def _build_write_tfrecord( args: Dict, ): """ Builds and writes a TFRecord with image and segmentation (mask) features. :param args: dictionary containing the following function arguments: output_path: the path of the TFRecord file to be written shard_id: shard ID (for multi-shard TFRecord datasets) num_per_shard: number of images/masks per shard num_images: total number of images in dataset file_ids: file IDs for image/mask files images_dir: directory containing image files masks_dir: directory containing mask files corresponding to the images """ with TFRecordWriter(args["output_path"]) as tfrecord_writer: start_idx = args["shard_id"] * args["num_per_shard"] end_idx = min((args["shard_id"] + 1) * args["num_per_shard"], args["num_images"]) for i in range(start_idx, end_idx): print(f'\r>> Converting image {i + 1}/{len(args["file_ids"])} "' f'shard {args["shard_id"]}') # read the image image_file_name = args["file_ids"][i] + ".jpg" image_path = os.path.join(args["images_dir"], image_file_name) image_data = tf.io.gfile.GFile(image_path, 'rb').read() width, height, _ = image_dimensions(image_path) # read the semantic segmentation annotation (mask) mask_path = os.path.join(args["masks_dir"], args["file_ids"][i] + ".png") seg_data = tf.io.gfile.GFile(mask_path, 'rb').read() seg_width, seg_height, _ = image_dimensions(mask_path) if height != seg_height or width != seg_width: raise RuntimeError('Shape mismatched between image and mask.') # Convert to tf example. example = tf.train.Example(features=tf.train.Features(feature={ 'image/encoded': _bytes_list_feature(image_data), 'image/filename': _bytes_list_feature(image_file_name), 'image/format': _bytes_list_feature('jpeg'), 'image/height': _int64_list_feature(height), 'image/width': _int64_list_feature(width), 'image/channels': _int64_list_feature(3), 'image/segmentation/class/encoded': (_bytes_list_feature(seg_data)), 'image/segmentation/class/format': _bytes_list_feature('png'), })) tfrecord_writer.write(example.SerializeToString())
def _create_tf_example( label_indices: Dict, group: NamedTuple, images_dir: str, ) -> tf.train.Example: """ Creates a TensorFlow Example object representation of a group of annotations for an image file. :param label_indices: dictionary mapping class labels to their integer indices :param group: namedtuple containing filename and pd.Group values :param images_dir: directory containing dataset image files :return: TensorFlow Example object corresponding to the group of annotations """ # read the image image_file_name = group.filename image_path = os.path.join(images_dir, group.filename) image_data = tf.io.gfile.GFile(image_path, 'rb').read() width, height, _ = image_dimensions(image_path) # lists of bounding box values for the example filename = group.filename.encode('utf8') xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] # for each bounding box annotation add the values into the lists for index, row in group.object.iterrows(): # normalize the bounding box coordinates to within the range (0, 1) xmins.append(int(row['xmin']) / width) xmaxs.append(int(row['xmax']) / width) ymins.append(int(row['ymin']) / height) ymaxs.append(int(row['ymax']) / height) # get the class label and corresponding index classes_text.append(row['class'].encode('utf8')) classes.append(label_indices[row['class']]) # build the Example from the lists of coordinates, class labels/indices, etc. tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': _int64_feature(height), 'image/width': _int64_feature(width), 'image/source_id': _bytes_feature(filename), 'image/encoded': _bytes_list_feature(image_data), 'image/filename': _bytes_list_feature(image_file_name), 'image/format': _bytes_list_feature('jpeg'), 'image/object/bbox/xmin': _float_list_feature(xmins), 'image/object/bbox/xmax': _float_list_feature(xmaxs), 'image/object/bbox/ymin': _float_list_feature(ymins), 'image/object/bbox/ymax': _float_list_feature(ymaxs), 'image/object/class/text': _string_bytes_list_feature(classes_text), 'image/object/class/label': _int64_list_feature(classes), })) return tf_example
def add_images( self, data_source: str, idxs: List[int], ): """ Add images into the dataset based on indices of the image file paths list. :param data_source: :param idxs: indices of the file paths list for the images be added :return: """ # loop over each of the class IDs/names and add to the source dataset for (class_id, label) in self.class_ids_to_labels.items(): self.add_class(data_source, class_id, label) # loop over the image path indexes for i in idxs: # extract the image filename to serve as the unique image ID image_path = self.image_paths[i] filename = image_path.split(os.path.sep)[-1] # get the image dimensions width, height, _ = image_dimensions(image_path) # Get the x, y coordinates of points of the polygons that make up # the outline of each object instance. These are stored in the # shape_attributes (see json format above) # The if condition is needed to support VIA versions 1.x and 2.x. a = self.via_annotations[filename] if type(a['regions']) is dict: polygons = [ r['shape_attributes'] for r in a['regions'].values() ] else: polygons = [r['shape_attributes'] for r in a['regions']] # add the image to the dataset self.add_image(data_source, image_id=filename, path=image_path, width=width, height=height, polygons=polygons)
def _write_bboxes_as_darknet( bboxes: List[List[float]], label_index: int, image_id: str, images_dir: str, darknet_dir: str, ) -> str: """ Writes a Darknet annotation file containing the bounding boxes for an image. :param bboxes: iterable of lists of bounding box coordinates [xmin, xmax, ymin, ymax] :param label_index: class label index :param image_id: image ID (should be the image's file name minus the file extension ".jpg") :param images_dir: directory where the image file is located :param darknet_dir: directory where the PASCAL file should be written :return: path to the Darknet annotation file """ # get the images' dimensions image_file_path = os.path.join(images_dir, image_id + ".jpg") image_width, image_height, _ = image_dimensions(image_file_path) # open the annotation file for writing bounding boxes one per line darknet_file_path = os.path.join(darknet_dir, image_id + ".txt") if os.path.exists(darknet_file_path): # an annotation file already exists for this image so append to it open_mode = "+a" else: # no annotation file exists yet for this image so create it open_mode = "+w" with open(darknet_file_path, open_mode) as darknet_file: # for each bounding box get the corresponding center x and y # as well as the bounding box's width and height in terms of # a decimal fraction of the total image dimension for bbox in bboxes: # get the label index based on the annotation's object name # find the bounding box's center X and Y, and width/height bbox_min_x, bbox_max_x, bbox_min_y, bbox_max_y = bbox bbox_width = (bbox_max_x - bbox_min_x) * image_width bbox_height = (bbox_max_y - bbox_min_y) * image_height bbox_width_fraction = bbox_width / image_width bbox_height_fraction = bbox_height / image_height bbox_center_x = (bbox_min_x * image_width) + (bbox_width / 2) bbox_center_y = (bbox_min_y * image_height) + (bbox_height / 2) bbox_center_fraction_x = bbox_center_x / image_width bbox_height_fraction_y = bbox_center_y / image_height # make sure we haven't overshot too much, if not then clip if bbox_width_fraction > 1.0: if (bbox_width_fraction - 1.0) > 0.025: # we have a significant overshoot, something's off and # we probably can't fix it without looking into the issue # further so report it via the logger and skip _logger.warning( "Creation of Darknet annotation for image " f"{image_id} results in an invalid (too " "wide) width fraction", ) continue else: # clip to one bbox_width_fraction = 1.0 if bbox_width_fraction < 0.0: if bbox_width_fraction < 0.025: # we have a significant overshoot, something's off and # we probably can't fix it without looking into the issue # further so report it via the logger and skip _logger.warning( "Creation of Darknet annotation for image " f"{image_id} results in an invalid (" "negative) width fraction -- skipping this box", ) continue else: # clip to zero bbox_width_fraction = 0.0 if bbox_height_fraction > 1.0: if (bbox_height_fraction - 1.0) > 0.025: # we have a significant overshoot, something's off and # we probably can't fix it without looking into the issue # further so report it via the logger and skip _logger.warning( "Creation of Darknet annotation for image " f"{image_id} results in an invalid (" "too tall) height fraction -- skipping this box", ) continue else: # clip to 1.0 bbox_height_fraction = 1.0 if bbox_height_fraction < 0.0: if bbox_height_fraction < 0.025: # we have a significant overshoot, something's off and # we probably can't fix it without looking into the issue # further so report it via the logger and skip _logger.warning( "Creation of Darknet annotation for image " f"{image_id} results in an invalid (" "negative) height fraction -- skipping this box", ) continue else: # clip to zero bbox_height_fraction = 0.0 if (bbox_width < 0.0) or (bbox_height < 0.0): # something's off and we probably can't fix it without looking # into the issue further so report it via the logger and skip _logger.warning( "Creation of Darknet annotation for image " f"{image_id} results in an invalid (" "negative) width or height -- skipping this box", ) continue # write the bounding box info into the file darknet_file.write( f"{label_index} {bbox_center_fraction_x} " f"{bbox_height_fraction_y} " f"{bbox_width_fraction} " f"{bbox_height_fraction}\n", ) return darknet_file_path
def _write_bboxes_as_pascal( bboxes: List[List[float]], label: str, image_id: str, images_dir: str, pascal_dir: str, ) -> int: """ Writes a PASCAL VOC (XML) annotation file containing the bounding boxes for an image. :param bboxes: iterable of lists of bounding box coordinates [xmin, ymin, xmax, ymax] :param label: class label :param image_id: ID of the image file (typically the image file name minus ".jpg" or ".png") :param images_dir: directory where the image file is located :param pascal_dir: directory where the PASCAL file should be written :return: 0 for success, 1 for failure """ # get the image dimensions image_file_name = image_id + ".jpg" image_path = os.path.join(images_dir, image_file_name) try: img_width, img_height, img_depth = image_dimensions(image_path) except OSError as error: _logger.warning( "Unable to create PASCAL annotation for image " f"{image_file_name} -- skipping", error) return 1 normalized_image_path = os.path.normpath(image_path) folder_name, image_file_name = normalized_image_path.split( os.path.sep)[-2:] # TODO # The below creates a fresh tree in all cases for later writing to the # annotation XML file. We should instead first see if the annotation file # already exists and if so then add the annotations (bounding boxes) to # the existing element tree before we then rewrite the XML file. annotation = etree.Element('annotation') folder = etree.SubElement(annotation, "folder") folder.text = folder_name filename = etree.SubElement(annotation, "filename") filename.text = image_file_name path = etree.SubElement(annotation, "path") path.text = normalized_image_path source = etree.SubElement(annotation, "source") database = etree.SubElement(source, "database") database.text = "OpenImages" size = etree.SubElement(annotation, "size") width = etree.SubElement(size, "width") width.text = str(img_width) height = etree.SubElement(size, "height") height.text = str(img_height) depth = etree.SubElement(size, "depth") depth.text = str(img_depth) segmented = etree.SubElement(annotation, "segmented") segmented.text = "0" for bbox in bboxes: obj = etree.SubElement(annotation, "object") name = etree.SubElement(obj, "name") name.text = label pose = etree.SubElement(obj, "pose") pose.text = "Unspecified" truncated = etree.SubElement(obj, "truncated") truncated.text = "0" difficult = etree.SubElement(obj, "difficult") difficult.text = "0" bndbox = etree.SubElement(obj, "bndbox") xmin = etree.SubElement(bndbox, "xmin") xmin.text = str(max(0, int(bbox[0] * img_width))) xmax = etree.SubElement(bndbox, "xmax") xmax.text = str(min(img_width - 1, int(bbox[1] * img_width))) ymin = etree.SubElement(bndbox, "ymin") ymin.text = str(max(0, int(bbox[2] * img_height))) ymax = etree.SubElement(bndbox, "ymax") ymax.text = str(min(img_height - 1, int(bbox[3] * img_height))) # write the XML to file pascal_file_path = os.path.join(pascal_dir, image_id + ".xml") with open(pascal_file_path, 'w') as pascal_file: pascal_file.write( etree.tostring(annotation, pretty_print=True, encoding='utf-8').decode("utf-8")) return 0
def vgg_to_masks( images_dir: str, annotations_file: str, masks_dir: str, class_labels_file: str, combine_into_one: bool = False, ): """ Creates mask files from annotations specified in a JSON file exported from the VGG Image Annotator (VIA) tool. :param images_dir: directory containing JPG image files :param annotations_file : annotation file containing segmentation (mask) regions, expected to be in the JSON format created by the VGG Image Annotator tool :param masks_dir: directory where PNG mask files will be written :param class_labels_file: text file containing one class label per line :param combine_into_one: if True then combine all mask regions for an image into a single mask file """ # arguments validation if not os.path.exists(images_dir): raise ValueError(f"Invalid images directory path: {images_dir}") elif not os.path.exists(annotations_file): raise ValueError(f"Invalid annotations file path: {annotations_file}") # make the masks directory if it doesn't already exist os.makedirs(masks_dir, exist_ok=True) # load the contents of the annotation JSON file (created # using the VIA tool) and initialize the annotations dictionary annotations = json.loads(open(annotations_file).read()) image_annotations = {} # loop over the file ID and annotations themselves (values) for data in annotations.values(): # store the data in the dictionary using the filename as the key image_annotations[data["filename"]] = data # get a dictionary of class labels to class IDs class_labels = _class_labels_to_ids(class_labels_file) _logger.info("Generating mask files...") for image_file_name in tqdm(os.listdir(images_dir)): # skip any files without a *.jpg extension if not image_file_name.endswith(".jpg"): continue file_id = os.path.splitext(image_file_name)[0] # grab the image info and then grab the annotation data for # the current image based on the unique image ID annotation = image_annotations[image_file_name] # get the image's dimensions width, height, _ = image_dimensions(os.path.join(images_dir, image_file_name)) # if combining all regions into a single mask file # then we'll only need to allocate the mask array once if combine_into_one: # allocate memory for the region mask region_mask = np.zeros((height, width, 3), dtype="uint8") # loop over each of the annotated regions for (i, region) in enumerate(annotation["regions"]): # if not combining all regions into a single mask file then # we'll need to reallocate the mask array for each mask region if not combine_into_one: # allocate memory for the region mask region_mask = np.zeros((height, width, 3), dtype="uint8") # grab the shape and region attributes shape_attributes = region["shape_attributes"] region_attributes = region["region_attributes"] # find the class ID corresponding to the region's class attribute class_label = region_attributes["class"] if class_label not in class_labels: raise ValueError( "No corresponding class ID found for the class label " f"found in the region attributes -- label: {class_label}", ) else: class_id = class_labels[class_label] # get the array of (x, y)-coordinates for the region's mask polygon x_coords = shape_attributes["all_points_x"] y_coords = shape_attributes["all_points_y"] coords = zip(x_coords, y_coords) poly_coords = [[x, y] for x, y in coords] pts = np.array(poly_coords, np.int32) # reshape the points to (<# of coordinates>, 1, 2) pts = pts.reshape((-1, 1, 2)) # draw the polygon mask, using the class ID as the mask value cv2.fillPoly(region_mask, [pts], color=[class_id]*3) # if not combining all masks into a single file # then write this mask into its own file if not combine_into_one: # write the mask file mask_file_name = f"{file_id}_segmentation_{i}.png" cv2.imwrite(os.path.join(masks_dir, mask_file_name), region_mask) # write a combined mask file, if requested if combine_into_one: # write the mask file mask_file_name = f"{file_id}_segmentation.png" cv2.imwrite(os.path.join(masks_dir, mask_file_name), region_mask) _logger.info("Done")
def kitti_to_darknet( images_dir: str, kitti_dir: str, darknet_dir: str, darknet_labels: str, ): """ Creates equivalent Darknet (YOLO) annotation files corresponding to a dataset with KITTI annotations. :param images_dir: directory containing the dataset's images :param kitti_dir: directory containing the dataset's KITTI annotation files :param darknet_dir: directory where the equivalent Darknet annotation files will be written :param darknet_labels: labels file corresponding to the label indices used in the Darknet annotation files, will be written into the specified Darknet annotations directory """ _logger.info("Converting annotations in KITTI format to Darknet format equivalents") # create the Darknet annotations directory in case it doesn't yet exist os.makedirs(darknet_dir, exist_ok=True) # get list of file IDs of the KITTI annotations and corresponding images annotation_ext = ".txt" image_ext = ".jpg" file_ids = matching_ids(kitti_dir, images_dir, annotation_ext, image_ext) # dictionary of labels to indices label_indices = {} # build Darknet annotations from KITTI for file_id in tqdm(file_ids): # get the image's dimensions image_file_name = file_id + image_ext width, height, _ = image_dimensions(os.path.join(images_dir, image_file_name)) # loop over all annotation lines in the KITTI file and compute Darknet equivalents annotation_file_name = file_id + annotation_ext with open(os.path.join(kitti_dir, annotation_file_name), "r") as kitti_file: darknet_bboxes = [] for line in kitti_file: parts = line.split() label = parts[0] if label in label_indices: label_index = label_indices[label] else: label_index = len(label_indices) label_indices[label] = label_index box_width_pixels = float(parts[6]) - float(parts[4]) + 1 box_height_pixels = float(parts[7]) - float(parts[5]) + 1 darknet_bbox = { "label_index": label_index, "center_x": ((box_width_pixels / 2) + float(parts[4])) / width, "center_y": ((box_height_pixels / 2) + float(parts[5])) / height, "box_width": box_width_pixels / width, "box_height": box_height_pixels / height, } darknet_bboxes.append(darknet_bbox) # write the Darknet annotation boxes into a Darknet annotation file with open(os.path.join(darknet_dir, annotation_file_name), "w") as darknet_file: for darknet_bbox in darknet_bboxes: darknet_file.write( f"{darknet_bbox['label_index']} {darknet_bbox['center_x']} " f"{darknet_bbox['center_y']} {darknet_bbox['box_width']} " f"{darknet_bbox['box_height']}\n" ) # write the Darknet labels into a text file, one label per line, # in order according to the indices used in the annotation files with open(os.path.join(darknet_dir, darknet_labels), "w") as darknet_labels_file: index_labels = {v: k for k, v in label_indices.items()} for i in range(len(index_labels)): darknet_labels_file.write(f"{index_labels[i]}\n")
def _dataset_bbox_examples( images_dir: str, annotations_dir: str, annotation_format: str, darknet_labels: str = None, ) -> pd.DataFrame: """ :param images_dir: directory containing the dataset's *.jpg image files :param annotations_dir: directory containing the dataset's annotation files :param annotation_format: currently supported: "darknet", "kitti", and "pascal" :param darknet_labels: path to the class labels file corresponding to Darknet (YOLO) annotation files, only necessary if using "darknet" annotation format :return: pandas DataFrame with rows corresponding to the dataset's bounding boxes """ # we expect all images to use the *.jpg extension image_ext = ".jpg" # list of bounding box annotations we'll eventually write to CSV bboxes = [] if annotation_format == "pascal": # get the file IDs for all matching image/PASCAL pairs (i.e. the dataset) annotation_ext = ".xml" for file_id in matching_ids( annotations_dir, images_dir, annotation_ext, image_ext, ): # add all bounding boxes from the PASCAL file to the list of boxes pascal_path = os.path.join(annotations_dir, file_id + annotation_ext) tree = ElementTree.parse(pascal_path) root = tree.getroot() for member in root.findall('object'): bbox_values = ( root.find('filename').text, int(root.find('size')[0].text), int(root.find('size')[1].text), member[0].text, int(member[4][0].text), int(member[4][1].text), int(member[4][2].text), int(member[4][3].text), ) bboxes.append(bbox_values) elif annotation_format == "kitti": # get the file IDs for all matching image/KITTI pairs (i.e. the dataset) annotation_ext = ".txt" for file_id in matching_ids( annotations_dir, images_dir, annotation_ext, image_ext, ): # get the image dimensions from the image file since this # info is not present in the corresponding KITTI annotation image_file_name = file_id + image_ext image_path = os.path.join(images_dir, image_file_name) width, height, _ = image_dimensions(image_path) # add all bounding boxes from the KITTI file to the list of boxes kitti_path = os.path.join(annotations_dir, file_id + annotation_ext) with open(kitti_path, "r") as kitti_file: for line in kitti_file: darknet_box = line.split() bbox_values = ( image_file_name, width, height, darknet_box[0], darknet_box[4], darknet_box[5], darknet_box[6], darknet_box[7], ) bboxes.append(bbox_values) elif annotation_format == "darknet": # read class labels into index/label dictionary darknet_index_labels = darknet_indices_to_labels(darknet_labels) # get the file IDs for all matching image/Darknet pairs (i.e. the dataset) annotation_ext = ".txt" file_ids = matching_ids( annotations_dir, images_dir, annotation_ext, image_ext, ) # get the bounding boxes from the annotation files _logger.info("Extracting bounding box info from Darknet annotations...") for file_id in tqdm(file_ids): # get the image dimensions from the image file since this # info is not present in the corresponding KITTI annotation image_file_name = file_id + image_ext image_path = os.path.join(images_dir, image_file_name) width, height, _ = image_dimensions(image_path) # add all bounding boxes from the Darknet file to the list of boxes darknet_path = os.path.join(annotations_dir, file_id + annotation_ext) with open(darknet_path, "r") as darknet_file: for line in darknet_file: darknet_box = line.split() label_index = int(darknet_box[0]) # only use annotations corresponding to the specified labels if label_index not in darknet_index_labels: # skip this annotation line continue center_x = float(darknet_box[1]) * width center_y = float(darknet_box[2]) * height box_width = float(darknet_box[3]) * width box_height = float(darknet_box[4]) * height bbox_values = ( image_file_name, width, height, darknet_index_labels[label_index], int(center_x - (box_width / 2)), int(center_y - (box_height / 2)), int(center_x + (box_width / 2)), int(center_y + (box_height / 2)), ) bboxes.append(bbox_values) else: raise ValueError(f"Unsupported annotation format: {annotation_format}") # stuff the bounding boxes into a pandas DataFrame column_names = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'] return pd.DataFrame(bboxes, columns=column_names)
def load_mask( self, image_id: str, ) -> (np.ndarray, List): """ Generate instance masks for an image. :param image_id: image identifier :return: 1) a boolean array of shape [height, width, instance_count] with one mask per instance, and 2) a 1-D array of class IDs corresponding to the instance masks (of length instance_count) """ # grab the image info and then grab the annotation data for # the current image based on the unique image ID info = self.image_info[image_id] annotation = self.via_annotations[info["id"]] # get the image's dimensions width, height, _ = image_dimensions(info["path"]) # allocate memory for our [height, width, num_instances] 3-D array # where each "instance" (region) effectively has its own "channel" num_instances = len(annotation["regions"]) masks = np.zeros(shape=(height, width, num_instances), dtype="uint8") # allocate memory for our [num_instances] 1-D array to contain # the class IDs corresponding to each mask instance mask_class_ids = np.full(shape=(num_instances, ), dtype="int32", fill_value=-1) # loop over each of the annotated regions for (i, region) in enumerate(annotation["regions"]): # allocate memory for the region mask region_mask = np.zeros(masks.shape[:2], dtype="uint8") # grab the shape and region attributes shape_attributes = region["shape_attributes"] region_attributes = region["region_attributes"] # find the class ID corresponding to the region's class attribute class_label = region_attributes["class"] class_id = -1 for key, label in self.class_ids_to_labels.items(): if label == class_label: class_id = key break if class_id == -1: raise ValueError( "No corresponding class ID found for the class label " f"found in the region attributes -- label: {class_label}", ) # get the array of (x, y)-coordinates for the region's mask polygon x_coords = shape_attributes["all_points_x"] y_coords = shape_attributes["all_points_y"] coords = zip(x_coords, y_coords) poly_coords = [[x, y] for x, y in coords] pts = np.array(poly_coords, np.int32) # reshape the points to (<# of coordinates>, 1, 2) pts = pts.reshape((-1, 1, 2)) # draw the polygon mask, using the class ID as the mask value cv2.fillPoly(region_mask, [pts], color=[class_id] * 3) # store the mask in the masks array masks[:, :, i] = region_mask # store the class ID for this channel (mask region) mask_class_ids[i] = class_id # resize the masks resized_mask = imutils.resize(masks[:, :, 0], width=self.width) new_height, new_width = resized_mask.shape[:2] resized_masks = np.zeros( [new_height, new_width, num_instances], dtype=np.uint8, ) for i in range(num_instances): resized_masks[:, :, i] = imutils.resize(masks[:, :, i], width=self.width) # return the masks array and the array of mask class IDs return resized_masks.astype("bool"), mask_class_ids