Exemple #1
0
def _build_write_tfrecord(
    args: Dict,
):
    """
    Builds and writes a TFRecord with image and segmentation (mask) features.

    :param args: dictionary containing the following function arguments:
         output_path: the path of the TFRecord file to be written
         shard_id: shard ID (for multi-shard TFRecord datasets)
         num_per_shard: number of images/masks per shard
         num_images: total number of images in dataset
         file_ids: file IDs for image/mask files
         images_dir: directory containing image files
         masks_dir: directory containing mask files corresponding to the images
    """

    with TFRecordWriter(args["output_path"]) as tfrecord_writer:
        start_idx = args["shard_id"] * args["num_per_shard"]
        end_idx = min((args["shard_id"] + 1) * args["num_per_shard"], args["num_images"])
        for i in range(start_idx, end_idx):
            print(f'\r>> Converting image {i + 1}/{len(args["file_ids"])} "'
                  f'shard {args["shard_id"]}')

            # read the image
            image_file_name = args["file_ids"][i] + ".jpg"
            image_path = os.path.join(args["images_dir"], image_file_name)
            image_data = tf.io.gfile.GFile(image_path, 'rb').read()
            width, height, _ = image_dimensions(image_path)

            # read the semantic segmentation annotation (mask)
            mask_path = os.path.join(args["masks_dir"], args["file_ids"][i] + ".png")
            seg_data = tf.io.gfile.GFile(mask_path, 'rb').read()
            seg_width, seg_height, _ = image_dimensions(mask_path)
            if height != seg_height or width != seg_width:
                raise RuntimeError('Shape mismatched between image and mask.')

            # Convert to tf example.
            example = tf.train.Example(features=tf.train.Features(feature={
                'image/encoded': _bytes_list_feature(image_data),
                'image/filename': _bytes_list_feature(image_file_name),
                'image/format': _bytes_list_feature('jpeg'),
                'image/height': _int64_list_feature(height),
                'image/width': _int64_list_feature(width),
                'image/channels': _int64_list_feature(3),
                'image/segmentation/class/encoded': (_bytes_list_feature(seg_data)),
                'image/segmentation/class/format': _bytes_list_feature('png'),
            }))
            tfrecord_writer.write(example.SerializeToString())
Exemple #2
0
def _create_tf_example(
        label_indices: Dict,
        group: NamedTuple,
        images_dir: str,
) -> tf.train.Example:
    """
    Creates a TensorFlow Example object representation of a group of annotations
    for an image file.

    :param label_indices: dictionary mapping class labels to their integer indices
    :param group: namedtuple containing filename and pd.Group values
    :param images_dir: directory containing dataset image files
    :return: TensorFlow Example object corresponding to the group of annotations
    """

    # read the image
    image_file_name = group.filename
    image_path = os.path.join(images_dir, group.filename)
    image_data = tf.io.gfile.GFile(image_path, 'rb').read()
    width, height, _ = image_dimensions(image_path)

    # lists of bounding box values for the example
    filename = group.filename.encode('utf8')
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    # for each bounding box annotation add the values into the lists
    for index, row in group.object.iterrows():
        # normalize the bounding box coordinates to within the range (0, 1)
        xmins.append(int(row['xmin']) / width)
        xmaxs.append(int(row['xmax']) / width)
        ymins.append(int(row['ymin']) / height)
        ymaxs.append(int(row['ymax']) / height)
        # get the class label and corresponding index
        classes_text.append(row['class'].encode('utf8'))
        classes.append(label_indices[row['class']])

    # build the Example from the lists of coordinates, class labels/indices, etc.
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': _int64_feature(height),
        'image/width': _int64_feature(width),
        'image/source_id': _bytes_feature(filename),
        'image/encoded': _bytes_list_feature(image_data),
        'image/filename': _bytes_list_feature(image_file_name),
        'image/format': _bytes_list_feature('jpeg'),
        'image/object/bbox/xmin': _float_list_feature(xmins),
        'image/object/bbox/xmax': _float_list_feature(xmaxs),
        'image/object/bbox/ymin': _float_list_feature(ymins),
        'image/object/bbox/ymax': _float_list_feature(ymaxs),
        'image/object/class/text': _string_bytes_list_feature(classes_text),
        'image/object/class/label': _int64_list_feature(classes),
    }))

    return tf_example
Exemple #3
0
    def add_images(
        self,
        data_source: str,
        idxs: List[int],
    ):
        """
        Add images into the dataset based on indices of the image file paths list.

        :param data_source:
        :param idxs: indices of the file paths list for the images be added
        :return:
        """

        # loop over each of the class IDs/names and add to the source dataset
        for (class_id, label) in self.class_ids_to_labels.items():
            self.add_class(data_source, class_id, label)

        # loop over the image path indexes
        for i in idxs:
            # extract the image filename to serve as the unique image ID
            image_path = self.image_paths[i]
            filename = image_path.split(os.path.sep)[-1]

            # get the image dimensions
            width, height, _ = image_dimensions(image_path)

            # Get the x, y coordinates of points of the polygons that make up
            # the outline of each object instance. These are stored in the
            # shape_attributes (see json format above)
            # The if condition is needed to support VIA versions 1.x and 2.x.
            a = self.via_annotations[filename]
            if type(a['regions']) is dict:
                polygons = [
                    r['shape_attributes'] for r in a['regions'].values()
                ]
            else:
                polygons = [r['shape_attributes'] for r in a['regions']]

            # add the image to the dataset
            self.add_image(data_source,
                           image_id=filename,
                           path=image_path,
                           width=width,
                           height=height,
                           polygons=polygons)
Exemple #4
0
def _write_bboxes_as_darknet(
    bboxes: List[List[float]],
    label_index: int,
    image_id: str,
    images_dir: str,
    darknet_dir: str,
) -> str:
    """
    Writes a Darknet annotation file containing the bounding boxes for an image.

    :param bboxes: iterable of lists of bounding box coordinates [xmin, xmax,
        ymin, ymax]
    :param label_index: class label index
    :param image_id: image ID (should be the image's file name minus the file
        extension ".jpg")
    :param images_dir: directory where the image file is located
    :param darknet_dir: directory where the PASCAL file should be written
    :return: path to the Darknet annotation file
    """

    # get the images' dimensions
    image_file_path = os.path.join(images_dir, image_id + ".jpg")
    image_width, image_height, _ = image_dimensions(image_file_path)

    # open the annotation file for writing bounding boxes one per line
    darknet_file_path = os.path.join(darknet_dir, image_id + ".txt")
    if os.path.exists(darknet_file_path):
        # an annotation file already exists for this image so append to it
        open_mode = "+a"
    else:
        # no annotation file exists yet for this image so create it
        open_mode = "+w"
    with open(darknet_file_path, open_mode) as darknet_file:

        # for each bounding box get the corresponding center x and y
        # as well as the bounding box's width and height in terms of
        # a decimal fraction of the total image dimension
        for bbox in bboxes:

            # get the label index based on the annotation's object name
            # find the bounding box's center X and Y, and width/height
            bbox_min_x, bbox_max_x, bbox_min_y, bbox_max_y = bbox
            bbox_width = (bbox_max_x - bbox_min_x) * image_width
            bbox_height = (bbox_max_y - bbox_min_y) * image_height
            bbox_width_fraction = bbox_width / image_width
            bbox_height_fraction = bbox_height / image_height
            bbox_center_x = (bbox_min_x * image_width) + (bbox_width / 2)
            bbox_center_y = (bbox_min_y * image_height) + (bbox_height / 2)
            bbox_center_fraction_x = bbox_center_x / image_width
            bbox_height_fraction_y = bbox_center_y / image_height

            # make sure we haven't overshot too much, if not then clip
            if bbox_width_fraction > 1.0:

                if (bbox_width_fraction - 1.0) > 0.025:
                    # we have a significant overshoot, something's off and
                    # we probably can't fix it without looking into the issue
                    # further so report it via the logger and skip
                    _logger.warning(
                        "Creation of Darknet annotation for image "
                        f"{image_id} results in an invalid (too "
                        "wide) width fraction", )
                    continue

                else:
                    # clip to one
                    bbox_width_fraction = 1.0

            if bbox_width_fraction < 0.0:

                if bbox_width_fraction < 0.025:
                    # we have a significant overshoot, something's off and
                    # we probably can't fix it without looking into the issue
                    # further so report it via the logger and skip
                    _logger.warning(
                        "Creation of Darknet annotation for image "
                        f"{image_id} results in an invalid ("
                        "negative) width fraction -- skipping this box", )
                    continue

                else:
                    # clip to zero
                    bbox_width_fraction = 0.0

            if bbox_height_fraction > 1.0:

                if (bbox_height_fraction - 1.0) > 0.025:
                    # we have a significant overshoot, something's off and
                    # we probably can't fix it without looking into the issue
                    # further so report it via the logger and skip
                    _logger.warning(
                        "Creation of Darknet annotation for image "
                        f"{image_id} results in an invalid ("
                        "too tall) height fraction -- skipping this box", )
                    continue
                else:
                    # clip to 1.0
                    bbox_height_fraction = 1.0

            if bbox_height_fraction < 0.0:

                if bbox_height_fraction < 0.025:
                    # we have a significant overshoot, something's off and
                    # we probably can't fix it without looking into the issue
                    # further so report it via the logger and skip
                    _logger.warning(
                        "Creation of Darknet annotation for image "
                        f"{image_id} results in an invalid ("
                        "negative) height fraction -- skipping this box", )
                    continue

                else:
                    # clip to zero
                    bbox_height_fraction = 0.0

            if (bbox_width < 0.0) or (bbox_height < 0.0):
                # something's off and we probably can't fix it without looking
                # into the issue further so report it via the logger and skip
                _logger.warning(
                    "Creation of Darknet annotation for image "
                    f"{image_id} results in an invalid ("
                    "negative) width or height -- skipping this box", )
                continue

            # write the bounding box info into the file
            darknet_file.write(
                f"{label_index} {bbox_center_fraction_x} "
                f"{bbox_height_fraction_y} "
                f"{bbox_width_fraction} "
                f"{bbox_height_fraction}\n", )

    return darknet_file_path
Exemple #5
0
def _write_bboxes_as_pascal(
    bboxes: List[List[float]],
    label: str,
    image_id: str,
    images_dir: str,
    pascal_dir: str,
) -> int:
    """
    Writes a PASCAL VOC (XML) annotation file containing the bounding boxes for
    an image.

    :param bboxes: iterable of lists of bounding box coordinates [xmin, ymin, xmax, ymax]
    :param label: class label
    :param image_id: ID of the image file (typically the image file name
        minus ".jpg" or ".png")
    :param images_dir: directory where the image file is located
    :param pascal_dir: directory where the PASCAL file should be written
    :return: 0 for success, 1 for failure
    """

    # get the image dimensions
    image_file_name = image_id + ".jpg"
    image_path = os.path.join(images_dir, image_file_name)
    try:
        img_width, img_height, img_depth = image_dimensions(image_path)
    except OSError as error:
        _logger.warning(
            "Unable to create PASCAL annotation for image "
            f"{image_file_name} -- skipping", error)
        return 1

    normalized_image_path = os.path.normpath(image_path)
    folder_name, image_file_name = normalized_image_path.split(
        os.path.sep)[-2:]

    # TODO
    #  The below creates a fresh tree in all cases for later writing to the
    #  annotation XML file. We should instead first see if the annotation file
    #  already exists and if so then add the annotations (bounding boxes) to
    #  the existing element tree before we then rewrite the XML file.

    annotation = etree.Element('annotation')
    folder = etree.SubElement(annotation, "folder")
    folder.text = folder_name
    filename = etree.SubElement(annotation, "filename")
    filename.text = image_file_name
    path = etree.SubElement(annotation, "path")
    path.text = normalized_image_path
    source = etree.SubElement(annotation, "source")
    database = etree.SubElement(source, "database")
    database.text = "OpenImages"
    size = etree.SubElement(annotation, "size")
    width = etree.SubElement(size, "width")
    width.text = str(img_width)
    height = etree.SubElement(size, "height")
    height.text = str(img_height)
    depth = etree.SubElement(size, "depth")
    depth.text = str(img_depth)
    segmented = etree.SubElement(annotation, "segmented")
    segmented.text = "0"
    for bbox in bboxes:
        obj = etree.SubElement(annotation, "object")
        name = etree.SubElement(obj, "name")
        name.text = label
        pose = etree.SubElement(obj, "pose")
        pose.text = "Unspecified"
        truncated = etree.SubElement(obj, "truncated")
        truncated.text = "0"
        difficult = etree.SubElement(obj, "difficult")
        difficult.text = "0"
        bndbox = etree.SubElement(obj, "bndbox")
        xmin = etree.SubElement(bndbox, "xmin")
        xmin.text = str(max(0, int(bbox[0] * img_width)))
        xmax = etree.SubElement(bndbox, "xmax")
        xmax.text = str(min(img_width - 1, int(bbox[1] * img_width)))
        ymin = etree.SubElement(bndbox, "ymin")
        ymin.text = str(max(0, int(bbox[2] * img_height)))
        ymax = etree.SubElement(bndbox, "ymax")
        ymax.text = str(min(img_height - 1, int(bbox[3] * img_height)))

    # write the XML to file
    pascal_file_path = os.path.join(pascal_dir, image_id + ".xml")
    with open(pascal_file_path, 'w') as pascal_file:
        pascal_file.write(
            etree.tostring(annotation, pretty_print=True,
                           encoding='utf-8').decode("utf-8"))

    return 0
Exemple #6
0
def vgg_to_masks(
        images_dir: str,
        annotations_file: str,
        masks_dir: str,
        class_labels_file: str,
        combine_into_one: bool = False,
):
    """
    Creates mask files from annotations specified in a JSON file exported from
    the VGG Image Annotator (VIA) tool.

    :param images_dir: directory containing JPG image files
    :param annotations_file : annotation file containing segmentation (mask)
        regions, expected to be in the JSON format created by the VGG Image
        Annotator tool
    :param masks_dir: directory where PNG mask files will be written
    :param class_labels_file: text file containing one class label per line
    :param combine_into_one: if True then combine all mask regions for an image
        into a single mask file
    """

    # arguments validation
    if not os.path.exists(images_dir):
        raise ValueError(f"Invalid images directory path: {images_dir}")
    elif not os.path.exists(annotations_file):
        raise ValueError(f"Invalid annotations file path: {annotations_file}")

    # make the masks directory if it doesn't already exist
    os.makedirs(masks_dir, exist_ok=True)

    # load the contents of the annotation JSON file (created
    # using the VIA tool) and initialize the annotations dictionary
    annotations = json.loads(open(annotations_file).read())
    image_annotations = {}

    # loop over the file ID and annotations themselves (values)
    for data in annotations.values():

        # store the data in the dictionary using the filename as the key
        image_annotations[data["filename"]] = data

    # get a dictionary of class labels to class IDs
    class_labels = _class_labels_to_ids(class_labels_file)

    _logger.info("Generating mask files...")
    for image_file_name in tqdm(os.listdir(images_dir)):

        # skip any files without a *.jpg extension
        if not image_file_name.endswith(".jpg"):
            continue

        file_id = os.path.splitext(image_file_name)[0]

        # grab the image info and then grab the annotation data for
        # the current image based on the unique image ID
        annotation = image_annotations[image_file_name]

        # get the image's dimensions
        width, height, _ = image_dimensions(os.path.join(images_dir, image_file_name))

        # if combining all regions into a single mask file
        # then we'll only need to allocate the mask array once
        if combine_into_one:
            # allocate memory for the region mask
            region_mask = np.zeros((height, width, 3), dtype="uint8")

        # loop over each of the annotated regions
        for (i, region) in enumerate(annotation["regions"]):

            # if not combining all regions into a single mask file then
            # we'll need to reallocate the mask array for each mask region
            if not combine_into_one:
                # allocate memory for the region mask
                region_mask = np.zeros((height, width, 3), dtype="uint8")

            # grab the shape and region attributes
            shape_attributes = region["shape_attributes"]
            region_attributes = region["region_attributes"]

            # find the class ID corresponding to the region's class attribute
            class_label = region_attributes["class"]
            if class_label not in class_labels:
                raise ValueError(
                    "No corresponding class ID found for the class label "
                    f"found in the region attributes -- label: {class_label}",
                )
            else:
                class_id = class_labels[class_label]

            # get the array of (x, y)-coordinates for the region's mask polygon
            x_coords = shape_attributes["all_points_x"]
            y_coords = shape_attributes["all_points_y"]
            coords = zip(x_coords, y_coords)
            poly_coords = [[x, y] for x, y in coords]
            pts = np.array(poly_coords, np.int32)

            # reshape the points to (<# of coordinates>, 1, 2)
            pts = pts.reshape((-1, 1, 2))

            # draw the polygon mask, using the class ID as the mask value
            cv2.fillPoly(region_mask, [pts], color=[class_id]*3)

            # if not combining all masks into a single file
            # then write this mask into its own file
            if not combine_into_one:
                # write the mask file
                mask_file_name = f"{file_id}_segmentation_{i}.png"
                cv2.imwrite(os.path.join(masks_dir, mask_file_name), region_mask)

        # write a combined mask file, if requested
        if combine_into_one:
            # write the mask file
            mask_file_name = f"{file_id}_segmentation.png"
            cv2.imwrite(os.path.join(masks_dir, mask_file_name), region_mask)

    _logger.info("Done")
Exemple #7
0
def kitti_to_darknet(
        images_dir: str,
        kitti_dir: str,
        darknet_dir: str,
        darknet_labels: str,
):
    """
    Creates equivalent Darknet (YOLO) annotation files corresponding to a dataset
    with KITTI annotations.

    :param images_dir: directory containing the dataset's images
    :param kitti_dir: directory containing the dataset's KITTI annotation files
    :param darknet_dir: directory where the equivalent Darknet annotation files
        will be written
    :param darknet_labels: labels file corresponding to the label indices used
        in the Darknet annotation files, will be written into the specified
        Darknet annotations directory
    """

    _logger.info("Converting annotations in KITTI format to Darknet format equivalents")

    # create the Darknet annotations directory in case it doesn't yet exist
    os.makedirs(darknet_dir, exist_ok=True)

    # get list of file IDs of the KITTI annotations and corresponding images
    annotation_ext = ".txt"
    image_ext = ".jpg"
    file_ids = matching_ids(kitti_dir, images_dir, annotation_ext, image_ext)

    # dictionary of labels to indices
    label_indices = {}

    # build Darknet annotations from KITTI
    for file_id in tqdm(file_ids):

        # get the image's dimensions
        image_file_name = file_id + image_ext
        width, height, _ = image_dimensions(os.path.join(images_dir, image_file_name))

        # loop over all annotation lines in the KITTI file and compute Darknet equivalents
        annotation_file_name = file_id + annotation_ext
        with open(os.path.join(kitti_dir, annotation_file_name), "r") as kitti_file:
            darknet_bboxes = []
            for line in kitti_file:
                parts = line.split()
                label = parts[0]
                if label in label_indices:
                    label_index = label_indices[label]
                else:
                    label_index = len(label_indices)
                    label_indices[label] = label_index
                box_width_pixels = float(parts[6]) - float(parts[4]) + 1
                box_height_pixels = float(parts[7]) - float(parts[5]) + 1
                darknet_bbox = {
                    "label_index": label_index,
                    "center_x": ((box_width_pixels / 2) + float(parts[4])) / width,
                    "center_y": ((box_height_pixels / 2) + float(parts[5])) / height,
                    "box_width": box_width_pixels / width,
                    "box_height": box_height_pixels / height,
                }
                darknet_bboxes.append(darknet_bbox)

        # write the Darknet annotation boxes into a Darknet annotation file
        with open(os.path.join(darknet_dir, annotation_file_name), "w") as darknet_file:
            for darknet_bbox in darknet_bboxes:
                darknet_file.write(
                    f"{darknet_bbox['label_index']} {darknet_bbox['center_x']} "
                    f"{darknet_bbox['center_y']} {darknet_bbox['box_width']} "
                    f"{darknet_bbox['box_height']}\n"
                )

    # write the Darknet labels into a text file, one label per line,
    # in order according to the indices used in the annotation files
    with open(os.path.join(darknet_dir, darknet_labels), "w") as darknet_labels_file:
        index_labels = {v: k for k, v in label_indices.items()}
        for i in range(len(index_labels)):
            darknet_labels_file.write(f"{index_labels[i]}\n")
Exemple #8
0
def _dataset_bbox_examples(
        images_dir: str,
        annotations_dir: str,
        annotation_format: str,
        darknet_labels: str = None,
) -> pd.DataFrame:
    """

    :param images_dir: directory containing the dataset's *.jpg image files
    :param annotations_dir: directory containing the dataset's annotation files
    :param annotation_format: currently supported: "darknet", "kitti", and "pascal"
    :param darknet_labels: path to the class labels file corresponding to Darknet
        (YOLO) annotation files, only necessary if using "darknet" annotation format
    :return: pandas DataFrame with rows corresponding to the dataset's bounding boxes
    """

    # we expect all images to use the *.jpg extension
    image_ext = ".jpg"

    # list of bounding box annotations we'll eventually write to CSV
    bboxes = []

    if annotation_format == "pascal":

        # get the file IDs for all matching image/PASCAL pairs (i.e. the dataset)
        annotation_ext = ".xml"
        for file_id in matching_ids(
                annotations_dir,
                images_dir,
                annotation_ext,
                image_ext,
        ):
            # add all bounding boxes from the PASCAL file to the list of boxes
            pascal_path = os.path.join(annotations_dir, file_id + annotation_ext)
            tree = ElementTree.parse(pascal_path)
            root = tree.getroot()
            for member in root.findall('object'):
                bbox_values = (
                    root.find('filename').text,
                    int(root.find('size')[0].text),
                    int(root.find('size')[1].text),
                    member[0].text,
                    int(member[4][0].text),
                    int(member[4][1].text),
                    int(member[4][2].text),
                    int(member[4][3].text),
                )
                bboxes.append(bbox_values)

    elif annotation_format == "kitti":

        # get the file IDs for all matching image/KITTI pairs (i.e. the dataset)
        annotation_ext = ".txt"
        for file_id in matching_ids(
                annotations_dir,
                images_dir,
                annotation_ext,
                image_ext,
        ):
            # get the image dimensions from the image file since this
            # info is not present in the corresponding KITTI annotation
            image_file_name = file_id + image_ext
            image_path = os.path.join(images_dir, image_file_name)
            width, height, _ = image_dimensions(image_path)

            # add all bounding boxes from the KITTI file to the list of boxes
            kitti_path = os.path.join(annotations_dir, file_id + annotation_ext)
            with open(kitti_path, "r") as kitti_file:
                for line in kitti_file:
                    darknet_box = line.split()
                    bbox_values = (
                        image_file_name,
                        width,
                        height,
                        darknet_box[0],
                        darknet_box[4],
                        darknet_box[5],
                        darknet_box[6],
                        darknet_box[7],
                    )
                    bboxes.append(bbox_values)

    elif annotation_format == "darknet":

        # read class labels into index/label dictionary
        darknet_index_labels = darknet_indices_to_labels(darknet_labels)

        # get the file IDs for all matching image/Darknet pairs (i.e. the dataset)
        annotation_ext = ".txt"
        file_ids = matching_ids(
                annotations_dir,
                images_dir,
                annotation_ext,
                image_ext,
        )

        # get the bounding boxes from the annotation files
        _logger.info("Extracting bounding box info from Darknet annotations...")
        for file_id in tqdm(file_ids):
            # get the image dimensions from the image file since this
            # info is not present in the corresponding KITTI annotation
            image_file_name = file_id + image_ext
            image_path = os.path.join(images_dir, image_file_name)
            width, height, _ = image_dimensions(image_path)

            # add all bounding boxes from the Darknet file to the list of boxes
            darknet_path = os.path.join(annotations_dir, file_id + annotation_ext)
            with open(darknet_path, "r") as darknet_file:
                for line in darknet_file:
                    darknet_box = line.split()
                    label_index = int(darknet_box[0])
                    # only use annotations corresponding to the specified labels
                    if label_index not in darknet_index_labels:
                        # skip this annotation line
                        continue
                    center_x = float(darknet_box[1]) * width
                    center_y = float(darknet_box[2]) * height
                    box_width = float(darknet_box[3]) * width
                    box_height = float(darknet_box[4]) * height
                    bbox_values = (
                        image_file_name,
                        width,
                        height,
                        darknet_index_labels[label_index],
                        int(center_x - (box_width / 2)),
                        int(center_y - (box_height / 2)),
                        int(center_x + (box_width / 2)),
                        int(center_y + (box_height / 2)),
                    )
                    bboxes.append(bbox_values)

    else:
        raise ValueError(f"Unsupported annotation format: {annotation_format}")

    # stuff the bounding boxes into a pandas DataFrame
    column_names = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    return pd.DataFrame(bboxes, columns=column_names)
Exemple #9
0
    def load_mask(
        self,
        image_id: str,
    ) -> (np.ndarray, List):
        """
        Generate instance masks for an image.

        :param image_id: image identifier
        :return: 1) a boolean array of shape [height, width, instance_count]
            with one mask per instance, and 2) a 1-D array of class IDs
            corresponding to the instance masks (of length instance_count)
        """

        # grab the image info and then grab the annotation data for
        # the current image based on the unique image ID
        info = self.image_info[image_id]
        annotation = self.via_annotations[info["id"]]

        # get the image's dimensions
        width, height, _ = image_dimensions(info["path"])

        # allocate memory for our [height, width, num_instances] 3-D array
        # where each "instance" (region) effectively has its own "channel"
        num_instances = len(annotation["regions"])
        masks = np.zeros(shape=(height, width, num_instances), dtype="uint8")

        # allocate memory for our [num_instances] 1-D array to contain
        # the class IDs corresponding to each mask instance
        mask_class_ids = np.full(shape=(num_instances, ),
                                 dtype="int32",
                                 fill_value=-1)

        # loop over each of the annotated regions
        for (i, region) in enumerate(annotation["regions"]):

            # allocate memory for the region mask
            region_mask = np.zeros(masks.shape[:2], dtype="uint8")

            # grab the shape and region attributes
            shape_attributes = region["shape_attributes"]
            region_attributes = region["region_attributes"]

            # find the class ID corresponding to the region's class attribute
            class_label = region_attributes["class"]
            class_id = -1
            for key, label in self.class_ids_to_labels.items():
                if label == class_label:
                    class_id = key
                    break
            if class_id == -1:
                raise ValueError(
                    "No corresponding class ID found for the class label "
                    f"found in the region attributes -- label: {class_label}",
                )

            # get the array of (x, y)-coordinates for the region's mask polygon
            x_coords = shape_attributes["all_points_x"]
            y_coords = shape_attributes["all_points_y"]
            coords = zip(x_coords, y_coords)
            poly_coords = [[x, y] for x, y in coords]
            pts = np.array(poly_coords, np.int32)

            # reshape the points to (<# of coordinates>, 1, 2)
            pts = pts.reshape((-1, 1, 2))

            # draw the polygon mask, using the class ID as the mask value
            cv2.fillPoly(region_mask, [pts], color=[class_id] * 3)

            # store the mask in the masks array
            masks[:, :, i] = region_mask

            # store the class ID for this channel (mask region)
            mask_class_ids[i] = class_id

        # resize the masks
        resized_mask = imutils.resize(masks[:, :, 0], width=self.width)
        new_height, new_width = resized_mask.shape[:2]
        resized_masks = np.zeros(
            [new_height, new_width, num_instances],
            dtype=np.uint8,
        )
        for i in range(num_instances):
            resized_masks[:, :, i] = imutils.resize(masks[:, :, i],
                                                    width=self.width)

        # return the masks array and the array of mask class IDs
        return resized_masks.astype("bool"), mask_class_ids