def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim
                    )
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm))
                    )
            masks = BitMasks(torch.stack([torch.from_numpy(x) for x in masks]))
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Beispiel #2
0
    def __getitem__(self, index):
        ann = self.coco[index]

        # bbox transform.
        bbox = np.array([ann["bbox"]])  # xmin, ymin, w, h
        bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS,
                               BoxMode.XYXY_ABS)  # x1y1x2y2
        bbox = Boxes(bbox)

        # mask transform.
        mask = PolygonMasks([ann["segmentation"]])
        mask = mask.crop_and_resize(bbox.tensor, self.size).float()

        return mask
    def __getitem__(self, index):
        ann = self.all_annotations[index]

        # bbox transform.
        bbox = np.array([ann["bbox"]])  # xmin, ymin, xmax, ymax
        bbox = Boxes(bbox)

        # mask transform.
        # print(bbox)
        # print(ann["segmentation"])
        mask = PolygonMasks([ann["segmentation"]])
        mask = mask.crop_and_resize(bbox.tensor, self.size).float()

        return mask
def annotations_to_instances_rotated(annos, image_size):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.
    Compared to `annotations_to_instances`, this function is for rotated boxes only

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            Containing fields "gt_boxes", "gt_classes",
            if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [obj["bbox"] for obj in annos]
    masks = [obj["segmentation"] for obj in annos]
    target = Instances(image_size)
    boxes = target.gt_boxes = RotatedBoxes(boxes)
    masks = target.gt_masks = PolygonMasks(masks)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    return target
Beispiel #5
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes
    if len(annos) and "viewpoint" in annos[0]:
        viewpoints = np.array([obj["viewpoint"] for obj in annos])
        viewpoints_class = torch.tensor(viewpoints[:, 0], dtype=torch.int64)
        target.gt_viewpoint = viewpoints_class
        if len(annos[0]["viewpoint"]) == 2:
            viewpoints_rads = torch.tensor(viewpoints[:, 1],
                                           dtype=torch.float32)
            target.gt_viewpoint_rads = viewpoints_rads

    if len(annos) and "bbox3D" in annos[0]:
        bbox3D = [obj["bbox3D"] for obj in annos]
        bbox3D = torch.tensor(bbox3D, dtype=torch.float)
        target.gt_bbox3D = bbox3D

    if len(annos) and "height" in annos[0]:
        height = [obj["height"] for obj in annos]
        height = torch.tensor(height, dtype=torch.float)
        target.gt_height = height

    if len(annos) and "segmentation" in annos[0]:
        polygons = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(polygons)
        else:
            assert mask_format == "bitmask", mask_format
            masks = BitMasks.from_polygon_masks(polygons, *image_size)
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Beispiel #6
0
def dota_annotations_to_instances(annos, image_size):

    target = Instances(image_size)

    obb_boxes = [obj["boxes"] for obj in annos]
    obb_boxes = target.gt_boxes = RotatedBoxes(obb_boxes)
    obb_boxes.clip(image_size)

    pt_hbb, pt_inbox, polygons = [], [], []

    rotate_boxes = obb_boxes.tensor.numpy()
    data = [convRotaToPolyAndHbb(rotate_box) for rotate_box in rotate_boxes]
    for d in data:
        pt_hbb.append(d[0])
        pt_inbox.append(d[1])
        polygons.append(d[2])

    target.gt_pt_inbox_boxes = Boxes(pt_inbox)

    target.gt_pt_hbb_boxes = Boxes(pt_hbb)

    classes = [obj["category_id"] + 1 for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    masks = PolygonMasks(polygons)
    target.gt_masks = masks

    if len(target) > 2000:
        mask = random.sample(list(range(0, len(target))), 2000)
        target = target[mask]

    return target
Beispiel #7
0
    def add_pseudo_label(self, targets, image_path, flip):
        new_targets = []
        if self.pseudo_gt is None:
            return targets
        if len(targets) > 0 and targets[
                0].gt_boxes.tensor.device != self.pseudo_gt.device:
            self.pseudo_gt = self.pseudo_gt.to(
                targets[0].gt_boxes.tensor.device)
        for i, (targets_per_image, path) in enumerate(zip(targets,
                                                          image_path)):
            H, W = targets_per_image._image_size
            gt_boxes = targets_per_image.gt_boxes
            gt_classes = targets_per_image.gt_classes
            p = int(path.split('/')[-1].split('.')[0])
            data = self.pseudo_gt[self.pseudo_gt[:, 0] == p]
            ld = len(data)
            if len(data) == 0:
                new_targets.append(targets_per_image)
                continue
            label = data[:, 1].long()
            boxes = data[:, 2:].clone()
            if flip[i] == 1:
                boxes[:, 0] = 1 - boxes[:, 0]
                boxes[:, 2] = 1 - boxes[:, 2]
                boxes = torch.index_select(
                    boxes, -1,
                    torch.as_tensor([2, 1, 0, 3], device=boxes.device))
            boxes = Boxes(boxes)
            boxes.scale(scale_x=W, scale_y=H)
            new_gt_boxes = gt_boxes.cat([gt_boxes, boxes])

            new_gt_masks = PolygonMasks([[]])
            if hasattr(targets_per_image, 'gt_masks'):
                gt_masks = targets_per_image.gt_masks
                new_gt_masks = new_gt_masks.cat([gt_masks] +
                                                [new_gt_masks] * ld)
            else:
                new_gt_masks = new_gt_masks.cat([new_gt_masks] * ld)
            new_gt_classes = torch.cat((gt_classes, label))

            new_target = Instances((H, W))
            new_target.gt_classes = new_gt_classes
            new_target.gt_masks = new_gt_masks
            new_target.gt_boxes = new_gt_boxes
            new_targets.append(new_target)
            lbl, cnt = label.unique(return_counts=True)
        return new_targets
Beispiel #8
0
def valid_dct_source(coco, dct_dim, mask_size):
    dct_mask_encoding = DctMaskEncoding(dct_dim, mask_size)
    mIoU = []
    Number = 0
    for ann in coco:
        Number += 1
        bbox = np.array([ann["bbox"]])  # xmin, ymin, w, h
        w, h = bbox[0][2], bbox[0][3]
        w, h = round(w), round(h)
        bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS,
                               BoxMode.XYXY_ABS)  # x1y1x2y2
        bbox = Boxes(bbox)

        # mask transform.
        mask = PolygonMasks([ann["segmentation"]])
        mask_source = rasterize_polygons_within_box_for_arbitrary_shape(
            mask.polygons[0], bbox.tensor[0].numpy(), h, w)
        mask_source = mask_source.numpy()  # numpy [h,w] binary

        mask_k = mask.crop_and_resize(
            bbox.tensor, mask_size).float()  # tensor [1,28,28],all 0 or 1
        mask_k = mask_k.view([mask_size, mask_size])
        dct_code = dct_mask_encoding.encode(mask_k)
        mask_re = dct_mask_encoding.decode(dct_code).numpy().squeeze()
        res = cv2.resize(mask_re.astype('float'),
                         dsize=(mask_source.shape[1], mask_source.shape[0]),
                         interpolation=cv2.INTER_LINEAR)

        res = np.where(res >= 0.5, 1, 0)
        res = np.reshape(res, [1, -1])
        mask_source = np.reshape(mask_source, [1, -1])
        res = res.astype(int)

        IoUevaluate = IOUMetric(2)
        IoUevaluate.add_batch(res, mask_source)

        _, _, _, mean_iu, _ = IoUevaluate.evaluate()
        mIoU.append(mean_iu)
        if Number % 1000 == 1:
            print(np.mean(mIoU))
    return np.mean(mIoU)
Beispiel #9
0
    def __getitem__(self, index):
        ann = self.coco[index]

        # bbox transform.
        bbox = np.array([ann["bbox"]])  # xmin, ymin, w, h
        bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS,
                               BoxMode.XYXY_ABS)  # x1y1x2y2
        bbox = Boxes(bbox)

        # mask transform.
        mask = PolygonMasks([ann["segmentation"]])
        mask = mask.crop_and_resize(bbox.tensor, self.size).float()
        if self.transform:
            if torch.rand(1) < 0.5:
                mask = mask.flip(2)

        # introduce several noise.
        noise_matrix = VALUE_NOISE * torch.rand(mask.shape)
        mask = torch.where(mask > noise_matrix, mask - noise_matrix,
                           noise_matrix)

        return mask
Beispiel #10
0
    def process_annotation(self, ann, mask_side_len=28):
        # Parse annotation data
        img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0]
        height, width = img_info["height"], img_info["width"]
        gt_polygons = [
            np.array(p, dtype=np.float64) for p in ann["segmentation"]
        ]
        gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS,
                                  BoxMode.XYXY_ABS)
        gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width)

        # Run rasterize ..
        torch_gt_bbox = torch.tensor(gt_bbox).to(dtype=torch.float32).reshape(
            -1, 4)
        box_bitmasks = {
            "polygon":
            PolygonMasks([gt_polygons
                          ]).crop_and_resize(torch_gt_bbox, mask_side_len)[0],
            "gridsample":
            rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox,
                                                mask_side_len),
            "roialign":
            BitMasks(torch.from_numpy(
                gt_bit_mask[None, :, :])).crop_and_resize(
                    torch_gt_bbox, mask_side_len)[0],
        }

        # Run paste ..
        results = defaultdict(dict)
        for k, box_bitmask in box_bitmasks.items():
            padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1)
            scaled_boxes = scale_boxes(torch_gt_bbox, scale)

            r = results[k]
            r["old"] = paste_mask_in_image_old(padded_bitmask[0],
                                               scaled_boxes[0],
                                               height,
                                               width,
                                               threshold=0.5)
            r["aligned"] = paste_masks_in_image(box_bitmask[None, :, :],
                                                Boxes(torch_gt_bbox),
                                                (height, width))[0]

        table = []
        for rasterize_method, r in results.items():
            for paste_method, mask in r.items():
                mask = np.asarray(mask)
                iou = iou_between_full_image_bit_masks(
                    gt_bit_mask.astype("uint8"), mask)
                table.append((rasterize_method, paste_method, iou))
        return table
Beispiel #11
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    pan_ids = torch.tensor([obj["pan_id"] for obj in annos])
    target.pan_id = pan_ids

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        poly_masks = PolygonMasks(segms)
        masks = []
        for segm in segms:
            if isinstance(segm, list):
                # polygon
                masks.append(polygons_to_bitmask(segm, *image_size))
            elif isinstance(segm, dict):
                # COCO RLE
                masks.append(mask_util.decode(segm))
            elif isinstance(segm, np.ndarray):
                assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                    segm.ndim
                )
                # mask array
                masks.append(segm)
            else:
                raise ValueError(
                    "Cannot convert segmentation of type '{}' to BitMasks!"
                    "Supported types are: polygons as list[list[float] or ndarray],"
                    " COCO-style RLE as a dict, or a full-image segmentation mask "
                    "as a 2D ndarray.".format(type(segm))
                )
        # torch.from_numpy does not support array with negative stride.
        bit_masks = BitMasks(
            torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
        )
        if mask_format == "polygon":
            target.gt_masks = poly_masks
            target.bit_masks = bit_masks
        else:
            target.gt_masks = bit_masks
            target.poly_masks = poly_masks

    return target
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    if 'light' in annos[0].keys():
        light = [BoxMode.convert(obj['light'],obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
        light = target.gt_light = Boxes(light)
        light.clip(image_size)
    boxes.clip(image_size)
    

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        polygons = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(polygons)
        else:
            assert mask_format == "bitmask", mask_format
            masks = BitMasks.from_polygon_masks(polygons, *image_size)
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Beispiel #13
0
    def test_polygon_area(self):
        # Draw polygon boxes
        for d in [5.0, 10.0, 1000.0]:
            polygon = PolygonMasks([[[0, 0, 0, d, d, d, d, 0]]])
            area = polygon.area()[0]
            target = d**2
            self.assertEqual(area, target)

        # Draw polygon triangles
        for d in [5.0, 10.0, 1000.0]:
            polygon = PolygonMasks([[[0, 0, 0, d, d, d]]])
            area = polygon.area()[0]
            target = d**2 / 2
            self.assertEqual(area, target)
Beispiel #14
0
    def filter_mask_size(self,
                         min_thresh=100,
                         max_thresh=100000,
                         to_rle=False):
        """
        Remove instances with mask areas outside of the interval (min_thresh, max_thresh.)

        Useful for removing small instances (ie 1 or even 0 pixels in segmentation mask) or
        abnormally large outliers (ie many instances combined in a giant blob.) Note that
        this does not modify the InstanceSet in place and returns an Instances object.

        Parameters
        -----------
        min_thresh, max_thresh: int, float or None
            only instances with mask areas greater than min thresh and smaller than max_thresh are kept.
            If either threshold is None, it is not applied (ie if both min_thresh and max_thresh are None
            then all masks are kept.)

        to_rle: bool
            if True, masks are converted to RLE before filtering. The inlier masks will be returned as RLE.
            Otherwise, mask format is preserved.

        Returns
        ----------
        instances_filtered: detectron2.structures.Instances object
            Instances object only containing instances with mask areas in the threshold range.

        """

        masks = self.instances.masks
        if to_rle:
            masks = RLEMasks(masks_to_rle(masks, self.instances.image_size))
        masktype = type(masks)
        # determine which instances contain inlier masks
        areas = mask_areas(masks)

        if min_thresh is None:
            inlier_min = np.ones(areas.shape, np.bool)
        else:
            inlier_min = areas > min_thresh
        if max_thresh is None:
            inlier_max = np.ones(areas.shape, np.bool)
        else:
            inlier_max = areas < max_thresh

        inliers_bool = np.logical_and(inlier_min, inlier_max)

        new_instance_fields = {}
        # for key, value in self.instances._fields.items():
        #     print(key)
        #     print(type(value))
        #     print(value)
        #     temp = value[inliers_bool]
        #     new_instance_fields[key] = temp

        # can't iterate through polygonmasks properly, case must be handled separately
        if masktype == PolygonMasks:
            polygons = [p for p, b in zip(masks.polygons, inliers_bool) if b]
            masks = PolygonMasks(polygons)
        else:
            masks = masks[inliers_bool]

        new_instance_fields = {}
        for key, value in self.instances._fields.items():
            if key == 'masks':
                new_instance_fields[key] = masks
            else:
                new_instance_fields[key] = value[inliers_bool]

        instances_filtered = Instances(self.instances.image_size,
                                       **new_instance_fields)
        return instances_filtered
def convert_to_coco_dict(dataset_name):
    """
    Convert a dataset in detectron2's standard format into COCO json format

    Generic dataset description can be found here:
    https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset

    COCO data format description can be found here:
    http://cocodataset.org/#format-data

    Args:
        dataset_name:
            name of the source dataset
            must be registered in DatastCatalog and in detectron2's standard format
    Returns:
        coco_dict: serializable dict in COCO json format
    """

    dataset_dicts = DatasetCatalog.get(dataset_name)
    categories = [{
        "id": id,
        "name": name
    } for id, name in enumerate(
        MetadataCatalog.get(dataset_name).thing_classes)]

    logger.info("Converting dataset dicts into COCO format")
    coco_images = []
    coco_annotations = []

    for image_dict in dataset_dicts:
        coco_image = {
            "id": image_dict["image_id"],
            "width": image_dict["width"],
            "height": image_dict["height"],
            "file_name": image_dict["file_name"],
        }
        coco_images.append(coco_image)

        anns_per_image = image_dict["annotations"]
        for annotation in anns_per_image:
            # create a new dict with only COCO fields
            coco_annotation = {}

            # COCO requirement: XYWH box format
            bbox = annotation["bbox"]
            bbox_mode = annotation["bbox_mode"]
            bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS)

            # COCO requirement: instance area
            if "segmentation" in annotation:
                # Computing areas for instances by counting the pixels
                segmentation = annotation["segmentation"]
                # TODO: check segmentation type: RLE, BinaryMask or Polygon
                polygons = PolygonMasks([segmentation])
                area = polygons.area()[0].item()
            else:
                # Computing areas using bounding boxes
                area = Boxes([bbox]).area()[0].item()

            if "keypoints" in annotation:
                keypoints = annotation["keypoints"]  # list[int]
                for idx, v in enumerate(keypoints):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # For COCO format consistency we substract 0.5
                        # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
                        keypoints[idx] = v - 0.5
                if "num_keypoints" in annotation:
                    num_keypoints = annotation["num_keypoints"]
                else:
                    num_keypoints = sum(kp > 0 for kp in keypoints[2::3])

            # COCO requirement:
            #   linking annotations to images
            #   "id" field must start with 1
            coco_annotation["id"] = len(coco_annotations) + 1
            coco_annotation["image_id"] = image_dict["image_id"]
            coco_annotation["bbox"] = bbox
            coco_annotation["area"] = area
            coco_annotation["category_id"] = annotation["category_id"]
            coco_annotation["iscrowd"] = annotation.get("iscrowd", 0)

            # Add optional fields
            if "keypoints" in annotation:
                coco_annotation["keypoints"] = keypoints
                coco_annotation["num_keypoints"] = num_keypoints

            if "segmentation" in annotation:
                coco_annotation["segmentation"] = annotation["segmentation"]

            coco_annotations.append(coco_annotation)

    logger.info(
        "Conversion finished, "
        f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}"
    )

    info = {
        "date_created": str(datetime.datetime.now()),
        "description":
        "Automatically generated COCO json file for Detectron2.",
    }
    coco_dict = {
        "info": info,
        "images": coco_images,
        "annotations": coco_annotations,
        "categories": categories,
        "licenses": None,
    }
    return coco_dict
Beispiel #16
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes
    if len(annos) and "segmentation" in annos[0]:
        segm = [obj["segmentation"]
                for obj in annos]  # it may be bitmask instead of polygon
        visible_segm = [obj["visible_mask"] for obj in annos
                        ]  # it may be bitmask instead of polygon

        if mask_format == "polygon":
            masks = PolygonMasks(segm)
            if not isinstance(visible_segm[0], list):
                visible_masks = visible_segm
                visible_masks = BitMasks(
                    torch.stack([torch.from_numpy(x) for x in visible_masks]))
            else:
                # visible_masks = BitMasks.from_polygon_masks(visible_polygons, *image_size)
                visible_masks = PolygonMasks(visible_segm)
        else:
            assert mask_format == "bitmask", mask_format

            if not isinstance(segm[0], list):
                masks = BitMasks(
                    torch.stack([torch.from_numpy(x) for x in segm]))
                # visible_masks = visible_polygons
                # visible_masks = BitMasks(torch.stack([torch.from_numpy(x) for x in visible_masks]))
            else:
                masks = BitMasks.from_polygon_masks(segm, *image_size)
                # visible_masks = BitMasks.from_polygon_masks(visible_polygons, *image_size)
                # print('masks:{}'.format(polygons))
            if not isinstance(visible_segm[0], list):
                visible_masks = visible_segm
                visible_masks = BitMasks(
                    torch.stack([torch.from_numpy(x) for x in visible_masks]))
            else:
                # print('visible_masks:{}'.format(visible_polygons))
                visible_masks = BitMasks.from_polygon_masks(
                    visible_segm, *image_size)

        target.gt_masks = masks
        target.gt_visible_masks = visible_masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Beispiel #17
0
def convert_to_coco_dict_from_detdict(dataset_dicts, metadata):
    """
        See  `convert_to_coco_dict`.
    """
    # dataset_dicts = DatasetCatalog.get(dataset_name)
    # metadata = MetadataCatalog.get(dataset_name)

    # unmap the category mapping ids for COCO
    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
        reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()}
        reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id]  # noqa
    else:
        reverse_id_mapper = lambda contiguous_id: contiguous_id  # noqa

    categories = [
        {"id": reverse_id_mapper(id), "name": name}
        for id, name in enumerate(metadata.thing_classes)
    ]

    logger.info("Converting dataset dicts into COCO format")
    coco_images = []
    coco_annotations = []

    for image_id, image_dict in enumerate(dataset_dicts):
        coco_image = {
            "id": image_dict.get("image_id", image_id),
            "width": image_dict["width"],
            "height": image_dict["height"],
            "file_name": image_dict["file_name"],
        }
        coco_images.append(coco_image)

        anns_per_image = image_dict["annotations"]
        for annotation in anns_per_image:
            # create a new dict with only COCO fields
            coco_annotation = {}

            # COCO requirement: XYWH box format
            bbox = annotation["bbox"]
            bbox_mode = annotation["bbox_mode"]
            bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS)

            # COCO requirement: instance area
            if "segmentation" in annotation:
                # Computing areas for instances by counting the pixels
                segmentation = annotation["segmentation"]
                # TODO: check segmentation type: RLE, BinaryMask or Polygon
                polygons = PolygonMasks([segmentation])
                area = polygons.area()[0].item()
            else:
                # Computing areas using bounding boxes
                bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
                area = Boxes([bbox_xy]).area()[0].item()

            if "keypoints" in annotation:
                keypoints = annotation["keypoints"]  # list[int]
                for idx, v in enumerate(keypoints):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # For COCO format consistency we substract 0.5
                        # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
                        keypoints[idx] = v - 0.5
                if "num_keypoints" in annotation:
                    num_keypoints = annotation["num_keypoints"]
                else:
                    num_keypoints = sum(kp > 0 for kp in keypoints[2::3])

            # COCO requirement:
            #   linking annotations to images
            #   "id" field must start with 1
            coco_annotation["id"] = len(coco_annotations) + 1
            coco_annotation["image_id"] = coco_image["id"]
            coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
            coco_annotation["area"] = area
            coco_annotation["iscrowd"] = annotation.get("iscrowd", 0)
            coco_annotation["category_id"] = reverse_id_mapper(annotation["category_id"])

            # Add optional fields
            if "keypoints" in annotation:
                coco_annotation["keypoints"] = keypoints
                coco_annotation["num_keypoints"] = num_keypoints

            if "segmentation" in annotation:
                coco_annotation["segmentation"] = annotation["segmentation"]

            coco_annotations.append(coco_annotation)

    logger.info(
        "Conversion finished, "
        f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}"
    )

    info = {
        "date_created": str(datetime.datetime.now()),
        "description": "Automatically generated COCO json file for Detectron2.",
    }
    coco_dict = {
        "info": info,
        "images": coco_images,
        "annotations": coco_annotations,
        "categories": categories,
        "licenses": None,
    }
    return coco_dict
Beispiel #18
0
def convert_to_coco_dict(dataset_name):
    """
    Convert a generic dataset into COCO json format

    Generic dataset description can be found here:
    https://github.com/facebookresearch/detectron2/blob/master/docs/tutorials/datasets.md#register-a-dataset

    COCO data format description can be found here:
    http://cocodataset.org/#format-data

    Args:
        dataset_name: name of the source dataset
    Returns:
        coco_dict: serializable dict in COCO json format
    """

    dataset_dicts = DatasetCatalog.get(dataset_name)
    categories = [{
        "id": id,
        "name": name
    } for id, name in enumerate(
        MetadataCatalog.get(dataset_name).thing_classes)]

    logger.info("Converting dataset dicts into COCO format")
    images = []
    annotations = []

    # just for logging purposes
    _annotation_keys = Counter()

    for image_dict in dataset_dicts:
        image = {
            "id": image_dict["image_id"],
            "width": image_dict["width"],
            "height": image_dict["height"],
            "file_name": image_dict["file_name"],
        }

        images.append(image)

        # deep-copying various annotations from the original format
        # can be bbox, segmentation, keypoint, etc.
        anns_per_image = deepcopy(image_dict["annotations"])
        for annotation in anns_per_image:
            # COCO requirement: linking annotations to images
            annotation["id"] = len(annotations) + 1
            annotation["image_id"] = image_dict["image_id"]

            # COCO requirement: XYWH box format
            bbox = annotation["bbox"]
            bbox_mode = annotation["bbox_mode"]
            bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS)
            del annotation["bbox_mode"]
            # TODO: make BBOX_MODE serializable, otherwise remove it
            annotation["bbox"] = bbox
            annotation["iscrowd"] = 0

            # COCO requirement: instance area
            if "segmentation" in annotation:
                # Computing areas for instances by counting the pixels
                segmentation = annotation["segmentation"]
                # TODO: check segmentation type: RLE, BinaryMask or Polygon
                polygons = PolygonMasks([segmentation])
                area = polygons.area()[0]
            else:
                # Computing areas using bounding boxes
                area = Boxes([bbox]).area()[0]
            annotation["area"] = float(area)

            # Keeping track of fields present in instances
            _annotation_keys.update(annotation.keys())

            annotations.append(annotation)

    logger.info(
        "Conversion finished, "
        f"num images: {len(images)}, num annotations: {len(annotations)}")
    logger.info(f"Annotation fields: {_annotation_keys}")
    if len(_annotation_keys.most_common()) != len(_annotation_keys):
        logger.warning(
            f"Annotation fields are not homogenous between instances")

    info = {
        "date_created": str(datetime.datetime.now()),
        "description":
        "Automatically generated COCO json file for Detectron2.",
    }
    coco_dict = {
        "info": info,
        "images": images,
        "annotations": annotations,
        "categories": categories,
        "licenses": None,
    }
    return coco_dict
Beispiel #19
0
    def read_from_ddict(self, ddict, inplace=True):
        """
        test
        """
        """
        Read ground truth annotations from data dicts.
        """ """

        Reads data dicts and stores the information as attributes of the InstanceSet object.
        The descriptions of the attributes are provided in the documentation for self.__init__().

        Parameters
        -----------
        ddict: list
            List of data dicts in format described below in Notes.

        inplace: bool
            If True, the object is modified in-place. Else, the InstanceSet object is returned.

        Returns
        -----------
        self (optinal): InstanceSet
            only returned if inplace == False

        Notes
        ------

        Data dicts should have the following format:
            -'file_name': str or Path object
                        path to image corresponding to annotations
            -'mask_format': str
                          'polygonmask' if segmentation masks are lists of XY coordinates, or
                          'bitmask'  if segmentation masks are RLE encoded segmentation masks
            -'height': int
                    image height in pixels
            -'width': int
                    image width in pixels
            -'annotations': list(dic)
                            list of annotations. See the annotation format below.
            -'num_instances': int
                        equal to len(annotations)- number of instances present in the image

        The dictionary format for the annotation dictionaries is as follows:
            -'category_id': int
                            numeric class label for the instance.
            -'bbox_mode': detectron2.structures.BoxMode object
                        describes the format of the bounding box coordinates.
                        The default is BoxMode.XYXY_ABS.
            -'bbox':  list(int)
                    4-element list of bbox coordinates
            -'segmentation': list
                            list containing:
                               - a list of polygon coordinates (mask format is polygonmasks)
                               - dictionaries  of RLE mask encodings (mask format is bitmasks)

        """

        # default values-always set
        self.pred_or_gt = 'gt'  # ddict assumed to be ground truth labels from get_ddict function

        # required values- function will error out if these are not set
        self.filepath = Path(ddict['file_name'])
        self.mask_format = ddict['mask_format']
        image_size = (ddict['height'], ddict['width'])
        # instances_gt = annotations_to_instances(ddict['annotations'], image_size, self.mask_format)

        class_idx = np.asarray(
            [anno['category_id'] for anno in ddict['annotations']], np.int)
        bbox = np.stack([anno['bbox'] for anno in ddict['annotations']])
        segs = [anno['segmentation'] for anno in ddict['annotations']]
        segtype = type(segs[0])
        if segtype == dict:
            # RLE encoded mask
            masks = RLEMasks(segs)

        elif segtype == np.ndarray:
            if segs[0].dtype == np.bool:
                #  bitmask
                masks = BitMasks(np.stack(segs))

        else:
            # list of (list or array) of coords in format [x0,y0,x1,y1,...xn,yn]
            masks = PolygonMasks(segs)

        instances = Instances(
            image_size, **{
                'masks': masks,
                'boxes': bbox,
                'class_idx': class_idx
            })
        self.instances = instances
        self.instances.colors = visualize.random_colors(
            len(instances), self.randomstate)

        # optional values- default to None if not in ddict
        self.dataset_class = ddict.get('dataset_class', None)
        HFW = ddict.get('HFW', None)
        HFW_units = None
        if HFW is not None:
            try:
                HFW = float(HFW)
            except ValueError:
                split = HFW.split(' ')
                if len(split) == 2:
                    HFW = float(split[0])
                    HFW_units = split[1]
        self.HFW = HFW
        self.HFW_units = HFW_units

        if not inplace:
            return self
        return
Beispiel #20
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segm = [obj["segmentation"] for obj in annos]
        visible = [obj["visible_mask"] for obj in annos] 
        invisible = []
        for obj in annos:
            if "invisible_mask" in obj:
                invisible.append(obj["invisible_mask"])
            else:
                invisible.append([[0.0,0.0,0.0,0.0,0.0,0.0]])
                
        if mask_format == "polygon":
            # gt amodal masks per image 
            a_masks = PolygonMasks(segm)
            # gt visible masks per image 
            v_masks = PolygonMasks(visible)
            # gt invisible masks per image 
            i_masks = PolygonMasks(invisible)  
        else:
            assert mask_format == "bitmask", mask_format
            a_masks = []
            v_masks = []
            i_masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    a_masks.append(polygons_to_bitmask(segm, *image_size))
                    v_masks.append(polygons_to_bitmask(visible, *image_size))
                    i_masks.append(polygons_to_bitmask(invisible, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    a_masks.append(mask_util.decode(segm))
                    v_masks.append(mask_util.decode(visible))
                    i_masks.append(mask_util.decode(invisible))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim
                    )
                    # mask array
                    a_masks.append(segm)
                    v_masks.append(visible)
                    i_masks.append(invisible)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm))
                    )
            # torch.from_numpy does not support array with negative stride.
            a_masks = BitMasks(
                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in a_masks])
            )
            v_masks = BitMasks(
                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in v_masks])
            )
            i_masks = BitMasks(
                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in i_masks])
            )
            
        # original mask head now is amodal mask head 
        target.gt_masks = a_masks
        target.gt_v_masks = v_masks
        target.gt_i_masks = i_masks
     
    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
def annotations_to_instances_with_attributes(annos,
                                             image_size,
                                             mask_format="polygon",
                                             load_attributes=False,
                                             max_attr_per_ins=16):
    """
    Extend the function annotations_to_instances() to support attributes
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim)
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm)))
            masks = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x)) for x in masks
                ]))
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    if len(annos) and load_attributes:
        attributes = -torch.ones(
            (len(annos), max_attr_per_ins), dtype=torch.int64)
        for idx, anno in enumerate(annos):
            if "attribute_ids" in anno:
                for jdx, attr_id in enumerate(anno["attribute_ids"]):
                    attributes[idx, jdx] = attr_id
        target.gt_attributes = attributes

    return target
Beispiel #22
0
def convert_to_coco_dict(dataset_name):
    """
    Convert a dataset in detectron2's standard format into COCO json format

    Generic dataset description can be found here:
    https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset

    COCO data format description can be found here:
    http://cocodataset.org/#format-data

    Args:
        dataset_name:
            name of the source dataset
            must be registered in DatastCatalog and in detectron2's standard format
    Returns:
        coco_dict: serializable dict in COCO json format
    """

    dataset_dicts = DatasetCatalog.get(dataset_name)
    metadata = MetadataCatalog.get(dataset_name)

    # unmap the category mapping ids for COCO
    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
        reverse_id_mapping = {
            v: k
            for k, v in metadata.thing_dataset_id_to_contiguous_id.items()
        }
        reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[
            contiguous_id]  # noqa
    else:
        reverse_id_mapper = lambda contiguous_id: contiguous_id  # noqa

    categories = [{
        "id": reverse_id_mapper(id),
        "name": name
    } for id, name in enumerate(metadata.thing_classes)]

    logger.info("Converting dataset dicts into COCO format")
    coco_images = []
    coco_annotations = []

    for image_id, image_dict in enumerate(dataset_dicts):
        coco_image = {
            "id": image_dict.get("image_id", image_id),
            "width": image_dict["width"],
            "height": image_dict["height"],
            "file_name": image_dict["file_name"],
        }
        coco_images.append(coco_image)

        anns_per_image = image_dict["annotations"]
        for annotation in anns_per_image:
            # create a new dict with only COCO fields
            coco_annotation = {}

            # COCO requirement: XYWH box format
            bbox = annotation["bbox"]
            bbox_mode = annotation["bbox_mode"]
            bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS)

            # COCO requirement: instance area
            if "segmentation" in annotation:
                # Computing areas for instances by counting the pixels
                segmentation = annotation["segmentation"]
                # TODO: check segmentation type: RLE, BinaryMask or Polygon
                polygons = PolygonMasks([segmentation])
                area = polygons.area()[0].item()
            else:
                # Computing areas using bounding boxes
                bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS,
                                          BoxMode.XYXY_ABS)
                area = Boxes([bbox_xy]).area()[0].item()

            # COCO requirement:
            #   linking annotations to images
            #   "id" field must start with 1
            coco_annotation["id"] = len(coco_annotations) + 1
            coco_annotation["image_id"] = coco_image["id"]
            coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
            coco_annotation["area"] = area
            coco_annotation["iscrowd"] = annotation.get("iscrowd", 0)
            coco_annotation["category_id"] = reverse_id_mapper(
                annotation["category_id"])

            if "segmentation" in annotation:
                coco_annotation["segmentation"] = annotation["segmentation"]

            coco_annotations.append(coco_annotation)

    logger.info(
        "Conversion finished, "
        f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}"
    )

    info = {
        "date_created": str(datetime.datetime.now()),
        "description":
        "Automatically generated COCO json file for Detectron2.",
    }
    coco_dict = {
        "info": info,
        "images": coco_images,
        "annotations": coco_annotations,
        "categories": categories,
        "licenses": None,
    }
    return coco_dict
Beispiel #23
0
def convert_to_coco_dict(dataset_name):
    """
    Convert an instance detection/segmentation or keypoint detection dataset
    in detectron2's standard format into COCO json format.

    Generic dataset description can be found here:
    https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset

    COCO data format description can be found here:
    http://cocodataset.org/#format-data

    Args:
        dataset_name (str):
            name of the source dataset
            Must be registered in DatastCatalog and in detectron2's standard format.
            Must have corresponding metadata "thing_classes"
    Returns:
        coco_dict: serializable dict in COCO json format
    """

    dataset_dicts = DatasetCatalog.get(dataset_name)
    metadata = MetadataCatalog.get(dataset_name)

    # unmap the category mapping ids for COCO
    if hasattr(metadata, "thing_dataset_id_to_contiguous_id"):
        reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()}
        reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id]  # noqa
    else:
        reverse_id_mapper = lambda contiguous_id: contiguous_id  # noqa

#     categories = [
#         {"id": reverse_id_mapper(id), "name": name}
#         for id, name in enumerate(metadata.thing_classes)
#     ]
    categories = [ {"id": 1, "name": 'lesion'} ]

    logger.info("Converting dataset dicts into COCO format")
    coco_images = []
    coco_annotations = []

    for image_id, image_dict in enumerate(dataset_dicts):
        coco_image = {
            "id": image_dict.get("image_id", image_id),
            "width": int(image_dict["width"]),
            "height": int(image_dict["height"]),
            "file_name": str(image_dict["file_name"]),
        }
        coco_images.append(coco_image)

        anns_per_image = image_dict.get("annotations", [])
        for annotation in anns_per_image:
            # create a new dict with only COCO fields
            coco_annotation = {}

            # COCO requirement: XYWH box format for axis-align and XYWHA for rotated
            bbox = annotation["bbox"]
            if isinstance(bbox, np.ndarray):
                if bbox.ndim != 1:
                    raise ValueError(f"bbox has to be 1-dimensional. Got shape={bbox.shape}.")
                bbox = bbox.tolist()
            if len(bbox) not in [4, 5]:
                raise ValueError(f"bbox has to has length 4 or 5. Got {bbox}.")
            from_bbox_mode = annotation["bbox_mode"]
            to_bbox_mode = BoxMode.XYWH_ABS if len(bbox) == 4 else BoxMode.XYWHA_ABS
            bbox = BoxMode.convert(bbox, from_bbox_mode, to_bbox_mode)

            # COCO requirement: instance area
            if "segmentation" in annotation:
                # Computing areas for instances by counting the pixels
                segmentation = annotation["segmentation"]
                # TODO: check segmentation type: RLE, BinaryMask or Polygon
                if isinstance(segmentation, list):
                    polygons = PolygonMasks([segmentation])
                    area = polygons.area()[0].item()
                elif isinstance(segmentation, dict):  # RLE
                    area = mask_util.area(segmentation).item()
                else:
                    raise TypeError(f"Unknown segmentation type {type(segmentation)}!")
            else:
                # Computing areas using bounding boxes
                if to_bbox_mode == BoxMode.XYWH_ABS:
                    bbox_xy = BoxMode.convert(bbox, to_bbox_mode, BoxMode.XYXY_ABS)
                    area = Boxes([bbox_xy]).area()[0].item()
                else:
                    area = RotatedBoxes([bbox]).area()[0].item()

            if "keypoints" in annotation:
                keypoints = annotation["keypoints"]  # list[int]
                for idx, v in enumerate(keypoints):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # For COCO format consistency we substract 0.5
                        # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163
                        keypoints[idx] = v - 0.5
                if "num_keypoints" in annotation:
                    num_keypoints = annotation["num_keypoints"]
                else:
                    num_keypoints = sum(kp > 0 for kp in keypoints[2::3])

            # COCO requirement:
            #   linking annotations to images
            #   "id" field must start with 1
            coco_annotation["id"] = len(coco_annotations) + 1
            coco_annotation["image_id"] = coco_image["id"]
            coco_annotation["bbox"] = [round(float(x), 3) for x in bbox]
            coco_annotation["area"] = float(area)
            coco_annotation["iscrowd"] = int(annotation.get("iscrowd", 0))
            coco_annotation["category_id"] = int(reverse_id_mapper(annotation["category_id"]))

            # Add optional fields
            if "keypoints" in annotation:
                coco_annotation["keypoints"] = keypoints
                coco_annotation["num_keypoints"] = num_keypoints

            if "segmentation" in annotation:
                seg = coco_annotation["segmentation"] = annotation["segmentation"]
                if isinstance(seg, dict):  # RLE
                    counts = seg["counts"]
                    if not isinstance(counts, str):
                        # make it json-serializable
                        seg["counts"] = counts.decode("ascii")

            coco_annotations.append(coco_annotation)

    logger.info(
        "Conversion finished, "
        f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}"
    )

    info = {
        "date_created": str(datetime.datetime.now()),
        "description": "Automatically generated COCO json file for Detectron2.",
    }
    coco_dict = {"info": info, "images": coco_images, "categories": categories, "licenses": None}
    if len(coco_annotations) > 0:
        coco_dict["annotations"] = coco_annotations
    return coco_dict
def annotations_to_instances(annos,
                             image_size,
                             mask_format="polygon",
                             max_num_planes=20):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.
    Args:
        annos (list[dict]): a list of annotations, one per instance.
        image_size (tuple): height, width
    Returns:
        Instances: It will contains fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
    """
    boxes = [
        BoxMode.convert(obj["bbox"], BoxMode(obj["bbox_mode"]),
                        BoxMode.XYXY_ABS) for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert (
                        segm.ndim == 2
                    ), "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim)
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm)))
            # torch.from_numpy does not support array with negative stride.
            masks = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x)) for x in masks
                ]))
        target.gt_masks = masks

    if len(annos) and "plane" in annos[0]:
        plane = [torch.tensor(obj["plane"]) for obj in annos]
        plane_idx = [torch.tensor([i]) for i in range(len(plane))]
        target.gt_planes = torch.stack(plane, dim=0)
        target.gt_plane_idx = torch.stack(plane_idx, dim=0)
    return target