Example #1
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim
                    )
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm))
                    )
            masks = BitMasks(torch.stack([torch.from_numpy(x) for x in masks]))
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
Example #2
0
def get_octagon_mask(octagons, image_shape):
    # input: N x 16
    octagons_np = octagons.cpu().numpy()
    masks = [
        polygons_to_bitmask([p], image_shape[0], image_shape[1])
        for p in octagons_np
    ]
    if not masks:
        return octagons.new_empty((0, ) + image_shape, dtype=torch.uint8)
    return torch.stack([torch.from_numpy(x) for x in masks])
Example #3
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    pan_ids = torch.tensor([obj["pan_id"] for obj in annos])
    target.pan_id = pan_ids

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        poly_masks = PolygonMasks(segms)
        masks = []
        for segm in segms:
            if isinstance(segm, list):
                # polygon
                masks.append(polygons_to_bitmask(segm, *image_size))
            elif isinstance(segm, dict):
                # COCO RLE
                masks.append(mask_util.decode(segm))
            elif isinstance(segm, np.ndarray):
                assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                    segm.ndim
                )
                # mask array
                masks.append(segm)
            else:
                raise ValueError(
                    "Cannot convert segmentation of type '{}' to BitMasks!"
                    "Supported types are: polygons as list[list[float] or ndarray],"
                    " COCO-style RLE as a dict, or a full-image segmentation mask "
                    "as a 2D ndarray.".format(type(segm))
                )
        # torch.from_numpy does not support array with negative stride.
        bit_masks = BitMasks(
            torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
        )
        if mask_format == "polygon":
            target.gt_masks = poly_masks
            target.bit_masks = bit_masks
        else:
            target.gt_masks = bit_masks
            target.poly_masks = poly_masks

    return target
Example #4
0
def get_bbox_mask(bboxes, image_shape):
    # input: N x 4
    x1, y1 = bboxes[:, 0], bboxes[:, 1]
    x2, y2 = bboxes[:, 0], bboxes[:, 3]
    x3, y3 = bboxes[:, 2], bboxes[:, 3]
    x4, y4 = bboxes[:, 2], bboxes[:, 1]
    rectangles = torch.stack([x1, y1, x2, y2, x3, y3, x4, y4],
                             dim=1).cpu().numpy()
    masks = [
        polygons_to_bitmask([p], image_shape[0], image_shape[1])
        for p in rectangles
    ]
    if not masks:
        return bboxes.new_empty((0, ) + image_shape, dtype=torch.uint8)
    return torch.stack([torch.from_numpy(x) for x in masks])
Example #5
0
def rasterize_polygons_within_box_for_arbitrary_shape(
        polygons: List[np.ndarray], box: np.ndarray, mask_size_h: int,
        mask_size_w: int) -> torch.Tensor:
    """
    New rasterize. Rasterize polygons within box with specific size.
    Args:
        polygons (list[ndarray[float]]): a list of polygons, which represents an instance.
        box: 4-element numpy array
        mask_size_h (int):
        mask_size_w (int):

    Returns:
        Tensor: BoolTensor of shape (mask_size, mask_size)
    """
    # 1. Shift the polygons w.r.t the boxes
    w, h = box[2] - box[0], box[3] - box[1]

    polygons = copy.deepcopy(polygons)
    for p in polygons:
        p[0::2] = p[0::2] - box[0]
        p[1::2] = p[1::2] - box[1]

    # 2. Rescale the polygons to the new box size
    # max() to avoid division by small number
    ratio_h = mask_size_h / max(h, 0.1)
    ratio_w = mask_size_w / max(w, 0.1)

    if ratio_h == ratio_w:
        for p in polygons:
            p *= ratio_h
    else:
        for p in polygons:
            p[0::2] *= ratio_w
            p[1::2] *= ratio_h

    # 3. Rasterize the polygons with coco api
    mask = polygons_to_bitmask(polygons, mask_size_h, mask_size_w)
    mask = torch.from_numpy(mask)
    return mask
Example #6
0
def AP_match0(_gts, _dts, ious, catIds, imgIds, imgs):
    _gts = copy.deepcopy(_gts)
    _dts = copy.deepcopy(_dts)
    
    for imgId in imgIds:
        for catId in catIds:
            gt = _gts[imgId, catId]
            dt = _dts[imgId, catId]
            if len(gt) == 0:
                continue
            if 'poly' in gt[0].keys():
                polygons = [convert(gt_['poly']) for gt_ in gt]
            else:
                polygons = [convert(gt_['segmentation']) for gt_ in gt]
            iou = ious[imgId, catId]
            per_im_bitmasks = []
            for per_polygon in polygons:
                if len(per_polygon) ==0:
                    continue
                #crowd的,随便糊弄一下就好了
                if type(per_polygon) == dict:
                    compact_rle = mask_util.frPyObjects([per_polygon], imgs[imgId]['height'], imgs[imgId]['width'])
                    bitmask = mask_util.decode(compact_rle).squeeze(2)
                else:
                    bitmask = polygons_to_bitmask(per_polygon, imgs[imgId]['height'], imgs[imgId]['width'])

                per_im_bitmasks.append(bitmask)
            gt_bitmasks = np.stack(per_im_bitmasks, axis=0)
            box_level = cal_box_levels(gt_bitmasks, max_level=2)
            if len(iou) != 0:
                max_idx = np.argmax(iou, axis=1)
                dt_box_level = box_level[max_idx]
                keep_dt = dt_box_level < 1
                _dts[imgId, catId] = list(np.array(dt)[keep_dt])
            keep_gt = box_level < 1
            _gts[imgId, catId] = list(np.array(gt)[keep_gt])

    return _gts, _dts
Example #7
0
def annotations_to_instances(annos, image_size, mask_format="polygon"):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.

    Args:
        annos (list[dict]): a list of instance annotations in one image, each
            element for one instance.
        image_size (tuple): height, width

    Returns:
        Instances:
            It will contain fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
            This is the format that builtin models expect.
    """
    boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segm = [obj["segmentation"] for obj in annos]
        visible = [obj["visible_mask"] for obj in annos] 
        invisible = []
        for obj in annos:
            if "invisible_mask" in obj:
                invisible.append(obj["invisible_mask"])
            else:
                invisible.append([[0.0,0.0,0.0,0.0,0.0,0.0]])
                
        if mask_format == "polygon":
            # gt amodal masks per image 
            a_masks = PolygonMasks(segm)
            # gt visible masks per image 
            v_masks = PolygonMasks(visible)
            # gt invisible masks per image 
            i_masks = PolygonMasks(invisible)  
        else:
            assert mask_format == "bitmask", mask_format
            a_masks = []
            v_masks = []
            i_masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    a_masks.append(polygons_to_bitmask(segm, *image_size))
                    v_masks.append(polygons_to_bitmask(visible, *image_size))
                    i_masks.append(polygons_to_bitmask(invisible, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    a_masks.append(mask_util.decode(segm))
                    v_masks.append(mask_util.decode(visible))
                    i_masks.append(mask_util.decode(invisible))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim
                    )
                    # mask array
                    a_masks.append(segm)
                    v_masks.append(visible)
                    i_masks.append(invisible)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm))
                    )
            # torch.from_numpy does not support array with negative stride.
            a_masks = BitMasks(
                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in a_masks])
            )
            v_masks = BitMasks(
                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in v_masks])
            )
            i_masks = BitMasks(
                torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in i_masks])
            )
            
        # original mask head now is amodal mask head 
        target.gt_masks = a_masks
        target.gt_v_masks = v_masks
        target.gt_i_masks = i_masks
     
    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    return target
def annotations_to_instances_with_attributes(annos,
                                             image_size,
                                             mask_format="polygon",
                                             load_attributes=False,
                                             max_attr_per_ins=16):
    """
    Extend the function annotations_to_instances() to support attributes
    """
    boxes = [
        BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
        for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim)
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm)))
            masks = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x)) for x in masks
                ]))
        target.gt_masks = masks

    if len(annos) and "keypoints" in annos[0]:
        kpts = [obj.get("keypoints", []) for obj in annos]
        target.gt_keypoints = Keypoints(kpts)

    if len(annos) and load_attributes:
        attributes = -torch.ones(
            (len(annos), max_attr_per_ins), dtype=torch.int64)
        for idx, anno in enumerate(annos):
            if "attribute_ids" in anno:
                for jdx, attr_id in enumerate(anno["attribute_ids"]):
                    attributes[idx, jdx] = attr_id
        target.gt_attributes = attributes

    return target
Example #9
0
def load_coco_json(json_file,
                   image_root,
                   dataset_name=None,
                   extra_annotation_keys=None):
    """
    Load a json file with COCO's instances annotation format.
    Currently supports instance detection, instance segmentation,
    and person keypoints annotations.

    Args:
        json_file (str): full path to the json file in COCO instances annotation format.
        image_root (str or path-like): the directory where the images in this json file exists.
        dataset_name (str): the name of the dataset (e.g., coco_2017_train).
            If provided, this function will also put "thing_classes" into
            the metadata associated with this dataset.
        extra_annotation_keys (list[str]): list of per-annotation keys that should also be
            loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints",
            "category_id", "segmentation"). The values for these keys will be returned as-is.
            For example, the densepose annotations are loaded in this way.

    Returns:
        list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See
        `Using Custom Datasets </tutorials/datasets.html>`_ )

    Notes:
        1. This function does not read the image files.
           The results do not have the "image" field.
    """
    from pycocotools.coco import COCO

    timer = Timer()
    json_file = PathManager.get_local_path(json_file)
    with contextlib.redirect_stdout(io.StringIO()):
        coco_api = COCO(json_file)
    if timer.seconds() > 1:
        logger.info("Loading {} takes {:.2f} seconds.".format(
            json_file, timer.seconds()))

    id_map = None
    if dataset_name is not None:
        meta = MetadataCatalog.get(dataset_name)
        print('meta:', meta)
        cat_ids = sorted(coco_api.getCatIds())
        cats = coco_api.loadCats(cat_ids)
        # The categories in a custom json file may not be sorted.
        thing_classes = [
            c["name"] for c in sorted(cats, key=lambda x: x["id"])
        ]
        meta.thing_classes = thing_classes

        # In COCO, certain category ids are artificially removed,
        # and by convention they are always ignored.
        # We deal with COCO's id issue and translate
        # the category ids to contiguous ids in [0, 80).

        # It works by looking at the "categories" field in the json, therefore
        # if users' own json also have incontiguous ids, we'll
        # apply this mapping as well but print a warning.
        if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
            if "coco" not in dataset_name:
                logger.warning("""
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
""")
        id_map = {v: i for i, v in enumerate(cat_ids)}
        meta.thing_dataset_id_to_contiguous_id = id_map

    # sort indices for reproducible results
    img_ids = sorted(coco_api.imgs.keys())
    # imgs is a list of dicts, each looks something like:
    # {'license': 4,
    #  'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
    #  'file_name': 'COCO_val2014_000000001268.jpg',
    #  'height': 427,
    #  'width': 640,
    #  'date_captured': '2013-11-17 05:57:24',
    #  'id': 1268}
    imgs = coco_api.loadImgs(img_ids)
    # anns is a list[list[dict]], where each dict is an annotation
    # record for an object. The inner list enumerates the objects in an image
    # and the outer list enumerates over images. Example of anns[0]:
    # [{'segmentation': [[192.81,
    #     247.09,
    #     ...
    #     219.03,
    #     249.06]],
    #   'area': 1035.749,
    #   'iscrowd': 0,
    #   'image_id': 1268,
    #   'bbox': [192.81, 224.8, 74.73, 33.43],
    #   'category_id': 16,
    #   'id': 42986},
    #  ...]
    anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]

    if "minival" not in json_file:
        # The popular valminusminival & minival annotations for COCO2014 contain this bug.
        # However the ratio of buggy annotations there is tiny and does not affect accuracy.
        # Therefore we explicitly white-list them.
        ann_ids = [
            ann["id"] for anns_per_image in anns for ann in anns_per_image
        ]
        assert len(set(ann_ids)) == len(
            ann_ids), "Annotation ids in '{}' are not unique!".format(
                json_file)

    imgs_anns = list(zip(imgs, anns))

    logger.info("Loaded {} images in COCO format from {}".format(
        len(imgs_anns), json_file))

    dataset_dicts = []

    ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"
                ] + (extra_annotation_keys or [])

    sum_box = 0
    sum_co_box = 0
    intersect_rate = 0.0
    intersect_num = 0

    num_instances_without_valid_segmentation = 0
    index_c = 0

    for jdex, (img_dict, anno_dict_list) in enumerate(imgs_anns):
        record = {}
        record["file_name"] = os.path.join(image_root, img_dict["file_name"])
        record["height"] = img_dict["height"]
        record["width"] = img_dict["width"]
        image_id = record["image_id"] = img_dict["id"]
        print('file name:', jdex, ':', record["file_name"])
        objs = []
        for anno in anno_dict_list:
            # Check that the image_id in this annotation is the same as
            # the image_id we're looking at.
            # This fails only when the data parsing logic or the annotation file is buggy.

            # The original COCO valminusminival2014 & minival2014 annotation files
            # actually contains bugs that, together with certain ways of using COCO API,
            # can trigger this assertion.
            assert anno["image_id"] == image_id

            assert anno.get("ignore", 0) == 0

            obj = {key: anno[key] for key in ann_keys if key in anno}

            segm = anno.get("segmentation", None)
            if segm:  # either list[list[float]] or dict(RLE)
                if not isinstance(segm, dict):
                    # filter out invalid polygons (< 3 points)
                    segm = [
                        poly for poly in segm
                        if len(poly) % 2 == 0 and len(poly) >= 6
                    ]
                    if len(segm) == 0:
                        num_instances_without_valid_segmentation += 1
                        continue  # ignore this instance
                obj["segmentation"] = segm

            keypts = anno.get("keypoints", None)
            if keypts:  # list[int]
                for idx, v in enumerate(keypts):
                    if idx % 3 != 2:
                        # COCO's segmentation coordinates are floating points in [0, H or W],
                        # but keypoint coordinates are integers in [0, H-1 or W-1]
                        # Therefore we assume the coordinates are "pixel indices" and
                        # add 0.5 to convert to floating point coordinates.
                        keypts[idx] = v + 0.5
                obj["keypoints"] = keypts

            obj["bbox_mode"] = BoxMode.XYWH_ABS
            if id_map:
                obj["category_id"] = id_map[obj["category_id"]]
            objs.append(obj)
        record["annotations"] = objs
        seg_list = []
        for obj in objs:
            seg_list.append(obj['segmentation'])
        #print('seg list:', seg_list)

        #dirname = "mask-vis"
        #os.makedirs(dirname, exist_ok=True)

        bitmask_list = []
        if len(seg_list) > 0:
            for index, seg in enumerate(seg_list):
                #print('seg len:', len(seg))
                invalid = False
                for sub_seg in seg:
                    #print('seg len:', len(sub_seg))
                    if len(sub_seg) < 6:
                        invalid = True
                if not invalid:
                    bitmask = polygons_to_bitmask(seg, img_dict["height"],
                                                  img_dict["width"])
                else:
                    bitmask = np.zeros(
                        (int(img_dict["height"]), int(img_dict["width"])),
                        dtype=bool)

                bitmask_list.append(bitmask.astype('int'))

        box_list = []
        for obj in objs:
            box_list.append([
                obj['bbox'][0], obj['bbox'][1],
                obj['bbox'][0] + obj['bbox'][2],
                obj['bbox'][1] + obj['bbox'][3]
            ])

        box_mask_list = []
        for index, obj in enumerate(objs):
            box_mask = np.zeros(
                (int(img_dict["height"]), int(img_dict["width"])), dtype=int)
            box_mask[int(box_list[index][1]):int(box_list[index][3]),
                     int(box_list[index][0]):int(box_list[index][2])] = 1
            box_mask_list.append(box_mask)

        sum_box += len(box_list)

        for index1, a_box in enumerate(box_list):
            union_mask_whole = np.zeros(
                (int(img_dict["height"]), int(img_dict["width"])), dtype=int)
            for index2, b_box in enumerate(box_list):
                if index1 != index2:
                    iou = bb_intersection_over_union(a_box, b_box)
                    if iou > 0.05:
                        union_mask = np.multiply(box_mask_list[index1],
                                                 bitmask_list[index2])
                        union_mask_whole += union_mask

            print("===========================================")
            print('bit mask area:', bitmask_list[index1].sum())
            union_mask_whole[union_mask_whole > 1.0] = 1.0
            print('cropped union mask area:', union_mask_whole.sum())
            intersect_mask = union_mask_whole * bitmask_list[index1]
            print('intersect mask area:', intersect_mask.sum())
            print('intersect rate:',
                  intersect_mask.sum() / float(bitmask_list[index1].sum()))
            print("===========================================")

            if intersect_mask.sum() >= 1.0:
                intersect_num += 1

            if float(bitmask_list[index1].sum()) > 1.0:
                intersect_rate += intersect_mask.sum() / float(
                    bitmask_list[index1].sum())

            union_mask_non_zero_num = np.count_nonzero(
                union_mask_whole.astype(int))
            record["annotations"][index1]['bg_object_segmentation'] = []
            if union_mask_non_zero_num > 20:
                sum_co_box += 1
                contours = measure.find_contours(union_mask_whole.astype(int),
                                                 0)
                for contour in contours:
                    if contour.shape[0] > 500:
                        contour = np.flip(contour, axis=1)[::10, :]
                    elif contour.shape[0] > 200:
                        contour = np.flip(contour, axis=1)[::5, :]
                    elif contour.shape[0] > 100:
                        contour = np.flip(contour, axis=1)[::3, :]
                    elif contour.shape[0] > 50:
                        contour = np.flip(contour, axis=1)[::2, :]
                    else:
                        contour = np.flip(contour, axis=1)

                    segmentation = contour.ravel().tolist()
                    record["annotations"][index1][
                        'bg_object_segmentation'].append(segmentation)

        dataset_dicts.append(record)
        #if jdex > 10000:
        #    break

    #print('sum intersect rate:', intersect_rate)
    #print('sum box:', sum_box)

    avg_intersect_rate = intersect_rate / float(sum_box)
    avg_intersect_rate_over_inter = intersect_rate / float(intersect_num)
    #print('avg rate:', avg_intersect_rate)
    #print('avg rate over intersect:', avg_intersect_rate_over_inter)

    if num_instances_without_valid_segmentation > 0:
        logger.warning(
            "Filtered out {} instances without valid segmentation. "
            "There might be issues in your dataset generation process.".format(
                num_instances_without_valid_segmentation))
    return dataset_dicts
def annotations_to_instances(annos,
                             image_size,
                             mask_format="polygon",
                             max_num_planes=20):
    """
    Create an :class:`Instances` object used by the models,
    from instance annotations in the dataset dict.
    Args:
        annos (list[dict]): a list of annotations, one per instance.
        image_size (tuple): height, width
    Returns:
        Instances: It will contains fields "gt_boxes", "gt_classes",
            "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
    """
    boxes = [
        BoxMode.convert(obj["bbox"], BoxMode(obj["bbox_mode"]),
                        BoxMode.XYXY_ABS) for obj in annos
    ]
    target = Instances(image_size)
    boxes = target.gt_boxes = Boxes(boxes)
    boxes.clip(image_size)

    classes = [obj["category_id"] for obj in annos]
    classes = torch.tensor(classes, dtype=torch.int64)
    target.gt_classes = classes

    if len(annos) and "segmentation" in annos[0]:
        segms = [obj["segmentation"] for obj in annos]
        if mask_format == "polygon":
            masks = PolygonMasks(segms)
        else:
            assert mask_format == "bitmask", mask_format
            masks = []
            for segm in segms:
                if isinstance(segm, list):
                    # polygon
                    masks.append(polygons_to_bitmask(segm, *image_size))
                elif isinstance(segm, dict):
                    # COCO RLE
                    masks.append(mask_util.decode(segm))
                elif isinstance(segm, np.ndarray):
                    assert (
                        segm.ndim == 2
                    ), "Expect segmentation of 2 dimensions, got {}.".format(
                        segm.ndim)
                    # mask array
                    masks.append(segm)
                else:
                    raise ValueError(
                        "Cannot convert segmentation of type '{}' to BitMasks!"
                        "Supported types are: polygons as list[list[float] or ndarray],"
                        " COCO-style RLE as a dict, or a full-image segmentation mask "
                        "as a 2D ndarray.".format(type(segm)))
            # torch.from_numpy does not support array with negative stride.
            masks = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x)) for x in masks
                ]))
        target.gt_masks = masks

    if len(annos) and "plane" in annos[0]:
        plane = [torch.tensor(obj["plane"]) for obj in annos]
        plane_idx = [torch.tensor([i]) for i in range(len(plane))]
        target.gt_planes = torch.stack(plane, dim=0)
        target.gt_plane_idx = torch.stack(plane_idx, dim=0)
    return target