def process_annotation(self, ann, mask_side_len=28): # Parse annotation data img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0] height, width = img_info["height"], img_info["width"] gt_polygons = [ np.array(p, dtype=np.float64) for p in ann["segmentation"] ] gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) gt_bbox = np.array(gt_bbox) gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width) # Run rasterize .. torch_gt_bbox = torch.Tensor(gt_bbox)[None, :].to(dtype=torch.float32) box_bitmasks = { "polygon": PolygonMasks([gt_polygons ]).crop_and_resize(torch_gt_bbox, mask_side_len)[0], "gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len), "roialign": BitMasks(torch.from_numpy( gt_bit_mask[None, :, :])).crop_and_resize( torch_gt_bbox, mask_side_len)[0], } # Run paste .. results = defaultdict(dict) for k, box_bitmask in box_bitmasks.items(): padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1) scaled_boxes = scale_boxes(torch_gt_bbox, scale) r = results[k] r["old"] = paste_mask_in_image_old(padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5) r["aligned"] = paste_masks_in_image(box_bitmask[None, :, :], Boxes(gt_bbox[None, :]), (height, width))[0] table = [] for rasterize_method, r in results.items(): for paste_method, mask in r.items(): mask = np.asarray(mask) iou = iou_between_full_image_bit_masks( gt_bit_mask.astype("uint8"), mask) table.append((rasterize_method, paste_method, iou)) return table
def test_polygon_area(self): # Draw polygon boxes for d in [5.0, 10.0, 1000.0]: polygon = PolygonMasks([[[0, 0, 0, d, d, d, d, 0]]]) area = polygon.area()[0] target = d**2 self.assertEqual(area, target) # Draw polygon triangles for d in [5.0, 10.0, 1000.0]: polygon = PolygonMasks([[[0, 0, 0, d, d, d]]]) area = polygon.area()[0] target = d**2 / 2 self.assertEqual(area, target)
def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [ BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos ] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: segms = [obj["segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(segms) else: assert mask_format == "bitmask", mask_format masks = [] for segm in segms: if isinstance(segm, list): # polygon masks.append(polygons_to_bitmask(segm, *image_size)) elif isinstance(segm, dict): # COCO RLE masks.append(mask_util.decode(segm)) elif isinstance(segm, np.ndarray): assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( segm.ndim) # mask array masks.append(segm) else: raise ValueError( "Cannot convert segmentation of type '{}' to BitMasks!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict, or a full-image segmentation mask " "as a 2D ndarray.".format(type(segm))) # torch.from_numpy does not support array with negative stride. masks = BitMasks( torch.stack([ torch.from_numpy(np.ascontiguousarray(x)) for x in masks ])) target.gt_masks = masks if len(annos) and "keypoints" in annos[0]: kpts = np.array([obj.get("keypoints", []) for obj in annos]) # (N, K, 3) # Set all out-of-boundary points to "unlabeled" kpts_xy = kpts[:, :, :2] inside = (kpts_xy >= np.array([0, 0])) & (kpts_xy <= np.array( image_size[::-1])) inside = inside.all(axis=2) kpts[:, :, :2] = kpts_xy kpts[:, :, 2][~inside] = 0 target.gt_keypoints = Keypoints(kpts) return target
def convert_to_coco_dict(dataset_name, dataset_dicts, metadata): """ Convert a dataset in cvpods's standard format into COCO json format COCO data format description can be found here: http://cocodataset.org/#format-data Args: dataset_name: name of the source dataset must be registered in DatastCatalog and in cvpods's standard format Returns: coco_dict: serializable dict in COCO json format """ if dataset_name not in [ "citypersons_train", "citypersons_val", "crowdhuman_train", "crowdhuman_val", "coco_2017_train", "coco_2017_val", "widerface_2019_train", "widerface_2019_val" ]: raise NotImplementedError( "Dataset name '{}' not supported".format(dataset_name)) # unmap the category mapping ids for COCO if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): reverse_id_mapping = { v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items() } def reverse_id_mapper(contiguous_id): return reverse_id_mapping[contiguous_id] # noqa else: def reverse_id_mapper(contiguous_id): return contiguous_id # noqa categories = [{ "id": reverse_id_mapper(id), "name": name } for id, name in enumerate(metadata.thing_classes)] logger.info("Converting dataset dicts into COCO format") coco_images = [] coco_annotations = [] for image_id, image_dict in enumerate(dataset_dicts): coco_image = { "id": image_dict.get("image_id", image_id), "width": image_dict["width"], "height": image_dict["height"], "file_name": image_dict["file_name"], } coco_images.append(coco_image) anns_per_image = image_dict["annotations"] for annotation in anns_per_image: # create a new dict with only COCO fields coco_annotation = {} # COCO requirement: XYWH box format bbox = annotation["bbox"] bbox_mode = annotation["bbox_mode"] bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS) # COCO requirement: instance area if "segmentation" in annotation: # Computing areas for instances by counting the pixels segmentation = annotation["segmentation"] # TODO: check segmentation type: RLE, BinaryMask or Polygon polygons = PolygonMasks([segmentation]) area = polygons.area()[0].item() else: # Computing areas using bounding boxes bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) area = Boxes([bbox_xy]).area()[0].item() if "keypoints" in annotation: keypoints = annotation["keypoints"] # list[int] for idx, v in enumerate(keypoints): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # For COCO format consistency we substract 0.5 # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163 keypoints[idx] = v - 0.5 if "num_keypoints" in annotation: num_keypoints = annotation["num_keypoints"] else: num_keypoints = sum(kp > 0 for kp in keypoints[2::3]) # COCO requirement: # linking annotations to images # "id" field must start with 1 coco_annotation["id"] = len(coco_annotations) + 1 coco_annotation["image_id"] = coco_image["id"] coco_annotation["bbox"] = [round(float(x), 3) for x in bbox] coco_annotation["area"] = area coco_annotation["category_id"] = reverse_id_mapper( annotation["category_id"]) coco_annotation["iscrowd"] = annotation.get("iscrowd", 0) # Add optional fields if "keypoints" in annotation: coco_annotation["keypoints"] = keypoints coco_annotation["num_keypoints"] = num_keypoints if "segmentation" in annotation: coco_annotation["segmentation"] = annotation["segmentation"] coco_annotations.append(coco_annotation) logger.info( "Conversion finished, " f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}" ) info = { "date_created": str(datetime.datetime.now()), "description": "Automatically generated COCO json file for cvpods.", } coco_dict = { "info": info, "images": coco_images, "annotations": coco_annotations, "categories": categories, "licenses": None, } return coco_dict