def test_json_deserializable(self): payload = '{"box_mode": 2}' obj = json.loads(payload) try: obj["box_mode"] = BoxMode(obj["box_mode"]) except Exception: self.fail("JSON deserialization failed")
def transform_instance_annotations(annotation, transforms, image_size, *, keypoint_hflip_indices=None): """ Apply transforms to box, segmentation and keypoints annotations of a single instance. It will use `transforms.apply_box` for the box, and `transforms.apply_coords` for segmentation polygons & keypoints. If you need anything more specially designed for each data structure, you'll need to implement your own version of this function or the transforms. Args: annotation (dict): dict of instance annotations for a single instance. It will be modified in-place. transforms (TransformList): image_size (tuple): the height, width of the transformed image keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. Returns: dict: the same input dict with fields "bbox", "segmentation", "keypoints" transformed according to `transforms`. The "bbox_mode" field will be set to XYXY_ABS. """ bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) # Note that bbox is 1d (per-instance bounding box) annotation["bbox"] = transforms.apply_box([bbox])[0] annotation["bbox_mode"] = BoxMode.XYXY_ABS if "segmentation" in annotation: # each instance contains 1 or more polygons segm = annotation["segmentation"] if isinstance(segm, list): # polygons polygons = [np.asarray(p).reshape(-1, 2) for p in segm] annotation["segmentation"] = [ p.reshape(-1) for p in transforms.apply_polygons(polygons) ] elif isinstance(segm, dict): # RLE mask = mask_util.decode(segm) mask = transforms.apply_segmentation(mask) assert tuple(mask.shape[:2]) == image_size annotation["segmentation"] = mask else: raise ValueError( "Cannot transform segmentation of type '{}'!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict.".format(type(segm))) if "keypoints" in annotation: keypoints = transform_keypoint_annotations(annotation["keypoints"], transforms, image_size, keypoint_hflip_indices) annotation["keypoints"] = keypoints return annotation
def gen_crop_transform_with_instance(crop_size, image_size, instance): """ Generate a CropTransform so that the cropping region contains the center of the given instance. Args: crop_size (tuple): h, w in pixels image_size (tuple): h, w instance (dict): an annotation dict of one instance, in mydl's dataset format. """ crop_size = np.asarray(crop_size, dtype=np.int32) bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 assert (image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1] ), "The annotation bounding box is outside of the image!" assert (image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1] ), "Crop size is larger than image size!" min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) y0 = np.random.randint(min_yx[0], max_yx[0] + 1) x0 = np.random.randint(min_yx[1], max_yx[1] + 1) return T.CropTransform(x0, y0, crop_size[1], crop_size[0])
def load_proposals_into_dataset(dataset_dicts, proposal_file): """ Load precomputed object proposals into the dataset. The proposal file should be a pickled dict with the following keys: - "ids": list[int] or list[str], the image ids - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores corresponding to the boxes. - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``. Args: dataset_dicts (list[dict]): annotations in mydl Dataset format. proposal_file (str): file path of pre-computed proposals, in pkl format. Returns: list[dict]: the same format as dataset_dicts, but added proposal field. """ logger = logging.getLogger(__name__) logger.info("Loading proposals from: {}".format(proposal_file)) with PathManager.open(proposal_file, "rb") as f: proposals = pickle.load(f, encoding="latin1") # Rename the key names in D1 proposal files rename_keys = {"indexes": "ids", "scores": "objectness_logits"} for key in rename_keys: if key in proposals: proposals[rename_keys[key]] = proposals.pop(key) # Fetch the indexes of all proposals that are in the dataset # Convert image_id to str since they could be int. img_ids = set({str(record["image_id"]) for record in dataset_dicts}) id_to_index = { str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids } # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS' bbox_mode = BoxMode(proposals["bbox_mode"] ) if "bbox_mode" in proposals else BoxMode.XYXY_ABS for record in dataset_dicts: # Get the index of the proposal i = id_to_index[str(record["image_id"])] boxes = proposals["boxes"][i] objectness_logits = proposals["objectness_logits"][i] # Sort the proposals in descending order of the scores inds = objectness_logits.argsort()[::-1] record["proposal_boxes"] = boxes[inds] record["proposal_objectness_logits"] = objectness_logits[inds] record["proposal_bbox_mode"] = bbox_mode return dataset_dicts
def process_annotation(self, ann, mask_side_len=28): # Parse annotation data img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0] height, width = img_info["height"], img_info["width"] gt_polygons = [ np.array(p, dtype=np.float64) for p in ann["segmentation"] ] gt_bbox = BoxMode.convert(np.array(ann["bbox"]), BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width) # Run rasterize .. torch_gt_bbox = torch.from_numpy( gt_bbox[None, :]).to(dtype=torch.float32) box_bitmasks = { "polygon": PolygonMasks([gt_polygons ]).crop_and_resize(torch_gt_bbox, mask_side_len)[0], "gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len), "roialign": BitMasks(torch.from_numpy( gt_bit_mask[None, :, :])).crop_and_resize( torch_gt_bbox, mask_side_len)[0], } # Run paste .. results = defaultdict(dict) for k, box_bitmask in box_bitmasks.items(): padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1) scaled_boxes = scale_boxes(torch_gt_bbox, scale) r = results[k] r["old"] = paste_mask_in_image_old(padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5) r["aligned"] = paste_masks_in_image(box_bitmask[None, :, :], Boxes(gt_bbox[None, :]), (height, width))[0] table = [] for rasterize_method, r in results.items(): for paste_method, mask in r.items(): mask = np.asarray(mask) iou = iou_between_full_image_bit_masks( gt_bit_mask.astype("uint8"), mask) table.append((rasterize_method, paste_method, iou)) return table
def process(self, inputs, outputs): """ Args: inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). It is a list of dict. Each dict corresponds to an image and contains keys like "height", "width", "file_name", "image_id". outputs: the outputs of a COCO model. It is a list of dicts with key "instances" that contains :class:`Instances`. The :class:`Instances` object needs to have `densepose` field. """ for input, output in zip(inputs, outputs): instances = output["instances"].to(self._cpu_device) boxes = instances.pred_boxes.tensor.clone() boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) instances.pred_densepose = instances.pred_densepose.to_result( boxes) json_results = prediction_to_json(instances, input["image_id"]) self._predictions.extend(json_results)
def boxlist_to_tensor(boxlist, output_box_dim): if type(boxlist) == np.ndarray: box_tensor = torch.from_numpy(boxlist) elif type(boxlist) == list: if boxlist == []: return torch.zeros((0, output_box_dim), dtype=torch.float32) else: box_tensor = torch.FloatTensor(boxlist) else: raise Exception("Unrecognized boxlist type") input_box_dim = box_tensor.shape[1] if input_box_dim != output_box_dim: if input_box_dim == 4 and output_box_dim == 5: box_tensor = BoxMode.convert(box_tensor, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS) else: raise Exception( "Unable to convert from {}-dim box to {}-dim box".format( input_box_dim, output_box_dim)) return box_tensor
def create_instances(predictions, image_size): ret = Instances(image_size) score = np.asarray([x["score"] for x in predictions]) chosen = (score > args.conf_threshold).nonzero()[0] score = score[chosen] bbox = np.asarray([predictions[i]["bbox"] for i in chosen]) bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) labels = np.asarray( [dataset_id_map(predictions[i]["category_id"]) for i in chosen]) ret.scores = score ret.pred_boxes = Boxes(bbox) ret.pred_classes = labels try: ret.pred_masks = [predictions[i]["segmentation"] for i in chosen] except KeyError: pass return ret
def draw_dataset_dict(self, dic): """ Draw annotations/segmentaions in mydl Dataset format. Args: dic (dict): annotation/segmentation data of one image, in mydl Dataset format. Returns: output (VisImage): image object with visualizations. """ annos = dic.get("annotations", None) if annos: if "segmentation" in annos[0]: masks = [x["segmentation"] for x in annos] else: masks = None if "keypoints" in annos[0]: keypts = [x["keypoints"] for x in annos] keypts = np.array(keypts).reshape(len(annos), -1, 3) else: keypts = None boxes = [BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) for x in annos] labels = [x["category_id"] for x in annos] names = self.metadata.get("thing_classes", None) if names: labels = [names[i] for i in labels] labels = [ "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "") for i, a in zip(labels, annos) ] self.overlay_instances(labels=labels, boxes=boxes, masks=masks, keypoints=keypts) sem_seg = dic.get("sem_seg", None) if sem_seg is None and "sem_seg_file_name" in dic: sem_seg = cv2.imread(dic["sem_seg_file_name"], cv2.IMREAD_GRAYSCALE) if sem_seg is not None: self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5) return self.output
def transform_proposals(dataset_dict, image_shape, transforms, min_box_side_len, proposal_topk): """ Apply transformations to the proposals in dataset_dict, if any. Args: dataset_dict (dict): a dict read from the dataset, possibly contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" image_shape (tuple): height, width transforms (TransformList): min_box_side_len (int): keep proposals with at least this size proposal_topk (int): only keep top-K scoring proposals The input dict is modified in-place, with abovementioned keys removed. A new key "proposals" will be added. Its value is an `Instances` object which contains the transformed proposals in its field "proposal_boxes" and "objectness_logits". """ if "proposal_boxes" in dataset_dict: # Transform proposal boxes boxes = transforms.apply_box( BoxMode.convert( dataset_dict.pop("proposal_boxes"), dataset_dict.pop("proposal_bbox_mode"), BoxMode.XYXY_ABS, )) boxes = Boxes(boxes) objectness_logits = torch.as_tensor( dataset_dict.pop("proposal_objectness_logits").astype("float32")) boxes.clip(image_shape) keep = boxes.nonempty(threshold=min_box_side_len) boxes = boxes[keep] objectness_logits = objectness_logits[keep] proposals = Instances(image_shape) proposals.proposal_boxes = boxes[:proposal_topk] proposals.objectness_logits = objectness_logits[:proposal_topk] dataset_dict["proposals"] = proposals
def instances_to_json(self, instances, img_id): num_instance = len(instances) if num_instance == 0: return [] boxes = instances.pred_boxes.tensor.numpy() if boxes.shape[1] == 4: boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) boxes = boxes.tolist() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() results = [] for k in range(num_instance): result = { "image_id": img_id, "category_id": classes[k], "bbox": boxes[k], "score": scores[k], } results.append(result) return results
def _convert_xywha_to_xyxy(self, x): return BoxMode.convert(x, BoxMode.XYWHA_ABS, BoxMode.XYXY_ABS)
def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None): """ Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for prediction_dict in dataset_predictions: predictions = prediction_dict["proposals"] # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = predictions.objectness_logits.sort(descending=True)[1] predictions = predictions[inds] ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"]) anno = coco_api.loadAnns(ann_ids) gt_boxes = [ BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno if obj["iscrowd"] == 0 ] gt_boxes = torch.as_tensor(gt_boxes).reshape( -1, 4) # guard against no boxes gt_boxes = Boxes(gt_boxes) gt_areas = torch.as_tensor( [obj["area"] for obj in anno if obj["iscrowd"] == 0]) if len(gt_boxes) == 0 or len(predictions) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if limit is not None and len(predictions) > limit: predictions = predictions[:limit] overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(predictions), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = (torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, }
def instances_to_coco_json(instances, img_id): """ Dump an "Instances" object to a COCO-format json that's used for evaluation. Args: instances (Instances): img_id (int): the image id Returns: list[dict]: list of json annotations in COCO format. """ num_instance = len(instances) if num_instance == 0: return [] boxes = instances.pred_boxes.tensor.numpy() boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) boxes = boxes.tolist() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() has_mask = instances.has("pred_masks") if has_mask: # use RLE to encode the masks, because they are too large and takes memory # since this evaluator stores outputs of the entire dataset rles = [ mask_util.encode( np.array(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in instances.pred_masks ] for rle in rles: # "counts" is an array encoded by mask_util as a byte-stream. Python3's # json writer which always produces strings cannot serialize a bytestream # unless you decode it. Thankfully, utf-8 works out (which is also what # the pycocotools/_mask.pyx does). rle["counts"] = rle["counts"].decode("utf-8") has_keypoints = instances.has("pred_keypoints") if has_keypoints: keypoints = instances.pred_keypoints results = [] for k in range(num_instance): result = { "image_id": img_id, "category_id": classes[k], "bbox": boxes[k], "score": scores[k], } if has_mask: result["segmentation"] = rles[k] if has_keypoints: # In COCO annotations, # keypoints coordinates are pixel indices. # However our predictions are floating point coordinates. # Therefore we subtract 0.5 to be consistent with the annotation format. # This is the inverse of data loading logic in `datasets/coco.py`. keypoints[k][:, :2] -= 0.5 result["keypoints"] = keypoints[k].flatten().tolist() results.append(result) return results
def _convert_xywh_to_xywha(self, x): return BoxMode.convert(x, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS)
def convert_to_coco_dict(dataset_name): """ Convert an instance detection/segmentation or keypoint detection dataset in mydl's standard format into COCO json format. Generic dataset description can be found here: https://mydl.readthedocs.io/tutorials/datasets.html#register-a-dataset COCO data format description can be found here: http://cocodataset.org/#format-data Args: dataset_name (str): name of the source dataset Must be registered in DatastCatalog and in mydl's standard format. Must have corresponding metadata "thing_classes" Returns: coco_dict: serializable dict in COCO json format """ dataset_dicts = DatasetCatalog.get(dataset_name) metadata = MetadataCatalog.get(dataset_name) # unmap the category mapping ids for COCO if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): reverse_id_mapping = { v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items() } reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[ contiguous_id] # noqa else: reverse_id_mapper = lambda contiguous_id: contiguous_id # noqa categories = [{ "id": reverse_id_mapper(id), "name": name } for id, name in enumerate(metadata.thing_classes)] logger.info("Converting dataset dicts into COCO format") coco_images = [] coco_annotations = [] for image_id, image_dict in enumerate(dataset_dicts): coco_image = { "id": image_dict.get("image_id", image_id), "width": image_dict["width"], "height": image_dict["height"], "file_name": image_dict["file_name"], } coco_images.append(coco_image) anns_per_image = image_dict["annotations"] for annotation in anns_per_image: # create a new dict with only COCO fields coco_annotation = {} # COCO requirement: XYWH box format bbox = annotation["bbox"] bbox_mode = annotation["bbox_mode"] bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS) # COCO requirement: instance area if "segmentation" in annotation: # Computing areas for instances by counting the pixels segmentation = annotation["segmentation"] # TODO: check segmentation type: RLE, BinaryMask or Polygon if isinstance(segmentation, list): polygons = PolygonMasks([segmentation]) area = polygons.area()[0].item() elif isinstance(segmentation, dict): # RLE area = mask_util.area(segmentation) else: raise TypeError( f"Unknown segmentation type {type(segmentation)}!") else: # Computing areas using bounding boxes bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) area = Boxes([bbox_xy]).area()[0].item() if "keypoints" in annotation: keypoints = annotation["keypoints"] # list[int] for idx, v in enumerate(keypoints): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # For COCO format consistency we substract 0.5 # https://github.com/facebookresearch/mydl/pull/175#issuecomment-551202163 keypoints[idx] = v - 0.5 if "num_keypoints" in annotation: num_keypoints = annotation["num_keypoints"] else: num_keypoints = sum(kp > 0 for kp in keypoints[2::3]) # COCO requirement: # linking annotations to images # "id" field must start with 1 coco_annotation["id"] = len(coco_annotations) + 1 coco_annotation["image_id"] = coco_image["id"] coco_annotation["bbox"] = [round(float(x), 3) for x in bbox] coco_annotation["area"] = area coco_annotation["iscrowd"] = annotation.get("iscrowd", 0) coco_annotation["category_id"] = reverse_id_mapper( annotation["category_id"]) # Add optional fields if "keypoints" in annotation: coco_annotation["keypoints"] = keypoints coco_annotation["num_keypoints"] = num_keypoints if "segmentation" in annotation: coco_annotation["segmentation"] = annotation["segmentation"] coco_annotations.append(coco_annotation) logger.info( "Conversion finished, " f"num images: {len(coco_images)}, num annotations: {len(coco_annotations)}" ) info = { "date_created": str(datetime.datetime.now()), "description": "Automatically generated COCO json file for mydl.", } coco_dict = { "info": info, "images": coco_images, "annotations": coco_annotations, "categories": categories, "licenses": None, } return coco_dict
def annotations_to_instances(annos, image_size, mask_format="polygon"): """ Create an :class:`Instances` object used by the models, from instance annotations in the dataset dict. Args: annos (list[dict]): a list of instance annotations in one image, each element for one instance. image_size (tuple): height, width Returns: Instances: It will contain fields "gt_boxes", "gt_classes", "gt_masks", "gt_keypoints", if they can be obtained from `annos`. This is the format that builtin models expect. """ boxes = [ BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos ] target = Instances(image_size) boxes = target.gt_boxes = Boxes(boxes) boxes.clip(image_size) classes = [obj["category_id"] for obj in annos] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if len(annos) and "segmentation" in annos[0]: segms = [obj["segmentation"] for obj in annos] if mask_format == "polygon": masks = PolygonMasks(segms) else: assert mask_format == "bitmask", mask_format masks = [] for segm in segms: if isinstance(segm, list): # polygon masks.append(polygons_to_bitmask(segm, *image_size)) elif isinstance(segm, dict): # COCO RLE masks.append(mask_util.decode(segm)) elif isinstance(segm, np.ndarray): assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( segm.ndim) # mask array masks.append(segm) else: raise ValueError( "Cannot convert segmentation of type '{}' to BitMasks!" "Supported types are: polygons as list[list[float] or ndarray]," " COCO-style RLE as a dict, or a full-image segmentation mask " "as a 2D ndarray.".format(type(segm))) # torch.from_numpy does not support array with negative stride. masks = BitMasks( torch.stack([ torch.from_numpy(np.ascontiguousarray(x)) for x in masks ])) target.gt_masks = masks if len(annos) and "keypoints" in annos[0]: kpts = [obj.get("keypoints", []) for obj in annos] target.gt_keypoints = Keypoints(kpts) return target
def _convert_xy_to_wh(self, x): return BoxMode.convert(x, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)