def test_group_bbox2d_per_label(): count1, count2 = 10, 11 bbox1 = BBox2D(label="car", x=1, y=1, w=2, h=3) bbox2 = BBox2D(label="pedestrian", x=7, y=6, w=3, h=4) bboxes = [] bboxes.extend([bbox1] * count1) bboxes.extend([bbox2] * count2) bboxes_per_label = group_bbox2d_per_label(bboxes) assert len(bboxes_per_label["car"]) == count1 assert len(bboxes_per_label["pedestrian"]) == count2
def test_convert2torchvision_format(): """test convert to torchvision format.""" boxes = [ BBox2D(label=0, x=10, y=10, w=10, h=10), BBox2D(label=1, x=20, y=20, w=10, h=10), ] actual_targets = prepare_bboxes(boxes) expected_targets = { "boxes": torch.Tensor([[10, 10, 20, 20], [20, 20, 30, 30]]), "labels": torch.LongTensor([0, 1]), } assert _same_dict(expected_targets, actual_targets)
def _load_bounding_boxes(self, raw_record): img_width = raw_record["image/width"].numpy() img_height = raw_record["image/height"].numpy() label = tf.sparse.to_dense( raw_record["image/object/class/label"]).numpy() xmin = ( tf.sparse.to_dense(raw_record["image/object/bbox/xmin"]).numpy() * img_width) xmax = ( tf.sparse.to_dense(raw_record["image/object/bbox/xmax"]).numpy() * img_width) ymin = ( tf.sparse.to_dense(raw_record["image/object/bbox/ymin"]).numpy() * img_height) ymax = ( tf.sparse.to_dense(raw_record["image/object/bbox/ymax"]).numpy() * img_height) width = xmax - xmin height = ymax - ymin bboxes = [ BBox2D(label, x, y, w, h) for label, x, y, w, h in zip(label, xmin, ymin, width, height) ] return bboxes
def test_read_bounding_box_2d(): annotation = [{ "instance_id": "...", "label_id": 27, "label_name": "car", "x": 30, "y": 50, "width": 100, "height": 100, }] definition = { "id": 1243, "name": "...", "description": "...", "format": "JSON", "spec": [{ "label_id": 27, "label_name": "car" }], } label_mappings = { m["label_id"]: m["label_name"] for m in definition["spec"] } bbox = read_bounding_box_2d(annotation, label_mappings) assert bbox == [BBox2D(27, 30, 50, 100, 100)]
def test_plot_bboxes(): cur_dir = pathlib.Path(__file__).parent.absolute() img = Image.open( str(cur_dir / "mock_data" / "simrun" / "captures" / "camera_000.png")) label_mappings = {1: "car", 2: "tree", 3: "light"} boxes = [ BBox2D(label=1, x=1, y=1, w=2, h=3), BBox2D(label=1, x=7, y=6, w=3, h=4), BBox2D(label=1, x=2, y=6, w=2, h=4), ] colors = ["green", "red", "green"] with patch( "datasetinsights.stats.visualization.plots.add_single_bbox_on_image" ) as mock: plot_bboxes(img, boxes, label_mappings=label_mappings, colors=colors) assert mock.call_count == len(boxes)
def test_gather_gt_preds(mock_all_gather, mock_get_world_size): """test gather preds.""" box_a, box_b = ( BBox2D(label=0, x=10, y=10, w=10, h=10), BBox2D(label=1, x=20, y=20, w=10, h=10), ) uneven_list = [ ([box_a], []), ([box_a, box_b], [box_b]), ([box_b], [box_a, box_b]), ([box_b], [box_a]), ] mock_get_world_size.return_value = 1 mock_all_gather.return_value = MagicMock() actual_result = gather_gt_preds( gt_preds=uneven_list, device=torch.device("cpu"), max_boxes=3 ) assert len(actual_result) == 4
def test_convert2canonical(): """test convert to canonical.""" boxes_rcnn_format = [ { "boxes": torch.Tensor( [[10.5, 10.5, 20.5, 20.5], [20.5, 20.5, 30.5, 30.5]] ), "labels": torch.Tensor([0, 1]), "scores": torch.FloatTensor([0.3, 0.9]), } ] actual_result = convert_bboxes2canonical(boxes_rcnn_format) expected_result = [ [ BBox2D(label=0, x=10.5, y=10.5, w=10, h=10, score=0.3), BBox2D(label=1, x=20.5, y=20.5, w=10, h=10, score=0.9), ] ] assert same_list_of_list_of_bboxes(actual_result, expected_result)
def test_plot_bboxes(): cur_dir = pathlib.Path(__file__).parent.absolute() img = Image.open( str(cur_dir / "mock_data" / "simrun" / "captures" / "camera_000.png")) boxes = [ BBox2D(label="car", x=1, y=1, w=2, h=3), BBox2D(label="tree", x=7, y=6, w=3, h=4), BBox2D(label="light", x=2, y=6, w=2, h=4), ] colors = [ ImageColor.getcolor("green", "RGB"), ImageColor.getcolor("red", "RGB"), ImageColor.getcolor("green", "RGB"), ] with patch("datasetinsights.stats.visualization.plots.ImageDraw.Draw" ) as mock: instance = mock.return_value plot_bboxes(img, boxes, colors) assert instance.rectangle.call_count == len(boxes) assert instance.multiline_text.call_count == len(boxes)
def test_pad_box_lists(): """test pad box lists.""" box_a, box_b = ( BBox2D(label=0, x=10, y=10, w=10, h=10), BBox2D(label=1, x=20, y=20, w=10, h=10), ) uneven_list = [ ([box_a], []), ([box_a, box_b], [box_b]), ([box_b], [box_a, box_b]), ([box_b], [box_a]), ] actual_result = pad_box_lists(uneven_list, max_boxes_per_img=3) expected_result = [ ( [box_a, padding_box, padding_box], [padding_box, padding_box, padding_box], ), ([box_a, box_b, padding_box], [box_b, padding_box, padding_box]), ([box_b, padding_box, padding_box], [box_a, box_b, padding_box]), ([box_b, padding_box, padding_box], [box_a, padding_box, padding_box]), ] for i in range(len(expected_result)): assert len(expected_result[i][0]) == len(actual_result[i][0]) assert len(expected_result[i][1]) == len(actual_result[i][1]) for t_index in range(2): for j in range(len(expected_result[i][t_index])): if np.isnan(expected_result[i][t_index][j].label): assert np.isnan(actual_result[i][t_index][j].label) else: assert ( expected_result[i][t_index][j] == actual_result[i][t_index][j] ) assert True
def __init__(self, image_size=(256, 256), transform=None): """initiate dataset class. Args: image_size : size of images you want generate transform : transform the images and bounding boxes """ self.images = [ Image.fromarray(np.random.random(image_size), "L"), Image.fromarray(np.random.random(image_size), "L"), ] self.bboxes = [ [ BBox2D(label=1, x=10, y=20, w=30, h=40), BBox2D(label=2, x=50, y=50, w=10, h=10), ], [ BBox2D(label=1, x=30, y=40, w=20, h=20), BBox2D(label=2, x=20, y=10, w=40, h=10), ], ] self.label_mappings = {"1": "car", "2": "bike"} self.transform = transform
def test_convert2canonical_batch(): """test convert to canonical batch.""" boxes_rcnn_format = [ { "boxes": torch.Tensor([[10.0, 10, 20, 20], [20, 20, 30, 30]]), "labels": torch.LongTensor([0, 1]), }, { "boxes": torch.Tensor([[10, 10, 20, 20], [20, 20, 30, 30]]), "labels": torch.LongTensor([2, 3]), }, ] actual_result = convert_bboxes2canonical(boxes_rcnn_format) expected_result = [ [ BBox2D(label=0, x=10, y=10, w=10, h=10), BBox2D(label=1, x=20, y=20, w=10, h=10), ], [ BBox2D(label=2, x=10, y=10, w=10, h=10), BBox2D(label=3, x=20, y=20, w=10, h=10), ], ] assert same_list_of_list_of_bboxes(actual_result, expected_result)
def convert_bboxes2canonical(bboxes): """convert bounding boxes to canonical. convert bounding boxes from the format used by pytorch torchvision's faster rcnn model into our canonical format, a list of list of BBox2Ds. Faster RCNN format: https://github.com/pytorch/vision/blob/master/torchvision/models/ detection/faster_rcnn.py#L45 Args: bboxes (List[Dict[str, torch.Tensor()): A list of dictionaries. Each item in the list corresponds to the bounding boxes for one example. The dictionary must have the keys 'boxes' and 'labels'. The value for 'boxes' is (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W``. The value for labels is (``Int64Tensor[N]``): the class label for each ground-truth box. If the dictionary has the key `scores` then these values are used for the confidence score of the BBox2D, otherwise the score is set to 1. Returns (list[List[BBox2D]]): Each element in the list corresponds to the list of bounding boxes for an example. """ bboxes_batch = [] for example in bboxes: all_coords = example["boxes"] num_boxes = all_coords.shape[0] labels = example["labels"] if "scores" in example.keys(): scores = example["scores"] else: scores = torch.FloatTensor([1.0] * num_boxes) bboxes_example = [] for i in range(num_boxes): coords = all_coords[i] x, y = coords[0].item(), coords[1].item() canonical_box = BBox2D( x=x, y=y, w=coords[2].item() - x, h=coords[3].item() - y, label=labels[i].item(), score=scores[i].item(), ) bboxes_example.append(canonical_box) bboxes_batch.append(bboxes_example) return bboxes_batch
def list2canonical(box_list): """convert a list into a Bbox2d. Args: box_list: box represented in list format Returns: BBox2d """ return BBox2D( label=box_list[0], score=box_list[1], x=box_list[2], y=box_list[3], w=box_list[4], h=box_list[5], )
def convert_coco2canonical(coco_annotation): """ convert from a tuple of image and coco style dictionary describing the bboxes to a tuple of image, List of BBox2D Args: coco_annotation (tuple): image and coco style dictionary Returns: a tuple of image, List of BBox2D """ image, targets = coco_annotation all_bboxes = [] for t in targets: label = t["category_id"] bbox = t["bbox"] b = BBox2D(x=bbox[0], y=bbox[1], w=bbox[2], h=bbox[3], label=label) all_bboxes.append(b) return image, all_bboxes
def _convert_to_bbox2d(single_bbox): """Convert the bbox record to BBox2D objects. Args: single_bbox (dict): raw bounding box information Return: canonical_bbox (BBox2D): canonical bounding box """ label = single_bbox["label_id"] bbox = single_bbox["bbox"] canonical_bbox = BBox2D(x=bbox[0], y=bbox[1], w=bbox[2], h=bbox[3], label=label) return canonical_bbox
def get_gt_pred_bbox(): gt_bbox1 = BBox2D(label="car", x=1, y=1, w=2, h=3) gt_bbox2 = BBox2D(label="car", x=7, y=6, w=3, h=4) gt_bbox3 = BBox2D(label="car", x=2, y=6, w=2, h=4) pred_bbox1 = BBox2D(label="car", x=1, y=2, w=3, h=3, score=0.93) pred_bbox2 = BBox2D(label="car", x=6, y=5, w=3, h=4, score=0.94) pred_bbox3 = BBox2D(label="car", x=2, y=5, w=2, h=4, score=0.79) gt_bboxes = [gt_bbox1, gt_bbox2, gt_bbox3] pred_bboxes = [pred_bbox1, pred_bbox2, pred_bbox3] return gt_bboxes, pred_bboxes
def pad_box_lists( gt_preds: List[Tuple[List[BBox2D], List[BBox2D]]], max_boxes_per_img=MAX_BOXES_PER_IMAGE, ): """Pad the list of boxes. Pad the list of boxes and targets with place holder boxes so that all targets and predictions have the same number of elements. Args: gt_preds (list(tuple(list(BBox2d), (Bbox2d)))): A list of tuples where the first element in each tuple is a list of bounding boxes corresponding to the targets in an example, and the second element in the tuple corresponds to the predictions in that example max_boxes_per_img: : maximum number of target boxes and predicted boxes per image Returns: same format as gt_preds but all examples will have the same number of targets and predictions. If there are fewer targets or predictions than max_boxes_per_img, then boxes with nan values are added. """ padding_box = BBox2D(label=np.nan, score=np.nan, x=np.nan, y=np.nan, w=np.nan, h=np.nan) for tup in gt_preds: target_list, pred_list = tup if len(target_list) > max_boxes_per_img: raise ValueError(f"max boxes per image set to {max_boxes_per_img}," f" but there were {len(target_list)} targets" f" found.") if len(pred_list) > max_boxes_per_img: raise ValueError(f"max boxes per image set to {max_boxes_per_img}," f" but there were {len(target_list)} predictions" f" found.") for i in range(max_boxes_per_img - len(target_list)): target_list.append(padding_box) for i in range(max_boxes_per_img - len(pred_list)): pred_list.append(padding_box) return gt_preds
def read_bounding_box_2d(annotation, label_mappings=None): """Convert dictionary representations of 2d bounding boxes into objects of the BBox2D class Args: annotation (List[dict]): 2D bounding box annotation label_mappings (dict): a dict of {label_id: label_name} mapping Returns: A list of 2D bounding box objects """ bboxes = [] for b in annotation: label_id = b["label_id"] x = b["x"] y = b["y"] w = b["width"] h = b["height"] if label_mappings and label_id not in label_mappings: continue box = BBox2D(label=label_id, x=x, y=y, w=w, h=h) bboxes.append(box) return bboxes
def get_gt_pred_bbox(): gt_bbox1 = BBox2D(label="car", x=1, y=1, w=2, h=3) gt_bbox2 = BBox2D(label="car", x=7, y=6, w=3, h=4) gt_bbox11 = BBox2D(label="pedestrian", x=1, y=6, w=2, h=4) gt_bbox3 = BBox2D(label="car", x=2, y=2, w=2, h=2) gt_bbox4 = BBox2D(label="car", x=2, y=6, w=2, h=4) gt_bbox5 = BBox2D(label="car", x=6, y=5, w=4, h=3) gt_bbox14 = BBox2D(label="bike", x=6, y=1, w=3, h=2) gt_bbox6 = BBox2D(label="car", x=2, y=1, w=2, h=3) gt_bbox7 = BBox2D(label="car", x=6, y=3, w=3, h=5) gt_bbox8 = BBox2D(label="car", x=2, y=1, w=5, h=2) gt_bbox9 = BBox2D(label="car", x=2, y=4, w=3, h=4) gt_bbox10 = BBox2D(label="car", x=5, y=1, w=5, h=4) gt_bbox12 = BBox2D(label="pedestrian", x=1, y=5, w=3, h=4) gt_bbox13 = BBox2D(label="pedestrian", x=8, y=7, w=2, h=2) pred_bbox1 = BBox2D(label="car", x=1, y=2, w=3, h=3, score=0.93) pred_bbox2 = BBox2D(label="car", x=6, y=5, w=3, h=4, score=0.94) pred_bbox13 = BBox2D(label="pedestrian", x=1, y=6, w=2, h=3, score=0.70) pred_bbox16 = BBox2D(label="pedestrian", x=1, y=7, w=2, h=3, score=0.80) pred_bbox3 = BBox2D(label="car", x=2, y=5, w=2, h=4, score=0.79) pred_bbox4 = BBox2D(label="car", x=5, y=4, w=4, h=2, score=0.39) pred_bbox5 = BBox2D(label="car", x=5, y=7, w=4, h=2, score=0.49) pred_bbox6 = BBox2D(label="car", x=2, y=2, w=2, h=2, score=0.59) pred_bbox7 = BBox2D(label="car", x=2, y=6, w=2, h=2, score=0.69) pred_bbox8 = BBox2D(label="car", x=6, y=3, w=4, h=4, score=0.79) pred_bbox9 = BBox2D(label="car", x=1, y=1, w=7, h=2, score=0.99) pred_bbox10 = BBox2D(label="car", x=4, y=5, w=3, h=4, score=0.90) pred_bbox11 = BBox2D(label="car", x=1, y=1, w=2, h=3, score=0.80) pred_bbox12 = BBox2D(label="car", x=4, y=4, w=5, h=2, score=0.70) pred_bbox14 = BBox2D(label="pedestrian", x=3, y=7, w=3, h=3, score=0.40) pred_bbox15 = BBox2D(label="pedestrian", x=8, y=7, w=2, h=3, score=0.30) gt_bboxes = [ [gt_bbox1, gt_bbox2, gt_bbox11], [gt_bbox3, gt_bbox4, gt_bbox5, gt_bbox14], [gt_bbox6, gt_bbox7], [gt_bbox8, gt_bbox9], [gt_bbox10, gt_bbox12, gt_bbox13], ] pred_bboxes = [ [pred_bbox1, pred_bbox2, pred_bbox13, pred_bbox16], [pred_bbox3, pred_bbox4, pred_bbox5], [pred_bbox6, pred_bbox7, pred_bbox8], [pred_bbox9, pred_bbox10], [pred_bbox11, pred_bbox12, pred_bbox14, pred_bbox15], ] return gt_bboxes, pred_bboxes
_gt_preds2tensor, canonical2list, convert_bboxes2canonical, gather_gt_preds, list2canonical, list3d_2canonical, metric_per_class_plot, pad_box_lists, prepare_bboxes, reduce_dict, tensorlist2canonical, ) from datasetinsights.io.bbox import BBox2D padding_box = BBox2D( label=np.nan, score=np.nan, x=np.nan, y=np.nan, w=np.nan, h=np.nan ) def test_pad_box_lists(): """test pad box lists.""" box_a, box_b = ( BBox2D(label=0, x=10, y=10, w=10, h=10), BBox2D(label=1, x=20, y=20, w=10, h=10), ) uneven_list = [ ([box_a], []), ([box_a, box_b], [box_b]), ([box_b], [box_a, box_b]), ([box_b], [box_a]), ]
def test_gt_preds2tensor(): """test prediction to tensor conversion.""" box_a, box_b = ( BBox2D(label=0, x=10, y=10, w=10, h=10), BBox2D(label=1, x=20, y=20, w=10, h=10), ) uneven_list = [ ([box_a], []), ([box_a, box_b], [box_b]), ([box_b], [box_a, box_b]), ([box_b], [box_a]), ] actual_result = _gt_preds2tensor(uneven_list, 3) expected_result = torch.Tensor( [ [ [ [0.0, 1.0, 10.0, 10.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], [ [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], ], [ [ [0.0, 1.0, 10.0, 10.0, 10.0, 10.0], [1.0, 1.0, 20.0, 20.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], [ [1.0, 1.0, 20.0, 20.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], ], [ [ [1.0, 1.0, 20.0, 20.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], [ [0.0, 1.0, 10.0, 10.0, 10.0, 10.0], [1.0, 1.0, 20.0, 20.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], ], [ [ [1.0, 1.0, 20.0, 20.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], [ [0.0, 1.0, 10.0, 10.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], ], ] ) torch.eq(expected_result, actual_result)
def test_canonical2list(): """test canonical to list.""" bbox = BBox2D(label=0, x=10, y=10, w=10, h=10) actual_result = canonical2list(bbox) expected_result = [0, 1.0, 10, 10, 10, 10] assert actual_result == expected_result
def get_image_and_bbox(): """prepare an image and bounding box.""" bbox = BBox2D(label=1, x=1, y=1, w=2, h=3) image = np.zeros((100, 200, 3)) return image, bbox
def test_list3d_2canonical(): """test list 3d to canonical.""" box_a, box_b = ( BBox2D(label=0, x=10, y=10, w=10, h=10), BBox2D(label=1, x=20, y=20, w=10, h=10), ) list3d = [ [ [ [0.0, 1.0, 10.0, 10.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], [ [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], ], [ [ [0.0, 1.0, 10.0, 10.0, 10.0, 10.0], [1.0, 1.0, 20.0, 20.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], [ [1.0, 1.0, 20.0, 20.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], ], [ [ [1.0, 1.0, 20.0, 20.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], [ [0.0, 1.0, 10.0, 10.0, 10.0, 10.0], [1.0, 1.0, 20.0, 20.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], ], [ [ [1.0, 1.0, 20.0, 20.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], [ [0.0, 1.0, 10.0, 10.0, 10.0, 10.0], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ], ], ] expected_result = [ ([box_a], []), ([box_a, box_b], [box_b]), ([box_b], [box_a, box_b]), ([box_b], [box_a]), ] actual_result = list3d_2canonical(list3d) assert actual_result == expected_result