def test_bounding_box(self): def resize(input: features.BoundingBox, size: torch.Tensor) -> features.BoundingBox: old_height, old_width = input.image_size new_height, new_width = size height_scale = new_height / old_height width_scale = new_width / old_width old_x1, old_y1, old_x2, old_y2 = input.convert("xyxy").to_parts() new_x1 = old_x1 * width_scale new_y1 = old_y1 * height_scale new_x2 = old_x2 * width_scale new_y2 = old_y2 * height_scale return features.BoundingBox.from_parts(new_x1, new_y1, new_x2, new_y2, like=input, format="xyxy", image_size=tuple( size.tolist())) def horizontal_flip( input: features.BoundingBox) -> features.BoundingBox: x, y, w, h = input.convert("xywh").to_parts() x = input.image_size[1] - (x + w) return features.BoundingBox.from_parts(x, y, w, h, like=input, format="xywh") def compose(input: features.BoundingBox, size: torch.Tensor) -> features.BoundingBox: return horizontal_flip(resize(input, size)).convert("xyxy") image_size = (8, 6) input = features.BoundingBox([2, 4, 2, 4], format="cxcywh", image_size=image_size) size = torch.tensor((4, 12)) expected = features.BoundingBox([6, 1, 10, 3], format="xyxy", image_size=image_size) actual_eager = compose(input, size) assert_close(actual_eager, expected) sample_inputs = (features.BoundingBox(torch.zeros((4, )), image_size=(10, 10)), torch.tensor((20, 5))) actual_jit = torch.jit.trace(compose, sample_inputs, check_trace=False)(input, size) assert_close(actual_jit, expected)
def make_bounding_box(*, format, image_size=(32, 32), extra_dims=(), dtype=torch.int64): if isinstance(format, str): format = features.BoundingBoxFormat[format] height, width = image_size if format == features.BoundingBoxFormat.XYXY: x1 = torch.randint(0, width // 2, extra_dims) y1 = torch.randint(0, height // 2, extra_dims) x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1 y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1 parts = (x1, y1, x2, y2) elif format == features.BoundingBoxFormat.XYWH: x = torch.randint(0, width // 2, extra_dims) y = torch.randint(0, height // 2, extra_dims) w = randint_with_tensor_bounds(1, width - x) h = randint_with_tensor_bounds(1, height - y) parts = (x, y, w, h) elif format == features.BoundingBoxFormat.CXCYWH: cx = torch.randint(1, width - 1, ()) cy = torch.randint(1, height - 1, ()) w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1) h = randint_with_tensor_bounds(1, torch.minimum(cy, width - cy) + 1) parts = (cx, cy, w, h) else: raise pytest.UsageError() return features.BoundingBox(torch.stack(parts, dim=-1).to(dtype), format=format, image_size=image_size)
def _compute_expected_bbox(bbox, angle_, translate_, scale_, shear_, center_): affine_matrix = _compute_affine_matrix(angle_, translate_, scale_, shear_, center_) affine_matrix = affine_matrix[:2, :] bbox_xyxy = convert_bounding_box_format( bbox, old_format=bbox.format, new_format=features.BoundingBoxFormat.XYXY) points = np.array([ [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0], [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0], [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0], [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0], ]) transformed_points = np.matmul(points, affine_matrix.T) out_bbox = [ np.min(transformed_points[:, 0]), np.min(transformed_points[:, 1]), np.max(transformed_points[:, 0]), np.max(transformed_points[:, 1]), ] out_bbox = features.BoundingBox( out_bbox, format=features.BoundingBoxFormat.XYXY, image_size=bbox.image_size, dtype=torch.float32, device=bbox.device, ) return convert_bounding_box_format( out_bbox, old_format=features.BoundingBoxFormat.XYXY, new_format=bbox.format, copy=False)
def test_correctness_rotate_bounding_box_on_fixed_input(device, expand): # Check transformation against known expected output image_size = (64, 64) # xyxy format in_boxes = [ [1, 1, 5, 5], [1, image_size[0] - 6, 5, image_size[0] - 2], [ image_size[1] - 6, image_size[0] - 6, image_size[1] - 2, image_size[0] - 2 ], [ image_size[1] // 2 - 10, image_size[0] // 2 - 10, image_size[1] // 2 + 10, image_size[0] // 2 + 10 ], ] in_boxes = features.BoundingBox(in_boxes, format=features.BoundingBoxFormat.XYXY, image_size=image_size, dtype=torch.float64, device=device) # Tested parameters angle = 45 center = None if expand else [12, 23] # # Expected bboxes computed using Detectron2: # from detectron2.data.transforms import RotationTransform, AugmentationList # from detectron2.data.transforms import AugInput # import cv2 # inpt = AugInput(im1, boxes=np.array(in_boxes, dtype="float32")) # augs = AugmentationList([RotationTransform(*size, angle, expand=expand, center=center, interp=cv2.INTER_NEAREST), ]) # out = augs(inpt) # print(inpt.boxes) if expand: expected_bboxes = [ [1.65937957, 42.67157288, 7.31623382, 48.32842712], [41.96446609, 82.9766594, 47.62132034, 88.63351365], [82.26955262, 42.67157288, 87.92640687, 48.32842712], [31.35786438, 31.35786438, 59.64213562, 59.64213562], ] else: expected_bboxes = [ [-11.33452378, 12.39339828, -5.67766953, 18.05025253], [28.97056275, 52.69848481, 34.627417, 58.35533906], [69.27564928, 12.39339828, 74.93250353, 18.05025253], [18.36396103, 1.07968978, 46.64823228, 29.36396103], ] output_boxes = F.rotate_bounding_box( in_boxes, in_boxes.format, in_boxes.image_size, angle, expand=expand, center=center, ) torch.testing.assert_close(output_boxes.tolist(), expected_bboxes)
def test_correctness_affine_bounding_box_on_fixed_input(device): # Check transformation against known expected output image_size = (64, 64) # xyxy format in_boxes = [ [20, 25, 35, 45], [50, 5, 70, 22], [ image_size[1] // 2 - 10, image_size[0] // 2 - 10, image_size[1] // 2 + 10, image_size[0] // 2 + 10 ], [1, 1, 5, 5], ] in_boxes = features.BoundingBox(in_boxes, format=features.BoundingBoxFormat.XYXY, image_size=image_size, dtype=torch.float64).to(device) # Tested parameters angle = 63 scale = 0.89 dx = 0.12 dy = 0.23 # Expected bboxes computed using albumentations: # from albumentations.augmentations.geometric.functional import bbox_shift_scale_rotate # from albumentations.augmentations.geometric.functional import normalize_bbox, denormalize_bbox # expected_bboxes = [] # for in_box in in_boxes: # n_in_box = normalize_bbox(in_box, *image_size) # n_out_box = bbox_shift_scale_rotate(n_in_box, -angle, scale, dx, dy, *image_size) # out_box = denormalize_bbox(n_out_box, *image_size) # expected_bboxes.append(out_box) expected_bboxes = [ (24.522435977922218, 34.375689508290854, 46.443125279998114, 54.3516575015695), (54.88288587110401, 50.08453280875634, 76.44484547743795, 72.81332520036864), (27.709526487041554, 34.74952648704156, 51.650473512958435, 58.69047351295844), (48.56528888843238, 9.611532109828834, 53.35347829361575, 14.39972151501221), ] output_boxes = F.affine_bounding_box( in_boxes, in_boxes.format, in_boxes.image_size, angle, (dx * image_size[1], dy * image_size[0]), scale, shear=(0, 0), ) assert len(output_boxes) == len(expected_bboxes) for a_out_box, out_box in zip(expected_bboxes, output_boxes.cpu()): np.testing.assert_allclose(out_box.cpu().numpy(), a_out_box)
def test_features_bounding_box(self, p): input = features.BoundingBox([0, 0, 5, 5], format=features.BoundingBoxFormat.XYXY, image_size=(10, 10)) transform = transforms.RandomVerticalFlip(p=p) actual = transform(input) expected_image_tensor = torch.tensor([0, 5, 5, 10]) if p == 1.0 else input expected = features.BoundingBox.new_like(input, data=expected_image_tensor) assert_equal(expected, actual) assert actual.format == expected.format assert actual.image_size == expected.image_size
def _compute_expected_bbox(bbox, angle_, expand_, center_): affine_matrix = _compute_affine_matrix(angle_, [0.0, 0.0], 1.0, [0.0, 0.0], center_) affine_matrix = affine_matrix[:2, :] image_size = bbox.image_size bbox_xyxy = convert_bounding_box_format( bbox, old_format=bbox.format, new_format=features.BoundingBoxFormat.XYXY) points = np.array([ [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0], [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0], [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0], [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0], # image frame [0.0, 0.0, 1.0], [0.0, image_size[0], 1.0], [image_size[1], image_size[0], 1.0], [image_size[1], 0.0, 1.0], ]) transformed_points = np.matmul(points, affine_matrix.T) out_bbox = [ np.min(transformed_points[:4, 0]), np.min(transformed_points[:4, 1]), np.max(transformed_points[:4, 0]), np.max(transformed_points[:4, 1]), ] if expand_: tr_x = np.min(transformed_points[4:, 0]) tr_y = np.min(transformed_points[4:, 1]) out_bbox[0] -= tr_x out_bbox[1] -= tr_y out_bbox[2] -= tr_x out_bbox[3] -= tr_y out_bbox = features.BoundingBox( out_bbox, format=features.BoundingBoxFormat.XYXY, image_size=image_size, dtype=torch.float32, device=bbox.device, ) return convert_bounding_box_format( out_bbox, old_format=features.BoundingBoxFormat.XYXY, new_format=bbox.format, copy=False)
def test_correctness_rotate_bounding_box(angle, expand, center): def _compute_expected_bbox(bbox, angle_, expand_, center_): affine_matrix = _compute_affine_matrix(angle_, [0.0, 0.0], 1.0, [0.0, 0.0], center_) affine_matrix = affine_matrix[:2, :] image_size = bbox.image_size bbox_xyxy = convert_bounding_box_format( bbox, old_format=bbox.format, new_format=features.BoundingBoxFormat.XYXY) points = np.array([ [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0], [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0], [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0], [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0], # image frame [0.0, 0.0, 1.0], [0.0, image_size[0], 1.0], [image_size[1], image_size[0], 1.0], [image_size[1], 0.0, 1.0], ]) transformed_points = np.matmul(points, affine_matrix.T) out_bbox = [ np.min(transformed_points[:4, 0]), np.min(transformed_points[:4, 1]), np.max(transformed_points[:4, 0]), np.max(transformed_points[:4, 1]), ] if expand_: tr_x = np.min(transformed_points[4:, 0]) tr_y = np.min(transformed_points[4:, 1]) out_bbox[0] -= tr_x out_bbox[1] -= tr_y out_bbox[2] -= tr_x out_bbox[3] -= tr_y out_bbox = features.BoundingBox( out_bbox, format=features.BoundingBoxFormat.XYXY, image_size=image_size, dtype=torch.float32, device=bbox.device, ) return convert_bounding_box_format( out_bbox, old_format=features.BoundingBoxFormat.XYXY, new_format=bbox.format, copy=False) image_size = (32, 38) for bboxes in make_bounding_boxes( image_sizes=[ image_size, ], extra_dims=((4, ), ), ): bboxes_format = bboxes.format bboxes_image_size = bboxes.image_size output_bboxes = F.rotate_bounding_box( bboxes, bboxes_format, image_size=bboxes_image_size, angle=angle, expand=expand, center=center, ) if center is None: center = [s // 2 for s in bboxes_image_size[::-1]] if bboxes.ndim < 2: bboxes = [bboxes] expected_bboxes = [] for bbox in bboxes: bbox = features.BoundingBox(bbox, format=bboxes_format, image_size=bboxes_image_size) expected_bboxes.append( _compute_expected_bbox(bbox, -angle, expand, center)) if len(expected_bboxes) > 1: expected_bboxes = torch.stack(expected_bboxes) else: expected_bboxes = expected_bboxes[0] torch.testing.assert_close(output_bboxes, expected_bboxes)
def test_correctness_affine_bounding_box(angle, translate, scale, shear, center): def _compute_expected_bbox(bbox, angle_, translate_, scale_, shear_, center_): affine_matrix = _compute_affine_matrix(angle_, translate_, scale_, shear_, center_) affine_matrix = affine_matrix[:2, :] bbox_xyxy = convert_bounding_box_format( bbox, old_format=bbox.format, new_format=features.BoundingBoxFormat.XYXY) points = np.array([ [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0], [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0], [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0], [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0], ]) transformed_points = np.matmul(points, affine_matrix.T) out_bbox = [ np.min(transformed_points[:, 0]), np.min(transformed_points[:, 1]), np.max(transformed_points[:, 0]), np.max(transformed_points[:, 1]), ] out_bbox = features.BoundingBox( out_bbox, format=features.BoundingBoxFormat.XYXY, image_size=bbox.image_size, dtype=torch.float32, device=bbox.device, ) return convert_bounding_box_format( out_bbox, old_format=features.BoundingBoxFormat.XYXY, new_format=bbox.format, copy=False) image_size = (32, 38) for bboxes in make_bounding_boxes( image_sizes=[ image_size, ], extra_dims=((4, ), ), ): bboxes_format = bboxes.format bboxes_image_size = bboxes.image_size output_bboxes = F.affine_bounding_box( bboxes, bboxes_format, image_size=bboxes_image_size, angle=angle, translate=(translate, translate), scale=scale, shear=(shear, shear), center=center, ) if center is None: center = [s // 2 for s in bboxes_image_size[::-1]] if bboxes.ndim < 2: bboxes = [bboxes] expected_bboxes = [] for bbox in bboxes: bbox = features.BoundingBox(bbox, format=bboxes_format, image_size=bboxes_image_size) expected_bboxes.append( _compute_expected_bbox(bbox, angle, (translate, translate), scale, (shear, shear), center)) if len(expected_bboxes) > 1: expected_bboxes = torch.stack(expected_bboxes) else: expected_bboxes = expected_bboxes[0] torch.testing.assert_close(output_bboxes, expected_bboxes)