def test_bounding_box(self):
        def resize(input: features.BoundingBox,
                   size: torch.Tensor) -> features.BoundingBox:
            old_height, old_width = input.image_size
            new_height, new_width = size

            height_scale = new_height / old_height
            width_scale = new_width / old_width

            old_x1, old_y1, old_x2, old_y2 = input.convert("xyxy").to_parts()

            new_x1 = old_x1 * width_scale
            new_y1 = old_y1 * height_scale

            new_x2 = old_x2 * width_scale
            new_y2 = old_y2 * height_scale

            return features.BoundingBox.from_parts(new_x1,
                                                   new_y1,
                                                   new_x2,
                                                   new_y2,
                                                   like=input,
                                                   format="xyxy",
                                                   image_size=tuple(
                                                       size.tolist()))

        def horizontal_flip(
                input: features.BoundingBox) -> features.BoundingBox:
            x, y, w, h = input.convert("xywh").to_parts()
            x = input.image_size[1] - (x + w)
            return features.BoundingBox.from_parts(x,
                                                   y,
                                                   w,
                                                   h,
                                                   like=input,
                                                   format="xywh")

        def compose(input: features.BoundingBox,
                    size: torch.Tensor) -> features.BoundingBox:
            return horizontal_flip(resize(input, size)).convert("xyxy")

        image_size = (8, 6)
        input = features.BoundingBox([2, 4, 2, 4],
                                     format="cxcywh",
                                     image_size=image_size)
        size = torch.tensor((4, 12))
        expected = features.BoundingBox([6, 1, 10, 3],
                                        format="xyxy",
                                        image_size=image_size)

        actual_eager = compose(input, size)
        assert_close(actual_eager, expected)

        sample_inputs = (features.BoundingBox(torch.zeros((4, )),
                                              image_size=(10, 10)),
                         torch.tensor((20, 5)))
        actual_jit = torch.jit.trace(compose, sample_inputs,
                                     check_trace=False)(input, size)
        assert_close(actual_jit, expected)
def make_bounding_box(*,
                      format,
                      image_size=(32, 32),
                      extra_dims=(),
                      dtype=torch.int64):
    if isinstance(format, str):
        format = features.BoundingBoxFormat[format]

    height, width = image_size

    if format == features.BoundingBoxFormat.XYXY:
        x1 = torch.randint(0, width // 2, extra_dims)
        y1 = torch.randint(0, height // 2, extra_dims)
        x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1
        y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1
        parts = (x1, y1, x2, y2)
    elif format == features.BoundingBoxFormat.XYWH:
        x = torch.randint(0, width // 2, extra_dims)
        y = torch.randint(0, height // 2, extra_dims)
        w = randint_with_tensor_bounds(1, width - x)
        h = randint_with_tensor_bounds(1, height - y)
        parts = (x, y, w, h)
    elif format == features.BoundingBoxFormat.CXCYWH:
        cx = torch.randint(1, width - 1, ())
        cy = torch.randint(1, height - 1, ())
        w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1)
        h = randint_with_tensor_bounds(1, torch.minimum(cy, width - cy) + 1)
        parts = (cx, cy, w, h)
    else:
        raise pytest.UsageError()

    return features.BoundingBox(torch.stack(parts, dim=-1).to(dtype),
                                format=format,
                                image_size=image_size)
    def _compute_expected_bbox(bbox, angle_, translate_, scale_, shear_,
                               center_):
        affine_matrix = _compute_affine_matrix(angle_, translate_, scale_,
                                               shear_, center_)
        affine_matrix = affine_matrix[:2, :]

        bbox_xyxy = convert_bounding_box_format(
            bbox,
            old_format=bbox.format,
            new_format=features.BoundingBoxFormat.XYXY)
        points = np.array([
            [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0],
            [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0],
            [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0],
            [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0],
        ])
        transformed_points = np.matmul(points, affine_matrix.T)
        out_bbox = [
            np.min(transformed_points[:, 0]),
            np.min(transformed_points[:, 1]),
            np.max(transformed_points[:, 0]),
            np.max(transformed_points[:, 1]),
        ]
        out_bbox = features.BoundingBox(
            out_bbox,
            format=features.BoundingBoxFormat.XYXY,
            image_size=bbox.image_size,
            dtype=torch.float32,
            device=bbox.device,
        )
        return convert_bounding_box_format(
            out_bbox,
            old_format=features.BoundingBoxFormat.XYXY,
            new_format=bbox.format,
            copy=False)
def test_correctness_rotate_bounding_box_on_fixed_input(device, expand):
    # Check transformation against known expected output
    image_size = (64, 64)
    # xyxy format
    in_boxes = [
        [1, 1, 5, 5],
        [1, image_size[0] - 6, 5, image_size[0] - 2],
        [
            image_size[1] - 6, image_size[0] - 6, image_size[1] - 2,
            image_size[0] - 2
        ],
        [
            image_size[1] // 2 - 10, image_size[0] // 2 - 10,
            image_size[1] // 2 + 10, image_size[0] // 2 + 10
        ],
    ]
    in_boxes = features.BoundingBox(in_boxes,
                                    format=features.BoundingBoxFormat.XYXY,
                                    image_size=image_size,
                                    dtype=torch.float64,
                                    device=device)
    # Tested parameters
    angle = 45
    center = None if expand else [12, 23]

    # # Expected bboxes computed using Detectron2:
    # from detectron2.data.transforms import RotationTransform, AugmentationList
    # from detectron2.data.transforms import AugInput
    # import cv2
    # inpt = AugInput(im1, boxes=np.array(in_boxes, dtype="float32"))
    # augs = AugmentationList([RotationTransform(*size, angle, expand=expand, center=center, interp=cv2.INTER_NEAREST), ])
    # out = augs(inpt)
    # print(inpt.boxes)
    if expand:
        expected_bboxes = [
            [1.65937957, 42.67157288, 7.31623382, 48.32842712],
            [41.96446609, 82.9766594, 47.62132034, 88.63351365],
            [82.26955262, 42.67157288, 87.92640687, 48.32842712],
            [31.35786438, 31.35786438, 59.64213562, 59.64213562],
        ]
    else:
        expected_bboxes = [
            [-11.33452378, 12.39339828, -5.67766953, 18.05025253],
            [28.97056275, 52.69848481, 34.627417, 58.35533906],
            [69.27564928, 12.39339828, 74.93250353, 18.05025253],
            [18.36396103, 1.07968978, 46.64823228, 29.36396103],
        ]

    output_boxes = F.rotate_bounding_box(
        in_boxes,
        in_boxes.format,
        in_boxes.image_size,
        angle,
        expand=expand,
        center=center,
    )

    torch.testing.assert_close(output_boxes.tolist(), expected_bboxes)
Beispiel #5
0
def test_correctness_affine_bounding_box_on_fixed_input(device):
    # Check transformation against known expected output
    image_size = (64, 64)
    # xyxy format
    in_boxes = [
        [20, 25, 35, 45],
        [50, 5, 70, 22],
        [
            image_size[1] // 2 - 10, image_size[0] // 2 - 10,
            image_size[1] // 2 + 10, image_size[0] // 2 + 10
        ],
        [1, 1, 5, 5],
    ]
    in_boxes = features.BoundingBox(in_boxes,
                                    format=features.BoundingBoxFormat.XYXY,
                                    image_size=image_size,
                                    dtype=torch.float64).to(device)
    # Tested parameters
    angle = 63
    scale = 0.89
    dx = 0.12
    dy = 0.23

    # Expected bboxes computed using albumentations:
    # from albumentations.augmentations.geometric.functional import bbox_shift_scale_rotate
    # from albumentations.augmentations.geometric.functional import normalize_bbox, denormalize_bbox
    # expected_bboxes = []
    # for in_box in in_boxes:
    #     n_in_box = normalize_bbox(in_box, *image_size)
    #     n_out_box = bbox_shift_scale_rotate(n_in_box, -angle, scale, dx, dy, *image_size)
    #     out_box = denormalize_bbox(n_out_box, *image_size)
    #     expected_bboxes.append(out_box)
    expected_bboxes = [
        (24.522435977922218, 34.375689508290854, 46.443125279998114,
         54.3516575015695),
        (54.88288587110401, 50.08453280875634, 76.44484547743795,
         72.81332520036864),
        (27.709526487041554, 34.74952648704156, 51.650473512958435,
         58.69047351295844),
        (48.56528888843238, 9.611532109828834, 53.35347829361575,
         14.39972151501221),
    ]

    output_boxes = F.affine_bounding_box(
        in_boxes,
        in_boxes.format,
        in_boxes.image_size,
        angle,
        (dx * image_size[1], dy * image_size[0]),
        scale,
        shear=(0, 0),
    )

    assert len(output_boxes) == len(expected_bboxes)
    for a_out_box, out_box in zip(expected_bboxes, output_boxes.cpu()):
        np.testing.assert_allclose(out_box.cpu().numpy(), a_out_box)
    def test_features_bounding_box(self, p):
        input = features.BoundingBox([0, 0, 5, 5], format=features.BoundingBoxFormat.XYXY, image_size=(10, 10))
        transform = transforms.RandomVerticalFlip(p=p)

        actual = transform(input)

        expected_image_tensor = torch.tensor([0, 5, 5, 10]) if p == 1.0 else input
        expected = features.BoundingBox.new_like(input, data=expected_image_tensor)
        assert_equal(expected, actual)
        assert actual.format == expected.format
        assert actual.image_size == expected.image_size
    def _compute_expected_bbox(bbox, angle_, expand_, center_):
        affine_matrix = _compute_affine_matrix(angle_, [0.0, 0.0], 1.0,
                                               [0.0, 0.0], center_)
        affine_matrix = affine_matrix[:2, :]

        image_size = bbox.image_size
        bbox_xyxy = convert_bounding_box_format(
            bbox,
            old_format=bbox.format,
            new_format=features.BoundingBoxFormat.XYXY)
        points = np.array([
            [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0],
            [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0],
            [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0],
            [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0],
            # image frame
            [0.0, 0.0, 1.0],
            [0.0, image_size[0], 1.0],
            [image_size[1], image_size[0], 1.0],
            [image_size[1], 0.0, 1.0],
        ])
        transformed_points = np.matmul(points, affine_matrix.T)
        out_bbox = [
            np.min(transformed_points[:4, 0]),
            np.min(transformed_points[:4, 1]),
            np.max(transformed_points[:4, 0]),
            np.max(transformed_points[:4, 1]),
        ]
        if expand_:
            tr_x = np.min(transformed_points[4:, 0])
            tr_y = np.min(transformed_points[4:, 1])
            out_bbox[0] -= tr_x
            out_bbox[1] -= tr_y
            out_bbox[2] -= tr_x
            out_bbox[3] -= tr_y

        out_bbox = features.BoundingBox(
            out_bbox,
            format=features.BoundingBoxFormat.XYXY,
            image_size=image_size,
            dtype=torch.float32,
            device=bbox.device,
        )
        return convert_bounding_box_format(
            out_bbox,
            old_format=features.BoundingBoxFormat.XYXY,
            new_format=bbox.format,
            copy=False)
def test_correctness_rotate_bounding_box(angle, expand, center):
    def _compute_expected_bbox(bbox, angle_, expand_, center_):
        affine_matrix = _compute_affine_matrix(angle_, [0.0, 0.0], 1.0,
                                               [0.0, 0.0], center_)
        affine_matrix = affine_matrix[:2, :]

        image_size = bbox.image_size
        bbox_xyxy = convert_bounding_box_format(
            bbox,
            old_format=bbox.format,
            new_format=features.BoundingBoxFormat.XYXY)
        points = np.array([
            [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0],
            [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0],
            [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0],
            [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0],
            # image frame
            [0.0, 0.0, 1.0],
            [0.0, image_size[0], 1.0],
            [image_size[1], image_size[0], 1.0],
            [image_size[1], 0.0, 1.0],
        ])
        transformed_points = np.matmul(points, affine_matrix.T)
        out_bbox = [
            np.min(transformed_points[:4, 0]),
            np.min(transformed_points[:4, 1]),
            np.max(transformed_points[:4, 0]),
            np.max(transformed_points[:4, 1]),
        ]
        if expand_:
            tr_x = np.min(transformed_points[4:, 0])
            tr_y = np.min(transformed_points[4:, 1])
            out_bbox[0] -= tr_x
            out_bbox[1] -= tr_y
            out_bbox[2] -= tr_x
            out_bbox[3] -= tr_y

        out_bbox = features.BoundingBox(
            out_bbox,
            format=features.BoundingBoxFormat.XYXY,
            image_size=image_size,
            dtype=torch.float32,
            device=bbox.device,
        )
        return convert_bounding_box_format(
            out_bbox,
            old_format=features.BoundingBoxFormat.XYXY,
            new_format=bbox.format,
            copy=False)

    image_size = (32, 38)

    for bboxes in make_bounding_boxes(
            image_sizes=[
                image_size,
            ],
            extra_dims=((4, ), ),
    ):
        bboxes_format = bboxes.format
        bboxes_image_size = bboxes.image_size

        output_bboxes = F.rotate_bounding_box(
            bboxes,
            bboxes_format,
            image_size=bboxes_image_size,
            angle=angle,
            expand=expand,
            center=center,
        )

        if center is None:
            center = [s // 2 for s in bboxes_image_size[::-1]]

        if bboxes.ndim < 2:
            bboxes = [bboxes]

        expected_bboxes = []
        for bbox in bboxes:
            bbox = features.BoundingBox(bbox,
                                        format=bboxes_format,
                                        image_size=bboxes_image_size)
            expected_bboxes.append(
                _compute_expected_bbox(bbox, -angle, expand, center))
        if len(expected_bboxes) > 1:
            expected_bboxes = torch.stack(expected_bboxes)
        else:
            expected_bboxes = expected_bboxes[0]
        torch.testing.assert_close(output_bboxes, expected_bboxes)
def test_correctness_affine_bounding_box(angle, translate, scale, shear,
                                         center):
    def _compute_expected_bbox(bbox, angle_, translate_, scale_, shear_,
                               center_):
        affine_matrix = _compute_affine_matrix(angle_, translate_, scale_,
                                               shear_, center_)
        affine_matrix = affine_matrix[:2, :]

        bbox_xyxy = convert_bounding_box_format(
            bbox,
            old_format=bbox.format,
            new_format=features.BoundingBoxFormat.XYXY)
        points = np.array([
            [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0],
            [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0],
            [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0],
            [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0],
        ])
        transformed_points = np.matmul(points, affine_matrix.T)
        out_bbox = [
            np.min(transformed_points[:, 0]),
            np.min(transformed_points[:, 1]),
            np.max(transformed_points[:, 0]),
            np.max(transformed_points[:, 1]),
        ]
        out_bbox = features.BoundingBox(
            out_bbox,
            format=features.BoundingBoxFormat.XYXY,
            image_size=bbox.image_size,
            dtype=torch.float32,
            device=bbox.device,
        )
        return convert_bounding_box_format(
            out_bbox,
            old_format=features.BoundingBoxFormat.XYXY,
            new_format=bbox.format,
            copy=False)

    image_size = (32, 38)

    for bboxes in make_bounding_boxes(
            image_sizes=[
                image_size,
            ],
            extra_dims=((4, ), ),
    ):
        bboxes_format = bboxes.format
        bboxes_image_size = bboxes.image_size

        output_bboxes = F.affine_bounding_box(
            bboxes,
            bboxes_format,
            image_size=bboxes_image_size,
            angle=angle,
            translate=(translate, translate),
            scale=scale,
            shear=(shear, shear),
            center=center,
        )

        if center is None:
            center = [s // 2 for s in bboxes_image_size[::-1]]

        if bboxes.ndim < 2:
            bboxes = [bboxes]

        expected_bboxes = []
        for bbox in bboxes:
            bbox = features.BoundingBox(bbox,
                                        format=bboxes_format,
                                        image_size=bboxes_image_size)
            expected_bboxes.append(
                _compute_expected_bbox(bbox, angle, (translate, translate),
                                       scale, (shear, shear), center))
        if len(expected_bboxes) > 1:
            expected_bboxes = torch.stack(expected_bboxes)
        else:
            expected_bboxes = expected_bboxes[0]
        torch.testing.assert_close(output_bboxes, expected_bboxes)