Beispiel #1
0
    def test_converts_thwc_to_PIL_video(self, shape):
        t, h, w = shape
        video = self.make_uint8_ndarray((t, h, w, 3))
        transform = Compose([NDArrayToPILVideo(), CollectFrames()])

        pil_video = transform(video)

        assert len(pil_video) == t
        assert pil_video[0].size[0] == w
        assert pil_video[0].size[1] == h
        assert all([f.mode == "RGB" for f in pil_video])
Beispiel #2
0
    def test_raises_error_if_target_is_not_passed_when_a_transform_requires_target(
            self):
        transforms = [
            MockFramesAndRequiredTargetTransform(None,
                                                 None,
                                                 name="MyTransform")
        ]
        composed_transform = Compose(transforms)
        frames = pil_video().example()

        with pytest.raises(TypeError, match="MyTransform"):
            composed_transform(frames)
Beispiel #3
0
    def test_calls_frames_only_transforms_sequentially(self):
        frames = pil_video().example()
        transforms, results = self.gen_transforms(5)
        composed_transform = Compose(transforms)

        transformed_frames = composed_transform(frames)

        transforms[0].assert_called_once_with(frames)
        transforms[1].assert_called_once_with(results[0])
        transforms[2].assert_called_once_with(results[1])
        transforms[3].assert_called_once_with(results[2])
        transforms[4].assert_called_once_with(results[3])
        assert transformed_frames == results[-1]
Beispiel #4
0
    def test_passes_target_to_supporting_transforms(self):
        results = [
            "transform_result_0", "transform_result_1", "transform_result_2"
        ]
        transforms = [
            MockFramesOnlyTransform(results[0]),
            MockFramesAndOptionalTargetTransform(results[1],
                                                 target_return_value=-2),
            MockFramesAndRequiredTargetTransform(results[2],
                                                 target_return_value=-3),
        ]
        composed_transform = Compose(transforms)
        frames = pil_video().example()

        target = -1
        transformed_frames, transformed_target = composed_transform(
            frames, target)

        transforms[0].assert_called_once_with(frames)
        transforms[1].assert_called_once_with(results[0], target=target)
        transforms[2].assert_called_once_with(results[1], -2)
        assert results[-1] == transformed_frames
        assert transformed_target == -3
Beispiel #5
0
 def test_nested_repr(self):
     t1 = CenterCropVideo(224)
     t2 = CenterCropVideo(16)
     assert (repr(Compose([
         t1, Compose([t2])
     ])) == f"Compose(transforms=[{t1!r}, Compose(transforms=[{t2!r}])])")
Beispiel #6
0
 def test_single_level_repr(self):
     t = CenterCropVideo(224)
     assert repr(Compose([t])) == f"Compose(transforms=[{t!r}])"
Beispiel #7
0
def get_transforms(
    args, model_settings: RGB2DModelSettings
) -> Tuple[Callable[[Any], torch.Tensor], Callable[[Any], torch.Tensor]]:
    train_transforms = []

    # model_settings.input_size is to be interpreted based on model_settings.input_order
    input_order = model_settings.input_order.lower()
    if input_order.endswith("hw"):
        input_height, input_width = model_input_size = model_settings.input_size[
            -2:]
    else:
        raise NotImplementedError(
            "Unsupported input ordering: {}".format(input_order))

    if args.augment_hflip:
        LOG.info("Using horizontal flipping")
        train_transforms.append(RandomHorizontalFlipVideo())
    if args.preserve_aspect_ratio:
        LOG.info(f"Preserving aspect ratio of videos")
        rescaled_size: Union[int, Tuple[int, int]] = int(
            input_height * args.image_scale_factor)
    else:
        rescaled_size = (
            int(input_height * args.image_scale_factor),
            int(input_width * args.image_scale_factor),
        )
        LOG.info(f"Squashing videos to {rescaled_size}")
    train_transforms.append(ResizeVideo(rescaled_size))
    LOG.info(f"Resizing videos to {rescaled_size}")
    if args.augment_crop:
        LOG.info(f"Using multiscale cropping "
                 f"(scales: {args.augment_crop_scales}, "
                 f"fixed_crops: {args.augment_crop_fixed_crops}, "
                 f"more_fixed_crops: {args.augment_crop_more_fixed_crops}"
                 f")")
        train_transforms.append(
            MultiScaleCropVideo(
                model_input_size,
                scales=args.augment_crop_scales,
                fixed_crops=args.augment_crop_fixed_crops,
                more_fixed_crops=args.augment_crop_more_fixed_crops,
            ))
    else:
        LOG.info(f"Cropping videos to {model_input_size}")
        train_transforms.append(RandomCropVideo(model_input_size))

    channel_dim = input_order.find("c")
    if channel_dim == -1:
        raise ValueError(
            f"Could not determine channel position in input_order {input_order!r}"
        )
    if model_settings.input_space == "BGR":
        LOG.info(f"Flipping channels from RGB to BGR")
        channel_transform = FlipChannels(channel_dim)
    else:
        assert model_settings.input_space == "RGB"
        channel_transform = IdentityTransform()
    common_transforms = [
        PILVideoToTensor(
            rescale=model_settings.input_range[-1] != 255,
            ordering=input_order,
        ),
        channel_transform,
        NormalizeVideo(mean=model_settings.mean,
                       std=model_settings.std,
                       channel_dim=channel_dim),
    ]
    train_transform = Compose(train_transforms + common_transforms)
    LOG.info(f"Training transform: {train_transform!r}")
    validation_transform = Compose(
        [ResizeVideo(rescaled_size),
         CenterCropVideo(model_input_size)] + common_transforms)
    LOG.info(f"Validation transform: {validation_transform!r}")
    return train_transform, validation_transform