def test_propagates_label_unchanged(self):
        video = tensor_video(min_width=1, min_height=1).example()
        channel_count = video.shape[0]
        transform = NormalizeVideo(torch.ones(channel_count),
                                   torch.ones(channel_count))

        assert_preserves_label(transform, video)
Esempio n. 2
0
    def test_distribution_is_normal_after_transform(self, ndim):
        """Basically a direct copy of
        https://github.com/pytorch/vision/blob/master/test/test_transforms.py#L753"""
        def kstest(tensor):
            p_value = stats.kstest(list(tensor.view(-1)), "norm",
                                   args=(0, 1)).pvalue
            return p_value

        p_value = 0.0001
        for channel_count in [1, 3]:
            # video is normally distributed ~ N(5, 10)
            if ndim == 2:
                shape = [channel_count, 500]
            elif ndim == 3:
                shape = [channel_count, 10, 50]
            else:
                shape = [channel_count, 5, 10, 10]
            video = torch.randn(*shape) * 10 + 5
            # We want the video not to be sampled from N(0, 1)
            # i.e. we want to reject the null hypothesis that video is from this
            # distribution
            assert kstest(video) <= p_value

            mean = [video[c].mean() for c in range(channel_count)]
            std = [video[c].std() for c in range(channel_count)]
            normalized = NormalizeVideo(mean, std)(video)

            # Check the video *is* sampled from N(0, 1)
            # i.e. we want to maintain the null hypothesis that the normalised video is
            # from this distribution
            assert kstest(normalized) >= 0.0001
    def test_preserves_channel_count(self, data):
        video = data.draw(tensor_video())
        input_channel_count = video.size(0)
        mean = np.random.randn(input_channel_count)
        std = np.random.randn(input_channel_count)
        note(mean)
        note(std)
        transform = NormalizeVideo(mean, std)

        transformed_video = transform(video)

        output_channel_count = transformed_video.size(0)
        assert input_channel_count == output_channel_count
    def test_distribution_is_normal_after_transform(self, ndim, data):
        """Basically a direct copy of
        https://github.com/pytorch/vision/blob/master/test/test_transforms.py#L753"""
        def samples_from_standard_normal(tensor: torch.Tensor,
                                         significance: float) -> bool:
            p_value = stats.kstest(tensor.view(-1).numpy(),
                                   "norm",
                                   args=(0, 1)).pvalue
            return p_value >= significance

        significance = 0.0001
        for channel_count in [1, 3]:
            # video is normally distributed ~ N(5, 10)
            channel_dim = 0
            if ndim == 2:
                shape = [channel_count, 500]
            elif ndim == 3:
                shape = [channel_count, 10, 50]
            else:
                channel_dim = data.draw(st.sampled_from([0, 1]))
                if channel_dim == 0:
                    shape = [channel_count, 5, 10, 10]
                else:
                    shape = [5, channel_count, 10, 10]

            video = torch.from_numpy(np.random.randn(*shape)).to(
                torch.float32) * 5 + 10
            # We want the video not to be sampled from N(0, 1)
            # i.e. we want to reject the null hypothesis that video is from this
            # distribution
            assume(not samples_from_standard_normal(video, significance))

            def get_stats(video: torch.Tensor, channel_dim, channel_count):
                video = video.transpose(
                    0, channel_dim)  # put channel dim at 0th index
                mean = [video[c].mean() for c in range(channel_count)]
                std = [video[c].std() for c in range(channel_count)]
                return mean, std

            mean, std = get_stats(video, channel_dim, channel_count)
            normalized = NormalizeVideo(mean, std,
                                        channel_dim=channel_dim)(video)

            # Check the video *is* sampled from N(0, 1)
            # i.e. we want to maintain the null hypothesis that the normalised video is
            # from this distribution
            assert samples_from_standard_normal(normalized, significance)
    def test_transform_not_inplace(self):
        transform = NormalizeVideo([10], [5], inplace=False)
        pre_transform_tensor = torch.randn(1, 2, 3, 4)
        post_transform_tensor = transform(pre_transform_tensor)

        assert not torch.equal(pre_transform_tensor, post_transform_tensor)
    def test_raises_value_error_when_length_of_mean_is_not_equal_to_channel_count(
            self):
        transform = NormalizeVideo([10, 10], [5, 5])

        with pytest.raises(ValueError):
            transform(torch.randn(3, 1, 1, 1))
 def test_raises_value_error_when_length_of_std_and_mean_dont_match(self):
     with pytest.raises(ValueError):
         NormalizeVideo([10], [5, 0])
 def test_raises_value_error_on_0_element_in_std_vector(self):
     with pytest.raises(ValueError):
         NormalizeVideo([10, 10], [5, 0])
 def test_raises_value_error_on_0_std(self):
     with pytest.raises(ValueError):
         NormalizeVideo(10, 0)
 def test_vector_statistics_smoke(self, video):
     mean = [128] * video.shape[0]
     std = [1] * video.shape[0]
     NormalizeVideo(mean, std)(video)
 def test_scalar_statistics_smoke(self, video):
     NormalizeVideo(128, 1)(video)
 def test_repr(self):
     assert (repr(NormalizeVideo(128, 15, channel_dim=0)) ==
             "NormalizeVideo(mean=128, std=15, channel_dim=0)")
Esempio n. 13
0
 def test_repr(self):
     assert repr(NormalizeVideo(128,
                                15)) == "NormalizeVideo(mean=128, std=15)"
Esempio n. 14
0
def get_transforms(
    args, model_settings: RGB2DModelSettings
) -> Tuple[Callable[[Any], torch.Tensor], Callable[[Any], torch.Tensor]]:
    train_transforms = []

    # model_settings.input_size is to be interpreted based on model_settings.input_order
    input_order = model_settings.input_order.lower()
    if input_order.endswith("hw"):
        input_height, input_width = model_input_size = model_settings.input_size[
            -2:]
    else:
        raise NotImplementedError(
            "Unsupported input ordering: {}".format(input_order))

    if args.augment_hflip:
        LOG.info("Using horizontal flipping")
        train_transforms.append(RandomHorizontalFlipVideo())
    if args.preserve_aspect_ratio:
        LOG.info(f"Preserving aspect ratio of videos")
        rescaled_size: Union[int, Tuple[int, int]] = int(
            input_height * args.image_scale_factor)
    else:
        rescaled_size = (
            int(input_height * args.image_scale_factor),
            int(input_width * args.image_scale_factor),
        )
        LOG.info(f"Squashing videos to {rescaled_size}")
    train_transforms.append(ResizeVideo(rescaled_size))
    LOG.info(f"Resizing videos to {rescaled_size}")
    if args.augment_crop:
        LOG.info(f"Using multiscale cropping "
                 f"(scales: {args.augment_crop_scales}, "
                 f"fixed_crops: {args.augment_crop_fixed_crops}, "
                 f"more_fixed_crops: {args.augment_crop_more_fixed_crops}"
                 f")")
        train_transforms.append(
            MultiScaleCropVideo(
                model_input_size,
                scales=args.augment_crop_scales,
                fixed_crops=args.augment_crop_fixed_crops,
                more_fixed_crops=args.augment_crop_more_fixed_crops,
            ))
    else:
        LOG.info(f"Cropping videos to {model_input_size}")
        train_transforms.append(RandomCropVideo(model_input_size))

    channel_dim = input_order.find("c")
    if channel_dim == -1:
        raise ValueError(
            f"Could not determine channel position in input_order {input_order!r}"
        )
    if model_settings.input_space == "BGR":
        LOG.info(f"Flipping channels from RGB to BGR")
        channel_transform = FlipChannels(channel_dim)
    else:
        assert model_settings.input_space == "RGB"
        channel_transform = IdentityTransform()
    common_transforms = [
        PILVideoToTensor(
            rescale=model_settings.input_range[-1] != 255,
            ordering=input_order,
        ),
        channel_transform,
        NormalizeVideo(mean=model_settings.mean,
                       std=model_settings.std,
                       channel_dim=channel_dim),
    ]
    train_transform = Compose(train_transforms + common_transforms)
    LOG.info(f"Training transform: {train_transform!r}")
    validation_transform = Compose(
        [ResizeVideo(rescaled_size),
         CenterCropVideo(model_input_size)] + common_transforms)
    LOG.info(f"Validation transform: {validation_transform!r}")
    return train_transform, validation_transform