Esempio n. 1
0
    def test_uniform_crop(self):
        # For videos with height < width.
        video = thwc_to_cthw(create_dummy_video_frames(
            20, 30, 40)).to(dtype=torch.float32)
        # Left crop.
        actual = uniform_crop(video, size=20, spatial_idx=0)
        self.assertTrue(actual.equal(video[:, :, 5:25, :20]))
        # Center crop.
        actual = uniform_crop(video, size=20, spatial_idx=1)
        self.assertTrue(actual.equal(video[:, :, 5:25, 10:30]))
        # Right crop.
        actual = uniform_crop(video, size=20, spatial_idx=2)
        self.assertTrue(actual.equal(video[:, :, 5:25, 20:]))

        # For videos with height > width.
        video = thwc_to_cthw(create_dummy_video_frames(
            20, 40, 30)).to(dtype=torch.float32)
        # Top crop.
        actual = uniform_crop(video, size=20, spatial_idx=0)
        self.assertTrue(actual.equal(video[:, :, :20, 5:25]))
        # Center crop.
        actual = uniform_crop(video, size=20, spatial_idx=1)
        self.assertTrue(actual.equal(video[:, :, 10:30, 5:25]))
        # Bottom crop.
        actual = uniform_crop(video, size=20, spatial_idx=2)
        self.assertTrue(actual.equal(video[:, :, 20:, 5:25]))
    def test_reading_from_directory_structure(self, decoder):
        # For an unknown reason this import has to be here for `buck test` to work.
        import torchvision.io as io

        with tempfile.TemporaryDirectory() as root_dir:

            # Create test directory structure with two classes and a video in each.
            root_dir_name = pathlib.Path(root_dir)
            test_class_1 = root_dir_name / "running"
            test_class_1.mkdir()
            data_1 = create_dummy_video_frames(15, 10, 10)
            test_class_2 = root_dir_name / "cleaning windows"
            test_class_2.mkdir()
            data_2 = create_dummy_video_frames(20, 15, 15)
            with tempfile.NamedTemporaryFile(
                    suffix=".mp4",
                    dir=test_class_1) as f_1, tempfile.NamedTemporaryFile(
                        suffix=".mp4", dir=test_class_2) as f_2:
                f_1.close()
                f_2.close()

                # Write lossless video for each class.
                io.write_video(
                    f_1.name,
                    data_1,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )
                io.write_video(
                    f_2.name,
                    data_2,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )

                clip_sampler = make_clip_sampler("uniform", 3)
                labeled_video_paths = LabeledVideoPaths.from_path(root_dir)
                dataset = LabeledVideoDataset(
                    labeled_video_paths,
                    clip_sampler=clip_sampler,
                    video_sampler=SequentialSampler,
                    decode_audio=False,
                    decoder=decoder,
                )

                # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2)
                # will be first.
                sample_1 = next(dataset)
                self.assertEqual(sample_1["label"], 0)
                self.assertTrue(sample_1["video"].equal(
                    thwc_to_cthw(data_2).to(torch.float32)))

                sample_2 = next(dataset)
                self.assertEqual(sample_2["label"], 1)
                self.assertTrue(sample_2["video"].equal(
                    thwc_to_cthw(data_1).to(torch.float32)))
Esempio n. 3
0
    def test_uniform_temporal_subsample(self):
        video = thwc_to_cthw(create_dummy_video_frames(
            20, 30, 40)).to(dtype=torch.float32)
        actual = uniform_temporal_subsample(video, video.shape[1])
        self.assertTrue(actual.equal(video))

        video = thwc_to_cthw(create_dummy_video_frames(
            20, 30, 40)).to(dtype=torch.float32)
        actual = uniform_temporal_subsample(video, video.shape[1] // 2)
        self.assertTrue(
            actual.equal(video[:, [0, 2, 4, 6, 8, 10, 12, 14, 16, 19]]))

        video = thwc_to_cthw(create_dummy_video_frames(
            20, 30, 40)).to(dtype=torch.float32)
        actual = uniform_temporal_subsample(video, 1)
        self.assertTrue(actual.equal(video[:, 0:1]))
Esempio n. 4
0
    def test_compose_with_video_transforms(self):
        video = thwc_to_cthw(create_dummy_video_frames(
            20, 30, 40)).to(dtype=torch.float32)
        test_clip = {"video": video, "label": 0}

        # Compose using torchvision and pytorchvideo transformst to ensure they interact
        # correctly.
        num_subsample = 10
        transform = Compose([
            ApplyTransformToKey(
                key="video",
                transform=Compose([
                    UniformTemporalSubsample(num_subsample),
                    NormalizeVideo([video.mean()] * 3, [video.std()] * 3),
                    RandomShortSideScale(min_size=15, max_size=25),
                    RandomCropVideo(10),
                    RandomHorizontalFlipVideo(p=0.5),
                ]),
            )
        ])

        actual = transform(test_clip)
        c, t, h, w = actual["video"].shape
        self.assertEqual(c, 3)
        self.assertEqual(t, num_subsample)
        self.assertEqual(h, 10)
        self.assertEqual(w, 10)
Esempio n. 5
0
 def test_repeat_temporal_frames_subsample(self):
     video = thwc_to_cthw(create_dummy_video_frames(
         32, 10, 10)).to(dtype=torch.float32)
     actual = repeat_temporal_frames_subsample(video, (1, 4))
     expected_shape = ((3, 32, 10, 10), (3, 8, 10, 10))
     for idx in range(len(actual)):
         self.assertEqual(actual[idx].shape, expected_shape[idx])
Esempio n. 6
0
    def test_normalize(self):
        video = thwc_to_cthw(create_dummy_video_frames(
            10, 30, 40)).to(dtype=torch.float32)
        transform = Normalize(video.mean(), video.std())

        actual = transform(video)
        self.assertAlmostEqual(actual.mean().item(), 0)
        self.assertAlmostEqual(actual.std().item(), 1)
Esempio n. 7
0
        def _init_benchmark_short_side_scale(**kwargs) -> Callable:
            x = thwc_to_cthw(
                create_dummy_video_frames(
                    kwargs["temporal_size"],
                    kwargs["ori_spatial_size"][0],
                    kwargs["ori_spatial_size"][1],
                )).to(dtype=torch.float32)

            def func_to_benchmark() -> None:
                _ = short_side_scale(x, kwargs["dst_short_size"])
                return

            return func_to_benchmark
Esempio n. 8
0
    def test_uniform_crop_transform(self):
        video = thwc_to_cthw(create_dummy_video_frames(
            10, 30, 40)).to(dtype=torch.float32)
        test_clip = {"video": video, "aug_index": 1, "label": 0}

        transform = UniformCropVideo(20)

        actual = transform(test_clip)
        c, t, h, w = actual["video"].shape
        self.assertEqual(c, 3)
        self.assertEqual(t, 10)
        self.assertEqual(h, 20)
        self.assertEqual(w, 20)
        self.assertTrue(actual["video"].equal(video[:, :, 5:25, 10:30]))
Esempio n. 9
0
    def test_torchscriptable_input_output(self):
        video = thwc_to_cthw(create_dummy_video_frames(20, 30, 40)).to(
            dtype=torch.float32
        )

        # Test all the torchscriptable tensors.
        for transform in [UniformTemporalSubsample(10), RandomShortSideScale(10, 20)]:

            transform_script = torch.jit.script(transform)
            self.assertTrue(isinstance(transform_script, torch.jit.ScriptModule))

            # Seed before each transform to force determinism.
            torch.manual_seed(0)
            output = transform(video)
            torch.manual_seed(0)
            script_output = transform_script(video)
            self.assertTrue(output.equal(script_output))
Esempio n. 10
0
 def test_short_side_scale_height_shorter_opencv(self):
     video = thwc_to_cthw(create_dummy_video_frames(
         20, 10, 20)).to(dtype=torch.float32)
     actual = short_side_scale(video, 5, backend="opencv")
     self.assertEqual(actual.shape, (3, 20, 5, 10))
Esempio n. 11
0
 def test_short_side_scale_equal_size_pytorch(self):
     video = thwc_to_cthw(create_dummy_video_frames(
         20, 10, 10)).to(dtype=torch.float32)
     actual = short_side_scale(video, 10, backend="pytorch")
     self.assertEqual(actual.shape, (3, 20, 10, 10))
    def test_reading_from_directory_structure_hmdb51(self, decoder):
        # For an unknown reason this import has to be here for `buck test` to work.
        import torchvision.io as io

        with tempfile.TemporaryDirectory() as root_dir:

            # Create test directory structure with two classes and a video in each.
            root_dir_name = pathlib.Path(root_dir)
            action_1 = "running"
            action_2 = "cleaning_windows"

            videos_root_dir = root_dir_name / "videos"
            videos_root_dir.mkdir()

            test_class_1 = videos_root_dir / action_1
            test_class_1.mkdir()
            data_1 = create_dummy_video_frames(15, 10, 10)
            test_class_2 = videos_root_dir / action_2
            test_class_2.mkdir()
            data_2 = create_dummy_video_frames(20, 15, 15)

            test_splits = root_dir_name / "folds"
            test_splits.mkdir()

            with tempfile.NamedTemporaryFile(
                    suffix="_u_nm_np1_ba_goo_19.avi",
                    dir=test_class_1) as f_1, tempfile.NamedTemporaryFile(
                        suffix="_u_nm_np1_fr_med_1.avi",
                        dir=test_class_2) as f_2:
                f_1.close()
                f_2.close()

                # Write lossless video for each class.
                io.write_video(
                    f_1.name,
                    data_1,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )
                io.write_video(
                    f_2.name,
                    data_2,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )

                _, video_name_1 = os.path.split(f_1.name)
                _, video_name_2 = os.path.split(f_2.name)

                with open(
                        os.path.join(test_splits,
                                     action_1 + "_test_split1.txt"), "w") as f:
                    f.write(f"{video_name_1} 1\n")

                with open(
                        os.path.join(test_splits,
                                     action_2 + "_test_split1.txt"), "w") as f:
                    f.write(f"{video_name_2} 1\n")

                clip_sampler = make_clip_sampler("uniform", 3)
                dataset = Hmdb51(
                    data_path=test_splits,
                    video_path_prefix=root_dir_name / "videos",
                    clip_sampler=clip_sampler,
                    video_sampler=SequentialSampler,
                    split_id=1,
                    split_type="train",
                    decode_audio=False,
                    decoder=decoder,
                )

                # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2)
                # will be first.
                sample_1 = next(dataset)
                sample_2 = next(dataset)

                self.assertTrue(sample_1["label"] in [action_1, action_2])
                if sample_1["label"] == action_2:
                    sample_1, sample_2 = sample_2, sample_1

                self.assertEqual(sample_1["label"], action_1)
                self.assertEqual(5, len(sample_1["meta_tags"]))
                self.assertTrue(sample_1["video"].equal(
                    thwc_to_cthw(data_1).to(torch.float32)))

                self.assertEqual(sample_2["label"], action_2)
                self.assertEqual(5, len(sample_2["meta_tags"]))
                self.assertTrue(sample_2["video"].equal(
                    thwc_to_cthw(data_2).to(torch.float32)))