Example #1
0
    def test_uniform_crop(self):
        # For videos with height < width.
        video = thwc_to_cthw(create_dummy_video_frames(
            20, 30, 40)).to(dtype=torch.float32)
        # Left crop.
        actual = uniform_crop(video, size=20, spatial_idx=0)
        self.assertTrue(actual.equal(video[:, :, 5:25, :20]))
        # Center crop.
        actual = uniform_crop(video, size=20, spatial_idx=1)
        self.assertTrue(actual.equal(video[:, :, 5:25, 10:30]))
        # Right crop.
        actual = uniform_crop(video, size=20, spatial_idx=2)
        self.assertTrue(actual.equal(video[:, :, 5:25, 20:]))

        # For videos with height > width.
        video = thwc_to_cthw(create_dummy_video_frames(
            20, 40, 30)).to(dtype=torch.float32)
        # Top crop.
        actual = uniform_crop(video, size=20, spatial_idx=0)
        self.assertTrue(actual.equal(video[:, :, :20, 5:25]))
        # Center crop.
        actual = uniform_crop(video, size=20, spatial_idx=1)
        self.assertTrue(actual.equal(video[:, :, 10:30, 5:25]))
        # Bottom crop.
        actual = uniform_crop(video, size=20, spatial_idx=2)
        self.assertTrue(actual.equal(video[:, :, 20:, 5:25]))
    def test_reading_from_directory_structure(self, decoder):
        # For an unknown reason this import has to be here for `buck test` to work.
        import torchvision.io as io

        with tempfile.TemporaryDirectory() as root_dir:

            # Create test directory structure with two classes and a video in each.
            root_dir_name = pathlib.Path(root_dir)
            test_class_1 = root_dir_name / "running"
            test_class_1.mkdir()
            data_1 = create_dummy_video_frames(15, 10, 10)
            test_class_2 = root_dir_name / "cleaning windows"
            test_class_2.mkdir()
            data_2 = create_dummy_video_frames(20, 15, 15)
            with tempfile.NamedTemporaryFile(
                    suffix=".mp4",
                    dir=test_class_1) as f_1, tempfile.NamedTemporaryFile(
                        suffix=".mp4", dir=test_class_2) as f_2:
                f_1.close()
                f_2.close()

                # Write lossless video for each class.
                io.write_video(
                    f_1.name,
                    data_1,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )
                io.write_video(
                    f_2.name,
                    data_2,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )

                clip_sampler = make_clip_sampler("uniform", 3)
                labeled_video_paths = LabeledVideoPaths.from_path(root_dir)
                dataset = LabeledVideoDataset(
                    labeled_video_paths,
                    clip_sampler=clip_sampler,
                    video_sampler=SequentialSampler,
                    decode_audio=False,
                    decoder=decoder,
                )

                # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2)
                # will be first.
                sample_1 = next(dataset)
                self.assertEqual(sample_1["label"], 0)
                self.assertTrue(sample_1["video"].equal(
                    thwc_to_cthw(data_2).to(torch.float32)))

                sample_2 = next(dataset)
                self.assertEqual(sample_2["label"], 1)
                self.assertTrue(sample_2["video"].equal(
                    thwc_to_cthw(data_1).to(torch.float32)))
Example #3
0
def temp_encoded_video(num_frames: int,
                       fps: int,
                       height=10,
                       width=10,
                       prefix=None,
                       directory=None):
    """Creates a temporary lossless, mp4 video with synthetic content.

    Uses a context which deletes the video after exit.
    """
    # Lossless options.
    video_codec = "libx264rgb"
    options = {"crf": "0"}
    data = create_dummy_video_frames(num_frames, height, width)
    with tempfile.NamedTemporaryFile(prefix=prefix,
                                     suffix=".mp4",
                                     dir=directory) as f:
        f.close()
        io.write_video(f.name,
                       data,
                       fps=fps,
                       video_codec=video_codec,
                       options=options)
        yield f.name, thwc_to_cthw(data).to(torch.float32)
    os.unlink(f.name)
Example #4
0
    def test_uniform_temporal_subsample(self):
        video = thwc_to_cthw(create_dummy_video_frames(
            20, 30, 40)).to(dtype=torch.float32)
        actual = uniform_temporal_subsample(video, video.shape[1])
        self.assertTrue(actual.equal(video))

        video = thwc_to_cthw(create_dummy_video_frames(
            20, 30, 40)).to(dtype=torch.float32)
        actual = uniform_temporal_subsample(video, video.shape[1] // 2)
        self.assertTrue(
            actual.equal(video[:, [0, 2, 4, 6, 8, 10, 12, 14, 16, 19]]))

        video = thwc_to_cthw(create_dummy_video_frames(
            20, 30, 40)).to(dtype=torch.float32)
        actual = uniform_temporal_subsample(video, 1)
        self.assertTrue(actual.equal(video[:, 0:1]))
Example #5
0
    def test_compose_with_video_transforms(self):
        video = thwc_to_cthw(create_dummy_video_frames(
            20, 30, 40)).to(dtype=torch.float32)
        test_clip = {"video": video, "label": 0}

        # Compose using torchvision and pytorchvideo transformst to ensure they interact
        # correctly.
        num_subsample = 10
        transform = Compose([
            ApplyTransformToKey(
                key="video",
                transform=Compose([
                    UniformTemporalSubsample(num_subsample),
                    NormalizeVideo([video.mean()] * 3, [video.std()] * 3),
                    RandomShortSideScale(min_size=15, max_size=25),
                    RandomCropVideo(10),
                    RandomHorizontalFlipVideo(p=0.5),
                ]),
            )
        ])

        actual = transform(test_clip)
        c, t, h, w = actual["video"].shape
        self.assertEqual(c, 3)
        self.assertEqual(t, num_subsample)
        self.assertEqual(h, 10)
        self.assertEqual(w, 10)
Example #6
0
 def test_repeat_temporal_frames_subsample(self):
     video = thwc_to_cthw(create_dummy_video_frames(
         32, 10, 10)).to(dtype=torch.float32)
     actual = repeat_temporal_frames_subsample(video, (1, 4))
     expected_shape = ((3, 32, 10, 10), (3, 8, 10, 10))
     for idx in range(len(actual)):
         self.assertEqual(actual[idx].shape, expected_shape[idx])
Example #7
0
    def test_normalize(self):
        video = thwc_to_cthw(create_dummy_video_frames(
            10, 30, 40)).to(dtype=torch.float32)
        transform = Normalize(video.mean(), video.std())

        actual = transform(video)
        self.assertAlmostEqual(actual.mean().item(), 0)
        self.assertAlmostEqual(actual.std().item(), 1)
Example #8
0
def temp_frame_video(frame_image_file_names, height=10, width=10):
    data = create_dummy_video_frames(len(frame_image_file_names), height,
                                     width)
    data = thwc_to_cthw(data)
    with tempfile.TemporaryDirectory() as root_dir:
        root_dir = pathlib.Path(root_dir)
        root_dir.mkdir(exist_ok=True)
        for i, file_name in enumerate(frame_image_file_names):
            im = transforms.ToPILImage()(data[:, i])
            im.save(root_dir / file_name, compress_level=0, optimize=False)
        yield root_dir, data.to(torch.float32)
Example #9
0
        def _init_benchmark_short_side_scale(**kwargs) -> Callable:
            x = thwc_to_cthw(
                create_dummy_video_frames(
                    kwargs["temporal_size"],
                    kwargs["ori_spatial_size"][0],
                    kwargs["ori_spatial_size"][1],
                )).to(dtype=torch.float32)

            def func_to_benchmark() -> None:
                _ = short_side_scale(x, kwargs["dst_short_size"])
                return

            return func_to_benchmark
Example #10
0
    def test_uniform_crop_transform(self):
        video = thwc_to_cthw(create_dummy_video_frames(
            10, 30, 40)).to(dtype=torch.float32)
        test_clip = {"video": video, "aug_index": 1, "label": 0}

        transform = UniformCropVideo(20)

        actual = transform(test_clip)
        c, t, h, w = actual["video"].shape
        self.assertEqual(c, 3)
        self.assertEqual(t, 10)
        self.assertEqual(h, 20)
        self.assertEqual(w, 20)
        self.assertTrue(actual["video"].equal(video[:, :, 5:25, 10:30]))
Example #11
0
    def test_torchscriptable_input_output(self):
        video = thwc_to_cthw(create_dummy_video_frames(20, 30, 40)).to(
            dtype=torch.float32
        )

        # Test all the torchscriptable tensors.
        for transform in [UniformTemporalSubsample(10), RandomShortSideScale(10, 20)]:

            transform_script = torch.jit.script(transform)
            self.assertTrue(isinstance(transform_script, torch.jit.ScriptModule))

            # Seed before each transform to force determinism.
            torch.manual_seed(0)
            output = transform(video)
            torch.manual_seed(0)
            script_output = transform_script(video)
            self.assertTrue(output.equal(script_output))
Example #12
0
def temp_encoded_video_with_audio(
    num_frames: int,
    fps: int,
    num_audio_samples: int,
    audio_rate: int = 48000,
    height=10,
    width=10,
    prefix=None,
):
    audio_data = torch.from_numpy(
        np.random.rand(1, num_audio_samples).astype("<i2"))
    video_data = create_dummy_video_frames(num_frames, height, width)
    with tempfile.NamedTemporaryFile(prefix=prefix, suffix=".avi") as f:
        f.close()
        write_audio_video(f.name,
                          video_data,
                          audio_data,
                          fps=fps,
                          audio_rate=audio_rate)
        cthw_video_data = thwc_to_cthw(video_data).to(torch.float32)
        yield f.name, cthw_video_data, audio_data[0].to(torch.float32)
Example #13
0
 def test_short_side_scale_height_shorter_opencv(self):
     video = thwc_to_cthw(create_dummy_video_frames(
         20, 10, 20)).to(dtype=torch.float32)
     actual = short_side_scale(video, 5, backend="opencv")
     self.assertEqual(actual.shape, (3, 20, 5, 10))
Example #14
0
 def test_short_side_scale_equal_size_pytorch(self):
     video = thwc_to_cthw(create_dummy_video_frames(
         20, 10, 10)).to(dtype=torch.float32)
     actual = short_side_scale(video, 10, backend="pytorch")
     self.assertEqual(actual.shape, (3, 20, 10, 10))
    def test_reading_from_directory_structure_hmdb51(self, decoder):
        # For an unknown reason this import has to be here for `buck test` to work.
        import torchvision.io as io

        with tempfile.TemporaryDirectory() as root_dir:

            # Create test directory structure with two classes and a video in each.
            root_dir_name = pathlib.Path(root_dir)
            action_1 = "running"
            action_2 = "cleaning_windows"

            videos_root_dir = root_dir_name / "videos"
            videos_root_dir.mkdir()

            test_class_1 = videos_root_dir / action_1
            test_class_1.mkdir()
            data_1 = create_dummy_video_frames(15, 10, 10)
            test_class_2 = videos_root_dir / action_2
            test_class_2.mkdir()
            data_2 = create_dummy_video_frames(20, 15, 15)

            test_splits = root_dir_name / "folds"
            test_splits.mkdir()

            with tempfile.NamedTemporaryFile(
                    suffix="_u_nm_np1_ba_goo_19.avi",
                    dir=test_class_1) as f_1, tempfile.NamedTemporaryFile(
                        suffix="_u_nm_np1_fr_med_1.avi",
                        dir=test_class_2) as f_2:
                f_1.close()
                f_2.close()

                # Write lossless video for each class.
                io.write_video(
                    f_1.name,
                    data_1,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )
                io.write_video(
                    f_2.name,
                    data_2,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )

                _, video_name_1 = os.path.split(f_1.name)
                _, video_name_2 = os.path.split(f_2.name)

                with open(
                        os.path.join(test_splits,
                                     action_1 + "_test_split1.txt"), "w") as f:
                    f.write(f"{video_name_1} 1\n")

                with open(
                        os.path.join(test_splits,
                                     action_2 + "_test_split1.txt"), "w") as f:
                    f.write(f"{video_name_2} 1\n")

                clip_sampler = make_clip_sampler("uniform", 3)
                dataset = Hmdb51(
                    data_path=test_splits,
                    video_path_prefix=root_dir_name / "videos",
                    clip_sampler=clip_sampler,
                    video_sampler=SequentialSampler,
                    split_id=1,
                    split_type="train",
                    decode_audio=False,
                    decoder=decoder,
                )

                # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2)
                # will be first.
                sample_1 = next(dataset)
                sample_2 = next(dataset)

                self.assertTrue(sample_1["label"] in [action_1, action_2])
                if sample_1["label"] == action_2:
                    sample_1, sample_2 = sample_2, sample_1

                self.assertEqual(sample_1["label"], action_1)
                self.assertEqual(5, len(sample_1["meta_tags"]))
                self.assertTrue(sample_1["video"].equal(
                    thwc_to_cthw(data_1).to(torch.float32)))

                self.assertEqual(sample_2["label"], action_2)
                self.assertEqual(5, len(sample_2["meta_tags"]))
                self.assertTrue(sample_2["video"].equal(
                    thwc_to_cthw(data_2).to(torch.float32)))