def test_uniform_crop(self): # For videos with height < width. video = thwc_to_cthw(create_dummy_video_frames( 20, 30, 40)).to(dtype=torch.float32) # Left crop. actual = uniform_crop(video, size=20, spatial_idx=0) self.assertTrue(actual.equal(video[:, :, 5:25, :20])) # Center crop. actual = uniform_crop(video, size=20, spatial_idx=1) self.assertTrue(actual.equal(video[:, :, 5:25, 10:30])) # Right crop. actual = uniform_crop(video, size=20, spatial_idx=2) self.assertTrue(actual.equal(video[:, :, 5:25, 20:])) # For videos with height > width. video = thwc_to_cthw(create_dummy_video_frames( 20, 40, 30)).to(dtype=torch.float32) # Top crop. actual = uniform_crop(video, size=20, spatial_idx=0) self.assertTrue(actual.equal(video[:, :, :20, 5:25])) # Center crop. actual = uniform_crop(video, size=20, spatial_idx=1) self.assertTrue(actual.equal(video[:, :, 10:30, 5:25])) # Bottom crop. actual = uniform_crop(video, size=20, spatial_idx=2) self.assertTrue(actual.equal(video[:, :, 20:, 5:25]))
def test_reading_from_directory_structure(self, decoder): # For an unknown reason this import has to be here for `buck test` to work. import torchvision.io as io with tempfile.TemporaryDirectory() as root_dir: # Create test directory structure with two classes and a video in each. root_dir_name = pathlib.Path(root_dir) test_class_1 = root_dir_name / "running" test_class_1.mkdir() data_1 = create_dummy_video_frames(15, 10, 10) test_class_2 = root_dir_name / "cleaning windows" test_class_2.mkdir() data_2 = create_dummy_video_frames(20, 15, 15) with tempfile.NamedTemporaryFile( suffix=".mp4", dir=test_class_1) as f_1, tempfile.NamedTemporaryFile( suffix=".mp4", dir=test_class_2) as f_2: f_1.close() f_2.close() # Write lossless video for each class. io.write_video( f_1.name, data_1, fps=30, video_codec="libx264rgb", options={"crf": "0"}, ) io.write_video( f_2.name, data_2, fps=30, video_codec="libx264rgb", options={"crf": "0"}, ) clip_sampler = make_clip_sampler("uniform", 3) labeled_video_paths = LabeledVideoPaths.from_path(root_dir) dataset = LabeledVideoDataset( labeled_video_paths, clip_sampler=clip_sampler, video_sampler=SequentialSampler, decode_audio=False, decoder=decoder, ) # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2) # will be first. sample_1 = next(dataset) self.assertEqual(sample_1["label"], 0) self.assertTrue(sample_1["video"].equal( thwc_to_cthw(data_2).to(torch.float32))) sample_2 = next(dataset) self.assertEqual(sample_2["label"], 1) self.assertTrue(sample_2["video"].equal( thwc_to_cthw(data_1).to(torch.float32)))
def test_uniform_temporal_subsample(self): video = thwc_to_cthw(create_dummy_video_frames( 20, 30, 40)).to(dtype=torch.float32) actual = uniform_temporal_subsample(video, video.shape[1]) self.assertTrue(actual.equal(video)) video = thwc_to_cthw(create_dummy_video_frames( 20, 30, 40)).to(dtype=torch.float32) actual = uniform_temporal_subsample(video, video.shape[1] // 2) self.assertTrue( actual.equal(video[:, [0, 2, 4, 6, 8, 10, 12, 14, 16, 19]])) video = thwc_to_cthw(create_dummy_video_frames( 20, 30, 40)).to(dtype=torch.float32) actual = uniform_temporal_subsample(video, 1) self.assertTrue(actual.equal(video[:, 0:1]))
def test_compose_with_video_transforms(self): video = thwc_to_cthw(create_dummy_video_frames( 20, 30, 40)).to(dtype=torch.float32) test_clip = {"video": video, "label": 0} # Compose using torchvision and pytorchvideo transformst to ensure they interact # correctly. num_subsample = 10 transform = Compose([ ApplyTransformToKey( key="video", transform=Compose([ UniformTemporalSubsample(num_subsample), NormalizeVideo([video.mean()] * 3, [video.std()] * 3), RandomShortSideScale(min_size=15, max_size=25), RandomCropVideo(10), RandomHorizontalFlipVideo(p=0.5), ]), ) ]) actual = transform(test_clip) c, t, h, w = actual["video"].shape self.assertEqual(c, 3) self.assertEqual(t, num_subsample) self.assertEqual(h, 10) self.assertEqual(w, 10)
def test_repeat_temporal_frames_subsample(self): video = thwc_to_cthw(create_dummy_video_frames( 32, 10, 10)).to(dtype=torch.float32) actual = repeat_temporal_frames_subsample(video, (1, 4)) expected_shape = ((3, 32, 10, 10), (3, 8, 10, 10)) for idx in range(len(actual)): self.assertEqual(actual[idx].shape, expected_shape[idx])
def test_normalize(self): video = thwc_to_cthw(create_dummy_video_frames( 10, 30, 40)).to(dtype=torch.float32) transform = Normalize(video.mean(), video.std()) actual = transform(video) self.assertAlmostEqual(actual.mean().item(), 0) self.assertAlmostEqual(actual.std().item(), 1)
def _init_benchmark_short_side_scale(**kwargs) -> Callable: x = thwc_to_cthw( create_dummy_video_frames( kwargs["temporal_size"], kwargs["ori_spatial_size"][0], kwargs["ori_spatial_size"][1], )).to(dtype=torch.float32) def func_to_benchmark() -> None: _ = short_side_scale(x, kwargs["dst_short_size"]) return return func_to_benchmark
def test_uniform_crop_transform(self): video = thwc_to_cthw(create_dummy_video_frames( 10, 30, 40)).to(dtype=torch.float32) test_clip = {"video": video, "aug_index": 1, "label": 0} transform = UniformCropVideo(20) actual = transform(test_clip) c, t, h, w = actual["video"].shape self.assertEqual(c, 3) self.assertEqual(t, 10) self.assertEqual(h, 20) self.assertEqual(w, 20) self.assertTrue(actual["video"].equal(video[:, :, 5:25, 10:30]))
def test_torchscriptable_input_output(self): video = thwc_to_cthw(create_dummy_video_frames(20, 30, 40)).to( dtype=torch.float32 ) # Test all the torchscriptable tensors. for transform in [UniformTemporalSubsample(10), RandomShortSideScale(10, 20)]: transform_script = torch.jit.script(transform) self.assertTrue(isinstance(transform_script, torch.jit.ScriptModule)) # Seed before each transform to force determinism. torch.manual_seed(0) output = transform(video) torch.manual_seed(0) script_output = transform_script(video) self.assertTrue(output.equal(script_output))
def test_short_side_scale_height_shorter_opencv(self): video = thwc_to_cthw(create_dummy_video_frames( 20, 10, 20)).to(dtype=torch.float32) actual = short_side_scale(video, 5, backend="opencv") self.assertEqual(actual.shape, (3, 20, 5, 10))
def test_short_side_scale_equal_size_pytorch(self): video = thwc_to_cthw(create_dummy_video_frames( 20, 10, 10)).to(dtype=torch.float32) actual = short_side_scale(video, 10, backend="pytorch") self.assertEqual(actual.shape, (3, 20, 10, 10))
def test_reading_from_directory_structure_hmdb51(self, decoder): # For an unknown reason this import has to be here for `buck test` to work. import torchvision.io as io with tempfile.TemporaryDirectory() as root_dir: # Create test directory structure with two classes and a video in each. root_dir_name = pathlib.Path(root_dir) action_1 = "running" action_2 = "cleaning_windows" videos_root_dir = root_dir_name / "videos" videos_root_dir.mkdir() test_class_1 = videos_root_dir / action_1 test_class_1.mkdir() data_1 = create_dummy_video_frames(15, 10, 10) test_class_2 = videos_root_dir / action_2 test_class_2.mkdir() data_2 = create_dummy_video_frames(20, 15, 15) test_splits = root_dir_name / "folds" test_splits.mkdir() with tempfile.NamedTemporaryFile( suffix="_u_nm_np1_ba_goo_19.avi", dir=test_class_1) as f_1, tempfile.NamedTemporaryFile( suffix="_u_nm_np1_fr_med_1.avi", dir=test_class_2) as f_2: f_1.close() f_2.close() # Write lossless video for each class. io.write_video( f_1.name, data_1, fps=30, video_codec="libx264rgb", options={"crf": "0"}, ) io.write_video( f_2.name, data_2, fps=30, video_codec="libx264rgb", options={"crf": "0"}, ) _, video_name_1 = os.path.split(f_1.name) _, video_name_2 = os.path.split(f_2.name) with open( os.path.join(test_splits, action_1 + "_test_split1.txt"), "w") as f: f.write(f"{video_name_1} 1\n") with open( os.path.join(test_splits, action_2 + "_test_split1.txt"), "w") as f: f.write(f"{video_name_2} 1\n") clip_sampler = make_clip_sampler("uniform", 3) dataset = Hmdb51( data_path=test_splits, video_path_prefix=root_dir_name / "videos", clip_sampler=clip_sampler, video_sampler=SequentialSampler, split_id=1, split_type="train", decode_audio=False, decoder=decoder, ) # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2) # will be first. sample_1 = next(dataset) sample_2 = next(dataset) self.assertTrue(sample_1["label"] in [action_1, action_2]) if sample_1["label"] == action_2: sample_1, sample_2 = sample_2, sample_1 self.assertEqual(sample_1["label"], action_1) self.assertEqual(5, len(sample_1["meta_tags"])) self.assertTrue(sample_1["video"].equal( thwc_to_cthw(data_1).to(torch.float32))) self.assertEqual(sample_2["label"], action_2) self.assertEqual(5, len(sample_2["meta_tags"])) self.assertTrue(sample_2["video"].equal( thwc_to_cthw(data_2).to(torch.float32)))