def run_distributed(rank, size, decoder, clip_duration, data_name,
                    return_dict):
    """
    This function is run by each distributed process. It samples videos
    based on the distributed split (determined by the
    DistributedSampler) and returns the dataset clips in the return_dict.
    """
    os.environ["MASTER_ADDR"] = "127.0.0.1"
    os.environ["MASTER_PORT"] = "29500"
    dist.init_process_group("gloo", rank=rank, world_size=size)
    clip_sampler = make_clip_sampler("uniform", clip_duration)
    labeled_video_paths = LabeledVideoPaths.from_path(data_name)
    dataset = EncodedVideoDataset(
        labeled_video_paths,
        clip_sampler=clip_sampler,
        video_sampler=DistributedSampler,
        decode_audio=False,
        decoder=decoder,
    )
    test_dataloader = DataLoader(dataset, batch_size=None, num_workers=1)

    # Run two epochs, simulating use in a training loop
    dataset.video_sampler.set_epoch(0)
    epoch_1 = [(sample["label"], sample["video"])
               for sample in test_dataloader]
    dataset.video_sampler.set_epoch(1)
    epoch_2 = [(sample["label"], sample["video"])
               for sample in test_dataloader]
    return_dict[rank] = {"epoch_1": epoch_1, "epoch_2": epoch_2}
    def test_video_name_with_whitespace_works(self, decoder):
        num_frames = 10
        fps = 5
        with temp_encoded_video(num_frames=num_frames,
                                fps=fps,
                                prefix="pre fix") as (
                                    video_file_name,
                                    data,
                                ):
            with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f:
                f.write(f"{video_file_name} 0\n".encode())
                f.write(f"{video_file_name} 1\n".encode())

            total_duration = num_frames / fps
            clip_sampler = make_clip_sampler("uniform", total_duration)
            labeled_video_paths = LabeledVideoPaths.from_path(f.name)
            dataset = EncodedVideoDataset(
                labeled_video_paths,
                clip_sampler=clip_sampler,
                video_sampler=SequentialSampler,
                decode_audio=False,
                decoder=decoder,
            )

            expected = [(0, data), (1, data)]
            for i, sample in enumerate(dataset):
                self.assertTrue(sample["video"].equal(expected[i][1]))
                self.assertEqual(sample["label"], expected[i][0])
    def test_sampling_with_more_processes_than_videos(self, decoder):
        with mock_encoded_video_dataset_file() as (
                mock_csv,
                label_videos,
                total_duration,
        ):
            half_duration = total_duration / 2 - self._EPS
            clip_sampler = make_clip_sampler("uniform", half_duration)
            labeled_video_paths = LabeledVideoPaths.from_path(mock_csv)
            dataset = EncodedVideoDataset(
                labeled_video_paths,
                clip_sampler=clip_sampler,
                video_sampler=SequentialSampler,
                decode_audio=False,
                decoder=decoder,
            )

            # Split each full video into two clips.
            expected = []
            for label, data in label_videos:
                num_frames = data.shape[0]
                half_frames = num_frames // 2
                first_half_data = data[:, :half_frames]
                second_half_data = data[:, half_frames:]
                expected.append((label, first_half_data))
                expected.append((label, second_half_data))

            test_dataloader = DataLoader(dataset,
                                         batch_size=None,
                                         num_workers=16)
            actual = [(sample["label"], sample["video"])
                      for sample in test_dataloader]
            assert_unordered_list_compare_true(self, expected, actual)
    def test_reading_from_directory_structure(self, decoder):
        # For an unknown reason this import has to be here for `buck test` to work.
        import torchvision.io as io

        with tempfile.TemporaryDirectory() as root_dir:

            # Create test directory structure with two classes and a video in each.
            root_dir_name = pathlib.Path(root_dir)
            test_class_1 = root_dir_name / "running"
            test_class_1.mkdir()
            data_1 = create_dummy_video_frames(15, 10, 10)
            test_class_2 = root_dir_name / "cleaning windows"
            test_class_2.mkdir()
            data_2 = create_dummy_video_frames(20, 15, 15)
            with tempfile.NamedTemporaryFile(
                    suffix=".mp4",
                    dir=test_class_1) as f_1, tempfile.NamedTemporaryFile(
                        suffix=".mp4", dir=test_class_2) as f_2:
                f_1.close()
                f_2.close()

                # Write lossless video for each class.
                io.write_video(
                    f_1.name,
                    data_1,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )
                io.write_video(
                    f_2.name,
                    data_2,
                    fps=30,
                    video_codec="libx264rgb",
                    options={"crf": "0"},
                )

                clip_sampler = make_clip_sampler("uniform", 3)
                labeled_video_paths = LabeledVideoPaths.from_path(root_dir)
                dataset = EncodedVideoDataset(
                    labeled_video_paths,
                    clip_sampler=clip_sampler,
                    video_sampler=SequentialSampler,
                    decode_audio=False,
                    decoder=decoder,
                )

                # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2)
                # will be first.
                sample_1 = next(dataset)
                self.assertEqual(sample_1["label"], 0)
                self.assertTrue(sample_1["video"].equal(
                    thwc_to_cthw(data_2).to(torch.float32)))

                sample_2 = next(dataset)
                self.assertEqual(sample_2["label"], 1)
                self.assertTrue(sample_2["video"].equal(
                    thwc_to_cthw(data_1).to(torch.float32)))
    def test_sampling_with_non_divisible_processes_by_clips(self, decoder):

        # Make one video with 15 frames and one with 10 frames, producing 3 clips and 2
        # clips respectively.
        num_frames = 10
        fps = 5
        with temp_encoded_video(num_frames=int(num_frames * 1.5), fps=fps) as (
                video_file_name_1,
                data_1,
        ):
            with temp_encoded_video(num_frames=num_frames, fps=fps) as (
                    video_file_name_2,
                    data_2,
            ):
                with tempfile.NamedTemporaryFile(delete=False,
                                                 suffix=".txt") as f:
                    f.write(f"{video_file_name_1} 0\n".encode())
                    f.write(f"{video_file_name_2} 1\n".encode())

                total_duration = num_frames / fps
                half_duration = total_duration / 2 - self._EPS
                clip_sampler = make_clip_sampler("uniform", half_duration)
                labeled_video_paths = LabeledVideoPaths.from_path(f.name)
                dataset = EncodedVideoDataset(
                    labeled_video_paths,
                    clip_sampler=clip_sampler,
                    video_sampler=SequentialSampler,
                    decode_audio=False,
                    decoder=decoder,
                )

                half_frames = num_frames // 2
                expected = {
                    (0, data_1[:, half_frames * 2:]),  # 1/3 clip
                    (0, data_1[:, half_frames:half_frames * 2]),  # 2/3 clip
                    (0, data_1[:, :half_frames]),  # 3/3/ clip
                    (1, data_2[:, :half_frames]),  # First half
                    (1, data_2[:, half_frames:]),  # Second half
                }

                test_dataloader = DataLoader(dataset,
                                             batch_size=None,
                                             num_workers=2)
                actual = [(sample["label"], sample["video"])
                          for sample in test_dataloader]
                assert_unordered_list_compare_true(self, expected, actual)
    def test_constant_clips_per_video_sampling_works(self, decoder):
        # Make one video with 15 frames and one with 10 frames, producing 3 clips and 2
        # clips respectively.
        num_frames = 10
        fps = 5
        with temp_encoded_video(num_frames=int(num_frames * 1.5), fps=fps) as (
                video_file_name_1,
                data_1,
        ):
            with temp_encoded_video(num_frames=num_frames, fps=fps) as (
                    video_file_name_2,
                    data_2,
            ):
                with tempfile.NamedTemporaryFile(delete=False,
                                                 suffix=".txt") as f:
                    f.write(f"{video_file_name_1} 0\n".encode())
                    f.write(f"{video_file_name_2} 1\n".encode())

                clip_frames = 2
                duration_for_frames = clip_frames / fps - self._EPS
                clip_sampler = make_clip_sampler("constant_clips_per_video",
                                                 duration_for_frames, 2)
                labeled_video_paths = LabeledVideoPaths.from_path(f.name)
                dataset = EncodedVideoDataset(
                    labeled_video_paths,
                    clip_sampler=clip_sampler,
                    video_sampler=SequentialSampler,
                    decode_audio=False,
                    decoder=decoder,
                )

                # Dataset has 2 videos. Each video has two evenly spaced clips of size
                # clip_frames sampled. The first clip of each video will always be
                # sampled at second 0. The second clip of the video is the next frame
                # from time: (total_duration - clip_duration) / 2
                half_frames_1 = math.ceil((data_1.shape[1] - clip_frames) / 2)
                half_frames_2 = math.ceil((data_2.shape[1] - clip_frames) / 2)
                expected = [
                    (0, data_1[:, :clip_frames]),
                    (0, data_1[:, half_frames_1:half_frames_1 + clip_frames]),
                    (1, data_2[:, :clip_frames]),
                    (1, data_2[:, half_frames_2:half_frames_2 + clip_frames]),
                ]
                for i, sample in enumerate(dataset):
                    self.assertTrue(sample["video"].equal(expected[i][1]))
                    self.assertEqual(sample["label"], expected[i][0])
Ejemplo n.º 7
0
    def _make_encoded_video_dataset(self, data: SampleCollection) -> 'EncodedVideoDataset':
        classes = self._get_classes(data)
        label_to_class_mapping = dict(enumerate(classes))
        class_to_label_mapping = {c: lab for lab, c in label_to_class_mapping.items()}

        filepaths = data.values("filepath")
        labels = data.values(self.label_field + ".label")
        targets = [class_to_label_mapping[lab] for lab in labels]
        labeled_video_paths = LabeledVideoPaths(list(zip(filepaths, targets)))

        ds: EncodedVideoDataset = EncodedVideoDataset(
            labeled_video_paths,
            self.clip_sampler,
            video_sampler=self.video_sampler,
            decode_audio=self.decode_audio,
            decoder=self.decoder,
        )
        return ds
    def test_random_clip_sampling_works(self, decoder):
        with mock_encoded_video_dataset_file() as (
                mock_csv,
                label_videos,
                total_duration,
        ):
            half_duration = total_duration / 2 - self._EPS
            clip_sampler = make_clip_sampler("random", half_duration)
            labeled_video_paths = LabeledVideoPaths.from_path(mock_csv)
            dataset = EncodedVideoDataset(
                labeled_video_paths,
                clip_sampler=clip_sampler,
                video_sampler=SequentialSampler,
                decode_audio=False,
                decoder=decoder,
            )

            expected_labels = [label for label, _ in label_videos]
            for i, sample in enumerate(dataset):
                expected_t_shape = 5
                self.assertEqual(sample["video"].shape[1], expected_t_shape)
                self.assertEqual(sample["label"], expected_labels[i])