def mock_encoded_video_dataset_file():
    """
    Creates a temporary mock encoded video dataset with 4 videos labeled from 0 - 4.
    Returns a labeled video file which points to this mock encoded video dataset, the
    ordered label and videos tuples and the video duration in seconds.
    """
    num_frames = 10
    fps = 5
    with temp_encoded_video(num_frames=num_frames, fps=fps) as (
            video_file_name_1,
            data_1,
    ):
        with temp_encoded_video(num_frames=num_frames, fps=fps) as (
                video_file_name_2,
                data_2,
        ):
            with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f:
                f.write(f"{video_file_name_1} 0\n".encode())
                f.write(f"{video_file_name_2} 1\n".encode())
                f.write(f"{video_file_name_1} 2\n".encode())
                f.write(f"{video_file_name_2} 3\n".encode())

            label_videos = [
                (0, data_1),
                (1, data_2),
                (2, data_1),
                (3, data_2),
            ]
            video_duration = num_frames / fps
            yield f.name, label_videos, video_duration
    def test_video_name_with_whitespace_works(self, decoder):
        num_frames = 10
        fps = 5
        with temp_encoded_video(num_frames=num_frames,
                                fps=fps,
                                prefix="pre fix") as (
                                    video_file_name,
                                    data,
                                ):
            with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f:
                f.write(f"{video_file_name} 0\n".encode())
                f.write(f"{video_file_name} 1\n".encode())

            total_duration = num_frames / fps
            clip_sampler = make_clip_sampler("uniform", total_duration)
            labeled_video_paths = LabeledVideoPaths.from_path(f.name)
            dataset = LabeledVideoDataset(
                labeled_video_paths,
                clip_sampler=clip_sampler,
                video_sampler=SequentialSampler,
                decode_audio=False,
                decoder=decoder,
            )

            expected = [(0, data), (1, data)]
            for i, sample in enumerate(dataset):
                self.assertTrue(sample["video"].equal(expected[i][1]))
                self.assertEqual(sample["label"], expected[i][0])
    def test_video_works(self):
        num_frames = 11
        fps = 5
        with temp_encoded_video(num_frames=num_frames,
                                fps=fps) as (file_name, data):
            test_video = EncodedVideo.from_path(file_name)
            self.assertAlmostEqual(test_video.duration, num_frames / fps)

            # All frames (0 - test_video.duration seconds)
            clip = test_video.get_clip(0, test_video.duration)
            frames, audio_samples = clip["video"], clip["audio"]
            self.assertTrue(frames.equal(data))
            self.assertEqual(audio_samples, None)

            # Half frames
            clip = test_video.get_clip(0, test_video.duration / 2)
            frames, audio_samples = clip["video"], clip["audio"]
            self.assertTrue(frames.equal(data[:, :round(num_frames / 2)]))
            self.assertEqual(audio_samples, None)

            # No frames
            clip = test_video.get_clip(test_video.duration + 1,
                                       test_video.duration + 3)
            frames, audio_samples = clip["video"], clip["audio"]
            self.assertEqual(frames, None)
            self.assertEqual(audio_samples, None)
            test_video.close()
    def test_sampling_with_non_divisible_processes_by_clips(self, decoder):

        # Make one video with 15 frames and one with 10 frames, producing 3 clips and 2
        # clips respectively.
        num_frames = 10
        fps = 5
        with temp_encoded_video(num_frames=int(num_frames * 1.5), fps=fps) as (
                video_file_name_1,
                data_1,
        ):
            with temp_encoded_video(num_frames=num_frames, fps=fps) as (
                    video_file_name_2,
                    data_2,
            ):
                with tempfile.NamedTemporaryFile(delete=False,
                                                 suffix=".txt") as f:
                    f.write(f"{video_file_name_1} 0\n".encode())
                    f.write(f"{video_file_name_2} 1\n".encode())

                total_duration = num_frames / fps
                half_duration = total_duration / 2 - self._EPS
                clip_sampler = make_clip_sampler("uniform", half_duration)
                labeled_video_paths = LabeledVideoPaths.from_path(f.name)
                dataset = LabeledVideoDataset(
                    labeled_video_paths,
                    clip_sampler=clip_sampler,
                    video_sampler=SequentialSampler,
                    decode_audio=False,
                    decoder=decoder,
                )

                half_frames = num_frames // 2
                expected = {
                    (0, data_1[:, half_frames * 2:]),  # 1/3 clip
                    (0, data_1[:, half_frames:half_frames * 2]),  # 2/3 clip
                    (0, data_1[:, :half_frames]),  # 3/3/ clip
                    (1, data_2[:, :half_frames]),  # First half
                    (1, data_2[:, half_frames:]),  # Second half
                }

                test_dataloader = DataLoader(dataset,
                                             batch_size=None,
                                             num_workers=2)
                actual = [(sample["label"], sample["video"])
                          for sample in test_dataloader]
                assert_unordered_list_compare_true(self, expected, actual)
    def test_constant_clips_per_video_sampling_works(self, decoder):
        # Make one video with 15 frames and one with 10 frames, producing 3 clips and 2
        # clips respectively.
        num_frames = 10
        fps = 5
        with temp_encoded_video(num_frames=int(num_frames * 1.5), fps=fps) as (
                video_file_name_1,
                data_1,
        ):
            with temp_encoded_video(num_frames=num_frames, fps=fps) as (
                    video_file_name_2,
                    data_2,
            ):
                with tempfile.NamedTemporaryFile(delete=False,
                                                 suffix=".txt") as f:
                    f.write(f"{video_file_name_1} 0\n".encode())
                    f.write(f"{video_file_name_2} 1\n".encode())

                clip_frames = 2
                duration_for_frames = clip_frames / fps - self._EPS
                clip_sampler = make_clip_sampler("constant_clips_per_video",
                                                 duration_for_frames, 2)
                labeled_video_paths = LabeledVideoPaths.from_path(f.name)
                dataset = LabeledVideoDataset(
                    labeled_video_paths,
                    clip_sampler=clip_sampler,
                    video_sampler=SequentialSampler,
                    decode_audio=False,
                    decoder=decoder,
                )

                # Dataset has 2 videos. Each video has two evenly spaced clips of size
                # clip_frames sampled. The first clip of each video will always be
                # sampled at second 0. The second clip of the video is the next frame
                # from time: (total_duration - clip_duration) / 2
                half_frames_1 = math.ceil((data_1.shape[1] - clip_frames) / 2)
                half_frames_2 = math.ceil((data_2.shape[1] - clip_frames) / 2)
                expected = [
                    (0, data_1[:, :clip_frames]),
                    (0, data_1[:, half_frames_1:half_frames_1 + clip_frames]),
                    (1, data_2[:, :clip_frames]),
                    (1, data_2[:, half_frames_2:half_frames_2 + clip_frames]),
                ]
                for i, sample in enumerate(dataset):
                    self.assertTrue(sample["video"].equal(expected[i][1]))
                    self.assertEqual(sample["label"], expected[i][0])
    def test_file_api(self):
        num_frames = 11
        fps = 5
        with temp_encoded_video(num_frames=num_frames,
                                fps=fps) as (file_name, data):
            with open(file_name, "rb") as f:
                test_video = EncodedVideoPyAV(f)

            self.assertAlmostEqual(test_video.duration, num_frames / fps)
            clip = test_video.get_clip(0, test_video.duration)
            frames, audio_samples = clip["video"], clip["audio"]
            self.assertTrue(frames.equal(data))
            self.assertEqual(audio_samples, None)
    def test_sampling_with_distributed_sampler(self, decoder):

        # Make one video with 15 frames and one with 10 frames, producing 3 clips and 2
        # clips respectively.
        num_frames = 10
        fps = 5
        with temp_encoded_video(num_frames=int(num_frames * 1.5), fps=fps) as (
                video_file_name_1,
                data_1,
        ):
            with temp_encoded_video(num_frames=num_frames, fps=fps) as (
                    video_file_name_2,
                    data_2,
            ):
                with tempfile.NamedTemporaryFile(delete=False,
                                                 suffix=".txt") as f:
                    f.write(f"{video_file_name_1} 0\n".encode())
                    f.write(f"{video_file_name_2} 1\n".encode())

                total_duration = num_frames / fps
                half_duration = total_duration / 2 - self._EPS

                # Create several processes initialized in a PyTorch distributed process
                # group so that distributed sampler is setup correctly when dataset is
                # constructed.
                num_processes = 2
                processes = []
                return_dict = multiprocessing.Manager().dict()
                for rank in range(num_processes):
                    p = Process(
                        target=run_distributed,
                        args=(
                            rank,
                            num_processes,
                            decoder,
                            half_duration,
                            f.name,
                            return_dict,
                        ),
                    )
                    p.start()
                    processes.append(p)

                for p in processes:
                    p.join()

                # After joining all distributed processes we expect all these label,
                # video pairs to be returned in random order.
                half_frames = num_frames // 2
                expected = {
                    (0, data_1[:, :half_frames]),  # 1/3 clip
                    (0, data_1[:, half_frames:half_frames * 2]),  # 2/3 clip
                    (0, data_1[:, half_frames * 2:]),  # 3/3 clip
                    (1, data_2[:, :half_frames]),  # First half
                    (1, data_2[:, half_frames:]),  # Second half
                }

                epoch_results = collections.defaultdict(list)
                for v in return_dict.values():
                    for k_2, v_2 in v.items():
                        epoch_results[k_2].extend(v_2)

                assert_unordered_list_compare_true(self, expected,
                                                   epoch_results["epoch_1"])
                assert_unordered_list_compare_true(self, expected,
                                                   epoch_results["epoch_2"])