def test_keyframe_reading(self, test_video, config):
        full_path = os.path.join(VIDEO_DIR, test_video)

        av_reader = av.open(full_path)
        # reduce streams to only keyframes
        av_stream = av_reader.streams.video[0]
        av_stream.codec_context.skip_frame = "NONKEY"

        av_keyframes = []
        vr_keyframes = []
        if av_reader.streams.video:

            # get all keyframes using pyav. Then, seek randomly into video reader
            # and assert that all the returned values are in AV_KEYFRAMES

            for av_frame in av_reader.decode(av_stream):
                av_keyframes.append(float(av_frame.pts * av_frame.time_base))

        if len(av_keyframes) > 1:
            video_reader = VideoReader(full_path, "video")
            for i in range(1, len(av_keyframes)):
                seek_val = (av_keyframes[i] + av_keyframes[i - 1]) / 2
                data = next(video_reader.seek(seek_val, True))
                vr_keyframes.append(data["pts"])

            data = next(video_reader.seek(config.duration, True))
            vr_keyframes.append(data["pts"])

            assert len(av_keyframes) == len(vr_keyframes)
            # NOTE: this video gets different keyframe with different
            # loaders (0.333 pyav, 0.666 for us)
            if test_video != "TrumanShow_wave_f_nm_np1_fr_med_26.avi":
                for i in range(len(av_keyframes)):
                    assert av_keyframes[i] == approx(vr_keyframes[i],
                                                     rel=0.001)
Exemple #2
0
    def test_fate_suite(self):
        video_path = fate("sub/MovText_capability_tester.mp4", VIDEO_DIR)
        vr = VideoReader(video_path)
        metadata = vr.get_metadata()

        self.assertTrue(metadata["subtitles"]["duration"] is not None)
        os.remove(video_path)
Exemple #3
0
    def test_frame_reading(self):
        for test_video, config in test_videos.items():
            full_path = os.path.join(VIDEO_DIR, test_video)

            av_reader = av.open(full_path)

            if av_reader.streams.video:
                video_reader = VideoReader(full_path, "video")
                for av_frame in av_reader.decode(av_reader.streams.video[0]):
                    vr_frame = next(video_reader)

                    assert float(av_frame.pts * av_frame.time_base) == approx(vr_frame["pts"], abs=0.1)

                    av_array = torch.tensor(av_frame.to_rgb().to_ndarray()).permute(2, 0, 1)
                    vr_array = vr_frame["data"]
                    mean_delta = torch.mean(torch.abs(av_array.float() - vr_array.float()))
                    # on average the difference is very small and caused
                    # by decoding (around 1%)
                    # TODO: asses empirically how to set this? atm it's 1%
                    # averaged over all frames
                    assert mean_delta.item() < 2.5

            av_reader = av.open(full_path)
            if av_reader.streams.audio:
                video_reader = VideoReader(full_path, "audio")
                for av_frame in av_reader.decode(av_reader.streams.audio[0]):
                    vr_frame = next(video_reader)
                    assert float(av_frame.pts * av_frame.time_base) == approx(vr_frame["pts"], abs=0.1)

                    av_array = torch.tensor(av_frame.to_ndarray()).permute(1, 0)
                    vr_array = vr_frame["data"]

                    max_delta = torch.max(torch.abs(av_array.float() - vr_array.float()))
                    # we assure that there is never more than 1% difference in signal
                    assert max_delta.item() < 0.001
Exemple #4
0
    def test_frame_reading(self, test_video):
        full_path = os.path.join(VIDEO_DIR, test_video)
        with av.open(full_path) as av_reader:
            if av_reader.streams.video:
                av_frames, vr_frames = [], []
                av_pts, vr_pts = [], []
                # get av frames
                for av_frame in av_reader.decode(av_reader.streams.video[0]):
                    av_frames.append(torch.tensor(av_frame.to_rgb().to_ndarray(), dtype=torch.uint8).permute(2, 0, 1))
                    av_pts.append(av_frame.pts * av_frame.time_base)

                # get vr frames
                video_reader = VideoReader(full_path, "video")
                for vr_frame in video_reader:
                    vr_frames.append(vr_frame["data"])
                    vr_pts.append(vr_frame["pts"])

                # same number of frames
                assert len(vr_frames) == len(av_frames)
                assert len(vr_pts) == len(av_pts)

                # compare the frames and ptss
                for i in range(len(vr_frames)):
                    assert float(av_pts[i]) == approx(vr_pts[i], abs=0.1)
                    mean_delta = torch.mean(torch.abs(av_frames[i].float() - vr_frames[i].float()))
                    # on average the difference is very small and caused
                    # by decoding (around 1%)
                    # TODO: asses empirically how to set this? atm it's 1%
                    # averaged over all frames
                    assert mean_delta.item() < 2.55

                del vr_frames, av_frames, vr_pts, av_pts

        # test audio reading compared to PYAV
        with av.open(full_path) as av_reader:
            if av_reader.streams.audio:
                av_frames, vr_frames = [], []
                av_pts, vr_pts = [], []
                # get av frames
                for av_frame in av_reader.decode(av_reader.streams.audio[0]):
                    av_frames.append(torch.tensor(av_frame.to_ndarray(), dtype=torch.float32).permute(1, 0))
                    av_pts.append(av_frame.pts * av_frame.time_base)
                av_reader.close()

                # get vr frames
                video_reader = VideoReader(full_path, "audio")
                for vr_frame in video_reader:
                    vr_frames.append(vr_frame["data"])
                    vr_pts.append(vr_frame["pts"])

                # same number of frames
                assert len(vr_frames) == len(av_frames)
                assert len(vr_pts) == len(av_pts)

                # compare the frames and ptss
                for i in range(len(vr_frames)):
                    assert float(av_pts[i]) == approx(vr_pts[i], abs=0.1)
                    max_delta = torch.max(torch.abs(av_frames[i].float() - vr_frames[i].float()))
                    # we assure that there is never more than 1% difference in signal
                    assert max_delta.item() < 0.001
Exemple #5
0
 def test_metadata(self, video_file):
     full_path = os.path.join(VIDEO_DIR, video_file)
     decoder = VideoReader(full_path, device="cuda")
     video_metadata = decoder.get_metadata()["video"]
     with av.open(full_path) as container:
         video = container.streams.video[0]
         av_duration = float(video.duration * video.time_base)
         assert math.isclose(video_metadata["duration"], av_duration, rel_tol=1e-2)
         assert math.isclose(video_metadata["fps"], video.base_rate, rel_tol=1e-2)
Exemple #6
0
 def test_metadata(self, test_video, config):
     """
     Test that the metadata returned via pyav corresponds to the one returned
     by the new video decoder API
     """
     full_path = os.path.join(VIDEO_DIR, test_video)
     reader = VideoReader(full_path, "video")
     reader_md = reader.get_metadata()
     assert config.video_fps == approx(reader_md["video"]["fps"][0], abs=0.0001)
     assert config.duration == approx(reader_md["video"]["duration"][0], abs=0.5)
Exemple #7
0
    def test_fate_suite(self):
        # TODO: remove the try-except statement once the connectivity issues are resolved
        try:
            video_path = fate("sub/MovText_capability_tester.mp4", VIDEO_DIR)
        except (urllib.error.URLError, ConnectionError) as error:
            pytest.skip(f"Skipping due to connectivity issues: {error}")
        vr = VideoReader(video_path)
        metadata = vr.get_metadata()

        assert metadata["subtitles"]["duration"] is not None
        os.remove(video_path)
Exemple #8
0
 def test_seek_reading(self, keyframes, full_path, duration):
     decoder = VideoReader(full_path, device="cuda")
     time = duration / 2
     decoder.seek(time, keyframes_only=keyframes)
     with av.open(full_path) as container:
         container.seek(int(time * 1000000), any_frame=not keyframes, backward=False)
         for av_frame in container.decode(container.streams.video[0]):
             av_frames = torch.tensor(av_frame.to_rgb(src_colorspace="ITU709").to_ndarray())
             vision_frames = next(decoder)["data"]
             mean_delta = torch.mean(torch.abs(av_frames.float() - vision_frames.cpu().float()))
             assert mean_delta < 0.75
Exemple #9
0
 def test_frame_reading(self):
     for test_video in test_videos:
         full_path = os.path.join(VIDEO_DIR, test_video)
         decoder = VideoReader(full_path, device="cuda:0")
         with av.open(full_path) as container:
             for av_frame in container.decode(container.streams.video[0]):
                 av_frames = torch.tensor(av_frame.to_ndarray().flatten())
                 vision_frames = next(decoder)["data"]
                 mean_delta = torch.mean(
                     torch.abs(av_frames.float() -
                               decoder._reformat(vision_frames).float()))
                 assert mean_delta < 0.1
Exemple #10
0
 def test_metadata(self):
     """
     Test that the metadata returned via pyav corresponds to the one returned
     by the new video decoder API
     """
     for test_video, config in test_videos.items():
         full_path = os.path.join(VIDEO_DIR, test_video)
         reader = VideoReader(full_path, "video")
         reader_md = reader.get_metadata()
         self.assertAlmostEqual(config.video_fps,
                                reader_md["video"]["fps"][0],
                                delta=0.0001)
         self.assertAlmostEqual(config.duration,
                                reader_md["video"]["duration"][0],
                                delta=0.5)
Exemple #11
0
    def test_video_reading_fn(self):
        """
        Test that the outputs of the pyav and ffmpeg outputs are mostly the same
        """
        for test_video, config in test_videos.items():
            full_path = os.path.join(VIDEO_DIR, test_video)

            ref_result = _decode_frames_by_av_module(full_path)

            reader = VideoReader(full_path, "video")
            newapi_result = _template_read_video(reader)

            # First we check if the frames are approximately the same
            # (note that every codec context has signature artefacts which
            # make a direct comparison not feasible)
            if newapi_result.vframes.numel() > 0 and ref_result.vframes.numel(
            ) > 0:
                mean_delta = torch.mean(
                    torch.abs(newapi_result.vframes.float() -
                              ref_result.vframes.float()))
            self.assertAlmostEqual(mean_delta, 0, delta=8.0)

            # Just a sanity check: are the two of the correct size?
            self.assertEqual(newapi_result.vframes.size(),
                             ref_result.vframes.size())

            # Lastly, we compare the resulting audio streams
            if (config.check_aframes and newapi_result.aframes.numel() > 0
                    and ref_result.aframes.numel() > 0):
                """Audio stream is available and audio frame is required to return
                from decoder"""
                is_same = torch.all(
                    torch.eq(newapi_result.aframes,
                             ref_result.aframes)).item()
                self.assertEqual(is_same, True)
Exemple #12
0
    def test_accurateseek_middle(self):
        for test_video, config in test_videos.items():
            full_path = os.path.join(VIDEO_DIR, test_video)

            stream = "video"
            video_reader = VideoReader(full_path, stream)
            md = video_reader.get_metadata()
            duration = md[stream]["duration"][0]
            if duration is not None:

                num_frames = 0
                for frame in video_reader:
                    num_frames += 1

                video_reader.seek(duration / 2)
                middle_num_frames = 0
                for frame in video_reader:
                    middle_num_frames += 1

                assert middle_num_frames < num_frames
                assert middle_num_frames == approx(num_frames // 2, abs=1)

                video_reader.seek(duration / 2)
                frame = next(video_reader)
                lb = duration / 2 - 1 / md[stream]["fps"][0]
                ub = duration / 2 + 1 / md[stream]["fps"][0]
                assert (lb <= frame["pts"]) and (ub >= frame["pts"])
Exemple #13
0
    def test_accurateseek_middle(self):
        for test_video, config in test_videos.items():
            full_path = os.path.join(VIDEO_DIR, test_video)

            stream = "video"
            video_reader = VideoReader(full_path, stream)
            md = video_reader.get_metadata()
            duration = md[stream]["duration"][0]
            if duration is not None:

                num_frames = 0
                for frame in video_reader:
                    num_frames += 1

                video_reader.seek(duration / 2)
                middle_num_frames = 0
                for frame in video_reader:
                    middle_num_frames += 1

                self.assertTrue(middle_num_frames < num_frames)
                self.assertAlmostEqual(middle_num_frames,
                                       num_frames // 2,
                                       delta=1)

                video_reader.seek(duration / 2)
                frame = next(video_reader)
                lb = duration / 2 - 1 / md[stream]["fps"][0]
                ub = duration / 2 + 1 / md[stream]["fps"][0]
                self.assertTrue((lb <= frame["pts"]) & (ub >= frame["pts"]))
Exemple #14
0
 def test_frame_reading(self, video_file):
     full_path = os.path.join(VIDEO_DIR, video_file)
     decoder = VideoReader(full_path, device="cuda")
     with av.open(full_path) as container:
         for av_frame in container.decode(container.streams.video[0]):
             av_frames = torch.tensor(av_frame.to_rgb(src_colorspace="ITU709").to_ndarray())
             vision_frames = next(decoder)["data"]
             mean_delta = torch.mean(torch.abs(av_frames.float() - vision_frames.cpu().float()))
             assert mean_delta < 0.75
Exemple #15
0
    def test_seek_start(self):
        for test_video, config in test_videos.items():
            full_path = os.path.join(VIDEO_DIR, test_video)

            video_reader = VideoReader(full_path, "video")
            num_frames = 0
            for frame in video_reader:
                num_frames += 1

            # now seek the container to 0 and do it again
            # It's often that starting seek can be inprecise
            # this way and it doesn't start at 0
            video_reader.seek(0)
            start_num_frames = 0
            for frame in video_reader:
                start_num_frames += 1

            assert start_num_frames == num_frames

            # now seek the container to < 0 to check for unexpected behaviour
            video_reader.seek(-1)
            start_num_frames = 0
            for frame in video_reader:
                start_num_frames += 1

            assert start_num_frames == num_frames
Exemple #16
0
 def test_read_video_tensor(self):
     """
     Check if reading the video using the `next` based API yields the
     same sized tensors as the pyav alternative.
     """
     torchvision.set_video_backend("pyav")
     for test_video, config in test_videos.items():
         full_path = os.path.join(VIDEO_DIR, test_video)
         # pass 1: decode all frames using existing TV decoder
         tv_result, _, _ = torchvision.io.read_video(full_path,
                                                     pts_unit="sec")
         tv_result = tv_result.permute(0, 3, 1, 2)
         # pass 2: decode all frames using new api
         reader = VideoReader(full_path, "video")
         frames = []
         for frame in reader:
             frames.append(frame['data'])
         new_api = torch.stack(frames, 0)
         self.assertEqual(tv_result.size(), new_api.size())
Exemple #17
0
def video_gen(video_path, S):
    reader = VideoReader(video_path)
    for frame in reader:
        yield frame['data']