Exemple #1
0
    def test_read_partial_video_pts_unit_sec(self):
        with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name, pts_unit='sec')

            for start in range(5):
                for l in range(1, 4):
                    lv, _, _ = io.read_video(f_name,
                                             pts[start],
                                             pts[start + l - 1],
                                             pts_unit='sec')
                    s_data = data[start:(start + l)]
                    self.assertEqual(len(lv), l)
                    self.assertTrue(s_data.equal(lv))

            container = av.open(f_name)
            stream = container.streams[0]
            lv, _, _ = io.read_video(
                f_name,
                int(pts[4] * (1.0 / stream.time_base) + 1) * stream.time_base,
                pts[7],
                pts_unit='sec')
            if get_video_backend() == "pyav":
                # for "video_reader" backend, we don't decode the closest early frame
                # when the given start pts is not matching any frame pts
                self.assertEqual(len(lv), 4)
                self.assertTrue(data[4:8].equal(lv))
            container.close()
Exemple #2
0
    def test_read_partial_video_pts_unit_sec(self, start, offset):
        with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name, pts_unit='sec')

            lv, _, _ = io.read_video(f_name,
                                     pts[start],
                                     pts[start + offset - 1],
                                     pts_unit='sec')
            s_data = data[start:(start + offset)]
            assert len(lv) == offset
            assert_equal(s_data, lv)

            with av.open(f_name) as container:
                stream = container.streams[0]
                lv, _, _ = io.read_video(f_name,
                                         int(pts[4] *
                                             (1.0 / stream.time_base) + 1) *
                                         stream.time_base,
                                         pts[7],
                                         pts_unit='sec')
            if get_video_backend() == "pyav":
                # for "video_reader" backend, we don't decode the closest early frame
                # when the given start pts is not matching any frame pts
                assert len(lv) == 4
                assert_equal(data[4:8], lv)
Exemple #3
0
    def test_read_packed_b_frames_divx_file(self):
        name = "hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi"
        f_name = os.path.join(VIDEO_DIR, name)
        pts, fps = io.read_video_timestamps(f_name)

        self.assertEqual(pts, sorted(pts))
        self.assertEqual(fps, 30)
Exemple #4
0
def vgpt(invid):
    try:
        os.remove("output.mp4")
    except FileNotFoundError:
        pass
    clip = VideoFileClip(invid)
    rate = clip.fps
    sequence_length = int(clip.fps * clip.duration)
    pts = read_video_timestamps(invid, pts_unit='sec')[0]
    video = read_video(invid,
                       pts_unit='sec',
                       start_pts=pts[0],
                       end_pts=pts[sequence_length - 1])[0]
    video = preprocess(video, resolution,
                       sequence_length).unsqueeze(0).to(device)

    with torch.no_grad():
        encodings = vqvae.encode(video)
        video_recon = vqvae.decode(encodings)
        video_recon = torch.clamp(video_recon, -0.5, 0.5)

    videos = video_recon[0].permute(1, 2, 3, 0)  # CTHW -> THWC
    videos = ((videos + 0.5) * 255).cpu().numpy().astype('uint8')
    imageio.mimwrite('output.mp4', videos, fps=int(rate))
    return './output.mp4'
Exemple #5
0
 def _compute_frame_pts(self):
     self.video_pts = []
     self.video_fps = []
     # TODO maybe paralellize this
     for video_file in self.video_paths:
         clips, fps = read_video_timestamps(video_file)
         self.video_pts.append(torch.as_tensor(clips))
         self.video_fps.append(fps)
Exemple #6
0
 def __getitem__(self, idx):
     # print(self.x[idx])
     try:
         testoutput = read_video_timestamps(self.x[idx])
         return testoutput
     except:
         # import pdb; pdb.set_trace()
         print('Got a problem at:', self.x[idx])
Exemple #7
0
def _make_dataset(directory,
                  extensions=None,
                  is_valid_file=None,
                  pts_unit='sec'):
    """Returns a list of all video files, timestamps, and offsets.

    Args:
        directory:
            Root directory path (should not contain subdirectories).
        extensions:
            Tuple of valid extensions.
        is_valid_file:
            Used to find valid files.
        pts_unit:
            Unit of the timestamps.

    Returns:
        A list of video files, timestamps, frame offsets, and fps.

    """

    # use filename to find valid files
    if extensions is not None:

        def _is_valid_file(filename):
            return filename.lower().endswith(extensions)

    # overwrite function to find valid files
    if is_valid_file is not None:
        _is_valid_file = is_valid_file

    # find all instances (no subdirectories)
    instances = []
    for fname in os.listdir(directory):

        # skip invalid files
        if not _is_valid_file(fname):
            continue

        # keep track of valid files
        path = os.path.join(directory, fname)
        instances.append(path)

    # get timestamps
    timestamps, fpss = [], []
    for instance in instances:
        ts, fps = read_video_timestamps(instance, pts_unit=pts_unit)
        timestamps.append(ts)
        fpss.append(fps)

    # get frame offsets
    offsets = [len(ts) for ts in timestamps]
    offsets = [0] + offsets[:-1]
    for i in range(1, len(offsets)):
        offsets[i] = offsets[i - 1] + offsets[i]  # cumsum

    return instances, timestamps, offsets, fpss
Exemple #8
0
    def test_read_timestamps_pts_unit_sec(self):
        with temp_video(10, 300, 300, 5) as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name, pts_unit='sec')

            container = av.open(f_name)
            stream = container.streams[0]
            pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
            num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
            expected_pts = [i * pts_step * stream.time_base for i in range(num_frames)]

            self.assertEqual(pts, expected_pts)
Exemple #9
0
    def test_read_partial_video(self):
        with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name)
            for start in range(5):
                for l in range(1, 4):
                    lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1])
                    s_data = data[start:(start + l)]
                    self.assertEqual(len(lv), l)
                    self.assertTrue(s_data.equal(lv))

            lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
            self.assertEqual(len(lv), 4)
            self.assertTrue(data[4:8].equal(lv))
Exemple #10
0
 def test_read_packed_b_frames_divx_file(self):
     with get_tmp_dir() as temp_dir:
         name = "hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi"
         f_name = os.path.join(temp_dir, name)
         url = "https://download.pytorch.org/vision_tests/io/" + name
         try:
             utils.download_url(url, temp_dir)
             pts, fps = io.read_video_timestamps(f_name)
             self.assertEqual(pts, sorted(pts))
             self.assertEqual(fps, 30)
         except URLError:
             msg = "could not download test file '{}'".format(url)
             warnings.warn(msg, RuntimeWarning)
             raise unittest.SkipTest(msg)
Exemple #11
0
    def test_read_timestamps(self):
        with temp_video(10, 300, 300, 5) as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name)
            # note: not all formats/codecs provide accurate information for computing the
            # timestamps. For the format that we use here, this information is available,
            # so we use it as a baseline
            container = av.open(f_name)
            stream = container.streams[0]
            pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
            num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
            expected_pts = [i * pts_step for i in range(num_frames)]

            self.assertEqual(pts, expected_pts)
            container.close()
Exemple #12
0
    def test_read_partial_video_bframes(self):
        # do not use lossless encoding, to test the presence of B-frames
        options = {'bframes': '16', 'keyint': '10', 'min-keyint': '4'}
        with temp_video(100, 300, 300, 5, options=options) as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name)
            for start in range(0, 80, 20):
                for l in range(1, 4):
                    lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1])
                    s_data = data[start:(start + l)]
                    self.assertEqual(len(lv), l)
                    self.assertTrue((s_data.float() - lv.float()).abs().max() < self.TOLERANCE)

            lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
            self.assertEqual(len(lv), 4)
            self.assertTrue((data[4:8].float() - lv.float()).abs().max() < self.TOLERANCE)
Exemple #13
0
    def test_read_timestamps_from_packet(self):
        with temp_video(10, 300, 300, 5, video_codec='mpeg4') as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name)
            # note: not all formats/codecs provide accurate information for computing the
            # timestamps. For the format that we use here, this information is available,
            # so we use it as a baseline
            container = av.open(f_name)
            stream = container.streams[0]
            # make sure we went through the optimized codepath
            self.assertIn(b'Lavc', stream.codec_context.extradata)
            pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
            num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
            expected_pts = [i * pts_step for i in range(num_frames)]

            self.assertEqual(pts, expected_pts)
            container.close()
Exemple #14
0
    def test_read_partial_video(self):
        with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name)
            for start in range(5):
                for l in range(1, 4):
                    lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1])
                    s_data = data[start:(start + l)]
                    self.assertEqual(len(lv), l)
                    self.assertTrue(s_data.equal(lv))

            if get_video_backend() == "pyav":
                # for "video_reader" backend, we don't decode the closest early frame
                # when the given start pts is not matching any frame pts
                lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
                self.assertEqual(len(lv), 4)
                self.assertTrue(data[4:8].equal(lv))
Exemple #15
0
    def test_read_partial_video(self, start, offset):
        with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name)

            lv, _, _ = io.read_video(f_name, pts[start],
                                     pts[start + offset - 1])
            s_data = data[start:(start + offset)]
            assert len(lv) == offset
            assert_equal(s_data, lv)

            if get_video_backend() == "pyav":
                # for "video_reader" backend, we don't decode the closest early frame
                # when the given start pts is not matching any frame pts
                lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
                assert len(lv) == 4
                assert_equal(data[4:8], lv)
Exemple #16
0
    def test_read_partial_video_pts_unit_sec(self):
        with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name, pts_unit='sec')

            for start in range(5):
                for l in range(1, 4):
                    lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1], pts_unit='sec')
                    s_data = data[start:(start + l)]
                    self.assertEqual(len(lv), l)
                    self.assertTrue(s_data.equal(lv))

            container = av.open(f_name)
            stream = container.streams[0]
            lv, _, _ = io.read_video(f_name,
                                     int(pts[4] * (1.0 / stream.time_base) + 1) * stream.time_base, pts[7],
                                     pts_unit='sec')
            self.assertEqual(len(lv), 4)
            self.assertTrue(data[4:8].equal(lv))
Exemple #17
0
    def test_read_partial_video_bframes(self):
        # do not use lossless encoding, to test the presence of B-frames
        options = {'bframes': '16', 'keyint': '10', 'min-keyint': '4'}
        with temp_video(100, 300, 300, 5, options=options) as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name)
            for start in range(0, 80, 20):
                for offset in range(1, 4):
                    lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1])
                    s_data = data[start:(start + offset)]
                    self.assertEqual(len(lv), offset)
                    assert_equal(s_data, lv, rtol=0.0, atol=self.TOLERANCE)

            lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
            # TODO fix this
            if get_video_backend() == 'pyav':
                self.assertEqual(len(lv), 4)
                assert_equal(data[4:8], lv, rtol=0.0, atol=self.TOLERANCE)
            else:
                self.assertEqual(len(lv), 3)
                assert_equal(data[5:8], lv, rtol=0.0, atol=self.TOLERANCE)
Exemple #18
0
    def test_read_partial_video(self):
        with tempfile.NamedTemporaryFile(suffix='.mp4') as f:
            data = self._create_video_frames(10, 300, 300)
            io.write_video(f.name, data, fps=5)

            pts = io.read_video_timestamps(f.name)

            for start in range(5):
                for l in range(1, 4):
                    lv, _, _ = io.read_video(f.name, pts[start],
                                             pts[start + l - 1])
                    s_data = data[start:(start + l)]
                    self.assertEqual(len(lv), l)
                    self.assertTrue((s_data.float() -
                                     lv.float()).abs().max() < self.TOLERANCE)

            lv, _, _ = io.read_video(f.name, pts[4] + 1, pts[7])
            self.assertEqual(len(lv), 4)
            self.assertTrue(
                (data[4:8].float() - lv.float()).abs().max() < self.TOLERANCE)
Exemple #19
0
    def test_read_partial_video_bframes(self, start, offset):
        # do not use lossless encoding, to test the presence of B-frames
        options = {"bframes": "16", "keyint": "10", "min-keyint": "4"}
        with temp_video(100, 300, 300, 5, options=options) as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name)

            lv, _, _ = io.read_video(f_name, pts[start],
                                     pts[start + offset - 1])
            s_data = data[start:(start + offset)]
            assert len(lv) == offset
            assert_equal(s_data, lv, rtol=0.0, atol=self.TOLERANCE)

            lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
            # TODO fix this
            if get_video_backend() == "pyav":
                assert len(lv) == 4
                assert_equal(data[4:8], lv, rtol=0.0, atol=self.TOLERANCE)
            else:
                assert len(lv) == 3
                assert_equal(data[5:8], lv, rtol=0.0, atol=self.TOLERANCE)
def get_video_stats(filename):
    pts, video_fps = read_video_timestamps(filename=filename, pts_unit='sec')
    if video_fps:
        stats = {
            'filename': os.path.basename(filename),
            'video-duration': len(pts) / video_fps,
            'fps': video_fps,
            'video-frames': len(pts)
        }
    else:
        stats = {
            'filename': os.path.basename(filename),
            'video-duration': None,
            'fps': None,
            'video-frames': None
        }
        print(
            f'WARNING: {filename} has an issue. video_fps = {video_fps}, len(pts) = {len(pts)}.'
        )
    return stats
Exemple #21
0
    def test_read_timestamps(self):
        with tempfile.NamedTemporaryFile(suffix='.mp4') as f:
            data = self._create_video_frames(10, 300, 300)
            io.write_video(f.name, data, fps=5)

            pts = io.read_video_timestamps(f.name)

            # note: not all formats/codecs provide accurate information for computing the
            # timestamps. For the format that we use here, this information is available,
            # so we use it as a baseline
            container = av.open(f.name)
            stream = container.streams[0]
            pts_step = int(
                round(float(1 / (stream.average_rate * stream.time_base))))
            num_frames = int(
                round(
                    float(stream.average_rate * stream.time_base *
                          stream.duration)))
            expected_pts = [i * pts_step for i in range(num_frames)]

            self.assertEqual(pts, expected_pts)
Exemple #22
0
 def test_read_video_timestamps_corrupted_file(self):
     with tempfile.NamedTemporaryFile(suffix=".mp4") as f:
         f.write(b"This is not an mpg4 file")
         video_pts, video_fps = io.read_video_timestamps(f.name)
         assert video_pts == []
         assert video_fps is None
Exemple #23
0
 def test_read_video_timestamps_corrupted_file(self):
     with tempfile.NamedTemporaryFile(suffix='.mp4') as f:
         f.write(b'This is not an mpg4 file')
         video_pts, video_fps = io.read_video_timestamps(f.name)
         self.assertEqual(video_pts, [])
         self.assertIs(video_fps, None)
Exemple #24
0
def _make_dataset(directory,
                  extensions=None,
                  is_valid_file=None,
                  pts_unit='sec'):
    """Returns a list of all video files, timestamps, and offsets.

    Args:
        directory:
            Root directory path (should not contain subdirectories).
        extensions:
            Tuple of valid extensions.
        is_valid_file:
            Used to find valid files.
        pts_unit:
            Unit of the timestamps.

    Returns:
        A list of video files, timestamps, frame offsets, and fps.

    """

    # handle is_valid_file and extensions the same way torchvision handles them:
    # https://pytorch.org/docs/stable/_modules/torchvision/datasets/folder.html#ImageFolder
    both_none = extensions is None and is_valid_file is None
    both_something = extensions is not None and is_valid_file is not None
    if both_none or both_something:
        raise ValueError('Both extensions and is_valid_file cannot be None or '
                         'not None at the same time')

    # use filename to find valid files
    if extensions is not None:

        def _is_valid_file(filename):
            return filename.lower().endswith(extensions)

    # overwrite function to find valid files
    if is_valid_file is not None:
        _is_valid_file = is_valid_file

    # find all video instances (no subdirectories)
    video_instances = []
    for fname in os.listdir(directory):

        # skip invalid files
        if not _is_valid_file(fname):
            continue

        # keep track of valid files
        path = os.path.join(directory, fname)
        video_instances.append(path)

    # get timestamps
    timestamps, fpss = [], []
    for instance in video_instances[:]:  # video_instances[:] creates a copy

        if AV_AVAILABLE and torchvision.get_video_backend() == 'pyav':
            # This is a hacky solution to estimate the timestamps.
            # When using the video_reader this approach fails because the
            # estimated timestamps are not correct.
            with av.open(instance) as av_video:
                stream = av_video.streams.video[0]

                # check if we can extract the video duration
                if not stream.duration:
                    print(
                        f'Video {instance} has no timestamp and will be skipped...'
                    )
                    video_instances.remove(
                        instance)  # remove from original list (not copy)
                    continue  # skip this broken video

                duration = stream.duration * stream.time_base
                fps = stream.base_rate
                n_frames = int(int(duration) * fps)

            timestamps.append([Fraction(i, fps) for i in range(n_frames)])
            fpss.append(fps)
        else:
            ts, fps = io.read_video_timestamps(instance, pts_unit=pts_unit)
            timestamps.append(ts)
            fpss.append(fps)

    # get frame offsets
    offsets = [len(ts) for ts in timestamps]
    offsets = [0] + offsets[:-1]
    for i in range(1, len(offsets)):
        offsets[i] = offsets[i - 1] + offsets[i]  # cumsum

    return video_instances, timestamps, offsets, fpss
Exemple #25
0
 def __getitem__(self, idx: int) -> Tuple[List[int], Optional[float]]:
     return read_video_timestamps(self.video_paths[idx])
Exemple #26
0
def _make_dataset(directory,
                  extensions=None,
                  is_valid_file=None,
                  pts_unit='sec'):
    """Returns a list of all video files, timestamps, and offsets.

    Args:
        directory:
            Root directory path (should not contain subdirectories).
        extensions:
            Tuple of valid extensions.
        is_valid_file:
            Used to find valid files.
        pts_unit:
            Unit of the timestamps.

    Returns:
        A list of video files, timestamps, frame offsets, and fps.

    """

    if extensions is None:
        if is_valid_file is None:
            ValueError('Both extensions and is_valid_file cannot be None')
        else:
            _is_valid_file = is_valid_file
    else:

        def is_valid_file_extension(filepath):
            return filepath.lower().endswith(extensions)

        if is_valid_file is None:
            _is_valid_file = is_valid_file_extension
        else:

            def _is_valid_file(filepath):
                return is_valid_file_extension(filepath) and is_valid_file(
                    filepath)

    # find all video instances (no subdirectories)
    video_instances = []

    def on_error(error):
        raise error

    for root, _, files in os.walk(directory, onerror=on_error):

        for fname in files:
            # skip invalid files
            if not _is_valid_file(os.path.join(root, fname)):
                continue

            # keep track of valid files
            path = os.path.join(root, fname)
            video_instances.append(path)

    # get timestamps
    timestamps, fpss = [], []
    for instance in video_instances[:]:  # video_instances[:] creates a copy

        if AV_AVAILABLE and torchvision.get_video_backend() == 'pyav':
            # This is a hacky solution to estimate the timestamps.
            # When using the video_reader this approach fails because the
            # estimated timestamps are not correct.
            with av.open(instance) as av_video:
                stream = av_video.streams.video[0]

                # check if we can extract the video duration
                if not stream.duration:
                    print(
                        f'Video {instance} has no timestamp and will be skipped...'
                    )
                    video_instances.remove(
                        instance)  # remove from original list (not copy)
                    continue  # skip this broken video

                duration = stream.duration * stream.time_base
                fps = stream.base_rate
                n_frames = int(int(duration) * fps)

            timestamps.append([Fraction(i, fps) for i in range(n_frames)])
            fpss.append(fps)
        else:
            ts, fps = io.read_video_timestamps(instance, pts_unit=pts_unit)
            timestamps.append(ts)
            fpss.append(fps)

    # get frame offsets
    offsets = [len(ts) for ts in timestamps]
    offsets = [0] + offsets[:-1]
    for i in range(1, len(offsets)):
        offsets[i] = offsets[i - 1] + offsets[i]  # cumsum

    return video_instances, timestamps, offsets, fpss
Exemple #27
0
 def __getitem__(self, idx):
     return read_video_timestamps(self.video_paths[idx])
Exemple #28
0
def _make_dataset(directory,
                  extensions=None,
                  is_valid_file=None,
                  pts_unit='sec'):
    """Returns a list of all video files, timestamps, and offsets.

    Args:
        directory:
            Root directory path (should not contain subdirectories).
        extensions:
            Tuple of valid extensions.
        is_valid_file:
            Used to find valid files.
        pts_unit:
            Unit of the timestamps.

    Returns:
        A list of video files, timestamps, frame offsets, and fps.

    """

    # use filename to find valid files
    if extensions is not None:
        def _is_valid_file(filename):
            return filename.lower().endswith(extensions)

    # overwrite function to find valid files
    if is_valid_file is not None:
        _is_valid_file = is_valid_file

    # find all instances (no subdirectories)
    instances = []
    for fname in os.listdir(directory):

        # skip invalid files
        if not _is_valid_file(fname):
            continue

        # keep track of valid files
        path = os.path.join(directory, fname)
        instances.append(path)

    # get timestamps
    timestamps, fpss = [], []
    for instance in instances:

        if AV_AVAILABLE and torchvision.get_video_backend() == 'pyav':
            # This is a hacky solution to estimate the timestamps.
            # When using the video_reader this approach fails because the 
            # estimated timestamps are not correct.
            with av.open(instance) as av_video:
                stream = av_video.streams.video[0]
                duration = stream.duration * stream.time_base
                fps = stream.base_rate
                n_frames = int(int(duration) * fps)

            timestamps.append([Fraction(i, fps) for i in range(n_frames)])
            fpss.append(fps)
        else:
            ts, fps = io.read_video_timestamps(instance, pts_unit=pts_unit)
            timestamps.append(ts)
            fpss.append(fps)


    # get frame offsets
    offsets = [len(ts) for ts in timestamps]
    offsets = [0] + offsets[:-1]
    for i in range(1, len(offsets)):
        offsets[i] = offsets[i-1] + offsets[i] # cumsum

    return instances, timestamps, offsets, fpss
Exemple #29
0
 def __getitem__(self, idx):
     if self._backend == "pyav":
         return read_video_timestamps(self.x[idx])
     else:
         return _read_video_timestamps_from_file(self.x[idx])