def test_read_partial_video_pts_unit_sec(self): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): pts, _ = io.read_video_timestamps(f_name, pts_unit='sec') for start in range(5): for l in range(1, 4): lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1], pts_unit='sec') s_data = data[start:(start + l)] self.assertEqual(len(lv), l) self.assertTrue(s_data.equal(lv)) container = av.open(f_name) stream = container.streams[0] lv, _, _ = io.read_video( f_name, int(pts[4] * (1.0 / stream.time_base) + 1) * stream.time_base, pts[7], pts_unit='sec') if get_video_backend() == "pyav": # for "video_reader" backend, we don't decode the closest early frame # when the given start pts is not matching any frame pts self.assertEqual(len(lv), 4) self.assertTrue(data[4:8].equal(lv)) container.close()
def test_read_partial_video_pts_unit_sec(self, start, offset): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): pts, _ = io.read_video_timestamps(f_name, pts_unit='sec') lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1], pts_unit='sec') s_data = data[start:(start + offset)] assert len(lv) == offset assert_equal(s_data, lv) with av.open(f_name) as container: stream = container.streams[0] lv, _, _ = io.read_video(f_name, int(pts[4] * (1.0 / stream.time_base) + 1) * stream.time_base, pts[7], pts_unit='sec') if get_video_backend() == "pyav": # for "video_reader" backend, we don't decode the closest early frame # when the given start pts is not matching any frame pts assert len(lv) == 4 assert_equal(data[4:8], lv)
def test_read_packed_b_frames_divx_file(self): name = "hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi" f_name = os.path.join(VIDEO_DIR, name) pts, fps = io.read_video_timestamps(f_name) self.assertEqual(pts, sorted(pts)) self.assertEqual(fps, 30)
def vgpt(invid): try: os.remove("output.mp4") except FileNotFoundError: pass clip = VideoFileClip(invid) rate = clip.fps sequence_length = int(clip.fps * clip.duration) pts = read_video_timestamps(invid, pts_unit='sec')[0] video = read_video(invid, pts_unit='sec', start_pts=pts[0], end_pts=pts[sequence_length - 1])[0] video = preprocess(video, resolution, sequence_length).unsqueeze(0).to(device) with torch.no_grad(): encodings = vqvae.encode(video) video_recon = vqvae.decode(encodings) video_recon = torch.clamp(video_recon, -0.5, 0.5) videos = video_recon[0].permute(1, 2, 3, 0) # CTHW -> THWC videos = ((videos + 0.5) * 255).cpu().numpy().astype('uint8') imageio.mimwrite('output.mp4', videos, fps=int(rate)) return './output.mp4'
def _compute_frame_pts(self): self.video_pts = [] self.video_fps = [] # TODO maybe paralellize this for video_file in self.video_paths: clips, fps = read_video_timestamps(video_file) self.video_pts.append(torch.as_tensor(clips)) self.video_fps.append(fps)
def __getitem__(self, idx): # print(self.x[idx]) try: testoutput = read_video_timestamps(self.x[idx]) return testoutput except: # import pdb; pdb.set_trace() print('Got a problem at:', self.x[idx])
def _make_dataset(directory, extensions=None, is_valid_file=None, pts_unit='sec'): """Returns a list of all video files, timestamps, and offsets. Args: directory: Root directory path (should not contain subdirectories). extensions: Tuple of valid extensions. is_valid_file: Used to find valid files. pts_unit: Unit of the timestamps. Returns: A list of video files, timestamps, frame offsets, and fps. """ # use filename to find valid files if extensions is not None: def _is_valid_file(filename): return filename.lower().endswith(extensions) # overwrite function to find valid files if is_valid_file is not None: _is_valid_file = is_valid_file # find all instances (no subdirectories) instances = [] for fname in os.listdir(directory): # skip invalid files if not _is_valid_file(fname): continue # keep track of valid files path = os.path.join(directory, fname) instances.append(path) # get timestamps timestamps, fpss = [], [] for instance in instances: ts, fps = read_video_timestamps(instance, pts_unit=pts_unit) timestamps.append(ts) fpss.append(fps) # get frame offsets offsets = [len(ts) for ts in timestamps] offsets = [0] + offsets[:-1] for i in range(1, len(offsets)): offsets[i] = offsets[i - 1] + offsets[i] # cumsum return instances, timestamps, offsets, fpss
def test_read_timestamps_pts_unit_sec(self): with temp_video(10, 300, 300, 5) as (f_name, data): pts, _ = io.read_video_timestamps(f_name, pts_unit='sec') container = av.open(f_name) stream = container.streams[0] pts_step = int(round(float(1 / (stream.average_rate * stream.time_base)))) num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration))) expected_pts = [i * pts_step * stream.time_base for i in range(num_frames)] self.assertEqual(pts, expected_pts)
def test_read_partial_video(self): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): pts, _ = io.read_video_timestamps(f_name) for start in range(5): for l in range(1, 4): lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1]) s_data = data[start:(start + l)] self.assertEqual(len(lv), l) self.assertTrue(s_data.equal(lv)) lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7]) self.assertEqual(len(lv), 4) self.assertTrue(data[4:8].equal(lv))
def test_read_packed_b_frames_divx_file(self): with get_tmp_dir() as temp_dir: name = "hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi" f_name = os.path.join(temp_dir, name) url = "https://download.pytorch.org/vision_tests/io/" + name try: utils.download_url(url, temp_dir) pts, fps = io.read_video_timestamps(f_name) self.assertEqual(pts, sorted(pts)) self.assertEqual(fps, 30) except URLError: msg = "could not download test file '{}'".format(url) warnings.warn(msg, RuntimeWarning) raise unittest.SkipTest(msg)
def test_read_timestamps(self): with temp_video(10, 300, 300, 5) as (f_name, data): pts, _ = io.read_video_timestamps(f_name) # note: not all formats/codecs provide accurate information for computing the # timestamps. For the format that we use here, this information is available, # so we use it as a baseline container = av.open(f_name) stream = container.streams[0] pts_step = int(round(float(1 / (stream.average_rate * stream.time_base)))) num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration))) expected_pts = [i * pts_step for i in range(num_frames)] self.assertEqual(pts, expected_pts) container.close()
def test_read_partial_video_bframes(self): # do not use lossless encoding, to test the presence of B-frames options = {'bframes': '16', 'keyint': '10', 'min-keyint': '4'} with temp_video(100, 300, 300, 5, options=options) as (f_name, data): pts, _ = io.read_video_timestamps(f_name) for start in range(0, 80, 20): for l in range(1, 4): lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1]) s_data = data[start:(start + l)] self.assertEqual(len(lv), l) self.assertTrue((s_data.float() - lv.float()).abs().max() < self.TOLERANCE) lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7]) self.assertEqual(len(lv), 4) self.assertTrue((data[4:8].float() - lv.float()).abs().max() < self.TOLERANCE)
def test_read_timestamps_from_packet(self): with temp_video(10, 300, 300, 5, video_codec='mpeg4') as (f_name, data): pts, _ = io.read_video_timestamps(f_name) # note: not all formats/codecs provide accurate information for computing the # timestamps. For the format that we use here, this information is available, # so we use it as a baseline container = av.open(f_name) stream = container.streams[0] # make sure we went through the optimized codepath self.assertIn(b'Lavc', stream.codec_context.extradata) pts_step = int(round(float(1 / (stream.average_rate * stream.time_base)))) num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration))) expected_pts = [i * pts_step for i in range(num_frames)] self.assertEqual(pts, expected_pts) container.close()
def test_read_partial_video(self): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): pts, _ = io.read_video_timestamps(f_name) for start in range(5): for l in range(1, 4): lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1]) s_data = data[start:(start + l)] self.assertEqual(len(lv), l) self.assertTrue(s_data.equal(lv)) if get_video_backend() == "pyav": # for "video_reader" backend, we don't decode the closest early frame # when the given start pts is not matching any frame pts lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7]) self.assertEqual(len(lv), 4) self.assertTrue(data[4:8].equal(lv))
def test_read_partial_video(self, start, offset): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): pts, _ = io.read_video_timestamps(f_name) lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1]) s_data = data[start:(start + offset)] assert len(lv) == offset assert_equal(s_data, lv) if get_video_backend() == "pyav": # for "video_reader" backend, we don't decode the closest early frame # when the given start pts is not matching any frame pts lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7]) assert len(lv) == 4 assert_equal(data[4:8], lv)
def test_read_partial_video_pts_unit_sec(self): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): pts, _ = io.read_video_timestamps(f_name, pts_unit='sec') for start in range(5): for l in range(1, 4): lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1], pts_unit='sec') s_data = data[start:(start + l)] self.assertEqual(len(lv), l) self.assertTrue(s_data.equal(lv)) container = av.open(f_name) stream = container.streams[0] lv, _, _ = io.read_video(f_name, int(pts[4] * (1.0 / stream.time_base) + 1) * stream.time_base, pts[7], pts_unit='sec') self.assertEqual(len(lv), 4) self.assertTrue(data[4:8].equal(lv))
def test_read_partial_video_bframes(self): # do not use lossless encoding, to test the presence of B-frames options = {'bframes': '16', 'keyint': '10', 'min-keyint': '4'} with temp_video(100, 300, 300, 5, options=options) as (f_name, data): pts, _ = io.read_video_timestamps(f_name) for start in range(0, 80, 20): for offset in range(1, 4): lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1]) s_data = data[start:(start + offset)] self.assertEqual(len(lv), offset) assert_equal(s_data, lv, rtol=0.0, atol=self.TOLERANCE) lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7]) # TODO fix this if get_video_backend() == 'pyav': self.assertEqual(len(lv), 4) assert_equal(data[4:8], lv, rtol=0.0, atol=self.TOLERANCE) else: self.assertEqual(len(lv), 3) assert_equal(data[5:8], lv, rtol=0.0, atol=self.TOLERANCE)
def test_read_partial_video(self): with tempfile.NamedTemporaryFile(suffix='.mp4') as f: data = self._create_video_frames(10, 300, 300) io.write_video(f.name, data, fps=5) pts = io.read_video_timestamps(f.name) for start in range(5): for l in range(1, 4): lv, _, _ = io.read_video(f.name, pts[start], pts[start + l - 1]) s_data = data[start:(start + l)] self.assertEqual(len(lv), l) self.assertTrue((s_data.float() - lv.float()).abs().max() < self.TOLERANCE) lv, _, _ = io.read_video(f.name, pts[4] + 1, pts[7]) self.assertEqual(len(lv), 4) self.assertTrue( (data[4:8].float() - lv.float()).abs().max() < self.TOLERANCE)
def test_read_partial_video_bframes(self, start, offset): # do not use lossless encoding, to test the presence of B-frames options = {"bframes": "16", "keyint": "10", "min-keyint": "4"} with temp_video(100, 300, 300, 5, options=options) as (f_name, data): pts, _ = io.read_video_timestamps(f_name) lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1]) s_data = data[start:(start + offset)] assert len(lv) == offset assert_equal(s_data, lv, rtol=0.0, atol=self.TOLERANCE) lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7]) # TODO fix this if get_video_backend() == "pyav": assert len(lv) == 4 assert_equal(data[4:8], lv, rtol=0.0, atol=self.TOLERANCE) else: assert len(lv) == 3 assert_equal(data[5:8], lv, rtol=0.0, atol=self.TOLERANCE)
def get_video_stats(filename): pts, video_fps = read_video_timestamps(filename=filename, pts_unit='sec') if video_fps: stats = { 'filename': os.path.basename(filename), 'video-duration': len(pts) / video_fps, 'fps': video_fps, 'video-frames': len(pts) } else: stats = { 'filename': os.path.basename(filename), 'video-duration': None, 'fps': None, 'video-frames': None } print( f'WARNING: {filename} has an issue. video_fps = {video_fps}, len(pts) = {len(pts)}.' ) return stats
def test_read_timestamps(self): with tempfile.NamedTemporaryFile(suffix='.mp4') as f: data = self._create_video_frames(10, 300, 300) io.write_video(f.name, data, fps=5) pts = io.read_video_timestamps(f.name) # note: not all formats/codecs provide accurate information for computing the # timestamps. For the format that we use here, this information is available, # so we use it as a baseline container = av.open(f.name) stream = container.streams[0] pts_step = int( round(float(1 / (stream.average_rate * stream.time_base)))) num_frames = int( round( float(stream.average_rate * stream.time_base * stream.duration))) expected_pts = [i * pts_step for i in range(num_frames)] self.assertEqual(pts, expected_pts)
def test_read_video_timestamps_corrupted_file(self): with tempfile.NamedTemporaryFile(suffix=".mp4") as f: f.write(b"This is not an mpg4 file") video_pts, video_fps = io.read_video_timestamps(f.name) assert video_pts == [] assert video_fps is None
def test_read_video_timestamps_corrupted_file(self): with tempfile.NamedTemporaryFile(suffix='.mp4') as f: f.write(b'This is not an mpg4 file') video_pts, video_fps = io.read_video_timestamps(f.name) self.assertEqual(video_pts, []) self.assertIs(video_fps, None)
def _make_dataset(directory, extensions=None, is_valid_file=None, pts_unit='sec'): """Returns a list of all video files, timestamps, and offsets. Args: directory: Root directory path (should not contain subdirectories). extensions: Tuple of valid extensions. is_valid_file: Used to find valid files. pts_unit: Unit of the timestamps. Returns: A list of video files, timestamps, frame offsets, and fps. """ # handle is_valid_file and extensions the same way torchvision handles them: # https://pytorch.org/docs/stable/_modules/torchvision/datasets/folder.html#ImageFolder both_none = extensions is None and is_valid_file is None both_something = extensions is not None and is_valid_file is not None if both_none or both_something: raise ValueError('Both extensions and is_valid_file cannot be None or ' 'not None at the same time') # use filename to find valid files if extensions is not None: def _is_valid_file(filename): return filename.lower().endswith(extensions) # overwrite function to find valid files if is_valid_file is not None: _is_valid_file = is_valid_file # find all video instances (no subdirectories) video_instances = [] for fname in os.listdir(directory): # skip invalid files if not _is_valid_file(fname): continue # keep track of valid files path = os.path.join(directory, fname) video_instances.append(path) # get timestamps timestamps, fpss = [], [] for instance in video_instances[:]: # video_instances[:] creates a copy if AV_AVAILABLE and torchvision.get_video_backend() == 'pyav': # This is a hacky solution to estimate the timestamps. # When using the video_reader this approach fails because the # estimated timestamps are not correct. with av.open(instance) as av_video: stream = av_video.streams.video[0] # check if we can extract the video duration if not stream.duration: print( f'Video {instance} has no timestamp and will be skipped...' ) video_instances.remove( instance) # remove from original list (not copy) continue # skip this broken video duration = stream.duration * stream.time_base fps = stream.base_rate n_frames = int(int(duration) * fps) timestamps.append([Fraction(i, fps) for i in range(n_frames)]) fpss.append(fps) else: ts, fps = io.read_video_timestamps(instance, pts_unit=pts_unit) timestamps.append(ts) fpss.append(fps) # get frame offsets offsets = [len(ts) for ts in timestamps] offsets = [0] + offsets[:-1] for i in range(1, len(offsets)): offsets[i] = offsets[i - 1] + offsets[i] # cumsum return video_instances, timestamps, offsets, fpss
def __getitem__(self, idx: int) -> Tuple[List[int], Optional[float]]: return read_video_timestamps(self.video_paths[idx])
def _make_dataset(directory, extensions=None, is_valid_file=None, pts_unit='sec'): """Returns a list of all video files, timestamps, and offsets. Args: directory: Root directory path (should not contain subdirectories). extensions: Tuple of valid extensions. is_valid_file: Used to find valid files. pts_unit: Unit of the timestamps. Returns: A list of video files, timestamps, frame offsets, and fps. """ if extensions is None: if is_valid_file is None: ValueError('Both extensions and is_valid_file cannot be None') else: _is_valid_file = is_valid_file else: def is_valid_file_extension(filepath): return filepath.lower().endswith(extensions) if is_valid_file is None: _is_valid_file = is_valid_file_extension else: def _is_valid_file(filepath): return is_valid_file_extension(filepath) and is_valid_file( filepath) # find all video instances (no subdirectories) video_instances = [] def on_error(error): raise error for root, _, files in os.walk(directory, onerror=on_error): for fname in files: # skip invalid files if not _is_valid_file(os.path.join(root, fname)): continue # keep track of valid files path = os.path.join(root, fname) video_instances.append(path) # get timestamps timestamps, fpss = [], [] for instance in video_instances[:]: # video_instances[:] creates a copy if AV_AVAILABLE and torchvision.get_video_backend() == 'pyav': # This is a hacky solution to estimate the timestamps. # When using the video_reader this approach fails because the # estimated timestamps are not correct. with av.open(instance) as av_video: stream = av_video.streams.video[0] # check if we can extract the video duration if not stream.duration: print( f'Video {instance} has no timestamp and will be skipped...' ) video_instances.remove( instance) # remove from original list (not copy) continue # skip this broken video duration = stream.duration * stream.time_base fps = stream.base_rate n_frames = int(int(duration) * fps) timestamps.append([Fraction(i, fps) for i in range(n_frames)]) fpss.append(fps) else: ts, fps = io.read_video_timestamps(instance, pts_unit=pts_unit) timestamps.append(ts) fpss.append(fps) # get frame offsets offsets = [len(ts) for ts in timestamps] offsets = [0] + offsets[:-1] for i in range(1, len(offsets)): offsets[i] = offsets[i - 1] + offsets[i] # cumsum return video_instances, timestamps, offsets, fpss
def __getitem__(self, idx): return read_video_timestamps(self.video_paths[idx])
def _make_dataset(directory, extensions=None, is_valid_file=None, pts_unit='sec'): """Returns a list of all video files, timestamps, and offsets. Args: directory: Root directory path (should not contain subdirectories). extensions: Tuple of valid extensions. is_valid_file: Used to find valid files. pts_unit: Unit of the timestamps. Returns: A list of video files, timestamps, frame offsets, and fps. """ # use filename to find valid files if extensions is not None: def _is_valid_file(filename): return filename.lower().endswith(extensions) # overwrite function to find valid files if is_valid_file is not None: _is_valid_file = is_valid_file # find all instances (no subdirectories) instances = [] for fname in os.listdir(directory): # skip invalid files if not _is_valid_file(fname): continue # keep track of valid files path = os.path.join(directory, fname) instances.append(path) # get timestamps timestamps, fpss = [], [] for instance in instances: if AV_AVAILABLE and torchvision.get_video_backend() == 'pyav': # This is a hacky solution to estimate the timestamps. # When using the video_reader this approach fails because the # estimated timestamps are not correct. with av.open(instance) as av_video: stream = av_video.streams.video[0] duration = stream.duration * stream.time_base fps = stream.base_rate n_frames = int(int(duration) * fps) timestamps.append([Fraction(i, fps) for i in range(n_frames)]) fpss.append(fps) else: ts, fps = io.read_video_timestamps(instance, pts_unit=pts_unit) timestamps.append(ts) fpss.append(fps) # get frame offsets offsets = [len(ts) for ts in timestamps] offsets = [0] + offsets[:-1] for i in range(1, len(offsets)): offsets[i] = offsets[i-1] + offsets[i] # cumsum return instances, timestamps, offsets, fpss
def __getitem__(self, idx): if self._backend == "pyav": return read_video_timestamps(self.x[idx]) else: return _read_video_timestamps_from_file(self.x[idx])