def test_probe_video_from_file(self): with temp_video(10, 300, 300, 5) as (f_name, data): video_info = io._probe_video_from_file(f_name) self.assertAlmostEqual(video_info.video_duration, 2, delta=0.1) self.assertAlmostEqual(video_info.video_fps, 5, delta=0.1)
def get_clip(self, idx): """ Gets a subclip from a list of videos. Args: idx (int): index of the subclip. Must be between 0 and num_clips(). Returns: video (Tensor) audio (Tensor) info (Dict) video_idx (int): index of the video in `video_paths` """ if idx >= self.num_clips(): raise IndexError("Index {} out of range " "({} number of clips)".format( idx, self.num_clips())) video_idx, clip_idx = self.get_clip_location(idx) video_path = self.video_paths[video_idx] clip_pts = self.clips[video_idx][clip_idx] from torchvision import get_video_backend backend = get_video_backend() if backend == "pyav": # check for invalid options if self._video_width != 0: raise ValueError( "pyav backend doesn't support _video_width != 0") if self._video_height != 0: raise ValueError( "pyav backend doesn't support _video_height != 0") if self._video_min_dimension != 0: raise ValueError( "pyav backend doesn't support _video_min_dimension != 0") if self._video_max_dimension != 0: raise ValueError( "pyav backend doesn't support _video_max_dimension != 0") if self._audio_samples != 0: raise ValueError( "pyav backend doesn't support _audio_samples != 0") if backend == "pyav": start_pts = clip_pts[0].item() end_pts = clip_pts[-1].item() video, audio, info = read_video(video_path, start_pts, end_pts) else: info = _probe_video_from_file(video_path) video_fps = info.video_fps audio_fps = None video_start_pts = clip_pts[0].item() video_end_pts = clip_pts[-1].item() audio_start_pts, audio_end_pts = 0, -1 audio_timebase = Fraction(0, 1) video_timebase = Fraction(info.video_timebase.numerator, info.video_timebase.denominator) if info.has_audio: audio_timebase = Fraction(info.audio_timebase.numerator, info.audio_timebase.denominator) audio_start_pts = pts_convert(video_start_pts, video_timebase, audio_timebase, math.floor) audio_end_pts = pts_convert(video_end_pts, video_timebase, audio_timebase, math.ceil) audio_fps = info.audio_sample_rate video, audio, info = _read_video_from_file( video_path, video_width=self._video_width, video_height=self._video_height, video_min_dimension=self._video_min_dimension, video_max_dimension=self._video_max_dimension, video_pts_range=(video_start_pts, video_end_pts), video_timebase=video_timebase, audio_samples=self._audio_samples, audio_channels=self._audio_channels, audio_pts_range=(audio_start_pts, audio_end_pts), audio_timebase=audio_timebase, ) info = {"video_fps": video_fps} if audio_fps is not None: info["audio_fps"] = audio_fps if self.frame_rate is not None: resampling_idx = self.resampling_idxs[video_idx][clip_idx] if isinstance(resampling_idx, torch.Tensor): resampling_idx = resampling_idx - resampling_idx[0] video = video[resampling_idx] info["video_fps"] = self.frame_rate assert len(video) == self.num_frames, "{} x {}".format( video.shape, self.num_frames) return video, audio, info, video_idx
def test_probe_video_from_file(self): with temp_video(10, 300, 300, 5) as (f_name, data): video_info = io._probe_video_from_file(f_name) assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps