def subsample_frames(video: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: video_length = len(video) if video_length < n_frames: raise ValueError(f"Video too short to sample {n_frames} from") sample_idxs = np.array( frame_idx_to_list(frame_sampler.sample(video_length))) return sample_idxs, video[sample_idxs]
def lintel_loader( file: Union[str, Path, IO[bytes]], frames_idx: Union[slice, List[slice], List[int]]) -> Iterator[Image.Image]: import lintel if isinstance(file, str): file = Path(file) if isinstance(file, Path): _LOG.debug("Loading data from {}".format(file)) with file.open("rb") as f: video = f.read() else: video = file.read() frames_idx = np.array(frame_idx_to_list(frames_idx)) assert isinstance(frames_idx, np.ndarray) load_idx, reconstruction_idx = np.unique(frames_idx, return_inverse=True) _LOG.debug("Converted frames_idx {} to load_idx {}".format( frames_idx, load_idx)) frames_data, width, height = lintel.loadvid_frame_nums(video, frame_nums=load_idx, should_seek=False) frames = np.frombuffer(frames_data, dtype=np.uint8) # TODO: Support 1 channel grayscale video frames = np.reshape(frames, newshape=(len(load_idx), height, width, 3)) frames = frames[reconstruction_idx] return (Image.fromarray(frame) for frame in frames)
def test_frame_sampler_generates_sequential_idx(self, frame_sampler, frame_count): frames_idx = frame_sampler.sample(frame_count) frames_idx = frame_idx_to_list(frames_idx) assert_ordered(frames_idx) assert_elems_lt(frames_idx, frame_count) assert_elems_gte(frames_idx, 0)
def _load_frames(self, frames_idx: Union[slice, List[slice], List[int]], video_folder: Path) -> Iterator[Image]: frame_numbers = frame_idx_to_list(frames_idx) filepaths = [ video_folder / self.filename_template.format(index + 1) for index in frame_numbers ] frames = (self._load_image(path) for path in filepaths) # shape: (n_frames, height, width, channels) return frames
def test_clip_is_oversampled_when_video_is_shorter_than_clip_length( self, data): clip_length = data.draw(st.integers(2, 1000)) video_length = data.draw(st.integers(1, clip_length - 1)) sampler = ClipSampler(clip_length=clip_length) frame_idx = sampler.sample(video_length) frame_idx = frame_idx_to_list(frame_idx) assert_elems_lt(frame_idx, clip_length - 1) assert_elems_gte(frame_idx, 0)
def _load_frames(self, frames_idx, video_file): from torchvideo.internal.readers import default_loader from torchvideo.samplers import frame_idx_to_list if os.path.splitext(video_file)[-1] in ['.NPY', '.npy']: vid = np.load(video_file) frames_idx = np.array(frame_idx_to_list(frames_idx)) vid = vid[frames_idx] return (Image.fromarray(frame) for frame in vid) else: return default_loader(video_file, frames_idx)
def test_random_sampler(data): video_length = data.draw(st.integers(min_value=1, max_value=1e3), label="Video length") snippet_length = data.draw(st.integers(min_value=1, max_value=video_length), label="Snippet length") frame_sample_count = data.draw(st.integers(min_value=1, max_value=100), label="Frame sample count") test = data.draw(st.booleans(), label="Test mode?") sampler = RandomSampler(frame_sample_count, snippet_length, test=test) samples = frame_idx_to_list(sampler.sample(video_length)) assert len(samples) == snippet_length * frame_sample_count
def test_oversampling_segments_test(self): segment_count = 4 snippet_length = 2 video_length = 5 snippet_idx = self.sample(video_length, segment_count, snippet_length, test=True) assert_valid_snippet_index( snippet_idx, expected_snippet_length=snippet_length, expected_segment_count=segment_count, video_length=video_length, ) assert frame_idx_to_list(snippet_idx) == [0, 1, 1, 2, 2, 3, 3, 4]
def __getitem__(self, index: int) -> Tuple[np.ndarray, Dict[str, Any]]: # __getitem__ methods should raise an IndexError when the passed index is # invalid, this supports iterating over them, e.g. for x in hdf_feature_reader if index >= len(self): raise IndexError if self.features is not None: features = self.features[index] else: features: np.ndarray = self.feature_dataset[index].reshape( -1, self.feature_dim) frame_idxs = self.sampler.sample(len(features)) def decode_label(val: Any): if isinstance(val, bytes): return val.decode('utf8') # decode from bytes to string return val labels = { name: decode_label(values[index]) for name, values in self.label_sets.items() } return features[np.array(frame_idx_to_list(frame_idxs))], labels
def sample(self, video_length, segment_count, snippet_length, test=False): sampler = TemporalSegmentSampler(segment_count, snippet_length, test=test) frame_idx = frame_idx_to_list(sampler.sample(video_length)) return frame_idx
def _load_mock_frames(self, frames_idx, video_file): frames_count = len(frame_idx_to_list(frames_idx)) return numpy.zeros((frames_count, 10, 20, 3))