def check_generated_files(): # check all frames gulp_directory = GulpDirectory(output_dir) for chunk in gulp_directory.chunks(): with chunk.open('rb'): for frames, meta in chunk: check_frames(frames) # check random access for a few videos frames, meta = gulp_directory[0] check_frames(frames, 'alternating') frames, meta = gulp_directory[(11, slice(0, None, 2))] check_frames(frames, 'black') frames, meta = gulp_directory[21, 1::2] check_frames(frames, 'white')
def __init__( self, root, csv_file_input, csv_file_labels, clip_size, nclips, step_size, is_val, transform=None, ): self.dataset_object = GulpDataset(csv_file_input, csv_file_labels) self.csv_data = self.dataset_object.csv_data self.classes_dict = self.dataset_object.classes_dict self.classes = self.dataset_object.classes self.gulp_directory = GulpDirectory(root) self.merged_meta_dict = self.gulp_directory.merged_meta_dict self.transform = transform self.clip_size = clip_size self.nclips = nclips self.step_size = step_size self.is_val = is_val
def __init__( self, gulp_path: Path, sample_transform: Optional[Callable[[PIL.Image.Image], FramesTypeVar]] = None, filter_fn: Optional[Callable[[str], bool]] = None, ): """ Args: gulp_path: Path to gulped dataset sample_transform: Transformation applied to sampled `[PIL.Image]` at a specific index filter_fn: A callable that is used to remove examples from the dataset. It should return whether or not the given sample (by id) should be kept or not. """ super().__init__() assert gulp_path.exists(), "Could not find the path {}".format(gulp_path) self.gulp_dir = GulpDirectory(str(gulp_path.absolute())) self.filter_fn = filter_fn if sample_transform is None: self.sample_transform = lambda x: x else: self.sample_transform = sample_transform self._video_records = self._read_video_records( self.gulp_dir.merged_meta_dict, filter_fn )
def __init__(self, path='../../epic/data/processed/gulp/rgb_train/', data_type='verb_class'): self.path = path self.data_type = data_type self.dataset = GulpDirectory(self.path) self.gdict = list(self.dataset.merged_meta_dict.keys()) self.datalen = len(self.gdict)
def test_creating_gulp_video_dataset_from_gulp_directory(self, gulp_path): gulp_dir = GulpDirectory(gulp_path) dataset = GulpVideoDataset(gulp_dir.output_dir, gulp_directory=gulp_dir) assert dataset.gulp_dir == gulp_dir assert dataset.gulp_dir.output_dir == dataset.root_path assert len(gulp_dir.merged_meta_dict) == len(dataset)
def __init__(self, transforms=None): #define all file path gulp_path = '../../epic/data/processed/gulp/rgb_train/' self.gulp_dataset = GulpDirectory(gulp_path) self.meta_dict = self._get_metadict() self.id_dict = pd.read_csv('./input_csv/EPIC_train_action_labels.csv') self.transforms = transforms self.len = len(self.meta_dict)
def test_dataset_throws_error_if_root_path_is_different_from_gulp_dir_path( self, gulp_path ): gulp_dir = GulpDirectory(gulp_path) with pytest.raises(ValueError): GulpVideoDataset( Path(gulp_dir.output_dir).with_name("not-a-gulp-dir"), gulp_directory=gulp_dir, )
def __init__( self, root_path: Union[str, Path], *, gulp_directory: Optional[GulpDirectory] = None, filter: Optional[Callable[[str], bool]] = None, label_field: Optional[str] = None, label_set: Optional[LabelSet] = None, sampler: FrameSampler = _default_sampler(), transform: Optional[NDArrayVideoTransform] = None, ): """ Args: root_path: Path to GulpIO dataset folder on disk. The ``.gulp`` and ``.gmeta`` files are direct children of this directory. filter: Filter function that determines whether a video is included into the dataset. The filter is called on each video id, and should return ``True`` to include the video, and ``False`` to exclude it. label_field: Meta data field name that stores the label of an example, this is used to construct a :class:`GulpLabelSet` that performs the example labelling. Defaults to ``'label'``. label_set: Optional label set for labelling examples. This is mutually exclusive with ``label_field``. sampler: Optional sampler for drawing frames from each video. transform: Optional transform over the :class:`ndarray` with layout ``THWC``. Note you'll probably want to remap the channels to ``CTHW`` at the end of this transform. gulp_directory: Optional gulp directory residing at root_path. Useful if you wish to create a custom label_set using the gulp_directory, which you can then pass in with the gulp_directory itself to avoid reading the gulp metadata twice. """ if transform is None: def transform(frames): return torch.Tensor(np.rollaxis(frames, -1, 0)).div_(255) if gulp_directory is not None: if Path(gulp_directory.output_dir) != Path(root_path): raise ValueError( "Expected gulp_dir.output ({}) to be the same as " "root_path ({})".format(gulp_directory.output_dir, root_path)) self.gulp_dir = gulp_directory else: self.gulp_dir = GulpDirectory(str(root_path)) label_set = self._get_label_set(self.gulp_dir, label_field, label_set) super().__init__(root_path, label_set=label_set, sampler=sampler, transform=transform) self._video_ids = self._get_video_ids(self.gulp_dir, filter) self.labels = self._label_examples(self._video_ids, self.label_set)
def __init__(self, datadir, metafile, mode="train", num_samples=None, **kwargs): super(MolImageGulpDataset, self).__init__(datadir=datadir, metafile=metafile, mode=mode, num_samples=num_samples) self.gulpdir = GulpDirectory(self.datadir)
def __init__(self, datadir, metafile, savedir="examples", gulp=False): self.datadir = datadir self.metadata = pd.read_csv(metafile) self.savedir = savedir os.makedirs(self.savedir, exist_ok=True) if gulp: self.gulpdir = GulpDirectory(datadir) else: self.gulpdir = None self.transforms = get_spot_check_transform()
def retrieve_nfrms_from_gulp(gulp_dir): id2nfrms = dict() gulp = GulpDirectory(gulp_dir) pbar = pb.ProgressBar(widgets=[pb.Percentage(), pb.Bar()], maxval=gulp.num_chunks).start() i = 0 for chunk in gulp: for frames, meta in chunk: id2nfrms[meta['id']] = len(frames) pbar.update(i + 1) i += 1 return id2nfrms
def __init__(self, transform, path='../../epic/data/processed/gulp/rgb_train/', frame_size=16, class_type='verb'): self.transform = transform self.path = path self.class_type = class_type self.frame_size = frame_size self.dataset = GulpDirectory(self.path) self.gdict = list(self.dataset.merged_meta_dict.keys()) self.datalen = len(self.gdict)
def __init__( self, gulp_path: Union[Path, str], class_type: str, *, with_metadata: bool = False, class_getter: Optional[ClassGetter] = None, segment_filter: Optional[SegmentFilter] = None, sample_transform: Optional[VideoTransform] = None ) -> None: """ Args: gulp_path: Path to gulp directory containing the gulped EPIC RGB or flow frames class_type: One of verb, noun, verb+noun, None, determines what label the segment returns. ``None`` should be used for loading test datasets. with_metadata: When True the segments will yield a tuple (metadata, class) where the class is defined by the class getter and the metadata is the raw dictionary stored in the gulp file. class_getter: Optionally provide a callable that takes in the gulp dict representing the segment from which you should return the class you wish the segment to have. segment_filter: Optionally provide a callable that takes a segment and returns True if you want to keep the segment in the dataset, or False if you wish to exclude it. sample_transform: Optionally provide a sample transform function which takes a list of PIL images and transforms each of them. This is applied on the frames just before returning from :meth:`load_frames`. """ super().__init__( _class_count[class_type], segment_filter=segment_filter, sample_transform=sample_transform, ) if isinstance(gulp_path, str): gulp_path = Path(gulp_path) assert gulp_path.exists(), "Could not find the path {}".format(gulp_path) self.gulp_dir = GulpDirectory(str(gulp_path)) if class_getter is None: class_getter = _class_getters[class_type] if with_metadata: original_getter = copy.copy(class_getter) class_getter = lambda metadata: (metadata, original_getter(metadata)) self._video_segments = self._read_segments( self.gulp_dir.merged_meta_dict, class_getter )
def __init__( self, path='/home/yanai-lab/ide-k/ide-k/epic/data/processed/gulp/rgb_train/', frame_size=1, class_type='noun'): """ Construct the epic-kitchen video dataset loader. Args: path (str): video path for epic dataset in gulpio format. class_type (str): Options includes 'noun', 'verb', 'noun+verb'. """ self.path = path self.class_type = class_type self.dataset = GulpDirectory(path) self.gdict = list(self.dataset.merged_meta_dict.keys()) self.datalen = len(self.gdict)
def main(args): gulp_dir = GulpDirectory(args.gulp_dir) frames_root = args.frames_root if args.uids_csv is not None: uids: np.ndarray = pd.read_csv(args.uids_csv, converters={"uid": str})[ "uid" ].values else: uids = np.array(list(gulp_dir.merged_meta_dict.keys())) for uid in tqdm(uids, dynamic_ncols=True, unit="video"): frames = gulp_dir[uid][0] frames_dir: Path = frames_root / uid frames_dir.mkdir(exist_ok=True, parents=True) for frame_idx, frame in enumerate(frames): frame_path = frames_dir / f"frame_{frame_idx:06d}.jpg" img = PIL.Image.fromarray(frame) img.save(frame_path, quality=args.quality)
def __init__(self, path='../../epic/data/processed/gulp/rgb_train/', frame_size=1, class_type='noun'): """ Construct the Epic-kitchen video dataset loader, Args: transform (transform): How to transform video path (str): video path for epic dataset in gulp format. frame_size (int): Number of frames to retrieve from the video segment. class_type (str): Options includes 'noun', 'verb', 'noun+verb'. """ self.frame_size = frame_size self.path = path self.class_type = class_type self.dataset = GulpDirectory(path) self.gdict = list(self.dataset.merged_meta_dict.keys()) self.datalen = len(self.gdict)
def __init__( self, root_path: Union[str, Path], filter: Optional[Callable[[str], bool]] = None, label_field: Optional[str] = None, label_set: Optional[LabelSet] = None, sampler: FrameSampler = _default_sampler(), transform: Optional[NDArrayVideoTransform] = None, ): """ Args: root_path: Path to GulpIO dataset folder on disk. The ``.gulp`` and ``.gmeta`` files are direct children of this directory. filter: Filter function that determines whether a video is included into the dataset. The filter is called on each video id, and should return ``True`` to include the video, and ``False`` to exclude it. label_field: Meta data field name that stores the label of an example, this is used to construct a :class:`GulpLabelSet` that performs the example labelling. Defaults to ``'label'``. label_set: Optional label set for labelling examples. This is mutually exclusive with ``label_field``. sampler: Optional sampler for drawing frames from each video. transform: Optional transform over the :class:`ndarray` with layout ``THWC``. Note you'll probably want to remap the channels to ``CTHW`` at the end of this transform. """ from gulpio import GulpDirectory if transform is None: def transform(frames): return torch.Tensor(np.rollaxis(frames, -1, 0)).div_(255) self.gulp_dir = GulpDirectory(str(root_path)) label_set = self._get_label_set(self.gulp_dir, label_field, label_set) super().__init__(root_path, label_set=label_set, sampler=sampler, transform=transform) self._video_ids = self._get_video_ids(self.gulp_dir, filter) self.labels = self._label_examples(self._video_ids, self.label_set)
def __init__(self, root, tsv, vocab, ids, transform=None, random_crop=False, max_video_len=8, max_sentence_len=24): self.root = root self.vocab = vocab self.ids = ids self.transform = transform self.random_crop = random_crop self.max_video_len = max_video_len self.max_sentence_len = max_sentence_len dataset = [l.strip().split('\t') for l in open(tsv, 'r').readlines()] self.sentences = dict([[get_uid_tgif(l[0]), l[1]] for l in dataset]) self.num_frames = dict([[get_uid_tgif(l[0]), int(l[2])] for l in dataset]) self.gulp = GulpDirectory(root)
def __init__(self, root, json, vocab, ids, transform=None, random_crop=False, max_video_len=8, max_sentence_len=24): self.root = root self.vocab = vocab self.ids = ids self.transform = transform self.random_crop = random_crop self.max_video_len = max_video_len self.max_sentence_len = max_sentence_len dataset = jsonmod.load(open(json, 'r')) self.dataset = dataset self.sentences = dict([(str(d['id']), d['sentence']) for d in dataset]) self.num_frames = dict([(str(d['id']), d['gulp_num_frames']) for d in dataset]) self.gulp = GulpDirectory(root)
def __init__(self, data_path, num_frames, step_size, is_val, transform=None, stack=True, random_offset=True): r"""Simple data loader for GulpIO format. Args: data_path (str): path to GulpIO dataset folder num_frames (int): number of frames to be fetched. step_size (int): number of frames skippid while picking sequence of frames from each video. is_val (bool): sets the necessary augmention procedure. transform (object): set of augmentation steps defined by Compose(). Default is None. stack (bool): stack frames into a numpy.array. Default is True. random_offset (bool): random offsetting to pick frames, if number of frames are more than what is necessary. """ self.gd = GulpDirectory(data_path) self.items = list(self.gd.merged_meta_dict.items()) self.num_chunks = self.gd.num_chunks if self.num_chunks == 0: raise (GulpIOEmptyFolder("Found 0 data binaries in subfolders " + "of: ".format(data_path))) print(" > Found {} chunks".format(self.num_chunks)) self.data_path = data_path self.transform_video = transform self.num_frames = num_frames self.step_size = step_size self.is_val = is_val self.stack = stack self.random_offset = random_offset
def __init__(self, gulp_dir, size=None, *args, **kwargs): super().__init__(*args, **kwargs) # instantiate the GulpDirectory self.gulp_dir = GulpDirectory(gulp_dir) self.size = tuple(size) if size else None