Ejemplo n.º 1
0
def check_generated_files():
    # check all frames
    gulp_directory = GulpDirectory(output_dir)
    for chunk in gulp_directory.chunks():
        with chunk.open('rb'):
            for frames, meta in chunk:
                check_frames(frames)

    # check random access for a few videos
    frames, meta = gulp_directory[0]
    check_frames(frames, 'alternating')
    frames, meta = gulp_directory[(11, slice(0, None, 2))]
    check_frames(frames, 'black')
    frames, meta = gulp_directory[21, 1::2]
    check_frames(frames, 'white')
Ejemplo n.º 2
0
    def __init__(
        self,
        root,
        csv_file_input,
        csv_file_labels,
        clip_size,
        nclips,
        step_size,
        is_val,
        transform=None,
    ):

        self.dataset_object = GulpDataset(csv_file_input, csv_file_labels)

        self.csv_data = self.dataset_object.csv_data
        self.classes_dict = self.dataset_object.classes_dict
        self.classes = self.dataset_object.classes

        self.gulp_directory = GulpDirectory(root)
        self.merged_meta_dict = self.gulp_directory.merged_meta_dict

        self.transform = transform

        self.clip_size = clip_size
        self.nclips = nclips
        self.step_size = step_size
        self.is_val = is_val
Ejemplo n.º 3
0
    def __init__(
        self,
        gulp_path: Path,
        sample_transform: Optional[Callable[[PIL.Image.Image], FramesTypeVar]] = None,
        filter_fn: Optional[Callable[[str], bool]] = None,
    ):
        """

        Args:
            gulp_path: Path to gulped dataset
            sample_transform: Transformation applied to sampled `[PIL.Image]` at a specific index
            filter_fn: A callable that is used to remove examples from the dataset.
                It should return whether or not the given sample (by id) should be
                kept or not.
        """
        super().__init__()
        assert gulp_path.exists(), "Could not find the path {}".format(gulp_path)
        self.gulp_dir = GulpDirectory(str(gulp_path.absolute()))
        self.filter_fn = filter_fn
        if sample_transform is None:
            self.sample_transform = lambda x: x
        else:
            self.sample_transform = sample_transform
        self._video_records = self._read_video_records(
            self.gulp_dir.merged_meta_dict, filter_fn
        )
Ejemplo n.º 4
0
 def __init__(self,
              path='../../epic/data/processed/gulp/rgb_train/',
              data_type='verb_class'):
     self.path = path
     self.data_type = data_type
     self.dataset = GulpDirectory(self.path)
     self.gdict = list(self.dataset.merged_meta_dict.keys())
     self.datalen = len(self.gdict)
Ejemplo n.º 5
0
    def test_creating_gulp_video_dataset_from_gulp_directory(self, gulp_path):
        gulp_dir = GulpDirectory(gulp_path)

        dataset = GulpVideoDataset(gulp_dir.output_dir, gulp_directory=gulp_dir)

        assert dataset.gulp_dir == gulp_dir
        assert dataset.gulp_dir.output_dir == dataset.root_path
        assert len(gulp_dir.merged_meta_dict) == len(dataset)
Ejemplo n.º 6
0
 def __init__(self, transforms=None):
     #define all file path
     gulp_path = '../../epic/data/processed/gulp/rgb_train/'
     self.gulp_dataset = GulpDirectory(gulp_path)
     self.meta_dict = self._get_metadict()
     self.id_dict = pd.read_csv('./input_csv/EPIC_train_action_labels.csv')
     self.transforms = transforms
     self.len = len(self.meta_dict)
Ejemplo n.º 7
0
    def test_dataset_throws_error_if_root_path_is_different_from_gulp_dir_path(
        self, gulp_path
    ):
        gulp_dir = GulpDirectory(gulp_path)

        with pytest.raises(ValueError):
            GulpVideoDataset(
                Path(gulp_dir.output_dir).with_name("not-a-gulp-dir"),
                gulp_directory=gulp_dir,
            )
Ejemplo n.º 8
0
    def __init__(
        self,
        root_path: Union[str, Path],
        *,
        gulp_directory: Optional[GulpDirectory] = None,
        filter: Optional[Callable[[str], bool]] = None,
        label_field: Optional[str] = None,
        label_set: Optional[LabelSet] = None,
        sampler: FrameSampler = _default_sampler(),
        transform: Optional[NDArrayVideoTransform] = None,
    ):
        """
        Args:
            root_path: Path to GulpIO dataset folder on disk. The ``.gulp`` and
                ``.gmeta`` files are direct children of this directory.
            filter: Filter function that determines whether a video is included into
                the dataset. The filter is called on each video id, and should return
                ``True`` to include the video, and ``False`` to exclude it.
            label_field: Meta data field name that stores the label of an example,
                this is used to construct a :class:`GulpLabelSet` that performs the
                example labelling. Defaults to ``'label'``.
            label_set: Optional label set for labelling examples. This is mutually
                exclusive with ``label_field``.
            sampler: Optional sampler for drawing frames from each video.
            transform: Optional transform over the :class:`ndarray` with layout
                ``THWC``. Note you'll probably want to remap the channels to ``CTHW`` at
                the end of this transform.
            gulp_directory: Optional gulp directory residing at root_path. Useful if
                you wish to create a custom label_set using the gulp_directory,
                which you can then pass in with the gulp_directory itself to avoid
                reading the gulp metadata twice.
        """

        if transform is None:

            def transform(frames):
                return torch.Tensor(np.rollaxis(frames, -1, 0)).div_(255)

        if gulp_directory is not None:
            if Path(gulp_directory.output_dir) != Path(root_path):
                raise ValueError(
                    "Expected gulp_dir.output ({}) to be the same as "
                    "root_path ({})".format(gulp_directory.output_dir,
                                            root_path))
            self.gulp_dir = gulp_directory
        else:
            self.gulp_dir = GulpDirectory(str(root_path))

        label_set = self._get_label_set(self.gulp_dir, label_field, label_set)
        super().__init__(root_path,
                         label_set=label_set,
                         sampler=sampler,
                         transform=transform)
        self._video_ids = self._get_video_ids(self.gulp_dir, filter)
        self.labels = self._label_examples(self._video_ids, self.label_set)
Ejemplo n.º 9
0
 def __init__(self,
              datadir,
              metafile,
              mode="train",
              num_samples=None,
              **kwargs):
     super(MolImageGulpDataset, self).__init__(datadir=datadir,
                                               metafile=metafile,
                                               mode=mode,
                                               num_samples=num_samples)
     self.gulpdir = GulpDirectory(self.datadir)
Ejemplo n.º 10
0
    def __init__(self, datadir, metafile, savedir="examples", gulp=False):
        self.datadir = datadir
        self.metadata = pd.read_csv(metafile)
        self.savedir = savedir
        os.makedirs(self.savedir, exist_ok=True)

        if gulp:
            self.gulpdir = GulpDirectory(datadir)
        else:
            self.gulpdir = None

        self.transforms = get_spot_check_transform()
Ejemplo n.º 11
0
def retrieve_nfrms_from_gulp(gulp_dir):
    id2nfrms = dict()
    gulp = GulpDirectory(gulp_dir)
    pbar = pb.ProgressBar(widgets=[pb.Percentage(), pb.Bar()],
                          maxval=gulp.num_chunks).start()
    i = 0
    for chunk in gulp:
        for frames, meta in chunk:
            id2nfrms[meta['id']] = len(frames)
        pbar.update(i + 1)
        i += 1
    return id2nfrms
Ejemplo n.º 12
0
 def __init__(self,
              transform,
              path='../../epic/data/processed/gulp/rgb_train/',
              frame_size=16,
              class_type='verb'):
     self.transform = transform
     self.path = path
     self.class_type = class_type
     self.frame_size = frame_size
     self.dataset = GulpDirectory(self.path)
     self.gdict = list(self.dataset.merged_meta_dict.keys())
     self.datalen = len(self.gdict)
Ejemplo n.º 13
0
    def __init__(
        self,
        gulp_path: Union[Path, str],
        class_type: str,
        *,
        with_metadata: bool = False,
        class_getter: Optional[ClassGetter] = None,
        segment_filter: Optional[SegmentFilter] = None,
        sample_transform: Optional[VideoTransform] = None
    ) -> None:
        """
        Args:
            gulp_path: Path to gulp directory containing the gulped EPIC RGB or flow frames

            class_type: One of verb, noun, verb+noun, None, determines what label the segment
                returns. ``None`` should be used for loading test datasets.

            with_metadata: When True the segments will yield a tuple (metadata, class) where the
                class is defined by the class getter and the metadata is the raw dictionary stored
                in the gulp file.

            class_getter: Optionally provide a callable that takes in the gulp dict representing the
                segment from which you should return the class you wish the segment to have.

            segment_filter: Optionally provide a callable that takes a segment and returns True if
                you want to keep the segment in the dataset, or False if you wish to exclude it.

            sample_transform: Optionally provide a sample transform function which takes a list of
                PIL images and transforms each of them. This is applied on the frames just before
                returning from :meth:`load_frames`.
        """
        super().__init__(
            _class_count[class_type],
            segment_filter=segment_filter,
            sample_transform=sample_transform,
        )
        if isinstance(gulp_path, str):
            gulp_path = Path(gulp_path)
        assert gulp_path.exists(), "Could not find the path {}".format(gulp_path)
        self.gulp_dir = GulpDirectory(str(gulp_path))
        if class_getter is None:
            class_getter = _class_getters[class_type]
        if with_metadata:
            original_getter = copy.copy(class_getter)
            class_getter = lambda metadata: (metadata, original_getter(metadata))
        self._video_segments = self._read_segments(
            self.gulp_dir.merged_meta_dict, class_getter
        )
Ejemplo n.º 14
0
 def __init__(
         self,
         path='/home/yanai-lab/ide-k/ide-k/epic/data/processed/gulp/rgb_train/',
         frame_size=1,
         class_type='noun'):
     """
     Construct the epic-kitchen video dataset loader.
     Args:
         path (str): video path for epic dataset in gulpio format.
         class_type (str): Options includes 'noun', 'verb', 'noun+verb'.
     """
     self.path = path
     self.class_type = class_type
     self.dataset = GulpDirectory(path)
     self.gdict = list(self.dataset.merged_meta_dict.keys())
     self.datalen = len(self.gdict)
Ejemplo n.º 15
0
def main(args):
    gulp_dir = GulpDirectory(args.gulp_dir)
    frames_root = args.frames_root
    if args.uids_csv is not None:
        uids: np.ndarray = pd.read_csv(args.uids_csv, converters={"uid": str})[
            "uid"
        ].values
    else:
        uids = np.array(list(gulp_dir.merged_meta_dict.keys()))

    for uid in tqdm(uids, dynamic_ncols=True, unit="video"):
        frames = gulp_dir[uid][0]
        frames_dir: Path = frames_root / uid
        frames_dir.mkdir(exist_ok=True, parents=True)
        for frame_idx, frame in enumerate(frames):
            frame_path = frames_dir / f"frame_{frame_idx:06d}.jpg"
            img = PIL.Image.fromarray(frame)
            img.save(frame_path, quality=args.quality)
Ejemplo n.º 16
0
 def __init__(self,
              path='../../epic/data/processed/gulp/rgb_train/',
              frame_size=1,
              class_type='noun'):
     """
     Construct the Epic-kitchen video dataset loader, 
     Args:
         transform (transform): How to transform video
         path (str): video path for epic dataset in gulp format.
         frame_size (int): Number of frames to retrieve from the video segment.
         class_type (str): Options includes 'noun', 'verb', 'noun+verb'.
     """
     self.frame_size = frame_size
     self.path = path
     self.class_type = class_type
     self.dataset = GulpDirectory(path)
     self.gdict = list(self.dataset.merged_meta_dict.keys())
     self.datalen = len(self.gdict)
Ejemplo n.º 17
0
    def __init__(
        self,
        root_path: Union[str, Path],
        filter: Optional[Callable[[str], bool]] = None,
        label_field: Optional[str] = None,
        label_set: Optional[LabelSet] = None,
        sampler: FrameSampler = _default_sampler(),
        transform: Optional[NDArrayVideoTransform] = None,
    ):
        """
        Args:
            root_path: Path to GulpIO dataset folder on disk. The ``.gulp`` and
                ``.gmeta`` files are direct children of this directory.
            filter: Filter function that determines whether a video is included into
                the dataset. The filter is called on each video id, and should return
                ``True`` to include the video, and ``False`` to exclude it.
            label_field: Meta data field name that stores the label of an example,
                this is used to construct a :class:`GulpLabelSet` that performs the
                example labelling. Defaults to ``'label'``.
            label_set: Optional label set for labelling examples. This is mutually
                exclusive with ``label_field``.
            sampler: Optional sampler for drawing frames from each video.
            transform: Optional transform over the :class:`ndarray` with layout
                ``THWC``. Note you'll probably want to remap the channels to ``CTHW`` at
                the end of this transform.
        """
        from gulpio import GulpDirectory

        if transform is None:

            def transform(frames):
                return torch.Tensor(np.rollaxis(frames, -1, 0)).div_(255)

        self.gulp_dir = GulpDirectory(str(root_path))
        label_set = self._get_label_set(self.gulp_dir, label_field, label_set)
        super().__init__(root_path,
                         label_set=label_set,
                         sampler=sampler,
                         transform=transform)
        self._video_ids = self._get_video_ids(self.gulp_dir, filter)
        self.labels = self._label_examples(self._video_ids, self.label_set)
Ejemplo n.º 18
0
    def __init__(self,
                 root,
                 tsv,
                 vocab,
                 ids,
                 transform=None,
                 random_crop=False,
                 max_video_len=8,
                 max_sentence_len=24):
        self.root = root
        self.vocab = vocab
        self.ids = ids
        self.transform = transform
        self.random_crop = random_crop
        self.max_video_len = max_video_len
        self.max_sentence_len = max_sentence_len

        dataset = [l.strip().split('\t') for l in open(tsv, 'r').readlines()]
        self.sentences = dict([[get_uid_tgif(l[0]), l[1]] for l in dataset])
        self.num_frames = dict([[get_uid_tgif(l[0]),
                                 int(l[2])] for l in dataset])
        self.gulp = GulpDirectory(root)
Ejemplo n.º 19
0
    def __init__(self,
                 root,
                 json,
                 vocab,
                 ids,
                 transform=None,
                 random_crop=False,
                 max_video_len=8,
                 max_sentence_len=24):
        self.root = root
        self.vocab = vocab
        self.ids = ids
        self.transform = transform
        self.random_crop = random_crop
        self.max_video_len = max_video_len
        self.max_sentence_len = max_sentence_len

        dataset = jsonmod.load(open(json, 'r'))
        self.dataset = dataset
        self.sentences = dict([(str(d['id']), d['sentence']) for d in dataset])
        self.num_frames = dict([(str(d['id']), d['gulp_num_frames'])
                                for d in dataset])
        self.gulp = GulpDirectory(root)
Ejemplo n.º 20
0
    def __init__(self,
                 data_path,
                 num_frames,
                 step_size,
                 is_val,
                 transform=None,
                 stack=True,
                 random_offset=True):
        r"""Simple data loader for GulpIO format.
            Args:
                data_path (str): path to GulpIO dataset folder
                num_frames (int): number of frames to be fetched.
                step_size (int): number of frames skippid while picking
            sequence of frames from each video.
                is_val (bool): sets the necessary augmention procedure.
                transform (object): set of augmentation steps defined by
            Compose(). Default is None.
                stack (bool): stack frames into a numpy.array. Default is True.
                random_offset (bool): random offsetting to pick frames, if
            number of frames are more than what is necessary.
        """
        self.gd = GulpDirectory(data_path)
        self.items = list(self.gd.merged_meta_dict.items())
        self.num_chunks = self.gd.num_chunks

        if self.num_chunks == 0:
            raise (GulpIOEmptyFolder("Found 0 data binaries in subfolders " +
                                     "of: ".format(data_path)))

        print(" > Found {} chunks".format(self.num_chunks))
        self.data_path = data_path
        self.transform_video = transform
        self.num_frames = num_frames
        self.step_size = step_size
        self.is_val = is_val
        self.stack = stack
        self.random_offset = random_offset
    def __init__(self, gulp_dir, size=None, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # instantiate the GulpDirectory
        self.gulp_dir = GulpDirectory(gulp_dir)
        self.size = tuple(size) if size else None