Esempio n. 1
0
 def __init__(self,
              dataset_path,
              annotation_path,
              clip_length,
              frame_stride,
              video_transform=None,
              name="<NO_NAME>",
              return_item_subpath=False,
              shuffle_list_seed=None):
     super(VideoIterVal, self).__init__()
     # load params
     self.frames_stride = frame_stride
     self.dataset_path = dataset_path
     self.video_transform = video_transform
     self.return_item_subpath = return_item_subpath
     self.rng = np.random.RandomState(
         shuffle_list_seed if shuffle_list_seed else 0)
     # load video list
     self.video_list = self._get_video_list(dataset_path=self.dataset_path,
                                            annotation_path=annotation_path)
     self.total_clip_length_in_frames = clip_length * frame_stride
     self.video_clips = VideoClips(
         video_paths=self.video_list,
         clip_length_in_frames=self.total_clip_length_in_frames,
         frames_between_clips=self.total_clip_length_in_frames)
     logging.info(
         "VideoIter:: iterator initialized (phase: '{:s}', num: {:d})".
         format(name, len(self.video_list)))
    def test_compute_clips_for_video(self):
        video_pts = torch.arange(30)
        # case 1: single clip
        num_frames = 13
        orig_fps = 30
        duration = float(len(video_pts)) / orig_fps
        new_fps = 13
        clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, orig_fps, new_fps)
        resampled_idxs = VideoClips._resample_video_idx(int(duration * new_fps), orig_fps, new_fps)
        assert len(clips) == 1
        assert_equal(clips, idxs)
        assert_equal(idxs[0], resampled_idxs)

        # case 2: all frames appear only once
        num_frames = 4
        orig_fps = 30
        duration = float(len(video_pts)) / orig_fps
        new_fps = 12
        clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, orig_fps, new_fps)
        resampled_idxs = VideoClips._resample_video_idx(int(duration * new_fps), orig_fps, new_fps)
        assert len(clips) == 3
        assert_equal(clips, idxs)
        assert_equal(idxs.flatten(), resampled_idxs)

        # case 3: frames aren't enough for a clip
        num_frames = 32
        orig_fps = 30
        new_fps = 13
        with pytest.warns(UserWarning):
            clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, orig_fps, new_fps)
        assert len(clips) == 0
        assert len(idxs) == 0
Esempio n. 3
0
    def __init__(self,
                 root,
                 frames_per_clip,
                 step_between_clips=1,
                 frame_rate=None,
                 extensions=('mp4', ),
                 transform=None,
                 cached=None,
                 _precomputed_metadata=None):
        super(Kinetics400, self).__init__(root)
        extensions = extensions

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}

        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]
        self.video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
        )
        self.transform = transform
    def test_compute_clips_for_video(self):
        video_pts = torch.arange(30)
        # case 1: single clip
        num_frames = 13
        orig_fps = 30
        duration = float(len(video_pts)) / orig_fps
        new_fps = 13
        clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames,
                                                         orig_fps, new_fps)
        resampled_idxs = VideoClips._resample_video_idx(int(duration * new_fps), orig_fps, new_fps)
        self.assertEqual(len(clips), 1)
        self.assertTrue(clips.equal(idxs))
        self.assertTrue(idxs[0].equal(resampled_idxs))

        # case 2: all frames appear only once
        num_frames = 4
        orig_fps = 30
        duration = float(len(video_pts)) / orig_fps
        new_fps = 12
        clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames,
                                                         orig_fps, new_fps)
        resampled_idxs = VideoClips._resample_video_idx(int(duration * new_fps), orig_fps, new_fps)
        self.assertEqual(len(clips), 3)
        self.assertTrue(clips.equal(idxs))
        self.assertTrue(idxs.flatten().equal(resampled_idxs))
Esempio n. 5
0
    def __init__(self,
                 clip_length,
                 frame_stride,
                 dataset_path=None,
                 video_transform=None,
                 return_label=False):
        super(VideoIter, self).__init__()
        # video clip properties
        self.frames_stride = frame_stride
        self.total_clip_length_in_frames = clip_length * frame_stride
        self.video_transform = video_transform

        # IO
        self.dataset_path = dataset_path
        self.video_list = self._get_video_list(dataset_path=self.dataset_path)
        self.return_label = return_label

        # data loading
        if os.path.exists('video_clips.file'):
            with open('video_clips.file', 'rb') as fp:
                self.video_clips = pickle.load(fp)
        else:
            self.video_clips = VideoClips(
                video_paths=self.video_list,
                clip_length_in_frames=self.total_clip_length_in_frames,
                frames_between_clips=self.total_clip_length_in_frames,
            )

        if not os.path.exists('video_clips.file'):
            with open('video_clips.file', 'wb') as fp:
                pickle.dump(self.video_clips,
                            fp,
                            protocol=pickle.HIGHEST_PROTOCOL)
Esempio n. 6
0
    def __init__(self,
                 root,
                 annotation_path,
                 frames_per_clip,
                 step_between_clips=1,
                 fold=1,
                 train=True,
                 framewiseTransform=False,
                 transform=None):
        super(HMDB51, self).__init__(root)
        if not 1 <= fold <= 3:
            raise ValueError(
                "fold should be between 1 and 3, got {}".format(fold))

        extensions = ('avi', )
        self.fold = fold
        self.train = train

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]
        video_clips = VideoClips(video_list, frames_per_clip,
                                 step_between_clips)
        self.indices = self._select_fold(video_list, annotation_path, fold,
                                         train)
        self.video_clips = video_clips.subset(self.indices)
        self.video_list = [video_list[i] for i in self.indices]
        self.framewiseTransform = framewiseTransform
        self.transform = transform
    def __init__(self,
                 clip_length,
                 frame_stride,
                 frame_rate=None,
                 dataset_path=None,
                 spatial_transform=None,
                 temporal_transform=None,
                 return_label=False,
                 video_formats=["avi", "mp4"]):
        super(VideoDataset, self).__init__()
        # video clip properties
        self.frames_stride = frame_stride
        self.total_clip_length_in_frames = clip_length * frame_stride
        self.spatial_transform = spatial_transform
        self.temporal_transform = temporal_transform
        self.video_formats = video_formats
        # IO
        self.dataset_path = dataset_path
        self.video_list = self._get_video_list(dataset_path=self.dataset_path)
        # print("video_list:", self.video_list, len(self.video_list))
        self.return_label = return_label

        # data loading
        self.video_clips = VideoClips(video_paths=self.video_list,
                                      clip_length_in_frames=self.total_clip_length_in_frames,
                                      frames_between_clips=self.total_clip_length_in_frames,
                                      frame_rate=frame_rate)
Esempio n. 8
0
    def __init__(self, root, train, frames_per_clip=16, step_between_clips=1, frame_rate=16, transform=None,
                 extensions=('mp4',), label_fn=lambda x, *_: x, local_rank=-1, get_label_only=False):
        train_or_val = 'train' if train else 'val'
        root = os.path.join(root, train_or_val)
        self.root = root

        super().__init__(root)

        self.transform = transform
        # Function that takes in __getitem__ idx and returns auxiliary label information in the form of a tensor
        self.label_fn = MethodType(label_fn, self)
        self.get_label_only = get_label_only

        clips_fn = os.path.join(root, f'clips_{train_or_val}_{frames_per_clip}_{step_between_clips}_{frame_rate}.pt')

        try:
            self.video_clips = torch.load(clips_fn)
        except FileNotFoundError:
            video_list = list(
                map(str, itertools.chain.from_iterable(Path(root).rglob(f'*.{ext}') for ext in extensions)))
            random.shuffle(video_list)
            if local_rank <= 0:
                print('Generating video clips file: ' + clips_fn)
            self.video_clips = VideoClips(
                video_list,
                frames_per_clip,
                step_between_clips,
                frame_rate,
                num_workers=32
            )
            torch.save(self.video_clips, clips_fn)

        clip_lengths = torch.as_tensor([len(v) for v in self.video_clips.clips])
        self.video_clips.clip_sizes = clip_lengths
Esempio n. 9
0
    def __init__(self, video_paths, clip_length_in_frames, stride, frame_rate,
                 refresh, cache_dir):

        self.frame_rate = frame_rate
        self.clip_length_in_frames = clip_length_in_frames
        self.stride = stride
        self.video_paths = video_paths
        fname = f"fps-{frame_rate}-clip_length-{clip_length_in_frames}-stride{stride}"
        video_str_bytes = '-'.join(sorted(video_paths)).encode("utf-8")
        hashed = hashlib.sha256(video_str_bytes).hexdigest()
        fname += f"num-videos{len(video_paths)}-{hashed}"
        cached_clips_path = Path(cache_dir) / fname
        if cached_clips_path.exists() and not refresh:
            print(f"Reloading cached clips object")
            with open(cached_clips_path, "rb") as f:
                self.video_clips = pickle.load(f)
        else:
            print(f"Building new video clips object")
            self.video_clips = VideoClips(
                frame_rate=frame_rate,
                video_paths=video_paths,
                frames_between_clips=stride,
                clip_length_in_frames=clip_length_in_frames,
            )
            cached_clips_path.parent.mkdir(exist_ok=True, parents=True)
            print(f"Writing object to cache at {cached_clips_path}")
            with open(cached_clips_path, "wb") as f:
                pickle.dump(self.video_clips, f)
Esempio n. 10
0
    def __init__(self,
                 root,
                 annotation_path,
                 frames_per_clip,
                 step_between_clips=1,
                 frame_rate=None,
                 fold=1,
                 train=True,
                 transform=None,
                 _precomputed_metadata=None,
                 num_workers=1,
                 _video_width=0,
                 _video_height=0,
                 _video_min_dimension=0,
                 _audio_samples=0):
        super(MYUCF101, self).__init__(root)
        if not 1 <= fold <= 3:
            raise ValueError(
                "fold should be between 1 and 3, got {}".format(fold))

        extensions = ('avi', )
        self.fold = fold
        self.train = train

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]
        video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
        )

        meta_data_str_ = os.path.join(
            root,
            f"meta_data_train_{train}_fold_{fold}_frames_{frames_per_clip}_skip_"
            f"{step_between_clips}.pickle")
        if not os.path.exists(meta_data_str_):
            with open(meta_data_str_, 'wb') as ff:
                pickle.dump(video_clips.metadata, ff)

        self.video_clips_metadata = video_clips.metadata
        self.indices = self._select_fold(video_list, annotation_path, fold,
                                         train)
        self.video_clips = video_clips.subset(self.indices)
        self.transform = transform
Esempio n. 11
0
    def __init__(self,
                 root,
                 annotation_path,
                 frames_per_clip,
                 step_between_clips=1,
                 frame_rate=None,
                 fold=1,
                 train=True,
                 transform=None,
                 _precomputed_metadata=None,
                 num_workers=1,
                 _video_width=0,
                 _video_height=0,
                 _video_min_dimension=0,
                 _audio_samples=0):
        super(UCF101, self).__init__(root)
        if not 1 <= fold <= 3:
            raise ValueError(
                "fold should be between 1 and 3, got {}".format(fold))

        extensions = ('avi', )
        self.fold = fold
        self.train = train

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]

        metadata_filepath = os.path.join(root, 'ucf101_metadata.pt')
        if os.path.exists(metadata_filepath):
            metadata = torch.load(metadata_filepath)
        else:
            metadata = None
        video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
        )
        if not os.path.exists(metadata_filepath):
            torch.save(video_clips.metadata, metadata_filepath)

        self.video_clips_metadata = video_clips.metadata
        self.indices = self._select_fold(video_list, annotation_path, fold,
                                         train)
        self.video_clips = video_clips.subset(self.indices)
        self.transform = transform
Esempio n. 12
0
 def test_video_clips_custom_fps(self, tmpdir):
     video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6])
     num_frames = 4
     for fps in [1, 3, 4, 10]:
         video_clips = VideoClips(video_list, num_frames, num_frames, fps, num_workers=2)
         for i in range(video_clips.num_clips()):
             video, audio, info, video_idx = video_clips.get_clip(i)
             assert video.shape[0] == num_frames
             assert info["video_fps"] == fps
Esempio n. 13
0
 def test_video_clips_custom_fps(self):
     with get_list_of_videos(num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6]) as video_list:
         num_frames = 4
         for fps in [1, 3, 4, 10]:
             video_clips = VideoClips(video_list, num_frames, num_frames, fps)
             for i in range(video_clips.num_clips()):
                 video, audio, info, video_idx = video_clips.get_clip(i)
                 self.assertEqual(video.shape[0], num_frames)
                 self.assertEqual(info["video_fps"], fps)
Esempio n. 14
0
class Mice(VisionDataset):
    def __init__(self,
                 root,
                 frames_per_clip,
                 step_between_clips=1,
                 frame_rate=None,
                 extensions=("mp4", ),
                 transform=None,
                 _precomputed_metadata=None,
                 num_workers=1,
                 _video_width=0,
                 _video_height=0,
                 _video_min_dimension=0,
                 _audio_samples=0,
                 _audio_channels=0):
        super(Mice, self).__init__(root)
        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]

        self.video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
            _audio_channels=_audio_channels,
        )
        self.transform = transform

    @property
    def metadata(self):
        return self.video_clips.metadata

    def __len__(self):
        return self.video_clips.num_clips()

    def __getitem__(self, idx):
        video, _, _, video_idx = self.video_clips.get_clip(idx)
        video_idx, clip_idx = self.video_clips.get_clip_location(idx)
        label = self.samples[video_idx][1]

        if self.transform is not None:
            video = self.transform(video)

        return video, label, video_idx, clip_idx
Esempio n. 15
0
def DownsampleClipSampler(video_clips: VideoClips, labels: List[int]):
    vc_labels = [
        labels[video_clips.get_clip_location(idx)[0]]
        for idx in range(video_clips.num_clips())
    ]
    cnt = min(vc_labels.count(a) for a in set(labels))
    indices = []
    for a in set(labels):
        indices += random.sample(
            [i for i, c in enumerate(vc_labels) if c == a], cnt)
    return SubsetRandomSampler(indices)
Esempio n. 16
0
def BalancedClipSampler(video_clips: VideoClips,
                        clip_labels: List[int],
                        num_samples=None,
                        log_weight=False):
    assert len(video_clips.clips) == len(clip_labels)
    vc_labels = [
        clip_labels[video_clips.get_clip_location(idx)[0]]
        for idx in range(video_clips.num_clips())
    ]
    if num_samples is None:
        num_samples = len(video_clips.video_paths)
    return BalancedSampler(vc_labels, num_samples, log_weight)
    def init_data(self,
                  root,
                  frames_per_clip,
                  step_between_clips=6,
                  frame_rate=6,
                  train=True,
                  transform=None,
                  _precomputed_metadata=None,
                  num_workers=1,
                  _video_width=0,
                  _video_height=0,
                  _video_min_dimension=0,
                  _audio_samples=0):
        super(HMDB51, self).__init__(root)
        extensions = ('avi', )
        if train:
            root = root + "/train"
        else:
            root = root + "/test"
        classes = sorted(list_dir(root))
        class_to_idx = {class_: i for (i, class_) in enumerate(classes)}
        print(class_to_idx)
        self.samples = []
        for target_class in sorted(class_to_idx.keys()):
            class_index = class_to_idx[target_class]
            target_dir = os.path.join(root, target_class)
            for root_curr, _, fnames in sorted(
                    os.walk(target_dir, followlinks=True)):
                for fname in sorted(fnames):
                    path = os.path.join(root_curr, fname)
                    if os.path.isfile(path):
                        item = path, class_index
                        self.samples.append(item)

        video_paths = [path for (path, _) in self.samples]
        video_clips = VideoClips(
            video_paths,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
        )
        self.train = train
        self.classes = classes
        self.video_clips_metadata = video_clips.metadata
        self.indices = self.get_indices(video_paths)
        self.video_clips = video_clips.subset(self.indices)
        self.transform = transform
Esempio n. 18
0
class MyVideoDataset(object):
    def __init__(self, video_paths):
        self.video_clips = VideoClips(video_paths,
                                      clip_length_in_frames=16,
                                      frames_between_clips=1,
                                      frame_rate=15)

    def __getitem__(self, idx):
        video, audio, info, video_idx = self.video_clips.get_clip(idx)
        return video, audio

    def __len__(self):
        return self.video_clips.num_clips()
Esempio n. 19
0
def BalancedPathSampler(video_clips: VideoClips,
                        clip_labels: List[int],
                        num_samples=None,
                        log_weight=False):
    assert len(video_clips.clips) == len(clip_labels)
    vc_labels = []
    for idx in range(video_clips.num_clips()):
        vidx, _ = video_clips.get_clip_location(idx)
        vc_labels.append((clip_labels[vidx], video_clips.video_paths[vidx]))

    if num_samples is None:
        num_samples = len(video_clips.video_paths)
    return BalancedSampler(vc_labels, num_samples, log_weight)
Esempio n. 20
0
    def test_distributed_sampler_and_uniform_clip_sampler(self):
        with get_list_of_videos(num_videos=3, sizes=[25, 25,
                                                     25]) as video_list:
            video_clips = VideoClips(video_list, 5, 5)
            clip_sampler = UniformClipSampler(video_clips, 3)

            distributed_sampler_rank0 = DistributedSampler(
                clip_sampler,
                num_replicas=2,
                rank=0,
                group_size=3,
            )
            indices = torch.tensor(list(iter(distributed_sampler_rank0)))
            self.assertEqual(len(distributed_sampler_rank0), 6)
            self.assertTrue(indices.equal(torch.tensor([0, 2, 4, 10, 12, 14])))

            distributed_sampler_rank1 = DistributedSampler(
                clip_sampler,
                num_replicas=2,
                rank=1,
                group_size=3,
            )
            indices = torch.tensor(list(iter(distributed_sampler_rank1)))
            self.assertEqual(len(distributed_sampler_rank1), 6)
            self.assertTrue(indices.equal(torch.tensor([5, 7, 9, 0, 2, 4])))
Esempio n. 21
0
    def test_distributed_sampler_and_uniform_clip_sampler(self, tmpdir):
        video_list = get_list_of_videos(tmpdir,
                                        num_videos=3,
                                        sizes=[25, 25, 25])
        video_clips = VideoClips(video_list, 5, 5)
        clip_sampler = UniformClipSampler(video_clips, 3)

        distributed_sampler_rank0 = DistributedSampler(
            clip_sampler,
            num_replicas=2,
            rank=0,
            group_size=3,
        )
        indices = torch.tensor(list(iter(distributed_sampler_rank0)))
        assert len(distributed_sampler_rank0) == 6
        assert_equal(indices, torch.tensor([0, 2, 4, 10, 12, 14]))

        distributed_sampler_rank1 = DistributedSampler(
            clip_sampler,
            num_replicas=2,
            rank=1,
            group_size=3,
        )
        indices = torch.tensor(list(iter(distributed_sampler_rank1)))
        assert len(distributed_sampler_rank1) == 6
        assert_equal(indices, torch.tensor([5, 7, 9, 0, 2, 4]))
Esempio n. 22
0
 def test_uniform_clip_sampler_insufficient_clips(self, tmpdir):
     video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[10, 25, 25])
     video_clips = VideoClips(video_list, 5, 5)
     sampler = UniformClipSampler(video_clips, 3)
     assert len(sampler) == 3 * 3
     indices = torch.tensor(list(iter(sampler)))
     assert_equal(indices, torch.tensor([0, 0, 1, 2, 4, 6, 7, 9, 11]))
Esempio n. 23
0
    def __init__(self,
                 root,
                 frames_per_clip,
                 step_between_clips=1,
                 frame_rate=None,
                 extensions=('avi', ),
                 transform=None,
                 num_workers=1,
                 _video_width=0,
                 _video_height=0,
                 _video_min_dimension=0,
                 _audio_samples=0):
        super(Kinetics400, self).__init__(root)

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]
        split = root.split('/')[-1].strip('/')
        metadata_filepath = os.path.join(
            root, 'kinetics_metadata_{}.pt'.format(split))

        if os.path.exists(metadata_filepath):
            metadata = torch.load(metadata_filepath)

        else:
            metadata = None

        self.video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
        )
        self.transform = transform
        if not os.path.exists(metadata_filepath):
            torch.save(self.video_clips.metadata, metadata_filepath)
Esempio n. 24
0
def sliding_window(video_path, save_path, epoch_id, preprocess=[]):
    T = 300

    videoclips = VideoClips([video_path],
                            clip_length_in_frames=T,
                            frames_between_clips=1)

    filenames = []
    rotation = 0
    sample_count = -1
    sample_dir = ""
    sample_id = ""
    for i in range(len(videoclips)):

        sample_count = hash(
            str(sample_count + 1 + epoch_id *
                (len(videoclips) / T))) % ((sys.maxsize + 1) * 2)

        # create new preprocess values
        rnd = np.random.uniform(-1, 1)
        rotation = 5 * rnd
        scale_factor = np.random.uniform(0.8, 1.2)
        crop_scale_y = np.random.uniform(0.5, 1)
        crop_scale_x = np.random.uniform(0.5, 1)

        # Preprocess
        clip, _, _, _ = videoclips.get_clip(i)

        clip = clip.numpy()

        for f in range(len(clip)):
            for p in preprocess:
                clip[f] = p(clip[f],
                            rotation=rotation,
                            scale_factor=scale_factor,
                            crop_scale=(crop_scale_y, crop_scale_x))

        clip = torch.tensor(clip)

        # Save
        filename = "{}.mp4".format(hex(sample_count))
        filepath = join(save_path, filename)
        torchvision.io.write_video(filepath, clip, 30)
        filenames.append(filename)
        print("{}, {}, {}/{}".format(filepath, epoch_id, i, len(videoclips)))

    return filenames
Esempio n. 25
0
class MyVideoDataset(data.Dataset):
    def __init__(self,
                 root,
                 data_dirs,
                 labels,
                 n_frames=30,
                 fps=5,
                 spatial_transform=None,
                 temporal_transform=None,
                 random_slice_size=0):
        data_dirs = [os.path.join(root, d + ".mp4") for d in data_dirs]
        self.videos = data_dirs
        self.labels = labels
        self.video_clips = VideoClips(self.videos,
                                      clip_length_in_frames=n_frames,
                                      frames_between_clips=n_frames,
                                      frame_rate=fps,
                                      num_workers=2)

        self.spatial_transform = spatial_transform
        self.temporal_transform = temporal_transform
        self.data_mean = None
        self.data_std = None
        self.random_slice_size = random_slice_size

    def set_stats(self, mean, std):
        self.data_mean, self.data_std = mean, std

    def __getitem__(self, idx):
        video, audio, info, video_idx = self.video_clips.get_clip(idx)
        if self.random_slice_size:
            video = T.RandomSlice(self.random_slice_size)(video)
        if self.temporal_transform is not None:
            video = self.temporal_transform(video)
        if self.spatial_transform is not None:
            video = self.spatial_transform(video)
        if self.data_mean is not None and self.data_std is not None:
            video = T.Normalize(mean=self.data_mean, std=self.data_std)(video)

        label = self.labels[video_idx]
        print(video_idx, "--- ", self.video_clips.video_paths[video_idx],
              "--- ", label)
        return idx, video, label, video_idx

    def __len__(self):
        return self.video_clips.num_clips()
Esempio n. 26
0
 def test_uniform_clip_sampler_insufficient_clips(self):
     with get_list_of_videos(num_videos=3, sizes=[10, 25,
                                                  25]) as video_list:
         video_clips = VideoClips(video_list, 5, 5)
         sampler = UniformClipSampler(video_clips, 3)
         self.assertEqual(len(sampler), 3 * 3)
         indices = torch.tensor(list(iter(sampler)))
         assert_equal(indices, torch.tensor([0, 0, 1, 2, 4, 6, 7, 9, 11]))
    def __init__(
        self,
        root,
        data_file,
        frames_per_clip,
        step_between_clips=1,
        frame_rate=None,
        extension="mp4",
        transform=None,
        _precomputed_metadata=None,
        num_workers=1,
        _video_width=0,
        _video_height=0,
        _video_min_dimension=0,
        _audio_samples=0,
        _audio_channels=0,
    ) -> "MiniKinetics200Dataset":
        assert os.path.exists(data_file), f"Data file {data_file} is missing"
        self.samples = []
        with open(data_file, "r") as fp:
            for line in fp.readlines():
                video_id, class_name, class_label = line.strip().split(",")
                class_name = class_name.replace("_", " ")
                video_path = os.path.join(
                    root,
                    class_name,
                    f"{video_id}.{extension}",
                )
                if os.path.exists(video_path):
                    self.samples.append([video_path, int(class_label)])

        video_list = [x[0] for x in self.samples]
        self.video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
            _audio_channels=_audio_channels,
        )
        self.transform = transform
    def __init__(self,
                 transforms=None,
                 train=True,
                 test=False,
                 count_videos=-1,
                 count_clips=-1,
                 skip_videoframes=5,
                 num_videoframes=100,
                 dist_videoframes=50,
                 video_directory=None,
                 fps=5):
        # If count_videos <= 0, use all the videos. If count_clips <= 0, use
        # all the clips from all the videos.
        self.train = train
        self.transforms = transforms
        self.video_directory = video_directory
        self.skip_videoframes = skip_videoframes
        self.num_videoframes = num_videoframes
        self.dist_videoframes = dist_videoframes

        self.video_files = sorted([
            os.path.join(video_directory, f) for f in os.listdir(video_directory) \
            if f.endswith('mp4')
        ])
        if count_videos > 0:
            self.video_files = self.video_files[:count_videos]

        clip_length_in_frames = self.num_videoframes * self.skip_videoframes
        frames_between_clips = self.dist_videoframes
        self.saved_video_clips = os.path.join(
            video_directory, 'video_clips.%dnf.%df.%ds.pkl' %
            (count_videos, clip_length_in_frames, frames_between_clips))
        if os.path.exists(self.saved_video_clips):
            print('Path Exists for video_clips: ', self.saved_video_clips)
            self.video_clips = pickle.load(open(self.saved_video_clips, 'rb'))
        else:
            print('Path does NOT exist for video_clips: ',
                  self.saved_video_clips)
            self.video_clips = VideoClips(
                self.video_files,
                clip_length_in_frames=clip_length_in_frames,
                frames_between_clips=frames_between_clips,
                frame_rate=fps)
            pickle.dump(self.video_clips, open(self.saved_video_clips, 'wb'))
        self.datums = self._retrieve_valid_datums(count_videos, count_clips)
        print(self.datums)
    def __init__(
        self,
        video_dir,
        label_map_json,
        labels_json,
        frames_per_clip,
        step_between_clips=1,
        frame_rate=None,
        transform=None,
        _precomputed_metadata=None,
        num_workers=1,
        _video_width=0,
        _video_height=0,
        _video_min_dimension=0,
    ) -> "_SomethingSomethingV2Dataset":
        for data_file in [label_map_json, labels_json]:
            assert os.path.exists(
                data_file), f"Data file {data_file} is missing"

        with open(label_map_json, "r") as fp:
            label_map = json.load(fp)

        with open(labels_json, "r") as fp:
            samples = json.load(fp)
            self.samples = []
            for sample in samples:
                video_id = sample["id"]
                label = sample["template"].replace("[", "").replace("]", "")
                assert label in label_map, f"Unknown label: {label}"
                video_path = os.path.join(video_dir, f"{video_id}.webm")
                assert os.path.exists(video_path), f"{video_path} is missing"
                self.samples.append((video_path, int(label_map[label])))

        video_list = [x[0] for x in self.samples]
        self.video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
        )
        self.transform = transform
Esempio n. 30
0
    def __init__(self,
                 root,
                 index_path,
                 *,
                 frames_per_clip,
                 step_between_clips,
                 frame_rate,
                 extensions=('mp4', ),
                 transform=None,
                 _precomputed_metadata=None,
                 num_workers=1,
                 _video_width=0,
                 _video_height=0,
                 _video_min_dimension=0,
                 _audio_samples=0):
        super(Kinetics400Indexed, self).__init__(root)

        self.index_path = index_path

        with open(index_path) as f:
            index = json.load(f)
            classes = index['classes']
            self.classes = classes
            self.samples = [(os.path.join(root, path), label)
                            for path, label in index['samples']]
        '''
        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None)
        self.classes = classes
        '''
        video_list = [x[0] for x in self.samples]
        self.video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
        )
        self.transform = transform