def test_video_clips(self):
        with get_list_of_videos(num_videos=3) as video_list:
            video_clips = VideoClips(video_list, 5, 5)
            self.assertEqual(video_clips.num_clips(), 1 + 2 + 3)
            for i, (v_idx, c_idx) in enumerate([(0, 0), (1, 0), (1, 1), (2, 0),
                                                (2, 1), (2, 2)]):
                video_idx, clip_idx = video_clips.get_clip_location(i)
                self.assertEqual(video_idx, v_idx)
                self.assertEqual(clip_idx, c_idx)

            video_clips = VideoClips(video_list, 6, 6)
            self.assertEqual(video_clips.num_clips(), 0 + 1 + 2)
            for i, (v_idx, c_idx) in enumerate([(1, 0), (2, 0), (2, 1)]):
                video_idx, clip_idx = video_clips.get_clip_location(i)
                self.assertEqual(video_idx, v_idx)
                self.assertEqual(clip_idx, c_idx)

            video_clips = VideoClips(video_list, 6, 1)
            self.assertEqual(video_clips.num_clips(),
                             0 + (10 - 6 + 1) + (15 - 6 + 1))
            for i, v_idx, c_idx in [(0, 1, 0), (4, 1, 4), (5, 2, 0),
                                    (6, 2, 1)]:
                video_idx, clip_idx = video_clips.get_clip_location(i)
                self.assertEqual(video_idx, v_idx)
                self.assertEqual(clip_idx, c_idx)
Beispiel #2
0
 def __init__(self,
              dataset_path,
              annotation_path,
              clip_length,
              frame_stride,
              video_transform=None,
              name="<NO_NAME>",
              return_item_subpath=False,
              shuffle_list_seed=None):
     super(VideoIterVal, self).__init__()
     # load params
     self.frames_stride = frame_stride
     self.dataset_path = dataset_path
     self.video_transform = video_transform
     self.return_item_subpath = return_item_subpath
     self.rng = np.random.RandomState(
         shuffle_list_seed if shuffle_list_seed else 0)
     # load video list
     self.video_list = self._get_video_list(dataset_path=self.dataset_path,
                                            annotation_path=annotation_path)
     self.total_clip_length_in_frames = clip_length * frame_stride
     self.video_clips = VideoClips(
         video_paths=self.video_list,
         clip_length_in_frames=self.total_clip_length_in_frames,
         frames_between_clips=self.total_clip_length_in_frames)
     logging.info(
         "VideoIter:: iterator initialized (phase: '{:s}', num: {:d})".
         format(name, len(self.video_list)))
 def test_uniform_clip_sampler_insufficient_clips(self, tmpdir):
     video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[10, 25, 25])
     video_clips = VideoClips(video_list, 5, 5)
     sampler = UniformClipSampler(video_clips, 3)
     assert len(sampler) == 3 * 3
     indices = torch.tensor(list(iter(sampler)))
     assert_equal(indices, torch.tensor([0, 0, 1, 2, 4, 6, 7, 9, 11]))
Beispiel #4
0
    def test_distributed_sampler_and_uniform_clip_sampler(self, tmpdir):
        video_list = get_list_of_videos(tmpdir,
                                        num_videos=3,
                                        sizes=[25, 25, 25])
        video_clips = VideoClips(video_list, 5, 5)
        clip_sampler = UniformClipSampler(video_clips, 3)

        distributed_sampler_rank0 = DistributedSampler(
            clip_sampler,
            num_replicas=2,
            rank=0,
            group_size=3,
        )
        indices = torch.tensor(list(iter(distributed_sampler_rank0)))
        assert len(distributed_sampler_rank0) == 6
        assert_equal(indices, torch.tensor([0, 2, 4, 10, 12, 14]))

        distributed_sampler_rank1 = DistributedSampler(
            clip_sampler,
            num_replicas=2,
            rank=1,
            group_size=3,
        )
        indices = torch.tensor(list(iter(distributed_sampler_rank1)))
        assert len(distributed_sampler_rank1) == 6
        assert_equal(indices, torch.tensor([5, 7, 9, 0, 2, 4]))
Beispiel #5
0
    def __init__(self,
                 root,
                 annotation_path,
                 frames_per_clip,
                 step_between_clips=1,
                 fold=1,
                 train=True,
                 framewiseTransform=False,
                 transform=None):
        super(HMDB51, self).__init__(root)
        if not 1 <= fold <= 3:
            raise ValueError(
                "fold should be between 1 and 3, got {}".format(fold))

        extensions = ('avi', )
        self.fold = fold
        self.train = train

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]
        video_clips = VideoClips(video_list, frames_per_clip,
                                 step_between_clips)
        self.indices = self._select_fold(video_list, annotation_path, fold,
                                         train)
        self.video_clips = video_clips.subset(self.indices)
        self.video_list = [video_list[i] for i in self.indices]
        self.framewiseTransform = framewiseTransform
        self.transform = transform
Beispiel #6
0
    def __init__(self, root, train, frames_per_clip=16, step_between_clips=1, frame_rate=16, transform=None,
                 extensions=('mp4',), label_fn=lambda x, *_: x, local_rank=-1, get_label_only=False):
        train_or_val = 'train' if train else 'val'
        root = os.path.join(root, train_or_val)
        self.root = root

        super().__init__(root)

        self.transform = transform
        # Function that takes in __getitem__ idx and returns auxiliary label information in the form of a tensor
        self.label_fn = MethodType(label_fn, self)
        self.get_label_only = get_label_only

        clips_fn = os.path.join(root, f'clips_{train_or_val}_{frames_per_clip}_{step_between_clips}_{frame_rate}.pt')

        try:
            self.video_clips = torch.load(clips_fn)
        except FileNotFoundError:
            video_list = list(
                map(str, itertools.chain.from_iterable(Path(root).rglob(f'*.{ext}') for ext in extensions)))
            random.shuffle(video_list)
            if local_rank <= 0:
                print('Generating video clips file: ' + clips_fn)
            self.video_clips = VideoClips(
                video_list,
                frames_per_clip,
                step_between_clips,
                frame_rate,
                num_workers=32
            )
            torch.save(self.video_clips, clips_fn)

        clip_lengths = torch.as_tensor([len(v) for v in self.video_clips.clips])
        self.video_clips.clip_sizes = clip_lengths
Beispiel #7
0
    def test_distributed_sampler_and_uniform_clip_sampler(self):
        with get_list_of_videos(num_videos=3, sizes=[25, 25,
                                                     25]) as video_list:
            video_clips = VideoClips(video_list, 5, 5)
            clip_sampler = UniformClipSampler(video_clips, 3)

            distributed_sampler_rank0 = DistributedSampler(
                clip_sampler,
                num_replicas=2,
                rank=0,
                group_size=3,
            )
            indices = torch.tensor(list(iter(distributed_sampler_rank0)))
            self.assertEqual(len(distributed_sampler_rank0), 6)
            self.assertTrue(indices.equal(torch.tensor([0, 2, 4, 10, 12, 14])))

            distributed_sampler_rank1 = DistributedSampler(
                clip_sampler,
                num_replicas=2,
                rank=1,
                group_size=3,
            )
            indices = torch.tensor(list(iter(distributed_sampler_rank1)))
            self.assertEqual(len(distributed_sampler_rank1), 6)
            self.assertTrue(indices.equal(torch.tensor([5, 7, 9, 0, 2, 4])))
Beispiel #8
0
    def __init__(self,
                 clip_length,
                 frame_stride,
                 dataset_path=None,
                 video_transform=None,
                 return_label=False):
        super(VideoIter, self).__init__()
        # video clip properties
        self.frames_stride = frame_stride
        self.total_clip_length_in_frames = clip_length * frame_stride
        self.video_transform = video_transform

        # IO
        self.dataset_path = dataset_path
        self.video_list = self._get_video_list(dataset_path=self.dataset_path)
        self.return_label = return_label

        # data loading
        if os.path.exists('video_clips.file'):
            with open('video_clips.file', 'rb') as fp:
                self.video_clips = pickle.load(fp)
        else:
            self.video_clips = VideoClips(
                video_paths=self.video_list,
                clip_length_in_frames=self.total_clip_length_in_frames,
                frames_between_clips=self.total_clip_length_in_frames,
            )

        if not os.path.exists('video_clips.file'):
            with open('video_clips.file', 'wb') as fp:
                pickle.dump(self.video_clips,
                            fp,
                            protocol=pickle.HIGHEST_PROTOCOL)
Beispiel #9
0
    def __init__(self, video_paths, clip_length_in_frames, stride, frame_rate,
                 refresh, cache_dir):

        self.frame_rate = frame_rate
        self.clip_length_in_frames = clip_length_in_frames
        self.stride = stride
        self.video_paths = video_paths
        fname = f"fps-{frame_rate}-clip_length-{clip_length_in_frames}-stride{stride}"
        video_str_bytes = '-'.join(sorted(video_paths)).encode("utf-8")
        hashed = hashlib.sha256(video_str_bytes).hexdigest()
        fname += f"num-videos{len(video_paths)}-{hashed}"
        cached_clips_path = Path(cache_dir) / fname
        if cached_clips_path.exists() and not refresh:
            print(f"Reloading cached clips object")
            with open(cached_clips_path, "rb") as f:
                self.video_clips = pickle.load(f)
        else:
            print(f"Building new video clips object")
            self.video_clips = VideoClips(
                frame_rate=frame_rate,
                video_paths=video_paths,
                frames_between_clips=stride,
                clip_length_in_frames=clip_length_in_frames,
            )
            cached_clips_path.parent.mkdir(exist_ok=True, parents=True)
            print(f"Writing object to cache at {cached_clips_path}")
            with open(cached_clips_path, "wb") as f:
                pickle.dump(self.video_clips, f)
Beispiel #10
0
    def __init__(self,
                 root,
                 frames_per_clip,
                 step_between_clips=1,
                 frame_rate=None,
                 extensions=('mp4', ),
                 transform=None,
                 cached=None,
                 _precomputed_metadata=None):
        super(Kinetics400, self).__init__(root)
        extensions = extensions

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}

        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]
        self.video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
        )
        self.transform = transform
    def __init__(self,
                 clip_length,
                 frame_stride,
                 frame_rate=None,
                 dataset_path=None,
                 spatial_transform=None,
                 temporal_transform=None,
                 return_label=False,
                 video_formats=["avi", "mp4"]):
        super(VideoDataset, self).__init__()
        # video clip properties
        self.frames_stride = frame_stride
        self.total_clip_length_in_frames = clip_length * frame_stride
        self.spatial_transform = spatial_transform
        self.temporal_transform = temporal_transform
        self.video_formats = video_formats
        # IO
        self.dataset_path = dataset_path
        self.video_list = self._get_video_list(dataset_path=self.dataset_path)
        # print("video_list:", self.video_list, len(self.video_list))
        self.return_label = return_label

        # data loading
        self.video_clips = VideoClips(video_paths=self.video_list,
                                      clip_length_in_frames=self.total_clip_length_in_frames,
                                      frames_between_clips=self.total_clip_length_in_frames,
                                      frame_rate=frame_rate)
 def test_uniform_clip_sampler_insufficient_clips(self):
     with get_list_of_videos(num_videos=3, sizes=[10, 25,
                                                  25]) as video_list:
         video_clips = VideoClips(video_list, 5, 5)
         sampler = UniformClipSampler(video_clips, 3)
         self.assertEqual(len(sampler), 3 * 3)
         indices = torch.tensor(list(iter(sampler)))
         assert_equal(indices, torch.tensor([0, 0, 1, 2, 4, 6, 7, 9, 11]))
Beispiel #13
0
    def __init__(self,
                 root,
                 annotation_path,
                 frames_per_clip,
                 step_between_clips=1,
                 frame_rate=None,
                 fold=1,
                 train=True,
                 transform=None,
                 _precomputed_metadata=None,
                 num_workers=1,
                 _video_width=0,
                 _video_height=0,
                 _video_min_dimension=0,
                 _audio_samples=0):
        super(UCF101, self).__init__(root)
        if not 1 <= fold <= 3:
            raise ValueError(
                "fold should be between 1 and 3, got {}".format(fold))

        extensions = ('avi', )
        self.fold = fold
        self.train = train

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]

        metadata_filepath = os.path.join(root, 'ucf101_metadata.pt')
        if os.path.exists(metadata_filepath):
            metadata = torch.load(metadata_filepath)
        else:
            metadata = None
        video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
        )
        if not os.path.exists(metadata_filepath):
            torch.save(video_clips.metadata, metadata_filepath)

        self.video_clips_metadata = video_clips.metadata
        self.indices = self._select_fold(video_list, annotation_path, fold,
                                         train)
        self.video_clips = video_clips.subset(self.indices)
        self.transform = transform
Beispiel #14
0
    def __init__(self,
                 root,
                 annotation_path,
                 frames_per_clip,
                 step_between_clips=1,
                 frame_rate=None,
                 fold=1,
                 train=True,
                 transform=None,
                 _precomputed_metadata=None,
                 num_workers=1,
                 _video_width=0,
                 _video_height=0,
                 _video_min_dimension=0,
                 _audio_samples=0):
        super(MYUCF101, self).__init__(root)
        if not 1 <= fold <= 3:
            raise ValueError(
                "fold should be between 1 and 3, got {}".format(fold))

        extensions = ('avi', )
        self.fold = fold
        self.train = train

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]
        video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
        )

        meta_data_str_ = os.path.join(
            root,
            f"meta_data_train_{train}_fold_{fold}_frames_{frames_per_clip}_skip_"
            f"{step_between_clips}.pickle")
        if not os.path.exists(meta_data_str_):
            with open(meta_data_str_, 'wb') as ff:
                pickle.dump(video_clips.metadata, ff)

        self.video_clips_metadata = video_clips.metadata
        self.indices = self._select_fold(video_list, annotation_path, fold,
                                         train)
        self.video_clips = video_clips.subset(self.indices)
        self.transform = transform
 def test_video_clips_custom_fps(self, tmpdir):
     video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6])
     num_frames = 4
     for fps in [1, 3, 4, 10]:
         video_clips = VideoClips(video_list, num_frames, num_frames, fps, num_workers=2)
         for i in range(video_clips.num_clips()):
             video, audio, info, video_idx = video_clips.get_clip(i)
             assert video.shape[0] == num_frames
             assert info["video_fps"] == fps
Beispiel #16
0
 def test_video_clips_custom_fps(self):
     with get_list_of_videos(num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6]) as video_list:
         num_frames = 4
         for fps in [1, 3, 4, 10]:
             video_clips = VideoClips(video_list, num_frames, num_frames, fps)
             for i in range(video_clips.num_clips()):
                 video, audio, info, video_idx = video_clips.get_clip(i)
                 self.assertEqual(video.shape[0], num_frames)
                 self.assertEqual(info["video_fps"], fps)
 def test_random_clip_sampler(self, tmpdir):
     video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[25, 25, 25])
     video_clips = VideoClips(video_list, 5, 5)
     sampler = RandomClipSampler(video_clips, 3)
     assert len(sampler) == 3 * 3
     indices = torch.tensor(list(iter(sampler)))
     videos = torch.div(indices, 5, rounding_mode="floor")
     v_idxs, count = torch.unique(videos, return_counts=True)
     assert_equal(v_idxs, torch.tensor([0, 1, 2]))
     assert_equal(count, torch.tensor([3, 3, 3]))
Beispiel #18
0
 def test_random_clip_sampler(self):
     with get_list_of_videos(num_videos=3, sizes=[25, 25,
                                                  25]) as video_list:
         video_clips = VideoClips(video_list, 5, 5)
         sampler = RandomClipSampler(video_clips, 3)
         self.assertEqual(len(sampler), 3 * 3)
         indices = torch.tensor(list(iter(sampler)))
         videos = indices // 5
         v_idxs, count = torch.unique(videos, return_counts=True)
         self.assertTrue(v_idxs.equal(torch.tensor([0, 1, 2])))
         self.assertTrue(count.equal(torch.tensor([3, 3, 3])))
 def test_random_clip_sampler(self):
     with get_list_of_videos(num_videos=3, sizes=[25, 25,
                                                  25]) as video_list:
         video_clips = VideoClips(video_list, 5, 5)
         sampler = RandomClipSampler(video_clips, 3)
         self.assertEqual(len(sampler), 3 * 3)
         indices = torch.tensor(list(iter(sampler)))
         videos = torch.div(indices, 5, rounding_mode='floor')
         v_idxs, count = torch.unique(videos, return_counts=True)
         assert_equal(v_idxs, torch.tensor([0, 1, 2]))
         assert_equal(count, torch.tensor([3, 3, 3]))
    def init_data(self,
                  root,
                  frames_per_clip,
                  step_between_clips=6,
                  frame_rate=6,
                  train=True,
                  transform=None,
                  _precomputed_metadata=None,
                  num_workers=1,
                  _video_width=0,
                  _video_height=0,
                  _video_min_dimension=0,
                  _audio_samples=0):
        super(HMDB51, self).__init__(root)
        extensions = ('avi', )
        if train:
            root = root + "/train"
        else:
            root = root + "/test"
        classes = sorted(list_dir(root))
        class_to_idx = {class_: i for (i, class_) in enumerate(classes)}
        print(class_to_idx)
        self.samples = []
        for target_class in sorted(class_to_idx.keys()):
            class_index = class_to_idx[target_class]
            target_dir = os.path.join(root, target_class)
            for root_curr, _, fnames in sorted(
                    os.walk(target_dir, followlinks=True)):
                for fname in sorted(fnames):
                    path = os.path.join(root_curr, fname)
                    if os.path.isfile(path):
                        item = path, class_index
                        self.samples.append(item)

        video_paths = [path for (path, _) in self.samples]
        video_clips = VideoClips(
            video_paths,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
        )
        self.train = train
        self.classes = classes
        self.video_clips_metadata = video_clips.metadata
        self.indices = self.get_indices(video_paths)
        self.video_clips = video_clips.subset(self.indices)
        self.transform = transform
 def test_uniform_clip_sampler(self):
     with get_list_of_videos(num_videos=3, sizes=[25, 25,
                                                  25]) as video_list:
         video_clips = VideoClips(video_list, 5, 5)
         sampler = UniformClipSampler(video_clips, 3)
         assert len(sampler) == 3 * 3
         indices = torch.tensor(list(iter(sampler)))
         videos = torch.div(indices, 5, rounding_mode='floor')
         v_idxs, count = torch.unique(videos, return_counts=True)
         assert_equal(v_idxs, torch.tensor([0, 1, 2]))
         assert_equal(count, torch.tensor([3, 3, 3]))
         assert_equal(indices, torch.tensor([0, 2, 4, 5, 7, 9, 10, 12, 14]))
    def test_video_clips(self, tmpdir):
        video_list = get_list_of_videos(tmpdir, num_videos=3)
        video_clips = VideoClips(video_list, 5, 5, num_workers=2)
        assert video_clips.num_clips() == 1 + 2 + 3
        for i, (v_idx, c_idx) in enumerate([(0, 0), (1, 0), (1, 1), (2, 0), (2, 1), (2, 2)]):
            video_idx, clip_idx = video_clips.get_clip_location(i)
            assert video_idx == v_idx
            assert clip_idx == c_idx

        video_clips = VideoClips(video_list, 6, 6)
        assert video_clips.num_clips() == 0 + 1 + 2
        for i, (v_idx, c_idx) in enumerate([(1, 0), (2, 0), (2, 1)]):
            video_idx, clip_idx = video_clips.get_clip_location(i)
            assert video_idx == v_idx
            assert clip_idx == c_idx

        video_clips = VideoClips(video_list, 6, 1)
        assert video_clips.num_clips() == 0 + (10 - 6 + 1) + (15 - 6 + 1)
        for i, v_idx, c_idx in [(0, 1, 0), (4, 1, 4), (5, 2, 0), (6, 2, 1)]:
            video_idx, clip_idx = video_clips.get_clip_location(i)
            assert video_idx == v_idx
            assert clip_idx == c_idx
Beispiel #23
0
    def __init__(self,
                 data_folder,
                 sequence_length,
                 train=True,
                 resolution=64):
        """
        Args:
            data_folder: path to the folder with videos. The folder
                should contain a 'train' and a 'test' directory,
                each with corresponding videos stored
            sequence_length: length of extracted video sequences
        """
        super().__init__()
        self.train = train
        self.sequence_length = sequence_length
        self.resolution = resolution

        folder = osp.join(data_folder, 'train' if train else 'test')
        files = sum([
            glob.glob(osp.join(folder, '**', f'*.{ext}'), recursive=True)
            for ext in self.exts
        ], [])

        # hacky way to compute # of classes (count # of unique parent directories)
        self.classes = list(set([get_parent_dir(f) for f in files]))
        self.classes.sort()
        self.class_to_label = {c: i for i, c in enumerate(self.classes)}

        warnings.filterwarnings('ignore')
        cache_file = osp.join(folder, f"metadata_{sequence_length}.pkl")
        if not osp.exists(cache_file):
            clips = VideoClips(files, sequence_length, num_workers=32)
            pickle.dump(clips.metadata, open(cache_file, 'wb'))
        else:
            metadata = pickle.load(open(cache_file, 'rb'))
            clips = VideoClips(files,
                               sequence_length,
                               _precomputed_metadata=metadata)
        self._clips = clips
Beispiel #24
0
    def from_ds_folder(cls,
                       dataset_root,
                       metadata_path=None,
                       extract_groom=False,
                       **kwargs):
        # make dataset
        classes = list(sorted(list_dir(dataset_root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        items = make_dataset(dataset_root, class_to_idx, extensions=(".mp4", ))
        logger.info('class2idx: %s', class_to_idx)
        samples = [s[0] for s in items]
        labels = [s[1] for s in items]

        metadata = None
        if metadata_path is not None and os.path.exists(metadata_path):
            metadata = torch.load(metadata_path)
        metadata = VideoClips(
            samples,  # for computing timestamps
            _precomputed_metadata=metadata,
            num_workers=8,
        ).metadata
        if metadata_path is not None and metadata is None:
            torch.save(metadata, metadata_path)

        if extract_groom:  # special case: groom under dataset_root/groom
            groom_folder = f'{dataset_root}/groom'
            annots = read_annotations(groom_folder)
            extracted_metadata, extracted_labels = extract_metadata_from_annotations(
                metadata, annots)
            # extracted_paths = set(item['video_paths'] for item in extracted_metadata)
            extracted_labels = [class_to_idx[a] for a in extracted_labels]
            logger.info('Extracted groom video count: %s',
                        len(extracted_labels))
            for vid_path, vid_pts, vid_fps, lab in zip(metadata['video_paths'],
                                                       metadata['video_pts'],
                                                       metadata['video_fps'],
                                                       labels):
                if 'not_groom' not in vid_path:
                    continue
                extracted_metadata['video_paths'].append(vid_path)
                extracted_metadata['video_pts'].append(vid_pts)
                extracted_metadata['video_fps'].append(vid_fps)
                extracted_labels.append(lab)
            metadata, labels = extracted_metadata, extracted_labels
        return cls(metadata=metadata,
                   labels=labels,
                   dataset_root=dataset_root,
                   **kwargs)
 def test_random_clip_sampler_unequal(self, tmpdir):
     video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[10, 25, 25])
     video_clips = VideoClips(video_list, 5, 5)
     sampler = RandomClipSampler(video_clips, 3)
     assert len(sampler) == 2 + 3 + 3
     indices = list(iter(sampler))
     assert 0 in indices
     assert 1 in indices
     # remove elements of the first video, to simplify testing
     indices.remove(0)
     indices.remove(1)
     indices = torch.tensor(indices) - 2
     videos = torch.div(indices, 5, rounding_mode="floor")
     v_idxs, count = torch.unique(videos, return_counts=True)
     assert_equal(v_idxs, torch.tensor([0, 1]))
     assert_equal(count, torch.tensor([3, 3]))
Beispiel #26
0
    def __init__(self,
                 root,
                 frames_per_clip,
                 step_between_clips=1,
                 frame_rate=None,
                 extensions=('avi', ),
                 transform=None,
                 num_workers=1,
                 _video_width=0,
                 _video_height=0,
                 _video_min_dimension=0,
                 _audio_samples=0):
        super(Kinetics400, self).__init__(root)

        classes = list(sorted(list_dir(root)))
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        self.samples = make_dataset(self.root,
                                    class_to_idx,
                                    extensions,
                                    is_valid_file=None)
        self.classes = classes
        video_list = [x[0] for x in self.samples]
        split = root.split('/')[-1].strip('/')
        metadata_filepath = os.path.join(
            root, 'kinetics_metadata_{}.pt'.format(split))

        if os.path.exists(metadata_filepath):
            metadata = torch.load(metadata_filepath)

        else:
            metadata = None

        self.video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
        )
        self.transform = transform
        if not os.path.exists(metadata_filepath):
            torch.save(self.video_clips.metadata, metadata_filepath)
Beispiel #27
0
def sliding_window(video_path, save_path, epoch_id, preprocess=[]):
    T = 300

    videoclips = VideoClips([video_path],
                            clip_length_in_frames=T,
                            frames_between_clips=1)

    filenames = []
    rotation = 0
    sample_count = -1
    sample_dir = ""
    sample_id = ""
    for i in range(len(videoclips)):

        sample_count = hash(
            str(sample_count + 1 + epoch_id *
                (len(videoclips) / T))) % ((sys.maxsize + 1) * 2)

        # create new preprocess values
        rnd = np.random.uniform(-1, 1)
        rotation = 5 * rnd
        scale_factor = np.random.uniform(0.8, 1.2)
        crop_scale_y = np.random.uniform(0.5, 1)
        crop_scale_x = np.random.uniform(0.5, 1)

        # Preprocess
        clip, _, _, _ = videoclips.get_clip(i)

        clip = clip.numpy()

        for f in range(len(clip)):
            for p in preprocess:
                clip[f] = p(clip[f],
                            rotation=rotation,
                            scale_factor=scale_factor,
                            crop_scale=(crop_scale_y, crop_scale_x))

        clip = torch.tensor(clip)

        # Save
        filename = "{}.mp4".format(hex(sample_count))
        filepath = join(save_path, filename)
        torchvision.io.write_video(filepath, clip, 30)
        filenames.append(filename)
        print("{}, {}, {}/{}".format(filepath, epoch_id, i, len(videoclips)))

    return filenames
    def __init__(
        self,
        video_dir,
        label_map_json,
        labels_json,
        frames_per_clip,
        step_between_clips=1,
        frame_rate=None,
        transform=None,
        _precomputed_metadata=None,
        num_workers=1,
        _video_width=0,
        _video_height=0,
        _video_min_dimension=0,
    ) -> "_SomethingSomethingV2Dataset":
        for data_file in [label_map_json, labels_json]:
            assert os.path.exists(
                data_file), f"Data file {data_file} is missing"

        with open(label_map_json, "r") as fp:
            label_map = json.load(fp)

        with open(labels_json, "r") as fp:
            samples = json.load(fp)
            self.samples = []
            for sample in samples:
                video_id = sample["id"]
                label = sample["template"].replace("[", "").replace("]", "")
                assert label in label_map, f"Unknown label: {label}"
                video_path = os.path.join(video_dir, f"{video_id}.webm")
                assert os.path.exists(video_path), f"{video_path} is missing"
                self.samples.append((video_path, int(label_map[label])))

        video_list = [x[0] for x in self.samples]
        self.video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
        )
        self.transform = transform
Beispiel #29
0
 def test_random_clip_sampler_unequal(self):
     with get_list_of_videos(num_videos=3, sizes=[10, 25,
                                                  25]) as video_list:
         video_clips = VideoClips(video_list, 5, 5)
         sampler = RandomClipSampler(video_clips, 3)
         self.assertEqual(len(sampler), 2 + 3 + 3)
         indices = list(iter(sampler))
         self.assertIn(0, indices)
         self.assertIn(1, indices)
         # remove elements of the first video, to simplify testing
         indices.remove(0)
         indices.remove(1)
         indices = torch.tensor(indices) - 2
         videos = indices // 5
         v_idxs, count = torch.unique(videos, return_counts=True)
         self.assertTrue(v_idxs.equal(torch.tensor([0, 1])))
         self.assertTrue(count.equal(torch.tensor([3, 3])))
    def __init__(
        self,
        root,
        data_file,
        frames_per_clip,
        step_between_clips=1,
        frame_rate=None,
        extension="mp4",
        transform=None,
        _precomputed_metadata=None,
        num_workers=1,
        _video_width=0,
        _video_height=0,
        _video_min_dimension=0,
        _audio_samples=0,
        _audio_channels=0,
    ) -> "MiniKinetics200Dataset":
        assert os.path.exists(data_file), f"Data file {data_file} is missing"
        self.samples = []
        with open(data_file, "r") as fp:
            for line in fp.readlines():
                video_id, class_name, class_label = line.strip().split(",")
                class_name = class_name.replace("_", " ")
                video_path = os.path.join(
                    root,
                    class_name,
                    f"{video_id}.{extension}",
                )
                if os.path.exists(video_path):
                    self.samples.append([video_path, int(class_label)])

        video_list = [x[0] for x in self.samples]
        self.video_clips = VideoClips(
            video_list,
            frames_per_clip,
            step_between_clips,
            frame_rate,
            _precomputed_metadata,
            num_workers=num_workers,
            _video_width=_video_width,
            _video_height=_video_height,
            _video_min_dimension=_video_min_dimension,
            _audio_samples=_audio_samples,
            _audio_channels=_audio_channels,
        )
        self.transform = transform