Beispiel #1
0
def load_real_and_fake_frame(real, fake, random):
    vr = decord.VideoReader(real, ctx=decord.cpu())
    frame_index = list(np.random.choice(range(len(vr)), random))
    real = vr.get_batch(frame_index).asnumpy()
    vr = decord.VideoReader(fake, ctx=decord.cpu())
    fake = vr.get_batch(frame_index).asnumpy()
    return real, fake, frame_index
Beispiel #2
0
def decord_sequential_cpu_benchmark(config):
    """Benchmarking decord library with seqeuential read"""
    device = "cpu"
    if device == "gpu":
        ctx = decord.gpu(0)
    else:
        ctx = decord.cpu()

    video_reader = decord.VideoReader(config["video_path"], ctx)
    assert config["resize_shape"] is False, "TODO: implement tranformation of image size for " \
                                            "decord_sequential_cpu_benchmark; note it has inbuilt" \
                                            "support for this. "
    assert config["downsample"] == 1, "TODO: implement downsampling," \
                                      " note that decord has options " \
                                      "to sample frames every N frames" \
                                      " https://github.com/dmlc/decord#videoloader" \
                                      "Also the video reader has " \
                                      "video_reader.skip_frames(N) function"
    # video_reader = decord.VideoReader(config["video_path"], ctx,
    #                        width=resize_width,
    #                        height=resize_height)

    for timer in tqdm(
            _TIME.measure_many(inspect.currentframe().f_code.co_name,
                               samples=config["repeats"])):
        frames_read = 0
        with tqdm(total=config["n_frames"]) as pbar:
            while frames_read < config["n_frames"]:
                try:
                    img = video_reader.next()
                except StopIteration:
                    break

                img = cv2.cvtColor(img.asnumpy(), cv2.COLOR_BGR2RGB)

                if config["show_img"]:
                    cv2.imshow("img", img)
                    k = cv2.waitKey(1)
                    if ord("q") == k:
                        break

                blocking_call(config["consumer_blocking_config"]["io_limited"],
                              config["consumer_blocking_config"]["duration"])

                frames_read += 1
                pbar.update()
        assert frames_read == config["n_frames"]
        timer.stop()
        del img
        del video_reader
        video_reader = decord.VideoReader(config["video_path"], ctx)
Beispiel #3
0
    def __call__(self, results):
        """
        Perform mp4 decode operations.
        return:
            List where each item is a numpy array after decoder.
        """
        filepath = results['filename']
        temporal_sample_index = results['temporal_sample_index']
        temporal_num_clips = results['temporal_num_clips']

        vr = de.VideoReader(filepath)
        videolen = len(vr)

        fps = vr.get_avg_fps()
        clip_size = self.num_frames * self.sampling_rate * fps / self.target_fps

        start_idx, end_idx = self.get_start_end_idx(videolen, clip_size,
                                                    temporal_sample_index,
                                                    temporal_num_clips)
        index = np.linspace(start_idx, end_idx,
                            self.num_frames).astype("int64")
        index = np.clip(index, 0, videolen)

        frames_select = vr.get_batch(index)  #1 for buffer

        # dearray_to_img
        np_frames = frames_select.asnumpy()
        frames_select_list = []
        for i in range(np_frames.shape[0]):
            imgbuf = np_frames[i]
            frames_select_list.append(Image.fromarray(imgbuf, mode='RGB'))
        results['imgs'] = frames_select_list
        return results
Beispiel #4
0
    def next_train_batch(self):
        input_batch = np.zeros(shape=(self._config.batch_size,
                                      self._config.time_dimen,
                                      self._config.frame_height,
                                      self._config.frame_width,
                                      self._config.frame_channels))
        gt_batch = np.zeros(shape=(self._config.batch_size, self._config.ncls))

        for b_idx, sample in enumerate(
                self._train_samples[self._train_batch_index *
                                    self._config.batch_size:
                                    (1 + self._train_batch_index) *
                                    self._config.batch_size]):
            video_path = os.path.join(self._config.trainval_set_dir,
                                      sample['video_path'])
            label = sample['label']
            video = decord.VideoReader(video_path)
            assert len(video) == self._config.time_dimen
            for t_idx, frame in enumerate(video):
                frame = frame.asnumpy()  # (height, width, channels)
                assert frame.shape[0] == self._config.frame_height
                assert frame.shape[1] == self._config.frame_width
                input_batch[b_idx][t_idx] = frame

            gt_batch[b_idx][self._label_mapping[label]] = 1.0

        self._train_batch_index += 1
        return input_batch, gt_batch
    def next_val_batch(self):
        input_batch = np.zeros(shape=(self._config.batch_size,
                                      self._config.time_dimen,
                                      self._config.frame_height,
                                      self._config.frame_width,
                                      self._config.frame_channels))
        gt_batch = np.zeros(shape=(self._config.batch_size, self._config.ncls))

        for b_idx, sample in enumerate(
                self._val_samples[self._val_batch_index *
                                  self._config.batch_size:
                                  (1 + self._val_batch_index) *
                                  self._config.batch_size]):
            video_path = os.path.join(self._config.trainval_set_dir, 'videos',
                                      sample['video_name'] + '.mp4')
            label = sample['gt']

            video = decord.VideoReader(video_path)
            sampled_frame_index_list = self.sample_t_dimen(
                len(video), target_frames=self._config.time_dimen)
            for t_idx, index in enumerate(sampled_frame_index_list):
                frame = video[index]
                frame = frame.asnumpy()  # (height, width, channels)
                resized_frame = cv2.resize(
                    frame,
                    (self._config.frame_width, self._config.frame_height))
                input_batch[b_idx][t_idx] = resized_frame

            gt_batch[b_idx][self._label_mapping[label]] = 1.0

        self._val_batch_index += 1
        return input_batch, gt_batch
    def _get_record(self, idx):
        record = self.video_infos[idx]

        video_reader = decord.VideoReader(osp.join(self.img_prefix, record.path))
        record.num_frames = len(video_reader)

        return record, video_reader
Beispiel #7
0
    def __getitem__(self, index):
        record = self.video_list[index]
        
        if('something' in self.dataset): 
            decode_boo = False
            video_list = os.listdir(record.path)
        
        else:
            decode_boo = True
            try:
                directory = record.path
                if directory[-4:] != ".mp4":
                    video_path = directory+".mp4"
                else:
                    video_path = directory
                video_list = decord.VideoReader(video_path)
            except UnicodeDecodeError:
                decode_boo = False
                video_list = os.listdir(record.path)
        
        
        if not self.test_mode:
            if self.I3D_sample :
                segment_indices = self._sample_indices(video_list) 
            else:
                segment_indices = self._sample_indices(video_list) if self.random_shift else self._get_val_indices(video_list) 
        else:
            if self.dataset == 'kinetics':
                segment_indices = self._sample_indices(video_list)
            else:
                segment_indices = self._get_test_indices(video_list)

        
        return self.get(record,video_list, segment_indices,decode_boo)
Beispiel #8
0
def read_data(video_name, transform):

    decord_vr = decord.VideoReader(video_name, width=opt['new_width'], height=opt['new_height'])
    duration = len(decord_vr)

    opt['skip_length'] = opt['new_length'] * opt['new_step']
    segment_indices, skip_offsets = sample_indices(duration)

    if opt['video_loader']:
        if opt['slowfast']:
            clip_input = video_TSN_decord_slowfast_loader( video_name, decord_vr, duration, segment_indices, skip_offsets)
        else:
            clip_input = video_TSN_decord_batch_loader(video_name, decord_vr, duration, segment_indices, skip_offsets)

    clip_input = transform(clip_input)

    if opt['slowfast']:
        sparse_sampels = len(clip_input) // (opt['num_segments'] * opt['num_crop'])
        clip_input = np.stack(clip_input, axis=0)
        clip_input = clip_input.reshape((-1,) + (sparse_sampels, 3, opt['input_size'], opt['input_size']))
        clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))
    else:
        clip_input = np.stack(clip_input, axis=0)
        clip_input = clip_input.reshape((-1,) + (opt['new_length'], 3, opt['input_size'], opt['input_size']))
        clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))

    if opt['new_length'] == 1:
        clip_input = np.squeeze(clip_input, axis=2)    # this is for 2D input case

    return nd.array(clip_input)
    def __getitem__(self, idx: int) -> Tuple[torch.tensor, int]:
        """
        Return:
            (clips (torch.tensor), label (int))
        """
        record = self.video_records[idx]
        try:
            video_reader = decord.VideoReader(
                "{}.{}".format(os.path.join(self.root, record.path),
                               self.video_ext),
                # TODO try to add `ctx=decord.ndarray.gpu(0) or .cuda(0)`
            )

        except:
            print("{}.{}".format(os.path.join(self.root, record.path),
                                 self.video_ext))
        record._num_frames = len(video_reader)

        offsets = self._sample_indices(record)
        clips = np.array([self._get_frames(video_reader, o) for o in offsets])

        if self.num_samples == 1:
            return (
                # [T, H, W, C] -> [C, T, H, W]
                self.transforms(torch.from_numpy(clips[0])),
                record.label,
                record.path)

        else:
            return (
                # [S, T, H, W, C] -> [S, C, T, H, W]
                torch.stack(
                    [self.transforms(torch.from_numpy(c)) for c in clips]),
                record.label,
                record.path)
Beispiel #10
0
def key_frames(
    video_file=None,
    out_dir=None,
    ctx=None,
    sub_clip=False,
    start_seconds=None,
    end_seconds=None,
):

    if sub_clip and start_seconds is not None and end_seconds is not None:
        video_file = extract_subclip(video_file, start_seconds, end_seconds)

    vr = de.VideoReader(video_file)
    key_idxs = vr.get_key_indices()

    video_name = Path(video_file).stem
    video_name = video_name.replace(' ', '_')
    if out_dir is None:
        out_dir = Path(video_file).parent / video_name
        out_dir = out_dir.with_suffix('')
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)
    for ki in key_idxs:
        frame = vr[ki].asnumpy()
        out_frame_file = out_dir / \
            f"{(video_name).replace(' ','_')}_{ki:08}.png"
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        cv2.imwrite(str(out_frame_file), frame)
        print(f"Saved {str(out_frame_file)}")
    print(f"Please check your frames located at {out_dir}")
    return out_dir
Beispiel #11
0
 def __call__(self, results):
     try:
         import decord
     except ImportError:
         raise ImportError(
             'Please run "pip install decord" to install Decord first.')
     decord.logging.set_level(5)
     if results['frame_inds'].ndim != 1:
         results['frame_inds'] = np.squeeze(results['frame_inds'])
     try:
         container = decord.VideoReader(
             results['filename'], num_threads=self.num_threads)
         num_frames = len(container)  # decord num_frames
         frame_inds = results['frame_inds']
         # Generate frame index mapping in order
         # frame_dict = {idx: container[idx % num_frames].asnumpy() for idx in np.unique(frame_inds)}
         # img_group = [frame_dict[idx] for idx in frame_inds]
         img_group = container.get_batch(
             [idx % num_frames for idx in frame_inds]).asnumpy()
         del container
         results['img_group'] = img_group
         results['ori_shape'] = img_group[0].shape
         results['img_shape'] = img_group[0].shape
     except Exception as e:
         print("Failed to decode {} with exception: {}".format(
             results['filename'], e))
         return None
     return results
Beispiel #12
0
 def __getitem__(self, i):
     try:
         vr = decord.VideoReader(self.videos[i], ctx=decord.cpu())
         start = np.random.choice(len(vr)-self.NUMFRAMES)
         vid = vr.get_batch(list(range(start, start+self.NUMFRAMES))).asnumpy()
     except Exception as e:
         return 0
     return (vid, start)
def load_video(filepath, num_frames, scale_factor):
    vr = decord.VideoReader(filepath, ctx=decord.cpu())
    vid = vr[:NUMFRAMES].asnumpy()
    if scale_factor != 1:
        vid = zoom(vid, [1, scale_factor, scale_factor, 1],
                   prefilter=False,
                   order=0)
    return vid
Beispiel #14
0
 def __getitem__(self, i):
     try:
         vr = decord.VideoReader(self.videos[i], ctx=decord.cpu())
         vid = np.asarray([vr[i].asnumpy() for i in range(self.NUMFRAMES)])
         start = 0
     except Exception as e:
         print(e)
         return 0
     return (vid, start)
Beispiel #15
0
 def __init__(self,
              video_url=None):
     super().__init__()
     self.video_url = video_url
     self.run_flag = True
     self.back_flag = False
     self.pause_flag = False
     self.vr = de.VideoReader(self.video_url, ctx=de.cpu(0))
     self.frame_numbers = range(len(self.vr))
Beispiel #16
0
def load_frames(vid, clip_len, root):
    vname = os.path.join(root, f'{vid}')
    with open(vname, 'rb') as f:
        vr = decord.VideoReader(f, width=342, height=256, num_threads=1)
    frame_rate = int(np.floor(float(len(vr)) / float(clip_len)))
    start_frame = 1
    # start_frame = random.randint(1, num_frames - clip_len * frame_rate + 1)
    idx_list = [start_frame + i * frame_rate for i in range(clip_len)]
    frames = vr.get_batch(idx_list)
    frames = frames.type(torch.float32) / 255.

    return frames.permute((3, 0, 1, 2))
Beispiel #17
0
    def __init__(self, path: str, extractor: str):
        """
        Parameters
        ----------
        path: str
            The path to this video

        extractor: str
            The name of the feature extractor currently used
        """
        self._path = path
        self.reader = decord.VideoReader(self._path)
        self.features = Features(self._path, extractor)
    def analyze(self):
        for index, sample in enumerate(self._samples):
            video_path = os.path.join(self._config.trainval_set_dir, 'videos',
                                      sample['video_name'] + '.mp4')
            video = decord.VideoReader(video_path)
            print(index + 1, sample['video_name'], len(video), video[0].shape)

            frame = video[0]
            frame = frame.asnumpy()  # (height, width, channels)
            print(frame.shape)
            resized = cv2.resize(
                frame, (self._config.frame_width, self._config.frame_height))
            print(resized.shape)
Beispiel #19
0
    def preprocess(self, data):
        videos = []

        for row in data:
            video = row.get('data') or row.get('body')
            if isinstance(video, str):
                video = base64.b64decode(video)
            # First save the bytes as a tmp file
            with open('/tmp/tmp.mp4', 'wb') as fout:
                fout.write(video)

            video = decord.VideoReader('/tmp/tmp.mp4')
            frames = [x.asnumpy() for x in video]
            videos.append(np.stack(frames))

        return videos
Beispiel #20
0
    def __getitem__(self, index):
        sample = self.samples[index]
        try:
            vr = decord.VideoReader(
                str(sample.video_path),
                width=self.video_width,
                height=self.video_height,
                num_threads=1,
            )
        except:
            print('no validate video ')
            print(f'{sample.video_path}')
            print('fallback')
            return self.prev

        num_frames = len(vr)
        if num_frames == 0:
            raise Exception(f'Empty video: {sample.video_path}')
        frame_indices = np.arange(num_frames)  # [0, 1, 2, ..., N - 1]

        if self.frame_rate is not None:
            frame_indices = self.resample_fps(frame_indices, vr.get_avg_fps())

        # same temporal frame but different spatial transform
        clip_frame_indices_list = [self.temporal_transform(frame_indices)]
        clip_frame_indices_list = clip_frame_indices_list * self.num_clips_per_sample

        # print('#' * 20)
        # print(clip_frame_indices_list[0])
        # print(clip_frame_indices_list[1])
        # print('#' * 20)

        # Fetch all frames in one `vr.get_batch` call
        clip_frame_indices = np.concatenate(
            clip_frame_indices_list)  # [a1, a2, ..., an, b1, b2, ...,bn]
        clips: torch.Tensor = vr.get_batch(
            clip_frame_indices)  # [N*T, H, W, C]
        clip_list = clips.chunk(len(clip_frame_indices_list),
                                dim=0)  # List[Tensor[T, H, W, C]]

        clip_list = [self.spatial_transform(clip) for clip in clip_list]
        for c in clip_list:
            assert isinstance(c, torch.Tensor)

        self.prev = (clip_list, sample.class_index)
        return clip_list, sample.class_index
Beispiel #21
0
def read_data(opt, video_name, transform):

    decord_vr = decord.VideoReader(video_name,
                                   width=opt.new_width,
                                   height=opt.new_height)
    duration = len(decord_vr)

    opt.skip_length = opt.new_length * opt.new_step
    segment_indices, skip_offsets = sample_indices(opt, duration)

    if opt.video_loader:
        if opt.slowfast:
            clip_input = video_TSN_decord_slowfast_loader(
                opt, video_name, decord_vr, duration, segment_indices,
                skip_offsets)
        else:
            clip_input = video_TSN_decord_batch_loader(opt, video_name,
                                                       decord_vr, duration,
                                                       segment_indices,
                                                       skip_offsets)

    clip_input = transform(clip_input)

    if opt.slowfast:
        sparse_sampels = len(clip_input) // (opt.num_segments * opt.num_crop)
        clip_input = np.stack(clip_input, axis=0)
        clip_input = clip_input.reshape((-1, ) +
                                        (sparse_sampels, 3, opt.input_size,
                                         opt.input_size))
        clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))
    else:
        clip_input = np.stack(clip_input, axis=0)
        clip_input = clip_input.reshape((-1, ) +
                                        (opt.new_length, 3, opt.input_size,
                                         opt.input_size))
        clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))

    if opt.new_length == 1:
        clip_input = np.squeeze(clip_input,
                                axis=2)  # this is for 2D input case

    return nd.array(clip_input)
Beispiel #22
0
    def __call__(self, results):
        """Perform the PyAV loading.

        Args:
            results (dict): The resulting dict to be modified and passed
                to the next transform in pipeline.
        """
        try:
            import decord
        except ImportError:
            raise ImportError(
                'Please run "pip install decord" to install Decord first.')

        if self.file_client is None:
            self.file_client = FileClient(self.io_backend, **self.kwargs)

        file_obj = io.BytesIO(self.file_client.get(results['filename']))
        container = decord.VideoReader(file_obj, num_threads=self.num_threads)
        results['video_reader'] = container
        results['total_frames'] = len(container)
        return results
Beispiel #23
0
    def __getitem__(self, idx):
        """
        Return:
            clips (torch.tensor), label (int)
        """
        record = self.video_records[idx]
        video_reader = decord.VideoReader(
            "{}.{}".format(os.path.join(self.video_dir, record.path),
                           self.video_ext),
            # TODO try to add `ctx=decord.ndarray.gpu(0) or .cuda(0)`
        )
        record._num_frames = len(video_reader)

        offsets = self._sample_indices(record)
        clips = np.array([self._get_frames(video_reader, o) for o in offsets])

        if self.num_segments == 1:
            # [T, H, W, C] -> [C, T, H, W]
            return self.transforms(torch.from_numpy(clips[0])), record.label
        else:
            # [S, T, H, W, C] -> [S, C, T, H, W]
            return (torch.stack([
                self.transforms(torch.from_numpy(c)) for c in clips
            ]), record.label)
Beispiel #24
0
def play_video(results: Dict[int, List[TrackingBbox]],
               input_video: str) -> None:
    """
     Plot the predicted tracks on the input video. Displays to front-end as sequence of images stringed together in a video.

    Args:
        results: dictionary mapping frame id to a list of predicted TrackingBboxes
        input_video: path to the input video
    """

    results = OrderedDict(sorted(results.items()))

    # assign bbox color per id
    unique_ids = list(
        set([bb.track_id for frame in results.values() for bb in frame]))
    color_map = assign_colors(unique_ids)

    # read video and initialize new tracking video
    video_reader = decord.VideoReader(input_video)

    # set up ipython jupyter display
    d_video = IPython.display.display("", display_id=1)

    # Read each frame, add bbox+track id, display frame
    for frame_idx in range(len(results) - 1):
        cur_tracks = results[frame_idx]
        im = video_reader.next().asnumpy()

        if len(cur_tracks) > 0:
            cur_image = draw_boxes(im, cur_tracks, color_map)

        f = io.BytesIO()
        im = Image.fromarray(im)
        im.save(f, "jpeg")
        d_video.update(IPython.display.Image(data=f.getvalue()))
        sleep(0.000001)
    def __getitem__(self, idx):
        record = self.video_infos[idx]
        label = record.label
        if self.use_decord:
            video_reader = decord.VideoReader('{}.{}'.format(
                osp.join(self.img_prefix, record.path), self.video_ext))
            record.num_frames = len(video_reader)
        else:
            video_reader = mmcv.VideoReader('{}.{}'.format(
                osp.join(self.img_prefix, record.path), self.video_ext))
            record.num_frames = len(video_reader)
        # record.num_frames = 231

        if self.test_mode:
            segment_indices, skip_offsets = self._get_test_indices(record)
        else:
            segment_indices, skip_offsets = self._sample_indices(
                record) if self.random_shift else self._get_val_indices(record)
        # handle the first modality
        modality = self.modalities[0]
        image_tmpl = self.image_tmpls[0]
        img_group = self._get_frames(record, video_reader, image_tmpl,
                                     modality, segment_indices, skip_offsets)

        flip = True if np.random.rand() < self.flip_ratio else False
        if (self.img_scale_dict is not None
                and record.path in self.img_scale_dict):
            img_scale = self.img_scale_dict[record.path]
        else:
            img_scale = self.img_scale
        (img_group, img_shape, pad_shape, scale_factor,
         crop_quadruple) = self.img_group_transform(
             img_group,
             img_scale,
             crop_history=None,
             flip=flip,
             keep_ratio=self.resize_keep_ratio,
             div_255=self.div_255,
             is_flow=True if modality == 'Flow' else False)
        ori_shape = (256, 340, 3)
        img_meta = dict(ori_shape=ori_shape,
                        img_shape=img_shape,
                        pad_shape=pad_shape,
                        scale_factor=scale_factor,
                        crop_quadruple=crop_quadruple,
                        flip=flip)
        # [M x C x H x W]
        # M = 1 * N_oversample * N_seg * L
        if self.input_format == "NCTHW":
            img_group = img_group.reshape((-1, self.num_segments,
                                           self.new_length) +
                                          img_group.shape[1:])
            # N_over x N_seg x L x C x H x W
            img_group = np.transpose(img_group, (0, 1, 3, 2, 4, 5))
            # N_over x N_seg x C x L x H x W
            img_group = img_group.reshape((-1, ) + img_group.shape[2:])
            # M' x C x L x H x W

        # handle the rest modalities using the same
        for i, (modality, image_tmpl) in enumerate(
                zip(self.modalities[1:], self.image_tmpls[1:])):
            print('handle the rest modalities using the same')
            img_group = self._get_frames(record, video_reader, image_tmpl,
                                         modality, segment_indices,
                                         skip_offsets)

            # apply transforms
            flip = True if np.random.rand() < self.flip_ratio else False
            (img_group, img_shape, pad_shape, scale_factor,
             crop_quadruple) = self.img_group_transform(
                 img_group,
                 img_scale,
                 crop_history=data['img_meta']['crop_quadruple'],
                 flip=data['img_meta']['flip'],
                 keep_ratio=self.resize_keep_ratio,
                 div_255=self.div_255,
                 is_flow=True if modality == 'Flow' else False)
            if self.input_format == "NCTHW":
                # Convert [M x C x H x W] to [M' x C x T x H x W]
                # M = 1 * N_oversample * N_seg * L
                # M' = 1 * N_oversample * N_seg, T = L
                img_group = img_group.reshape((-1, self.num_segments,
                                               self.new_length) +
                                              img_group.shape[1:])
                img_group = np.transpose(img_group, (0, 1, 3, 2, 4, 5))
                img_group = img_group.reshape((-1, ) + img_group.shape[2:])

        # return img_group, label
        return img_group, label
Beispiel #26
0
parser.add_argument('--gpu', type=int, default=-1, help='context to run, use --gpu=-1 to use cpu only')
parser.add_argument('--file', type=str, default='/tmp/testsrc_h264_100s_default.mp4', help='Test video')
parser.add_argument('--seed', type=int, default=666, help='numpy random seed for random access indices')
parser.add_argument('--random-frames', type=int, default=300, help='number of random frames to run')
parser.add_argument('--width', type=int, default=320, help='resize frame width')
parser.add_argument('--height', type=int, default=240, help='resize frame height')

args = parser.parse_args()

test_video = args.file
if args.gpu > -1:
    ctx = de.gpu(args.gpu)
else:
    ctx = de.cpu()

vr = de.VideoReader(test_video, ctx, width=args.width, height=args.height)
cnt = 0
tic = time.time()
while True:
    try:
        frame = vr.next()
    except StopIteration:
        break
    cnt += 1
print(cnt, ' frames, elapsed time for sequential read: ', time.time() - tic)

np.random.seed(args.seed)  # fix seed for all random tests
acc_indices = np.arange(len(vr))
np.random.shuffle(acc_indices)
if args.random_frames > len(vr):
    warnings.warn('Number of random frames reduced to {} to fit test video'.format(len(vr)))
Beispiel #27
0
    def decord_video_loader(self, path):

        video_loader = decord.VideoReader(path)
        return video_loader, len(video_loader)
Beispiel #28
0
 def __init__(self, video_path):
     self.video_path = video_path
     self.video = decord.VideoReader(video_path)
Beispiel #29
0
def get_output(video_path,
               out_filename,
               label,
               fps=30,
               font_scale=0.5,
               font_color='white',
               target_resolution=None,
               resize_algorithm='bicubic',
               use_frames=False):
    """Get demo output using ``moviepy``.

    This function will generate video file or gif file from raw video or
    frames, by using ``moviepy``. For more information of some parameters,
    you can refer to: https://github.com/Zulko/moviepy.

    Args:
        video_path (str): The video file path or the rawframes directory path.
            If ``use_frames`` is set to True, it should be rawframes directory
            path. Otherwise, it should be video file path.
        out_filename (str): Output filename for the generated file.
        label (str): Predicted label of the generated file.
        fps (int): Number of picture frames to read per second. Default: 30.
        font_scale (float): Font scale of the label. Default: 0.5.
        font_color (str): Font color of the label. Default: 'white'.
        target_resolution (None | tuple[int | None]): Set to
            (desired_width desired_height) to have resized frames. If either
            dimension is None, the frames are resized by keeping the existing
            aspect ratio. Default: None.
        resize_algorithm (str): Support "bicubic", "bilinear", "neighbor",
            "lanczos", etc. Default: 'bicubic'. For more information,
            see https://ffmpeg.org/ffmpeg-scaler.html
        use_frames: Determine Whether to use rawframes as input. Default:False.
    """

    if video_path.startswith(('http://', 'https://')):
        raise NotImplementedError

    try:
        from moviepy.editor import ImageSequenceClip
    except ImportError:
        raise ImportError('Please install moviepy to enable output file.')

    # Channel Order is BGR
    if use_frames:
        frame_list = sorted(
            [osp.join(video_path, x) for x in os.listdir(video_path)])
        frames = [cv2.imread(x) for x in frame_list]
    else:
        video = decord.VideoReader(video_path)
        frames = [x.asnumpy()[..., ::-1] for x in video]

    if target_resolution:
        w, h = target_resolution
        frame_h, frame_w, _ = frames[0].shape
        if w == -1:
            w = int(h / frame_h * frame_w)
        if h == -1:
            h = int(w / frame_w * frame_h)
        frames = [cv2.resize(f, (w, h)) for f in frames]

    textsize = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, font_scale,
                               1)[0]
    textheight = textsize[1]
    padding = 10
    location = (padding, padding + textheight)

    if isinstance(font_color, str):
        font_color = webcolors.name_to_rgb(font_color)[::-1]

    frames = [np.array(frame) for frame in frames]
    for frame in frames:
        cv2.putText(frame, label, location, cv2.FONT_HERSHEY_DUPLEX,
                    font_scale, font_color, 1)

    # RGB order
    frames = [x[..., ::-1] for x in frames]
    video_clips = ImageSequenceClip(frames, fps=fps)

    out_type = osp.splitext(out_filename)[1][1:]
    if out_type == 'gif':
        video_clips.write_gif(out_filename)
    else:
        video_clips.write_videofile(out_filename, remove_temp=True)
Beispiel #30
0
    def __call__(self, results):
        """
        Perform mp4 decode operations.
        return:
            List where each item is a numpy array after decoder.
        """
        file_path = results['filename']
        results['format'] = 'video'
        results['backend'] = self.backend

        if self.backend == 'cv2':
            cap = cv2.VideoCapture(file_path)
            videolen = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            sampledFrames = []
            for i in range(videolen):
                ret, frame = cap.read()
                # maybe first frame is empty
                if ret == False:
                    continue
                img = frame[:, :, ::-1]
                sampledFrames.append(img)
            results['frames'] = sampledFrames
            results['frames_len'] = len(sampledFrames)

        elif self.backend == 'decord':
            container = de.VideoReader(file_path)
            frames_len = len(container)
            results['frames'] = container
            results['frames_len'] = frames_len

        elif self.backend == 'pyav':  # for TimeSformer
            if self.mode in ["train", "valid"]:
                clip_idx = -1
            elif self.mode in ["test"]:
                clip_idx = 0
            else:
                raise NotImplementedError

            container = av.open(file_path)

            num_clips = 1  # always be 1

            # decode process
            fps = float(container.streams.video[0].average_rate)

            frames_length = container.streams.video[0].frames
            duration = container.streams.video[0].duration

            if duration is None:
                # If failed to fetch the decoding information, decode the entire video.
                decode_all_video = True
                video_start_pts, video_end_pts = 0, math.inf
            else:
                decode_all_video = False
                start_idx, end_idx = get_start_end_idx(
                    frames_length,
                    self.sampling_rate * self.num_seg / self.target_fps * fps,
                    clip_idx, num_clips)
                timebase = duration / frames_length
                video_start_pts = int(start_idx * timebase)
                video_end_pts = int(end_idx * timebase)

            frames = None
            # If video stream was found, fetch video frames from the video.
            if container.streams.video:
                margin = 1024
                seek_offset = max(video_start_pts - margin, 0)

                container.seek(seek_offset,
                               any_frame=False,
                               backward=True,
                               stream=container.streams.video[0])
                tmp_frames = {}
                buffer_count = 0
                max_pts = 0
                for frame in container.decode(**{"video": 0}):
                    max_pts = max(max_pts, frame.pts)
                    if frame.pts < video_start_pts:
                        continue
                    if frame.pts <= video_end_pts:
                        tmp_frames[frame.pts] = frame
                    else:
                        buffer_count += 1
                        tmp_frames[frame.pts] = frame
                        if buffer_count >= 0:
                            break
                video_frames = [tmp_frames[pts] for pts in sorted(tmp_frames)]

                container.close()

                frames = [
                    frame.to_rgb().to_ndarray() for frame in video_frames
                ]
                clip_sz = self.sampling_rate * self.num_seg / self.target_fps * fps

                start_idx, end_idx = get_start_end_idx(
                    len(frames),  # frame_len
                    clip_sz,
                    clip_idx if decode_all_video else
                    0,  # If decode all video, -1 in train and valid, 0 in test;
                    # else, always 0 in train, valid and test, as we has selected clip size frames when decode.
                    1)
                results['frames'] = frames
                results['frames_len'] = len(frames)
                results['start_idx'] = start_idx
                results['end_idx'] = end_idx
        else:
            raise NotImplementedError
        return results