def extractSlides(videoPath): print(f"Reading {videoPath.as_posix()}...") vr = VideoReader(videoPath.as_posix(), ctx=cpu(0)) fps = vr.get_avg_fps() print(f"Successfully read. FPS: {fps}") slides = [] frameCount = 1 prevImageHash = None imageChanged = False for i in trange(0, len(vr), int(fps)): frame = vr[i].asnumpy() pilImage = Image.fromarray(frame) prevImageHash = imagehash.average_hash(pilImage) if not prevImageHash else currentImageHash currentImageHash = imagehash.average_hash(pilImage) imageDiff = currentImageHash - prevImageHash if imageChanged and imageDiff < DIFF_THRESHOLD: slides.append(pilImage) imageChanged = False if imageDiff > DIFF_THRESHOLD: imageChanged = True return slides
def test_whole_video(video_path): from decord import VideoReader vr = VideoReader(video_path) for frame in read(video_path): frame_decord = vr.next().asnumpy() np.testing.assert_equal(frame, frame_decord)
def loadvideo_decord(self, sample, sample_rate_scale=1): """Load video content using Decord""" # pylint: disable=line-too-long, bare-except, unnecessary-comprehension fname = self.data_path + sample if not (os.path.exists(fname)): return [] # avoid hanging issue if os.path.getsize(fname) < 1 * 1024: print('SKIP: ', fname, " - ", os.path.getsize(fname)) return [] try: if self.keep_aspect_ratio: vr = VideoReader(fname, num_threads=1, ctx=cpu(0)) else: vr = VideoReader(fname, width=self.new_width, height=self.new_height, num_threads=1, ctx=cpu(0)) except: print("video cannot be loaded by decord: ", fname) return [] if self.mode == 'test': all_index = [x for x in range(0, len(vr), self.frame_sample_rate)] while len(all_index) < self.clip_len: all_index.append(all_index[-1]) vr.seek(0) buffer = vr.get_batch(all_index).asnumpy() return buffer # handle temporal segments converted_len = int(self.clip_len * self.frame_sample_rate) seg_len = len(vr) // self.num_segment all_index = [] for i in range(self.num_segment): if seg_len <= converted_len: index = np.linspace(0, seg_len, num=seg_len // self.frame_sample_rate) index = np.concatenate( (index, np.ones(self.clip_len - seg_len // self.frame_sample_rate) * seg_len)) index = np.clip(index, 0, seg_len - 1).astype(np.int64) else: end_idx = np.random.randint(converted_len, seg_len) str_idx = end_idx - converted_len index = np.linspace(str_idx, end_idx, num=self.clip_len) index = np.clip(index, str_idx, end_idx - 1).astype(np.int64) index = index + i * seg_len all_index.extend(list(index)) all_index = all_index[::int(sample_rate_scale)] vr.seek(0) buffer = vr.get_batch(all_index).asnumpy() return buffer
def test_video(video_name): """loads the given video and feeds frames through the inference engine""" f = os.path.join(cachedir, os.path.basename(os.path.splitext(video_name)[0])) if os.path.isfile(f + ".npy"): print(f"FOUND EXISTING CLASSIFICATIONS: {f}.npy") return np.load(f + ".npy") vr = VideoReader(video_name, ctx=cpu(0)) frames = len(vr) print("video frames:", frames) decord.bridge.set_bridge('tensorflow') # Assuming 60 fps sample_rate = 60 images_per_batch = 32 samples = int(frames / sample_rate) batches = int(samples / images_per_batch) persample = np.empty((batches * images_per_batch, 4), dtype=np.uint32) for i in range(batches): print("batch", i, "of", batches) # Create a collection of frame indexes at each sample rate within the batch frameIdxs = [(x * sample_rate) + (i * images_per_batch * sample_rate) for x in range(32)] frames = vr.get_batch(frameIdxs) res = inferLocal(frameIdxs, frames) persample[i * images_per_batch:(i + 1) * images_per_batch, :] = res print("saving to", f) np.save(f, persample) return persample
def extract_frames(video, hi_dir, hi_size, times): info = get_video_info(video) w, h = info['coded_width'], info['coded_height'] aspect_ratio = w / h if aspect_ratio > hi_size[0] / hi_size[1]: # Wide format wo, ho = hi_size[0], int(hi_size[0] // aspect_ratio) else: wo, ho = int(hi_size[1] * aspect_ratio), hi_size[1] framerate = int(info['nb_frames']) / float(info['duration']) nframes = [] for time in times: nframes.append(int(framerate * (2 * (time + 1)))) vr = VideoReader(video, ctx=cpu(0)) nframes = [min(vr._num_frame - 1, x) for x in nframes] frames = vr.get_batch(nframes).asnumpy() for i in range(len(nframes)): frame = frames[i, :, :, :] # Now clear why r and b are mixed up. frame = frame[:, :, np.array([2, 1, 0])] assert frame.ndim == 3 assert frame.shape[-1] == 3 cv2.imwrite(os.path.join(hi_dir, f'thumb-{times[i]+1:04}.png'), cv2.resize(frame, (wo, ho)))
def worker_func(idx, data_queue, msg_queue, anno_lst): while True: msg = msg_queue.get() if msg == 'stop': break elif msg == 'new_epoch': for anno in anno_lst: if Enable_Time_Log: t1 = time.time() anno_copy = {k: v for k, v in anno.items()} vr = VideoReader(anno['Video'], ctx=cpu(idx)) h, w, _ = Cfg.input_frame_shape anno_copy['Frames'] = [ pickle.dumps(cv2.resize(img[:, :, ::-1], (w, h))) \ for img in \ list(vr.get_batch(anno['FrameIDs']).asnumpy())] data_queue.put(anno_copy) if Enable_Time_Log: t2 = time.time() print('Decord reader takes {:.3f}s'.format(t2 - t1)) elif len(msg) == 2 and msg[0] == 'update': anno_lst = msg[1]
def __init__(self, video_file, img_size=(416, 416), gpu=None, num_threads=8, offset=0, is_torch=True): self.is_torch = is_torch if is_torch: decord.bridge.set_bridge('torch') if type(img_size) is tuple: self.img_size = img_size else: self.img_size = (img_size, img_size) self.offset = offset if gpu is None: ctx = decord.cpu() else: ctx = decord.gpu(gpu) if type(img_size) == int: img_size = (img_size, img_size) self._vr = VideoReader(video_file, ctx=ctx, width=img_size[0], height=img_size[1], num_threads=num_threads)
def get_train_clip(opts, video_path): """ Chooses a random clip from a video for training/ validation Args: opts : config options frame_path : frames of video frames Total_frames: Number of frames in the video Returns: list(frames) : random clip (list of frames of length sample_duration) from a video for training/ validation """ clip = [] i = 0 loop = False vr = VideoReader(video_path, width=-1, height=-1) # h, w = vr[0].shape[:2] # if h > w: # r_w = 256 # r_h = int(h/w*256) # else: # r_h = 256 # r_w = int(w/h*256) # vr = VideoReader(video_path, width=r_w, height=r_h) total_frames = len(vr) if total_frames > 300: interval = int(total_frames / (300 / opts.sample_duration)) s_frame = np.random.randint(0, total_frames - interval) f_stamp = list(np.linspace(s_frame, s_frame+interval, opts.sample_duration).astype(np.int)) clip = vr.get_batch(f_stamp).asnumpy() return torch.from_numpy(clip.transpose(3, 0, 1, 2).astype(np.float32)) else: # choosing a random frame if total_frames <= opts.sample_duration: loop = True start_frame = 0 else: start_frame = np.random.randint(0, total_frames - opts.sample_duration) if opts.modality == 'RGB': while len(clip) < opts.sample_duration: clip.append(vr.get_batch([start_frame+i]).asnumpy()[0]) # revised i += 1 if loop and i == total_frames: i = 0 return torch.from_numpy(np.array(clip, dtype=np.float32).transpose(3, 0, 1, 2))
def load_video(self, path): ''' https://github.com/dmlc/decord#installation https://github.com/dmlc/decord/blob/master/examples/video_reader.ipynb A decord wrapper implemented per the instruction Load the video as an object Args: path: the path to the video file Returns: none ''' self.vr = VideoReader(path, width=320, height=240, ctx=cpu(0))
def __getitem__(self, idx): if idx < 0: return torch.zeros(1, 1, 1, 1), self.flattened_data_dir[idx] result = False vid = None # idx = 3456 # deal with corrupted videos in list or videos which are just too long for us to process while not result: try: vid = VideoReader(self.flattened_data_dir[idx]) if (int(len(vid)) > self.temporal_depth): result = True else: #idx = random.randint(0, len(self.flattened_data_dir)-1) del vid gc.collect() return torch.zeros(901, 1, 1, 1), -1 except: #idx = random.randint(0, len(self.flattened_data_dir)-1) del vid gc.collect() return torch.zeros(901, 1, 1, 1), -1 frames = self.transform(vid, self.split) # vid.close() del vid gc.collect() return frames, self.flattened_data_dir[idx]
def __getitem__(self, idx): result = False vid = None cls = None #idx = None #random.randint(0,400) # deal with corrupted videos in list #print(self.flattened_data_dir[idx]) while not result: try: #vid = pims.PyAVVideoReader(self.flattened_data_dir[idx]) vid = VideoReader(self.flattened_data_dir[idx]) cls = self.idx_per_file[idx] test_frame = vid[1] if(int(len(vid))>self.temporal_depth): result = True else: idx = random.randint(0, len(self.flattened_data_dir)-1) except: idx = random.randint(0, len(self.flattened_data_dir)-1) frames = self.transform(vid, self.split) #del rand_vid, vid #del vid #print(frames.shape) return frames, cls
def get(self, record, indices, path): images = list() if not self.video_source: # print(path) for seg_ind in indices: p = int(seg_ind) seg_imgs = self._load_image(path, p) images.extend(seg_imgs) else: vr = VideoReader(os.path.join(self.root_path, record.path), ctx=cpu(0)) for seg_ind in indices: try: images.append(Image.fromarray(vr[seg_ind - 1].asnumpy())) except Exception as e: images.append(Image.fromarray(vr[0].asnumpy())) process_data = self.transform(images) # import ipdb;ipdb.set_trace() # print(path) if self.multi_class: # print(record.mlabel) return process_data, record.mlabel else: return process_data, record.label
def video_reader(*args, **kwds): # Code to acquire resource, e.g.: resource = VideoReader(*args, **kwds) try: yield resource finally: del resource
def test_bytes_io(): fn = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'examples', 'flipping_a_pancake.mkv')) with open(fn, 'rb') as f: vr = VideoReader(f) assert len(vr) == 310 vr2 = _get_default_test_video() assert np.mean(np.abs(vr[10].asnumpy().astype('float') - vr2[10].asnumpy().astype('float'))) < 2 # average pixel diff < 2
def _get_rotated_test_video(rot, height=-1, width=-1, ctx=CTX): return VideoReader(os.path.abspath( os.path.join(os.path.dirname(__file__), '..', '..', 'test_data', f'video_{rot}.mov')), height=height, width=width, ctx=ctx)
def frames(self): vr = VideoReader(self._path) trans = torchvision.transforms.ToPILImage(mode='RGB') images = [] for idx in range(len(vr)): images.append(trans(vr[idx].permute(2, 0, 1)).convert('RGB')) return images
def get_decord(path): images_d = [] vr = VideoReader(path, ctx=cpu(0)) len_vr = len(vr) for i in range(len(vr)): # the video reader will handle seeking and skipping in the most efficient manner images_d.append(vr[i]) print("decord", len(images_d))
def __init__(self, url, num_threads=1, batch=64): self.num_threads = multiprocessing.cpu_count() print("cpu count ", self.num_threads) self.vr = VideoReader(url, ctx=cpu(0)) self.img_size = 640 # self.detection = YOLOV5() self.n = len(self.vr) self.batch = batch
def test_same_behavior_as_decord(video_path): from decord import VideoReader frame = read(video_path).__next__() decord_frame = VideoReader(video_path).next().asnumpy() assert frame.shape == decord_frame.shape np.testing.assert_equal(frame, decord_frame)
class DecordVideoReader(): def __init__(self, video_file, img_size=(416, 416), gpu=None, num_threads=8, offset=0, is_torch=True): self.is_torch = is_torch if is_torch: decord.bridge.set_bridge('torch') if type(img_size) is tuple: self.img_size = img_size else: self.img_size = (img_size, img_size) self.offset = offset if gpu is None: ctx = decord.cpu() else: ctx = decord.gpu(gpu) if type(img_size) == int: img_size = (img_size, img_size) self._vr = VideoReader(video_file, ctx=ctx, width=img_size[0], height=img_size[1], num_threads=num_threads) def __len__(self): return len(self._vr) - self.offset def __getitem__(self, idx): if self.is_torch: return self._vr[idx + self.offset].permute( 2, 0, 1).contiguous().float().div(255) else: return self._vr[idx + self.offset].asnumpy() def get_batch(self, batch): batch = [b + self.offset for b in batch] if self.is_torch: return self._vr.get_batch(batch).permute( 0, 3, 1, 2).contiguous().float().div(255) else: return self._vr.get_batch(batch).asnumpy()
def test_bytes_io(): fn = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', '..', '..', 'examples', 'flipping_a_pancake.mkv')) with open(fn, 'rb') as f: vr = VideoReader(f) assert len(vr) == 310 vr2 = _get_default_test_video() assert np.allclose(vr[10].asnumpy(), vr2[10].asnumpy())
def run(): for f in files: file = open(f, "rb") bs = file.read() file.close() a = datetime.datetime.now() with open('/dev/shm/a.mp4', 'wb') as bf: bf.write(bs) vr = VideoReader('/dev/shm/a.mp4', ctx=cpu(0), num_threads=0) a = datetime.datetime.now() vr = VideoReader(f, ctx=cpu(0), num_threads=0) b = datetime.datetime.now() print("init: ", (b - a).microseconds, "us") for i in np.array([10, 12, 14, 60, 62, 64]) + 0: vr[i] c = datetime.datetime.now() print(f, "decode: ", (c - b).microseconds, "us")
def __getitem__(self, idx): """ Returns: tuple_frame (tensor): [tuple_len x channel x height x width] tuple_order (tensor): [tuple_len] """ if self.train: videoname = self.train_split[idx] else: videoname = self.test_split[idx] filename = os.path.join(self.root_dir, 'video', videoname) #videodata = skvideo.io.vread(filename) #length, height, width, channel = videodata.shape videodata = VideoReader(filename, ctx=cpu(0)) length = len(videodata) height = videodata[0].shape[0] width = videodata[0].shape[1] channel = videodata[0].shape[2] tuple_frame = [] tuple_order = list(range(0, self.tuple_len)) # random select frame for train, deterministic random select for test if self.train: tuple_start = random.randint(0, length - self.tuple_total_frames) else: random.seed(idx) tuple_start = random.randint(0, length - self.tuple_total_frames) frame_idx = tuple_start for _ in range(self.tuple_len): tuple_frame.append(videodata[frame_idx]) frame_idx = frame_idx + self.interval frame_and_order = list(zip(tuple_frame, tuple_order)) # random shuffle for train, the same shuffle for test if self.train: random.shuffle(frame_and_order) else: random.seed(idx) random.shuffle(frame_and_order) tuple_frame, tuple_order = zip(*frame_and_order) if self.transforms_: trans_tuple = [] for frame in tuple_frame: frame = self.toPIL(frame) # PIL image frame = self.transforms_(frame) # tensor [C x H x W] trans_tuple.append(frame) tuple_frame = trans_tuple else: tuple_frame = [torch.tensor(frame) for frame in tuple_frame] return torch.stack(tuple_frame), torch.tensor(tuple_order)
def test_same_behavior_as_decord_with_resize(video_path): from decord import VideoReader height, width = 540, 960 frame = read(video_path, height=height, width=width).__next__() decord_frame = VideoReader(video_path, width=width, height=height).next().asnumpy() assert frame.shape == decord_frame.shape np.testing.assert_equal(frame, decord_frame)
def _check_video(video_filename): ok = False try: container = VideoReader(video_filename, num_threads=1) if len(container) > 0: ok = True del container except: pass return ok
def extract_frames_from_video(video_file, video_id, target_dir): results = [] """ for each video file creates the corresponding directory if not exists csv item: video_id;path_to_frame;frame_index;avg_fps;yolo3_classes;caption;score; """ # a file like object works as well, for in-memory decoding with open(video_file, 'rb') as f: vr = VideoReader(f, ctx=cpu(0)) print('video frames:', len(vr)) total_frames = len(vr) avg_fps = int(vr.get_avg_fps()) # 1. the simplest way is to directly access frames print('get_avg_fps=', vr.get_avg_fps()) for i in range(0, len(vr), avg_fps): # # the video reader will handle seeking and skipping in the most efficient manner frame = vr[i] save_path = os.path.join(target_dir,"{:010d}.jpg".format(i)) if not os.path.exists(save_path): print(frame.shape) img = frame.asnumpy() img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) detected_classes_list = detect_objects_single_image(img) words = get_caption_single_image(img) score= get_score(words, detected_classes_list) if score > 2: frame_result = [] frame_result.append(video_id) frame_result.append(save_path) frame_result.append(i) frame_result.append(avg_fps) frame_result.append(detected_classes_list) frame_result.append(words) frame_result.append(score) cv2.imwrite(save_path, img) results.extend(frame_result) return results
def __init__(self, video_file, frame_idxs=None): """ :param video_file: video file path :param frame_idxs: frame that are to be processed, a list of integers """ self.vr = VideoReader(video_file, ctx=cpu(0)) self._rotation = check_rotation(video_file) if frame_idxs is None: self._frame_idxs = np.arange(len(self.vr)) else: self._frame_idxs = sorted(frame_idxs)
def get_frames(video_path: Path, num_frames: int, resize_coeff: Tuple[int, int], transform: albu.Compose, decode_gpu: bool) -> Dict[str, Any]: try: if decode_gpu: video = VideoReader(str(video_path), ctx=gpu(0)) else: video = VideoReader(str(video_path), ctx=cpu(0)) len_video = len(video) if num_frames is None: frame_ids = list(range(len_video)) else: if len_video < num_frames: step = 1 else: step = int(len_video / num_frames) frame_ids = list(range(0, len_video, step))[:num_frames] frames = video.get_batch(frame_ids).asnumpy() torched_frames, resize_factor = prepare_frames(frames, resize_coeff, transform) result = { "torched_frames": torched_frames, "resize_factor": resize_factor, "video_path": video_path, "frame_ids": np.array(frame_ids), "frames": frames, } except DECORDError: print(f"{video_path} is broken") result = {} return result
def _check_video(video_filename): ok = False try: container = VideoReader(video_filename, num_threads=1) if len(container) > 0: ok = True del container except: pass if ok: return video_filename, True, "Checked" else: remove(video_filename) return video_filename, False, "Invalid video file"
def __getitem__(self, idx): """ Returns: clip (tensor): [channel x time x height x width] class_idx (tensor): class index [0-50] """ if self.train: videoname = self.train_split[idx] else: videoname = self.test_split[idx] class_idx = self.class_label2idx[videoname[:videoname.find('/')]] - 1 filename = os.path.join(self.root_dir, 'video', videoname) videodata = VideoReader(filename, ctx=cpu(0)) #length, height, width, channel = videodata.shape length = len(videodata) height = videodata[0].shape[0] width = videodata[0].shape[1] channel = videodata[0].shape[2] all_clips = [] all_idx = [] for i in np.linspace(self.clip_len / 2, length - self.clip_len / 2, self.sample_num): clip_start = int(i - self.clip_len / 2) clip = videodata[clip_start:clip_start + self.clip_len] if self.transforms_: trans_clip = [] # fix seed, apply the sample `random transformation` for all frames in the clip seed = random.random() for frame in clip.asnumpy(): random.seed(seed) frame = self.toPIL(frame) # PIL image frame = self.transforms_(frame) # tensor [C x H x W] trans_clip.append(frame) # (T x C X H x W) to (C X T x H x W) clip = torch.stack(trans_clip).permute([1, 0, 2, 3]) #frequency clip_mean = torch.mean(clip, 1, keepdim=True) clip = clip - clip_mean else: clip = torch.tensor(clip) all_clips.append(clip) all_idx.append(torch.tensor(int(class_idx))) return torch.stack(all_clips), torch.stack(all_idx)