def loadvideo_decord(self, sample, sample_rate_scale=1): """Load video content using Decord""" # pylint: disable=line-too-long, bare-except, unnecessary-comprehension fname = self.data_path + sample if not (os.path.exists(fname)): return [] # avoid hanging issue if os.path.getsize(fname) < 1 * 1024: print('SKIP: ', fname, " - ", os.path.getsize(fname)) return [] try: if self.keep_aspect_ratio: vr = VideoReader(fname, num_threads=1, ctx=cpu(0)) else: vr = VideoReader(fname, width=self.new_width, height=self.new_height, num_threads=1, ctx=cpu(0)) except: print("video cannot be loaded by decord: ", fname) return [] if self.mode == 'test': all_index = [x for x in range(0, len(vr), self.frame_sample_rate)] while len(all_index) < self.clip_len: all_index.append(all_index[-1]) vr.seek(0) buffer = vr.get_batch(all_index).asnumpy() return buffer # handle temporal segments converted_len = int(self.clip_len * self.frame_sample_rate) seg_len = len(vr) // self.num_segment all_index = [] for i in range(self.num_segment): if seg_len <= converted_len: index = np.linspace(0, seg_len, num=seg_len // self.frame_sample_rate) index = np.concatenate( (index, np.ones(self.clip_len - seg_len // self.frame_sample_rate) * seg_len)) index = np.clip(index, 0, seg_len - 1).astype(np.int64) else: end_idx = np.random.randint(converted_len, seg_len) str_idx = end_idx - converted_len index = np.linspace(str_idx, end_idx, num=self.clip_len) index = np.clip(index, str_idx, end_idx - 1).astype(np.int64) index = index + i * seg_len all_index.extend(list(index)) all_index = all_index[::int(sample_rate_scale)] vr.seek(0) buffer = vr.get_batch(all_index).asnumpy() return buffer
def get_train_clip(opts, video_path): """ Chooses a random clip from a video for training/ validation Args: opts : config options frame_path : frames of video frames Total_frames: Number of frames in the video Returns: list(frames) : random clip (list of frames of length sample_duration) from a video for training/ validation """ clip = [] i = 0 loop = False vr = VideoReader(video_path, width=-1, height=-1) # h, w = vr[0].shape[:2] # if h > w: # r_w = 256 # r_h = int(h/w*256) # else: # r_h = 256 # r_w = int(w/h*256) # vr = VideoReader(video_path, width=r_w, height=r_h) total_frames = len(vr) if total_frames > 300: interval = int(total_frames / (300 / opts.sample_duration)) s_frame = np.random.randint(0, total_frames - interval) f_stamp = list(np.linspace(s_frame, s_frame+interval, opts.sample_duration).astype(np.int)) clip = vr.get_batch(f_stamp).asnumpy() return torch.from_numpy(clip.transpose(3, 0, 1, 2).astype(np.float32)) else: # choosing a random frame if total_frames <= opts.sample_duration: loop = True start_frame = 0 else: start_frame = np.random.randint(0, total_frames - opts.sample_duration) if opts.modality == 'RGB': while len(clip) < opts.sample_duration: clip.append(vr.get_batch([start_frame+i]).asnumpy()[0]) # revised i += 1 if loop and i == total_frames: i = 0 return torch.from_numpy(np.array(clip, dtype=np.float32).transpose(3, 0, 1, 2))
def worker_func(idx, data_queue, msg_queue, anno_lst): while True: msg = msg_queue.get() if msg == 'stop': break elif msg == 'new_epoch': for anno in anno_lst: if Enable_Time_Log: t1 = time.time() anno_copy = {k: v for k, v in anno.items()} vr = VideoReader(anno['Video'], ctx=cpu(idx)) h, w, _ = Cfg.input_frame_shape anno_copy['Frames'] = [ pickle.dumps(cv2.resize(img[:, :, ::-1], (w, h))) \ for img in \ list(vr.get_batch(anno['FrameIDs']).asnumpy())] data_queue.put(anno_copy) if Enable_Time_Log: t2 = time.time() print('Decord reader takes {:.3f}s'.format(t2 - t1)) elif len(msg) == 2 and msg[0] == 'update': anno_lst = msg[1]
def test_video(video_name): """loads the given video and feeds frames through the inference engine""" f = os.path.join(cachedir, os.path.basename(os.path.splitext(video_name)[0])) if os.path.isfile(f + ".npy"): print(f"FOUND EXISTING CLASSIFICATIONS: {f}.npy") return np.load(f + ".npy") vr = VideoReader(video_name, ctx=cpu(0)) frames = len(vr) print("video frames:", frames) decord.bridge.set_bridge('tensorflow') # Assuming 60 fps sample_rate = 60 images_per_batch = 32 samples = int(frames / sample_rate) batches = int(samples / images_per_batch) persample = np.empty((batches * images_per_batch, 4), dtype=np.uint32) for i in range(batches): print("batch", i, "of", batches) # Create a collection of frame indexes at each sample rate within the batch frameIdxs = [(x * sample_rate) + (i * images_per_batch * sample_rate) for x in range(32)] frames = vr.get_batch(frameIdxs) res = inferLocal(frameIdxs, frames) persample[i * images_per_batch:(i + 1) * images_per_batch, :] = res print("saving to", f) np.save(f, persample) return persample
def extract_frames(video, hi_dir, hi_size, times): info = get_video_info(video) w, h = info['coded_width'], info['coded_height'] aspect_ratio = w / h if aspect_ratio > hi_size[0] / hi_size[1]: # Wide format wo, ho = hi_size[0], int(hi_size[0] // aspect_ratio) else: wo, ho = int(hi_size[1] * aspect_ratio), hi_size[1] framerate = int(info['nb_frames']) / float(info['duration']) nframes = [] for time in times: nframes.append(int(framerate * (2 * (time + 1)))) vr = VideoReader(video, ctx=cpu(0)) nframes = [min(vr._num_frame - 1, x) for x in nframes] frames = vr.get_batch(nframes).asnumpy() for i in range(len(nframes)): frame = frames[i, :, :, :] # Now clear why r and b are mixed up. frame = frame[:, :, np.array([2, 1, 0])] assert frame.ndim == 3 assert frame.shape[-1] == 3 cv2.imwrite(os.path.join(hi_dir, f'thumb-{times[i]+1:04}.png'), cv2.resize(frame, (wo, ho)))
class DecordVideoReader(): def __init__(self, video_file, img_size=(416, 416), gpu=None, num_threads=8, offset=0, is_torch=True): self.is_torch = is_torch if is_torch: decord.bridge.set_bridge('torch') if type(img_size) is tuple: self.img_size = img_size else: self.img_size = (img_size, img_size) self.offset = offset if gpu is None: ctx = decord.cpu() else: ctx = decord.gpu(gpu) if type(img_size) == int: img_size = (img_size, img_size) self._vr = VideoReader(video_file, ctx=ctx, width=img_size[0], height=img_size[1], num_threads=num_threads) def __len__(self): return len(self._vr) - self.offset def __getitem__(self, idx): if self.is_torch: return self._vr[idx + self.offset].permute( 2, 0, 1).contiguous().float().div(255) else: return self._vr[idx + self.offset].asnumpy() def get_batch(self, batch): batch = [b + self.offset for b in batch] if self.is_torch: return self._vr.get_batch(batch).permute( 0, 3, 1, 2).contiguous().float().div(255) else: return self._vr.get_batch(batch).asnumpy()
def main(): files = list(BASE.glob('**/*.mp4')) frames = list(range(16)) # Warm up for f in files[:16]: v = VideoReader(str(f)) v.get_batch(frames) print(f"Reading {len(frames)} frames from {len(files)} files") for i in range(3): print(f'pass {i + 1}') t1 = time.perf_counter() for f in files: v = VideoReader(str(f)) v.get_batch(frames) t2 = time.perf_counter() print(f'Time: {t2-t1}') print('Done')
def get_frames(video_path: Path, num_frames: int, resize_coeff: Tuple[int, int], transform: albu.Compose, decode_gpu: bool) -> Dict[str, Any]: try: if decode_gpu: video = VideoReader(str(video_path), ctx=gpu(0)) else: video = VideoReader(str(video_path), ctx=cpu(0)) len_video = len(video) if num_frames is None: frame_ids = list(range(len_video)) else: if len_video < num_frames: step = 1 else: step = int(len_video / num_frames) frame_ids = list(range(0, len_video, step))[:num_frames] frames = video.get_batch(frame_ids).asnumpy() torched_frames, resize_factor = prepare_frames(frames, resize_coeff, transform) result = { "torched_frames": torched_frames, "resize_factor": resize_factor, "video_path": video_path, "frame_ids": np.array(frame_ids), "frames": frames, } except DECORDError: print(f"{video_path} is broken") result = {} return result
def extract_frames(video_path, frames_dir, overwrite=False, start=-1, end=-1, every=1): """ Extract frames from a video using decord's VideoReader :param video_path: path of the video :param frames_dir: the directory to save the frames :param overwrite: to overwrite frames that already exist? :param start: start frame :param end: end frame :param every: frame spacing :return: count of images saved """ video_path = os.path.normpath( video_path) # make the paths OS (Windows) compatible # video_path = os.path.normpath("/home/julan/Downloads/TestVideo-2020-08-07_16.20.54.mp4") # make the paths OS (Windows) compatible frames_dir = os.path.normpath( frames_dir) # make the paths OS (Windows) compatible # frames_dir = os.path.normpath("/home/julan/Downloads/Temporal") # make the paths OS (Windows) compatible video_dir, video_filename = os.path.split( video_path) # get the video path and filename from the path # video_dir, video_filename = os.path.split("/home/julan/Downloads/Temporal") assert os.path.exists(video_path) # assert the video file exists # load the VideoReader vr = VideoReader(video_path, ctx=cpu(0)) # can set to cpu or gpu .. ctx=gpu(0) if start < 0: # if start isn't specified lets assume 0 start = 0 if end < 0: # if end isn't specified assume the end of the video end = len(vr) frames_list = list(range(start, end, every)) saved_count = 0 if every > 25 and len( frames_list ) < 1000: # this is faster for every > 25 frames and can fit in memory frames = vr.get_batch(frames_list).asnumpy() for index, frame in zip( frames_list, frames): # lets loop through the frames until the end save_path = os.path.join( frames_dir, video_filename, "{:010d}.jpg".format(index)) # create the save path if not os.path.exists( save_path ) or overwrite: # if it doesn't exist or we want to overwrite anyways cv2.imwrite(save_path, cv2.cvtColor( frame, cv2.COLOR_RGB2BGR)) # save the extracted image saved_count += 1 # increment our counter by one else: # this is faster for every <25 and consumes small memory for index in range(start, end): # lets loop through the frames until the end frame = vr[index] # read an image from the capture if index % every == 0: # if this is a frame we want to write out based on the 'every' argument save_path = os.path.join( frames_dir, "{:010d}.jpg".format(index)) # create the save path if not os.path.exists( save_path ) or overwrite: # if it doesn't exist or we want to overwrite anyways cv2.imwrite( save_path, cv2.cvtColor( frame.asnumpy(), cv2.COLOR_RGB2BGR)) # save the extracted image saved_count += 1 # increment our counter by one return saved_count # and return the count of the images we saved
def get_test_clip(opts, video_path): """ Args: opts : config options frame_path : frames of video frames Total_frames: Number of frames in the video Returns: list(frames) : list of all video frames """ clip = [] clip_stamps = [] i = 0 try: vr = VideoReader(video_path, width=-1, height=-1) except: print('video path {} cannot be opened'.format(video_path)) with open('un_opened_file.txt', 'a') as f: f.write(video_path) f.write('\n') # h, w = vr[0].shape[:2] # if h > w: # r_w = 256 # r_h = int(h/w*256) # else: # r_h = 256 # r_w = int(w/h*256) # vr = VideoReader(video_path, width=r_w, height=r_h) total_frames = len(vr) # in case video FPS >> 30 if total_frames > 300: s_stamp = np.linspace(0, total_frames, int(300/16)+1) s_stamp = s_stamp.astype(np.int) for i in range(len(s_stamp[:-1])): i_batch = list(np.linspace(s_stamp[i], s_stamp[i+1]-1, 16).astype(np.int)) clip_stamps.append(i_batch) else: if total_frames < opts.sample_duration: single_clip_stamp = list(range(0, total_frames)) while len(single_clip_stamp) < opts.sample_duration: single_clip_stamp.append(i) i += 1 if i >= total_frames-1: i = 0 clip_stamps.append(single_clip_stamp) else: s_stamp = list(range(0, total_frames, opts.sample_duration))[:-1] s_stamp.append(total_frames - opts.sample_duration) for f_start in s_stamp: clip_stamps.append(list(range(f_start, f_start+opts.sample_duration))) if opts.modality == 'RGB': for stamps in clip_stamps: # batch = vr.get_batch(stamps).asnumpy() # show_img_numpy(batch[0]) clip.append(vr.get_batch(stamps).asnumpy()) return torch.from_numpy(np.array(clip, dtype=np.float32).transpose(0, 4, 1, 2, 3))
def __getitem__(self, idx): """ Returns: tuple_clip (tensor): [tuple_len x channel x time x height x width] tuple_order (tensor): [tuple_len] """ if self.train: videoname = self.train_split[idx].split()[0] else: videoname = self.test_split[idx].split()[0] filename = os.path.join(self.root_dir, 'video', videoname) #.replace('\\', '/') #videodata = skvideo.io.vread(filename) videodata = VideoReader(filename, ctx=cpu(0)) #length, height, width, channel = videodata.shape length = len(videodata) height = videodata[0].shape[0] width = videodata[0].shape[1] channel = videodata[0].shape[2] tuple_clip = [] tuple_order = list(range(0, self.tuple_len)) # random select tuple for train, deterministic random select for test if self.train: tuple_start = random.randint(0, abs(length - self.tuple_total_frames)) else: random.seed(idx) tuple_start = random.randint(0, abs(length - self.tuple_total_frames)) clip_start = tuple_start for _ in range(self.tuple_len): clip = videodata.get_batch( list(range(clip_start, clip_start + 3 * self.clip_len, 3))) tuple_clip.append(clip) clip_start = clip_start + 3 * self.clip_len + self.interval clip_and_order = list(zip(tuple_clip, tuple_order)) # random shuffle for train, the same shuffle for test if self.train: random.shuffle(clip_and_order) else: random.seed(idx) random.shuffle(clip_and_order) tuple_clip, tuple_order = zip(*clip_and_order) if self.transforms_: trans_tuple = [] for clip in tuple_clip: trans_clip = [] # fix seed, apply the sample `random transformation` for all frames in the clip seed = random.random() for frame in clip.asnumpy(): random.seed(seed) frame = self.toPIL(frame) # PIL image frame = self.transforms_(frame) # tensor [C x H x W] trans_clip.append(frame) # (T x C X H x W) to (C X T x H x W) trans_clip = torch.stack(trans_clip).permute([1, 0, 2, 3]) trans_tuple.append(trans_clip) tuple_clip = trans_tuple else: tuple_clip = [torch.tensor(clip) for clip in tuple_clip] return torch.stack(tuple_clip), torch.tensor(tuple_order), idx
shuffle=1) ex = vl.next() vr = VideoReader(path, ctx=cpu(0)) # a file like object works as well, for in-memory decoding with open(path, 'rb') as f: vr = VideoReader(f, ctx=cpu(0)) print('video frames:', len(vr)) # 1. the simplest way is to directly access frames for i in range(len(vr)): # the video reader will handle seeking and skipping in the most efficient manner frame = vr[i] print(frame.shape) # To get multiple frames at once, use get_batch # this is the efficient way to obtain a long list of frames frames = vr.get_batch([1, 3, 5, 7, 9]) print(frames.shape) # (5, 240, 320, 3) # duplicate frame indices will be accepted and handled internally to avoid duplicate decoding frames2 = vr.get_batch([1, 2, 3, 2, 3, 4, 3, 4, 5]) print(frames2.shape) # (9, 240, 320, 3) # 2. you can do cv2 style reading as well # skip 100 frames vr.skip_frames(100) # seek to start vr.seek(0) batch = vr.next() print('frame shape:', batch.shape)
class Generator: def __init__(self, dataset_path): self.counter = 0 self.dataset = dataset_path self.models = os.listdir('checkpoints') self.dream_len = 0 self.frame_rate = 0 self.vr = None def set_dream_len(self, len_in_seconds=60, frame_rate=30): self.dream_len = len_in_seconds * frame_rate self.frame_rate = frame_rate self.counter = 0 def is_dream_terminated(self): if self.counter < self.dream_len: return False else: return True def set_new_video(self, path): self.vr = VideoReader(path, width=320, height=240, ctx=cpu(0)) def get_total_frame_count(self): return len(self.vr) def get_a_frame(self, frame): if frame < self.get_total_frame_count(): return self.vr[frame].asnumpy() - 1 else: print('frame is out of range') def get_frames(self, skipping=1): return self.vr.get_batch(range(0, len(self.vr) - 1, skipping)) def process(self, model_name): # make a new directory for storing temp data if os.path.exists('data/raw'): shutil.rmtree('data/raw') os.mkdir('data/raw') else: os.mkdir('data/raw') # check to make sure the model is available if model_name not in self.models: print('specified model not available') return else: frame_count = 0 total_frames = self.get_total_frame_count() # decompose a video into frames while not self.is_dream_terminated(): if frame_count < total_frames: img = Image.fromarray(self.get_a_frame(frame_count)) img.save('data/raw/' + '{num:05d}'.format(num=self.counter) + '.png') frame_count += 1 self.counter += 1 else: break # start translating the images transform(model_name, total_frames) # copy the results to the translated folder path = os.path.join('data/tmp', model_name, 'test_latest/images') files = [img for img in os.listdir(path) if img.endswith(".png")] for file in files: if 'fake' in file: # copy the translated images to the translated folder shutil.copy(os.path.join(path, file), 'data/translated') def generate_video(self, path): image_files = [ path + '/' + img for img in os.listdir(path) if img.endswith(".png") ] image_files.sort() clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip( image_files, fps=self.frame_rate) clip.write_videofile('results/dream.mp4')
def get_decord_batch(path): vr = VideoReader(path, ctx=cpu(0)) len_vr = len(vr) batch = vr.get_batch(range(len_vr)) print("decord", batch.shape)
} ] indices = [0] + indices + [len(scene)] breakpoints = [(indices[i], indices[i + 1]) for i in range(0, len(indices) - 1)] breakpoints = [ _breakpoint for _breakpoint in breakpoints if _breakpoint[0] + 1 != _breakpoint[1] and _breakpoint[0] != _breakpoint[1] ] for itr, (scene_start, scene_end) in enumerate(breakpoints): # validate the scene if scene_end - scene_start > 20: # convert this to iterations frames = scene.get_batch(range(scene_start, scene_end)).asnumpy() start_time, end_time = ( frame_count_to_timestamp(scene_start, fps), frame_count_to_timestamp(scene_end, fps), ) # extract the clip and save it in the final folder if not os.path.exists(CURRENT_DOWNLOAD_PATH): os.makedirs(CURRENT_DOWNLOAD_PATH, exist_ok=True) clip = VideoFileClip(scene_path).subclip( str(start_time), str(end_time)) clip.write_videofile( os.path.join( CURRENT_DOWNLOAD_PATH, f"start_time-{scene_start}-start_end-{scene_end}-"
duration = len(vr) print('The video contains %d frames' % duration) ################################################################ # If we want to access frame at index 10, frame = vr[9] print(frame.shape) ################################################################ # For deep learning, usually we want to get multiple frames at once. Now you can use ``get_batch`` function, # Suppose we want to get a 32-frame video clip by skipping one frame in between, frame_id_list = range(0, 64, 2) frames = vr.get_batch(frame_id_list).asnumpy() print(frames.shape) ################################################################ # There is another advanced functionality, you can get all the key frames as below, key_indices = vr.get_key_indices() key_frames = vr.get_batch(key_indices) print(key_frames.shape) ################################################################ # Pretty flexible, right? Try it on your videos. ################################################################ # Speed comparison # ----------------
def loadvideo_decord(self, sample, sample_rate_scale=1): """Load video content using Decord""" # pylint: disable=line-too-long, bare-except, unnecessary-comprehension fname = self.data_path + sample if not (os.path.exists(fname)): return [] # avoid hanging issue if os.path.getsize(fname) < 1 * 1024: print('SKIP: ', fname, " - ", os.path.getsize(fname)) return [] try: if self.keep_aspect_ratio: vr = VideoReader(fname, num_threads=1, ctx=cpu(0)) else: vr = VideoReader(fname, width=self.new_width, height=self.new_height, num_threads=1, ctx=cpu(0)) except: print("video cannot be loaded by decord: ", fname) return [] if self.mode == 'test': all_index = [x for x in range(0, len(vr), self.frame_sample_rate)] while len(all_index) < self.clip_len: all_index.append(all_index[-1]) vr.seek(0) buffer = vr.get_batch(all_index).asnumpy() return buffer # handle temporal segments converted_len = int(self.clip_len * self.frame_sample_rate) seg_len = len(vr) // self.num_segment all_index = [] for i in range(self.num_segment): if seg_len <= converted_len: index = list(range(1, seg_len))[::self.frame_sample_rate] diff = self.clip_len - len(index) if diff > 0: temp = int(seg_len / 2) for j in range(diff): while (temp in index): temp += 1 index.append(temp) if temp >= seg_len: temp = 0 index.sort() '''if len(index) == self.clip_len: print('success') else: print('no')''' # index = np.linspace(0, seg_len, num=seg_len // self.frame_sample_rate) # index = np.concatenate((index, np.ones(self.clip_len - seg_len // self.frame_sample_rate) * seg_len)) # index = np.clip(index, 0, seg_len - 1).astype(np.int64) # elif seg_len == self.clip_len: # index = list(range(seg_len)) else: index = list(range(1, seg_len))[::self.frame_sample_rate] diff = len(index) - self.clip_len if diff > 0: front = 0 back = seg_len - 1 start_front = True for j in range(diff): if start_front: while (front not in index): front += 1 index.remove(front) start_front = False else: while (back not in index): back -= 1 index.remove(back) start_front = True index.sort() '''if len(index) == self.clip_len: print('success') else: print('no')''' # end_idx = np.random.randint(converted_len, seg_len) # str_idx = end_idx - converted_len # index = np.linspace(str_idx, end_idx, num=self.clip_len) # index = np.clip(index, str_idx, end_idx - 1).astype(np.int64) index = np.array(index) + i * seg_len # print(len(index)) all_index.extend(list(index)) all_index = all_index[::int(sample_rate_scale)] vr.seek(0) if all_index[-1] >= seg_len: print(all_index) # print('error') t = 0 while (t in all_index): t += 1 if t == seg_len: t = int(seg_len / 2) break all_index[-1] = t all_index.sort() print(all_index) print(fname) print(len(all_index)) print(seg_len) buffer = vr.get_batch(all_index).asnumpy() return buffer
def extract_frames(video_path, frames_dir, custom_coordinates, start=-1, end=-1, seconds=0.1, meet=True): """ Extract frames from a video using decord's VideoReader :param video_path: path of the video :param frames_dir: the directory to save the frames :param overwrite: to overwrite frames that already exist? :param start: start frame :param end: end frame :param seconds: frame spacing :return: count of images saved """ video_path = os.path.normpath( video_path) # make the paths OS (Windows) compatible frames_dir = os.path.normpath( frames_dir) # make the paths OS (Windows) compatible video_dir, video_filename = os.path.split( video_path) # get the video path and filename from the path assert os.path.exists(video_path) # assert the video file exists vidcap = cv2.VideoCapture(video_path) fps = int(vidcap.get(cv2.CAP_PROP_FPS)) if fps == 0: return False seconds = int(seconds * fps) frameToStore = None try: vr = VideoReader(video_path, ctx=gpu(0)) # can set to cpu or gpu except: vr = VideoReader(video_path, ctx=cpu(0)) # can set to cpu or gpu if meet: shareScreenCoverage = {"h": float(0.75), "w": float(0.75)} else: shareScreenCoverage = {"h": float(1), "w": float(1)} if start < 0: # if start isn't specified lets assume 0 start = 0 if end < 0: # if end isn't specified assume the end of the video end = len(vr) frames_list = list(range(start, end, seconds)) saved_count = 0 frames = vr.get_batch(frames_list).asnumpy() for index, frame in zip( frames_list, frames): # lets loop through the frames until the end save_path = os.path.join( frames_dir, video_filename, f"frame{saved_count}.jpg") # create the save path newFrame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) h, w, dimension = newFrame.shape if meet: croppedImageAttributes = { "top": int(0.125 * h), "bottom": int(0.875 * h), "left": int(0), "right": int(0.75 * w), } else: croppedImageAttributes = { "top": int(custom_coordinates["top"] * h), "bottom": int((1 - custom_coordinates["bottom"]) * h), "left": int(custom_coordinates["left"] * w), "right": int((1 - custom_coordinates["right"]) * w), } # to crop Google meet slides frame only and ignore the speaker part of screen newFrame = newFrame[ croppedImageAttributes["top"]:croppedImageAttributes["bottom"], croppedImageAttributes["left"]:croppedImageAttributes["right"], ] # have seen atleast 1 frame before. if frameToStore is not None: # compare new frame with last frame same: bool = CheckSimilarity(frameToStore, newFrame) # save last frame if last frame is not same as new frame if not same: cv2.imwrite(save_path, frameToStore) # save the extracted image saved_count += 1 # increment our counter by one frameToStore = newFrame # save the last image too if it was diff from prev frame if not same: cv2.imwrite(save_path, frameToStore) # save the extracted image saved_count += 1 return True
class Processor: def __init__(self, model, categories, labeled_dataset=None): ''' https://pytorch.org/hub/pytorch_vision_resnet/ Classification algorithm based on the ResNet model Args: model: the trained model categories: classification labels ''' self.model = model self.categories = categories self.model.eval() def classify(self, image): ''' Classifies the input image Args: image: the input image object Returns: the most probable label based on the ResNet result the return should be an integer value ''' # input image must be resized and normalized so that it is the same as the trained model preprocess = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) input_tensor = preprocess(image) input_batch = input_tensor.unsqueeze( 0) # create a mini-batch as expected by the model # move the input and model to GPU for speed if available if torch.cuda.is_available(): input_batch = input_batch.to('cuda') self.model.to('cuda') # Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes with torch.no_grad(): output = self.model(input_batch) # The output has unnormalized scores. To get probabilities, you can run a softmax on it. probabilities = torch.nn.functional.softmax(output[0], dim=0) # Show the most probable class top_prob, top_catid = torch.topk(probabilities, 3) return top_catid.numpy(), top_prob.numpy() def classify_frames(self, skipping=1): ''' function that can iterate through the video frames and classify each frame. Args: skipping: number of frames to skip while iterating through the video Returns: The top 3 labels with the highest probabilities. ''' # transform the frames to numpy format frames = self.get_frames(skipping).asnumpy() results = dict() # dictionary for storing the classification results for frame in frames: labels, prob = self.classify(Image.fromarray(frame)) for i in range(len(labels)): if labels[i] in results: results[labels[i]] = results[labels[i]] + prob[i] else: results[labels[i]] = prob[i] # sort the dictionary in descending order sorted_list = sorted(results.items(), key=lambda x: x[1], reverse=True) # only return the labels # return [item[0] for item in sorted_list] return [sorted_list[i][0] for i in range(len(sorted_list)) if i < 3] def load_video(self, path): ''' https://github.com/dmlc/decord#installation https://github.com/dmlc/decord/blob/master/examples/video_reader.ipynb A decord wrapper implemented per the instruction Load the video as an object Args: path: the path to the video file Returns: none ''' self.vr = VideoReader(path, width=320, height=240, ctx=cpu(0)) def get_frames(self, skipping=1): ''' Get the sampled frames from the input video Args: skipping: number of frames to skip in when sampling Returns: the frames sampled from the video ''' return self.vr.get_batch(range(0, len(self.vr) - 1, skipping)) def get_average_rgb(self): ''' Get the average RGB values for each color channel Args: None Returns: average RGB values ''' frames = self.get_frames().asnumpy() rgb = np.zeros(3) for i in range(3): rgb[i] = np.mean(frames[:, :, :, i]) return rgb
def run_data_worker(idx, args, data_queue, safe_gap=10000, nframes=10): assert args.interval * nframes < safe_gap videos = [i for i in os.listdir(args.data_dir) if i.endswith('.mp4')] videos = [os.path.join(args.data_dir, i) for i in videos] sub_videos = [] for i, v in enumerate(videos): if i % args.num_data_threads == idx: sub_videos.append(v) for video in sub_videos: txt = glob.glob(video + '_*.txt')[0] annos = read_anno_txt(txt) vr = VideoReader(video, ctx=cpu(idx)) vid = os.path.basename(video) frame_ts_table = [ int(vr.get_frame_timestamp(i)[1] * 1000) for i in range(len(vr)) ] frame_ids = [] t = fid = aid = 0 # three trace pointers # O(N+M), N=len(intervals), M=len(vr) while aid < len(annos) and \ t < max(args.interval // 2, annos[aid]['Time'] - safe_gap): # Extract negative example # NOTE: for simplicity, this implementation the segment # from last positive anno to the video end while fid < len(vr) and frame_ts_table[fid] < t: fid += 1 if len(frame_ids) < nframes: frame_ids.append(fid) else: frame_ids.pop(0) frame_ids.append(fid) if len(frame_ids) == nframes: frames = [ cv2.resize(img[:, :, ::-1], (640, 360)) for img in list(vr.get_batch(frame_ids).asnumpy()) ] data_queue.put((frames, 0, vid, t)) t += args.interval if t >= annos[aid]['Time'] - safe_gap: # Extract positive example frame_ids.clear() for i in range(nframes - 1, -1, -1): # i = 9, 8, ..., 0 when nframes = 10 t = annos[aid]['Time'] - args.interval * i while frame_ts_table[fid] < t: fid += 1 frame_ids.append(fid) frames = [ cv2.resize(img[:, :, ::-1], (640, 360)) for img in list(vr.get_batch(frame_ids).asnumpy()) ] data_queue.put((frames, 1, vid, t)) frame_ids.clear() t = annos[aid]['Time'] + safe_gap while aid < len(annos) and t >= annos[aid]['Time'] - safe_gap: aid += 1