def read_video(video_path): """ Read a video file as a numpy array Resizes frames so that the minimum side is 256 pixels Args: video_path: Path to video file Returns: video: Numpy data array """ vinfo = ffprobe(video_path)['video'] width = int(vinfo['@width']) height = int(vinfo['@height']) scaling = 256.0 / min(width, height) new_width = int(math.ceil(scaling * width)) new_height = int(math.ceil(scaling * height)) # Resize frames reader = FFmpegReader(video_path, outputdict={'-s': "{}x{}".format(new_width, new_height) }) frames = [] for frame in reader.nextFrame(): frames.append(frame) reader.close() return frames
def read_video(self, path): # Return: Numpy.ndarray 5-d tensor with shape (1, <No. of frames>, <height>, <width>, <channels>) capt = FFmpegReader(filename=path) self.fps = int(capt.inputfps) list_of_frames = [] for index, frame in enumerate(capt.nextFrame()): # frame -> (<height>, <width>, 3) capture_frame = True if self.required_fps != None: is_valid = range(self.required_fps) capture_frame = (index % self.fps) in is_valid if capture_frame: if self.target_size is not None: temp_image = image.array_to_img(frame) frame = image.img_to_array( temp_image.resize(self.target_size, Image.ANTIALIAS)).astype('uint8') list_of_frames.append(frame) temp_video = np.stack(list_of_frames) capt.close() if self.to_gray: temp_video = rgb2gray(temp_video) if self.max_frames is not None: temp_video = self.process_video(video=temp_video) return np.expand_dims(temp_video, axis=0)
def get_frames(self, filename, wanted): v = FFmpegReader(filename) # , outputdict={'-pix_fmt': 'yuv444p'}) frames = None n_frames = 0 for n, frame in enumerate(v.nextFrame()): # the FFmpegReader API actually renders every frame; so it's rather # slow; but it ensures that every frame is rendered, not just # i-frames... getting i-frames would be faster, but might increase # false-negative rate due to picking out different frames from # different encodings if n not in wanted: continue if frames is None: frames = np.ndarray(shape=(self.grab_n_frames, ) + frame.shape, dtype=np.float64) frames[n_frames] = frame n_frames += 1 if n_frames == self.grab_n_frames: break v.close() if n_frames != self.grab_n_frames: raise RuntimeError( 'Video has invalid number of frames: {}: {}'.format( filename, len(frames))) frames = self._crop_bars(frames) return [ self.process_frame(n, filename, frame) for n, frame in enumerate(frames) ]
def read_mj2_frames(fname): from skvideo.io import FFmpegReader sq = FFmpegReader(fname, outputdict={'-pix_fmt': 'gray16le'}) imgs = [] for s in sq: imgs.append(s) sq.close() return np.stack(imgs).squeeze()
def _read_video(self, path): """ Parameters: path (str): Required Path of the video to be read Returns: Numpy.ndarray A 5-d tensor with shape (1, <No. of frames>, <height>, <width>, <channels>) """ cap = FFmpegReader(filename=path) list_of_frames = [] self.fps = int(cap.inputfps) # Frame Rate for index, frame in enumerate(cap.nextFrame()): capture_frame = True if self.required_fps != None: is_valid = range(self.required_fps) capture_frame = (index % self.fps) in is_valid if capture_frame: if self.target_size is not None: temp_image = image.array_to_img(frame) frame = image.img_to_array( temp_image.resize( self.target_size, Image.ANTIALIAS)).astype('uint8') # Shape of each frame -> (<height>, <width>, 3) list_of_frames.append(frame) temp_video = np.stack(list_of_frames) cap.close() if self.to_gray: temp_video = rgb2gray(temp_video) if self.max_frames is not None: temp_video = self._process_video(video=temp_video) return temp_video
def get_frame_count(self, paths): """ Can be used to determine the value of `max_frames` Parameters: paths (list): Required A list of paths of the videos to be read Returns: dict (python dictionary) For each video, the total number of frames in that video is stored in the dictionary. """ frame_count = {} for path in paths: cap = FFmpegReader(filename=path) frame_count[path] = cap.inputframenum cap.close() return frame_count
def get_mp4_frames(mp4_path, skip_frames, num_frames_per_event, do_flip, brighten_val, is_high_res, do_aug): # Get mp4 reader try: reader = FFmpegReader(mp4_path) except Exception as e: if PRINT_ERRORS: print(e) return None # Get starting frame and offsets frame_shape = EXPECTED_HIGH_RES if is_high_res else EXPECTED_LOW_RES start_frame = (reader.inputframenum - (num_frames_per_event * skip_frames)) // 2 if start_frame <= 0: reader.close() return None start_x = int((frame_shape[0] - reader.outputheight) // 2) if start_x < 0: reader.close() return None start_y = int((frame_shape[1] - reader.outputwidth) // 2) if start_y < 0: reader.close() return None start_z = int((frame_shape[2] - reader.outputdepth) // 2) if start_z < 0: reader.close() return None # Put middle (num_frames_per_event * skip_frames) input frames in numpy array cur_i = 0 cur_frame = 0 frame_array = np.zeros(shape=((num_frames_per_event, ) + frame_shape), dtype=np.uint8) for frame in reader.nextFrame(): if cur_frame >= start_frame: cur_offset = cur_frame - start_frame if cur_i < num_frames_per_event and (cur_offset % skip_frames) == 0: frame_array[cur_i, start_x:start_x+reader.outputheight, start_y:start_y+reader.outputwidth, start_z:start_z+reader.outputdepth] = frame if brighten_val < 1.0: frame_array[cur_i, :, :, :] = adj_brightness(frame_array[cur_i, :, :, :], brighten_val) if do_flip: frame_array[cur_i, :, :, :] = hflip_img(frame_array[cur_i, :, :, :]) cur_i += 1 cur_frame += 1 reader.close() # Return array with frames return frame_array