def __init__( self, filename: str, trim: Tuple[int, int], crop: Tuple[int, int, int, int], frame_rate: float = 15, ) -> None: super().__init__() # Get video frames with scikit-video reader = FFmpegReader( filename + ".mp4", inputdict={"-r": str(frame_rate)}, outputdict={"-r": str(frame_rate)}, ) self.frames: np.ndarray = [] for frame_idx, frame in enumerate(reader.nextFrame()): # Trim video (time) if frame_idx < trim[0]: continue if frame_idx >= trim[1]: break frame_idx += 1 # Crop frames (space) frame = frame[crop[1] : crop[3], crop[0] : crop[2], :] self.frames.append(cv2.resize(frame, (140, 140))) # Change to NumPy array with PyTorch dimension format self.frames = np.array(self.frames, dtype=float) self.frames = np.transpose(self.frames, axes=(0, 3, 1, 2)) y, _ = librosa.load(filename + ".wav", sr=2000) D = librosa.core.stft(y, n_fft=510) self.samples = np.abs(D)
def read_video(self, path): # Return: Numpy.ndarray 5-d tensor with shape (1, <No. of frames>, <height>, <width>, <channels>) capt = FFmpegReader(filename=path) self.fps = int(capt.inputfps) list_of_frames = [] for index, frame in enumerate(capt.nextFrame()): # frame -> (<height>, <width>, 3) capture_frame = True if self.required_fps != None: is_valid = range(self.required_fps) capture_frame = (index % self.fps) in is_valid if capture_frame: if self.target_size is not None: temp_image = image.array_to_img(frame) frame = image.img_to_array( temp_image.resize(self.target_size, Image.ANTIALIAS)).astype('uint8') list_of_frames.append(frame) temp_video = np.stack(list_of_frames) capt.close() if self.to_gray: temp_video = rgb2gray(temp_video) if self.max_frames is not None: temp_video = self.process_video(video=temp_video) return np.expand_dims(temp_video, axis=0)
def _get_frame(self, seek, video_idx, last): opened_video = None # handle to opened target video if self.opened_videos[ video_idx]: # if handle(s) exists for target video current = self.opened_videos[video_idx] # get handles list opened_video = next((ov for ov in current if ov[0] == seek), None) # look for matching seek if opened_video is None: # no (matching) handle found video_path = join(self.root, self.videos[video_idx][1][0]) # build video path video_file = FFmpegReader(video_path) # get a video file pointer video_iter = video_file.nextFrame() # get an iterator opened_video = [seek, islice(video_iter, seek, None), video_file] # seek video and create o.v. item self.opened_videos[video_idx].append( opened_video) # add opened video object to o.v. list opened_video[0] = seek + 1 # update seek pointer frame = next(opened_video[1]) # cache output frame if last: opened_video[2]._close() # close video file (private method?!) self.opened_videos[video_idx].remove( opened_video) # remove o.v. item return frame
def read_video(video_path): """ Read a video file as a numpy array Resizes frames so that the minimum side is 256 pixels Args: video_path: Path to video file Returns: video: Numpy data array """ vinfo = ffprobe(video_path)['video'] width = int(vinfo['@width']) height = int(vinfo['@height']) scaling = 256.0 / min(width, height) new_width = int(math.ceil(scaling * width)) new_height = int(math.ceil(scaling * height)) # Resize frames reader = FFmpegReader(video_path, outputdict={'-s': "{}x{}".format(new_width, new_height) }) frames = [] for frame in reader.nextFrame(): frames.append(frame) reader.close() return frames
def PreProcessVideo(fmt, filename, output, start=250, n_frames=5): info = ffprobe(filename) vinfo = info['video'] v = FFmpegReader(filename, outputdict={'-pix_fmt': fmt}) X = np.ndarray((int(vinfo['@height']) * int(vinfo['@width']) * 5, 6)) n = 0 t = 0 frames = v.nextFrame() for t, frame in enumerate(frames): if t < start: continue if t >= start + n_frames: break print(t) sys.stdout.flush() printed = False for row_n, line in enumerate(frame): for col_n, pixel in enumerate(line): c1, c2, c3 = pixel t_scaled = (float(t - start) / float(vinfo['@width'])) * 255.0 x_scaled = (float(col_n) / float(vinfo['@width'])) * 255.0 y_scaled = (float(row_n) / float(vinfo['@width'])) * 255.0 X[n] = np.array([t_scaled, x_scaled, y_scaled, c1, c2, c3]) n += 1 print("Done with the encode part") np.save(output, X, allow_pickle=False, fix_imports=False)
def iterate_video(filename, x1, y1, x2, y2, x3, y3, x4, y4, down_scale=True): """ itereer over alle frames van de video tel het aantal wagens die door een van de twee rechthoeken rijden de visualizatie wordt opgeslaan als video in trafic.avi druk Q om te stoppen :param filename: bestandsnaam van de video :param x1, y1, x2, y2: twee hoekpunten van de eerste rechthoek :param x3, y3, x4, y4: twee hoekpunten van de tweede rechthoek :param down_scale: boolean: als True wordt de resolutie van de video gehalveerd :return: None """ queue = collections.deque() if not os.path.isfile(filename): raise Exception("file not found") reader = FFmpegReader(filename) shape = reader.getShape()[1:3] if down_scale: shape = [shape[0] // 2, shape[1] // 2] stepsize = 5 video_writer = cv2.VideoWriter('traffic.avi', cv2.VideoWriter_fourcc(*'XVID'), 30.0, (shape[1], shape[0])) for frame in reader.nextFrame(): if down_scale: frame = cv2.resize(frame, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA) queue.append(frame[:, :, ::-1]) if len(queue) > 2 * stepsize: res = traffic(queue[0], queue[stepsize], queue[stepsize * 2], x1, y1, x2, y2, x3, y3, x4, y4) cv2.imshow("Traffic", res) k = cv2.waitKey(1) queue.popleft() video_writer.write(res) if k == 113: # press Q to break break video_writer.release()
def get_frames(self, filename, wanted): v = FFmpegReader(filename) # , outputdict={'-pix_fmt': 'yuv444p'}) frames = None n_frames = 0 for n, frame in enumerate(v.nextFrame()): # the FFmpegReader API actually renders every frame; so it's rather # slow; but it ensures that every frame is rendered, not just # i-frames... getting i-frames would be faster, but might increase # false-negative rate due to picking out different frames from # different encodings if n not in wanted: continue if frames is None: frames = np.ndarray(shape=(self.grab_n_frames, ) + frame.shape, dtype=np.float64) frames[n_frames] = frame n_frames += 1 if n_frames == self.grab_n_frames: break v.close() if n_frames != self.grab_n_frames: raise RuntimeError( 'Video has invalid number of frames: {}: {}'.format( filename, len(frames))) frames = self._crop_bars(frames) return [ self.process_frame(n, filename, frame) for n, frame in enumerate(frames) ]
def __getitem__(self, index): item = self.json_data[index] framerate_sampled = self.augmentor.jitter_fps(FRAMERATE) optional_args = {"-r": "%d" % framerate_sampled} duration = self.get_duration(item.path) if duration is not None: nframes = int(duration * framerate_sampled) optional_args["-vframes"] = "%d" % nframes # Open video file reader = FFmpegReader(item.path, inputdict={}, outputdict=optional_args) try: imgs = [] for img in reader.nextFrame(): imgs.append(img) except (RuntimeError, ZeroDivisionError) as exception: print('{}: WEBM reader cannot open {}. Empty ' 'list returned.'.format(type(exception).__name__, item.path)) imgs = self.transform_pre(imgs) imgs, label = self.augmentor(imgs, item.label) imgs = self.transform_post(imgs) num_frames = len(imgs) target_idx = self.classes_dict[label] if self.nclips > -1: num_frames_necessary = self.clip_size * self.nclips * self.step_size else: num_frames_necessary = num_frames offset = 0 if num_frames_necessary < num_frames: # If there are more frames, then sample starting offset. diff = (num_frames - num_frames_necessary) # temporal augmentation if not self.is_val: offset = np.random.randint(0, diff) imgs = imgs[offset:num_frames_necessary + offset:self.step_size] if len(imgs) < (self.clip_size * self.nclips): imgs.extend([imgs[-1]] * ((self.clip_size * self.nclips) - len(imgs))) # format data to torch data = torch.stack(imgs) data = data.permute(1, 0, 2, 3) if self.get_item_id: return (data, target_idx, item.id) else: return (data, target_idx)
def __init__( self, filenames: List[str], trims: List[Tuple[int, int]], crops: List[Tuple[int, int, int, int]], frame_rate: float = 15, ): # TDCCMCDataset is an unconvential dataset, where each data is # dynamically sampled whenever needed instead of a static dataset. # Therefore, in `__init__`, we do not define a static dataset. Instead, # we simply preprocess the video and audio for faster `__getitem__`. super().__init__() self.sources: List[Tuple[np.ndarray, np.ndarray]] = [] for filename, trim, crop in zip(filenames, trims, crops): # Get video frames with scikit-video reader = FFmpegReader( filename + ".mp4", inputdict={"-r": str(frame_rate)}, outputdict={"-r": str(frame_rate)}, ) frames = [] for frame_idx, frame in enumerate(reader.nextFrame()): # Trim video (time) if frame_idx < trim[0]: continue if frame_idx >= trim[1]: break # Crop frames (space) frame = frame[crop[1]:crop[3], crop[0]:crop[2], :] frames.append(cv2.resize(frame, (140, 140))) # Change to NumPy array with PyTorch dimension format frames = np.array(frames, dtype=float) frames = np.transpose(frames, axes=(0, 3, 1, 2)) # STFT audio # TODO Magic number sr=2000, n_fft=510 y, _ = librosa.load(filename + ".wav", sr=2000) D = librosa.core.stft(y, n_fft=510) D = np.abs(D) # Save video frames and audio self.sources.append((frames, D))
def _read_video(self, path): """ Parameters: path (str): Required Path of the video to be read Returns: Numpy.ndarray A 5-d tensor with shape (1, <No. of frames>, <height>, <width>, <channels>) """ cap = FFmpegReader(filename=path) list_of_frames = [] self.fps = int(cap.inputfps) # Frame Rate for index, frame in enumerate(cap.nextFrame()): capture_frame = True if self.required_fps != None: is_valid = range(self.required_fps) capture_frame = (index % self.fps) in is_valid if capture_frame: if self.target_size is not None: temp_image = image.array_to_img(frame) frame = image.img_to_array( temp_image.resize( self.target_size, Image.ANTIALIAS)).astype('uint8') # Shape of each frame -> (<height>, <width>, 3) list_of_frames.append(frame) temp_video = np.stack(list_of_frames) cap.close() if self.to_gray: temp_video = rgb2gray(temp_video) if self.max_frames is not None: temp_video = self._process_video(video=temp_video) return temp_video
vid = FFmpegReader(fnm) # vid = cv2.VideoCapture(fnm) # success, images = vid.read() # print(success) # images = [images] # images = [] # split_num = 30 # frame_num = 2950 # split_size = frame_num // split_num val_split = 2200 count = 0 for frame_num, frame in enumerate(vid.nextFrame()): # success, next_image = vid.read() if frame_num % 100 == 0: print(frame_num) if frame_num < val_split: imsave('/mnt/data/pigs/imgs/{}/{}.png'.format(num, frame_num), frame) else: imsave( '/mnt/data/pigs/val_imgs/{}/{}.png'.format( num, frame_num - val_split), frame) # if len(images) == split_size: # print('Saving data/%d-%d.npy' % (num, count)) # np.save('/mnt/data/pigs/train/%d-%d.npy' % (num, count), # np.array(images))
def get_mp4_frames(mp4_path, skip_frames, num_frames_per_event, do_flip, brighten_val, is_high_res, do_aug): # Get mp4 reader try: reader = FFmpegReader(mp4_path) except Exception as e: if PRINT_ERRORS: print(e) return None # Get starting frame and offsets frame_shape = EXPECTED_HIGH_RES if is_high_res else EXPECTED_LOW_RES start_frame = (reader.inputframenum - (num_frames_per_event * skip_frames)) // 2 if start_frame <= 0: reader.close() return None start_x = int((frame_shape[0] - reader.outputheight) // 2) if start_x < 0: reader.close() return None start_y = int((frame_shape[1] - reader.outputwidth) // 2) if start_y < 0: reader.close() return None start_z = int((frame_shape[2] - reader.outputdepth) // 2) if start_z < 0: reader.close() return None # Put middle (num_frames_per_event * skip_frames) input frames in numpy array cur_i = 0 cur_frame = 0 frame_array = np.zeros(shape=((num_frames_per_event, ) + frame_shape), dtype=np.uint8) for frame in reader.nextFrame(): if cur_frame >= start_frame: cur_offset = cur_frame - start_frame if cur_i < num_frames_per_event and (cur_offset % skip_frames) == 0: frame_array[cur_i, start_x:start_x+reader.outputheight, start_y:start_y+reader.outputwidth, start_z:start_z+reader.outputdepth] = frame if brighten_val < 1.0: frame_array[cur_i, :, :, :] = adj_brightness(frame_array[cur_i, :, :, :], brighten_val) if do_flip: frame_array[cur_i, :, :, :] = hflip_img(frame_array[cur_i, :, :, :]) cur_i += 1 cur_frame += 1 reader.close() # Return array with frames return frame_array
nClasses, subDir, nFrames, nVideos)) # Create class directories if they do not exist classDir = os.path.join(rootDirSave, subDir) if not os.path.exists(classDir): os.makedirs(classDir) pbar3 = trange(nFrames, ncols=100, position=4, desc='Video progress ') frameCount = 0 # Tensor to save all the frames of a video frameCollection = torch.FloatTensor(nFrames // args.skip, 3, iHeight, iWidth) for frame in reader.nextFrame(): # Garb each frame frameCount += 1 if (frameCount % args.skip) == 0: # Original resolution -> desired resolution tempImg = resize(frame, (iHeight, iWidth)) if getImgs: imgName = '{:02}_{:04}.png'.format( nVideos, frameCount) # Ignore warning regarding float64 being converted into uint8 with warnings.catch_warnings(): warnings.simplefilter("ignore") imsave( os.path.join(rootDirSave, subDir, imgName), tempImg)