def _load_video(full_path, size = None, resize_mode = 'resize_and_crop', cut_edges=False, cut_edges_thresh=0): """ Lead a video into a numpy array :param full_path: Full path to the video :param size: A 2-tuple of width-height, indicating the desired size of the ouput :param resize_mode: The mode with which to get the video to the desired size. Can be: 'squeeze', 'preserve_aspect', 'crop', 'scale_crop'. See resize_image in image_ops.py for more info. :param cut_edges: True if you want to cut the dark edges from the video :param cut_edges_thresh: If cut_edges, this is the threshold at which you'd like to cut them. :return: A (n_frames, height, width, 3) numpy array """ try: from moviepy.video.io.VideoFileClip import VideoFileClip except ImportError: raise ImportError("You need to install moviepy to read videos. In the virtualenv, go `pip install moviepy`") assert os.path.exists(full_path) video = VideoFileClip(full_path) images = [] edge_crops = None for frame in video.iter_frames(): if cut_edges: if edge_crops is None: edge_crops = get_dark_edge_slice(frame, cut_edges_thresh=cut_edges_thresh) else: frame = frame[edge_crops[0], edge_crops[1]] if size is not None: width, height = size frame = resize_image(frame, width=width, height=height, mode=resize_mode) images.append(frame) return images
class VideoStim(Stim, CollectionStimMixin): ''' A video. ''' def __init__(self, filename, onset=None): self.clip = VideoFileClip(filename) self.fps = self.clip.fps self.width = self.clip.w self.height = self.clip.h self.n_frames = int(self.fps * self.clip.duration) duration = self.clip.duration super(VideoStim, self).__init__(filename, onset, duration) def __iter__(self): """ Frame iteration. """ for i, f in enumerate(self.clip.iter_frames()): yield VideoFrameStim(self, i, data=f) @property def frames(self): return [f for f in self.clip.iter_frames()] def get_frame(self, index=None, onset=None): if index is not None: onset = float(index) / self.fps else: index = int(onset * self.fps) return VideoFrameStim(self, index, data=self.clip.get_frame(onset))
def ffwd_video(path_in, path_out, checkpoint_dir, device_t='/gpu:0', batch_size=4): video_clip = VideoFileClip(path_in, audio=False) video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out, video_clip.size, video_clip.fps, codec="libx264", preset="medium", bitrate="2000k", audiofile=path_in, threads=None, ffmpeg_params=None) g = tf.Graph() soft_config = tf.ConfigProto(allow_soft_placement=True) soft_config.gpu_options.allow_growth = True with g.as_default(), g.device(device_t), \ tf.Session(config=soft_config) as sess: batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3) img_placeholder = tf.placeholder(tf.float32, shape=batch_shape, name='img_placeholder') preds = transform.net(img_placeholder) saver = tf.train.Saver() if os.path.isdir(checkpoint_dir): ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise Exception("No checkpoint found...") else: saver.restore(sess, checkpoint_dir) X = np.zeros(batch_shape, dtype=np.float32) def style_and_write(count): for i in range(count, batch_size): X[i] = X[count - 1] # Use last frame to fill X _preds = sess.run(preds, feed_dict={img_placeholder: X}) for i in range(0, count): video_writer.write_frame(np.clip(_preds[i], 0, 255).astype(np.uint8)) frame_count = 0 # The frame count that written to X for frame in video_clip.iter_frames(): X[frame_count] = frame frame_count += 1 if frame_count == batch_size: style_and_write(frame_count) frame_count = 0 if frame_count != 0: style_and_write(frame_count) video_writer.close()
def video_style_transfer_gatys(video_path, style_path, output_path, batch_s=4): video = VideoFileClip(video_path, audio=False) video_w = ffmpeg_writer.FFMPEG_VideoWriter(output_path, video.size, video.fps, codec="libx264", preset="medium", bitrate="2000k", audiofile=video_path, threads=None, ffmpeg_params=None) style = Image.load_image(style_path) content = [c for c in video.iter_frames()] batch_l = [content[i:i + batch_s] for i in range(0, len(content), batch_s)] for b in batch_l: frames = run_style_transfer(b, style) for f in frames: video_w.write_frame(f) video_w.close()
def load_video(filepath, sample=6): clip = VideoFileClip(filepath) video = [] skip = 0 for frame in clip.iter_frames(): skip += 1 if skip % sample != 0: continue img = Image.fromarray(frame) img = img.resize((224, 224)) norm = np.divide(np.array(img), 255) norm = np.reshape(norm, [1, 224, 224, 3]) video.append(norm) return np.array(video)
def get_video_feat(path): feat = [] if use_VGG: frames = load_video(path) for f in frames: f5_3 = sess.run([vgg.fc6], feed_dict={image_holder: f}) feat.append(f5_3) else: clip = VideoFileClip(path) skip = 6 count = 0 for f in clip.iter_frames(): count += 1 if count % skip != 0: continue feat.append(model.extract_PIL(Image.fromarray(f))) return feat
def video_style_transfer(input_path, model_path, output_path, batch_s=4): video = VideoFileClip(input_path, audio=False) video_w = ffmpeg_writer.FFMPEG_VideoWriter(output_path, video.size, video.fps, codec="libx264", preset="medium", bitrate="2000k", audiofile=input_path, threads=None, ffmpeg_params=None) with tf.Graph().as_default(), tf.Session() as session: video_iter = list(video.iter_frames()) batch_l = [video_iter[i:i + batch_s] for i in range(0, len(video_iter), batch_s)] while len(batch_l[-1]) < batch_s: batch_l[-1].append(batch_l[-1][-1]) print("Loading model, it may take some time") video_wip = np.array(batch_l, dtype=np.float32) place_holder = tf.placeholder(tf.float32, shape=video_wip.shape[1:], name='place_holder') wip = Transform.net(place_holder) p_loader = tf.train.Saver() if os.path.isdir(model_path): model = tf.train.get_checkpoint_state(model_path) is_valid = model.model_checkpoint_path if model is not None and is_valid: p_loader.restore(session, is_valid) else: raise EX else: p_loader.restore(session, model_path) # The information about size in the video files are: 'width, height' # In *** the dimensions are 'height, width' #shape = (batch_s, video.size[1], video.size[0], 3) # TODO check if it's ok without shape for i in range(len(video_wip)): r_res = session.run(wip, feed_dict={place_holder: video_wip[i]}) for r in r_res: video_w.write_frame(np.clip(r, 0, 255).astype(np.uint8)) print("processed " + str(i+1) + " out of " + str(len(video_wip)) + " batches", end = '\r') video_w.close()
def get_frames_from_video(path_to_video, from_sec=0, to_sec=None, undistort=False): """ Generator that reads a video file from disk and yields a color correct frame at a time """ camera_calibration = CameraCalibration(CAMERA_TYPE) fullpath = os.path.abspath(path_to_video) video = VideoFileClip(fullpath, audio=False).subclip(from_sec, to_sec) for frame in video.iter_frames(): # We have to switch the order of channels as opencv has a different order as they are coming from the camera color_corrected_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if undistort: color_corrected_frame = camera_calibration.undistort( color_corrected_frame) yield color_corrected_frame
class VideoStim(DynamicStim): ''' A video. ''' def __init__(self, filename): self.clip = VideoFileClip(filename) self.fps = self.clip.fps self.width = self.clip.w self.height = self.clip.h self.frames = [] self.frames = [f for f in self.clip.iter_frames()] self.n_frames = len(self.frames) super(VideoStim, self).__init__(filename) def _extract_duration(self): self.duration = self.n_frames * 1. / self.fps def __iter__(self): """ Frame iteration. """ for i, f in enumerate(self.frames): yield VideoFrameStim(self, i, data=f) def extract(self, extractors, merge_events=True, **kwargs): period = 1. / self.fps timeline = Timeline(period=period) for ext in extractors: # For VideoExtractors, pass the entire stim if ext.target.__name__ == self.__class__.__name__: events = ext.apply(self, **kwargs) for ev in events: timeline.add_event(ev, merge=merge_events) # Otherwise, for images, loop over frames else: c = 0 for frame in self: if frame.data is not None: event = Event(onset=c * period) event.add_value(ext.apply(frame)) timeline.add_event(event, merge=merge_events) c += 1 return timeline
def _load_video(full_path, size=None, resize_mode='resize_and_crop', cut_edges=False, cut_edges_thresh=0): """ Lead a video into a numpy array :param full_path: Full path to the video :param size: A 2-tuple of width-height, indicating the desired size of the ouput :param resize_mode: The mode with which to get the video to the desired size. Can be: 'squeeze', 'preserve_aspect', 'crop', 'scale_crop'. See resize_image in image_ops.py for more info. :param cut_edges: True if you want to cut the dark edges from the video :param cut_edges_thresh: If cut_edges, this is the threshold at which you'd like to cut them. :return: A (n_frames, height, width, 3) numpy array """ try: from moviepy.video.io.VideoFileClip import VideoFileClip except ImportError: raise ImportError( "You need to install moviepy to read videos. In the virtualenv, go `pip install moviepy`" ) assert os.path.exists(full_path) video = VideoFileClip(full_path) images = [] edge_crops = None for frame in video.iter_frames(): if cut_edges: if edge_crops is None: edge_crops = get_dark_edge_slice( frame, cut_edges_thresh=cut_edges_thresh) else: frame = frame[edge_crops[0], edge_crops[1]] if size is not None: width, height = size frame = resize_image(frame, width=width, height=height, mode=resize_mode) images.append(frame) return images
def load_video(filepath, sample=6, use_VGG=True): clip = VideoFileClip(filepath) video = [] skip = 0 for frame in clip.iter_frames(): skip += 1 if skip % sample != 0: continue img = Image.fromarray(frame) img = img.resize((224, 224)) if use_VGG else img.resize((299, 299)) if use_VGG: norm = np.divide(np.array(img), 255) norm = np.reshape(norm, [1, 224, 224, 3]) video.append(norm) else: # keras will handle input normalization for InceptionV3 video.append(np.array(img)) return np.array(video)
def open(cls, path: Union[str, Path], method: str = "pillow") -> 'GifSequence': """ Create a GifSequence from a GIF file using Pillow or MoviePy :param path: path to GIF file :param method: method to load GIF frames (pillow or mpy) :return: the opened GifSequence """ image_file = Image.open(path) assert type(image_file) is GifImagePlugin.GifImageFile if method == "pillow": return cls(ImageSequence.Iterator(image_file)) elif method == "mpy": durations = [image.info['duration'] for image in ImageSequence.Iterator(image_file)] clip = VideoFileClip(path) frames = [] for frame, duration in zip(clip.iter_frames(), durations): frames.append(GifFrame.from_array(array=frame, duration=duration)) return GifSequence.from_frames(frames) else: raise ValueError("Method must be either pillow or mpy (moviepy)")
def _extract_frames(self, person, video): video_frames_dir = self._video_frames_path(video) video_clip = VideoFileClip(self._video_path(video)) start_time = time.time() print('[extract-frames] about to extract_frames for {}, fps {}, length {}s'.format(video_frames_dir, video_clip.fps, video_clip.duration)) if os.path.exists(video_frames_dir): print('[extract-frames] frames already exist, skipping extraction: {}'.format(video_frames_dir)) return os.makedirs(video_frames_dir) frame_num = 0 for frame in tqdm.tqdm(video_clip.iter_frames(fps=video['fps']), total = video_clip.fps * video_clip.duration): video_frame_file = os.path.join(video_frames_dir, 'frame_{:03d}.jpg'.format(frame_num)) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Swap RGB to BGR to work with OpenCV cv2.imwrite(video_frame_file, frame) frame_num += 1 print('[extract] finished extract_frames for {}, total frames {}, time taken {:.0f}s'.format( video_frames_dir, frame_num-1, time.time() - start_time))
def _get_frames(self): """ Extract frames from the video """ path_to_vid = os.path.join(self.vid_path, self.vid_id) assert os.path.exists(path_to_vid), "{} file not found".format( path_to_vid) try: # Load video video_clip = VideoFileClip(path_to_vid, audio=False, fps_source="fps") except Exception as e: logger.info("Failed to load video from {} with error {}".format( path_to_vid, e)) self.orig_width, self.orig_height = video_clip.size self.frames = None for in_frame in video_clip.iter_frames(fps=self.in_fps): if self.frames is None: self.frames = in_frame[None, ...] else: self.frames = np.concatenate( (self.frames, in_frame[None, ...]), axis=0) # convert to tensor self.frames = torch.from_numpy(self.frames).float() # Normalize the values self.frames = self.frames / 255.0 self.frames = self.frames - torch.tensor(self.cfg.DATA.MEAN) self.frames = self.frames / torch.tensor(self.cfg.DATA.STD) # T H W C -> C T H W. self.frames = self.frames.permute(3, 0, 1, 2)
def cartoonize(file_in, smoothing = 3): print(file_in) path_in=os.path.join(dir_in, file_in) path_out=os.path.join(dir_out, file_in) if os.path.exists(path_out): os.remove(path_out) video_clip = VideoFileClip(path_in, audio=False) i = 0 for img in video_clip.iter_frames(): print(i) i += 1 # if i == 100: # break # 1) Edges gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray = cv2.medianBlur(gray, 5) edges = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9) # 2) Color color = cv2.bilateralFilter(img, 9, 200, 200) # 3) Cartoon cartoon = cv2.bitwise_and(color, color, mask=edges) output = cv2.GaussianBlur(cartoon, (smoothing, smoothing), 0) # output = cartoon cv2.imwrite("images/im%04d.png"%(i), output) fps = int(video_clip.fps) cv2.destroyAllWindows() os.system("ffmpeg -r %d -i images/im%%04d.png -vb 40M -vcodec mpeg4 -r %d %s"%(fps, fps, path_out)) files = glob.glob(os.path.join(dir_out, "*.png")) for f in files: os.remove(f)
def feed_forward_video(path_in, path_out, checkpoint_dir): # initialize video cap video_cap = VideoFileClip(path_in, audio=False) # initialize writer video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out, video_cap.size, video_cap.fps, codec='libx264', preset='medium', bitrate='2000k', audiofile=path_in, threads=None, ffmpeg_params=None) g = tf.Graph() soft_config = tf.ConfigProto(allow_soft_placement=True) soft_config.gpu_options.allow_growth = True with g.as_default(), tf.Session(config=soft_config) as sess: batch_shape = (None, video_cap.size[1], video_cap.size[0], 3) img_placeholder = tf.placeholder(tf.float32, shape=batch_shape, name='img_placeholder') model = Transfer() pred = model(img_placeholder) saver = tf.train.Saver() if os.path.isdir(checkpoint_dir): ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise Exception('No checkpoint found...') else: saver.restore(sess, checkpoint_dir) frame_id = 0 for frame in video_cap.iter_frames(): print('frame id: {}'.format(frame_id)) _pred = sess.run(pred, feed_dict={img_placeholder: np.asarray([frame]).astype(np.float32)}) video_writer.write_frame(np.clip(_pred, 0, 255).astype(np.uint8)) frame_id += 1 video_writer.close()
def transform_video(self, input_path, output_path, batch_size=4, start=0, end=0): ''' Transform a video to animation version https://github.com/lengstrom/fast-style-transfer/blob/master/evaluate.py#L21 ''' # Force to None end = end or None if not os.path.isfile(input_path): raise FileNotFoundError(f'{input_path} does not exist') output_dir = "/".join(output_path.split("/")[:-1]) os.makedirs(output_dir, exist_ok=True) is_gg_drive = '/drive/' in output_path temp_file = '' if is_gg_drive: # Writing directly into google drive can be inefficient temp_file = f'tmp_anime.{output_path.split(".")[-1]}' def transform_and_write(frames, count, writer): anime_images = denormalize_input(self.transform(frames), dtype=np.uint8) for i in range(0, count): img = np.clip(anime_images[i], 0, 255) writer.write_frame(img) video_clip = VideoFileClip(input_path, audio=False) if start or end: video_clip = video_clip.subclip(start, end) video_writer = ffmpeg_writer.FFMPEG_VideoWriter(temp_file or output_path, video_clip.size, video_clip.fps, codec="libx264", preset="medium", bitrate="2000k", audiofile=input_path, threads=None, ffmpeg_params=None) total_frames = round(video_clip.fps * video_clip.duration) print( f'Transfroming video {input_path}, {total_frames} frames, size: {video_clip.size}' ) batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3) frame_count = 0 frames = np.zeros(batch_shape, dtype=np.float32) for frame in tqdm(video_clip.iter_frames()): try: frames[frame_count] = frame frame_count += 1 if frame_count == batch_size: transform_and_write(frames, frame_count, video_writer) frame_count = 0 except Exception as e: print(e) break # The last frames if frame_count != 0: transform_and_write(frames, frame_count, video_writer) if temp_file: # move to output path shutil.move(temp_file, output_path) print(f'Animation video saved to {output_path}') video_writer.close()
source_video = movie_resize(source_video, (width, height)) x_center, y_center = 128, height // 2 else: width = int(256 * width / height) height = 256 source_video = movie_resize(source_video, (width, height)) x_center, y_center = width // 2, 128 source_video = movie_crop(source_video, x_center=x_center, y_center=y_center, width=256, height=256) else: raise NotImplementedError("Invalid Video Resize Mode") driving_video = [(frame / 255) for frame in source_video.iter_frames()] print() print("Generating Video") predictions = make_animation(source_image, driving_video, generator, kp_detector, **options) print() output_clip = VideoClip(make_frame, duration=source_duration) output_clip = output_clip.set_fps(source_fps) output_clip = output_clip.set_audio(source_audio) if args.image_resize == 'fill' and args.crop_output: print(f"Cropping output video to {unfill_width}x{unfill_height}") output_clip = movie_crop(output_clip, x_center=256 // 2,
def ffwd_video(path_in, path_out, checkpoint_dir, device_t='/gpu:0', batch_size=4): """Creates a stylized video. Code from lengstrom's repo found here: https://github.com/lengstrom/fast-style-transfer and the specific file is found here: https://github.com/lengstrom/fast-style-transfer/blob/master/evaluate.py Parameters ---------- path_in : str The path to the video to read in to stylize. path_out : str The path to save the stylized video. checkpoint_dir : str The checkpoint dir holding the neural style transfer model. This should be a .ckpt file. device_t : str, optional The device you want to run the model on. batch_size : int, optional The batch size you want to use for the model. """ video_clip = VideoFileClip(path_in, audio=False) video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out, video_clip.size, video_clip.fps, codec="libx264", preset="medium", bitrate="2000k", audiofile=path_in, threads=None, ffmpeg_params=None) g = tf.Graph() soft_config = tf.ConfigProto(allow_soft_placement=True) soft_config.gpu_options.allow_growth = True with g.as_default(), g.device(device_t), \ tf.Session(config=soft_config) as sess: batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3) img_placeholder = tf.placeholder(tf.float32, shape=batch_shape, name='img_placeholder') preds = transform.net(img_placeholder) saver = tf.train.Saver() if os.path.isdir(checkpoint_dir): ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise Exception("No checkpoint found...") else: saver.restore(sess, checkpoint_dir) X = np.zeros(batch_shape, dtype=np.float32) def style_and_write(count): for i in range(count, batch_size): X[i] = X[count - 1] # Use last frame to fill X _preds = sess.run(preds, feed_dict={img_placeholder: X}) for i in range(0, count): video_writer.write_frame(np.clip(_preds[i], 0, 255).astype(np.uint8)) frame_count = 0 # The frame count that written to X pbar = tqdm(total=int(video_clip.fps * video_clip.duration)) for frame in video_clip.iter_frames(): X[frame_count] = frame frame_count += 1 if frame_count == batch_size: style_and_write(frame_count) pbar.update(frame_count) frame_count = 0 if frame_count != 0: style_and_write(frame_count) pbar.update(frame_count) pbar.close() video_writer.close()
if __name__ == '__main__': options, _ = getopt.getopt(sys.argv[1:], '', ['file=']) for opt in options: if opt[0] == '--file': video_path = opt[1] clip = VideoFileClip(video_path, audio=False) coun = 0 max_frame_cout = 2000 start_count = 60 * 20 # 60 fps * 17 sec imgs_path = [] for clip in clip.iter_frames(): coun += 1 if coun % 60 != 0 or coun < start_count: continue elif len(imgs_path) >= max_frame_cout: break img = Image.fromarray(clip) step = 30 sample_size = (150, 200) margin = 80 for x in range(0 + margin, img.size[0] - sample_size[0] - margin, step): for y in range(0 + margin, img.size[1] - sample_size[1] - margin,
from CarDetection import detect_vehicle from LicensePlateDetection import LicensePlateDetection from Utils import get_image_patch, save_debug_image if __name__ == "__main__": start = time.time() fullpath = os.path.abspath("testFiles/IMG_2993.m4v") clip = VideoFileClip(fullpath, audio=False).subclip(0, 3) frame_counter = 0 car_counter = 0 car_detection_total_duration = 0 lp_extraction_total_duration = 0 for frame in clip.iter_frames(): frame_counter += 1 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_copy = np.copy(frame) car_detection_start = time.time() car_boxes = detect_vehicle(frame) car_detection_total_duration += time.time() - car_detection_start print("Found " + str(len(car_boxes)) + " cars in frame " + str(frame_counter)) car_counter_per_frame = 0 for car_box in car_boxes: car_counter_per_frame += 1 car_counter += 1 frame_copy = cv2.rectangle(frame_copy, car_box[0], car_box[3],
def load_movie(filename): clip = VideoFileClip(filename) frameList = [_make_grayscale(a) for a in clip.iter_frames()] mat = np.asarray(frameList, dtype=np.uint8) return mat
y2=P1_HEALTH_BAR_TL[1] + HEALTH_BAR_DIM[1]) P2_health_clip = crop(high_res_clip, x1=P2_HEALTH_BAR_TL[0], y1=P2_HEALTH_BAR_TL[1], x2=P2_HEALTH_BAR_TL[0] + HEALTH_BAR_DIM[0], y2=P2_HEALTH_BAR_TL[1] + HEALTH_BAR_DIM[1]) # Find match start timestamps ########################################################################### sec_clip_frames_buffer = collections.deque() vs_sec_clip_frames = [] sec_matches = [] match_titles = [] next_sec = 0 for sec, clip_frame in clip.iter_frames(with_times=True, logger='bar'): ''' if detect_health(high_res_clip, sec): print("hit") ''' while sec_clip_frames_buffer: (buffer_sec, _) = sec_clip_frames_buffer[0] if sec - buffer_sec > CLIP_FRAME_BUFFER_MAX_SECS: sec_clip_frames_buffer.popleft() else: break sec_clip_frames_buffer.append((sec, clip_frame)) if sec < next_sec: continue
def save_results_video(loader, model, args): from moviepy.video.io.VideoFileClip import VideoFileClip model.eval() with torch.no_grad(): for i, video_paths in enumerate( loader): # Must be a batch size of 1 (video) print(f"Processing {i} of {len(loader)}: {video_paths[0]}") clip = VideoFileClip(video_paths[0]) for frame_id, frame in enumerate( clip.iter_frames()): # HxWx3 numpy array frame = transforms.ToTensor()(frame) if args.gpu is not None: images = [frame.cuda(args.gpu, non_blocking=True)] outputs = model(images) # Plots: # do_plot = True do_plot = False if do_plot: import visualize_maskrcnn_predictions as vis_preds top_predictions = vis_preds.select_top_predictions( outputs[0], 0.7) top_predictions = { k: v.cpu() for k, v in top_predictions.items() } cv_img = (images[0].cpu().numpy().transpose( (1, 2, 0)) * 255).astype("uint8") result = cv_img.copy() result = vis_preds.overlay_boxes(result, top_predictions) result = vis_preds.overlay_keypoints( result, top_predictions) result = vis_preds.overlay_class_names( result, top_predictions) plt.imshow(result) plt.show() for j, output in enumerate(outputs): keypoints_all = output["keypoints"].cpu( ) # Nx17x3 (most confident detection first) keypoints_scores_all = output["keypoints_scores"].cpu( ) # Nx17 (most confident detection first) boxes = output["boxes"].cpu() # Nx4 labels = output["labels"].cpu() # N scores = output["scores"].cpu() # N video_path = video_paths[j] # If using small dataset, scale keypoints by 3 to get original coordinates if 'small' in args.data: keypoints_all[:, :, :2] *= args.image_scale_factor # Rearrange into dictionary for writing: # OPENPOSE ANNOTATION STYLE people = [] for k, keypoints in enumerate(keypoints_all): if labels[k] == 1: keypoint_visibility = keypoints[:, 2].tolist() keypoints_scores = keypoints_scores_all[k, :] keypoints[:, 2] = keypoints_scores # Swap visibility and keypoint scores keypoints = keypoints.reshape(-1).tolist() box = boxes[k].tolist() score = scores[k].item() person = { "person_id": [-1], "pose_keypoints_2d": keypoints, "keypoint_visibility": keypoint_visibility, "boxes": box, "score": score } people.append(person) if not people: # Output all zero if no detection people = [{ "person_id": [-1], "pose_keypoints_2d": [0.] * 17 * 3, "keypoint_visibility": [0.] * 17, "boxes": [0.] * 4, "score": 0. }] print( f"No people detected in frame {frame_id} of {video_path}" ) output_dict = {} output_dict["format"] = "ikea" output_dict["people"] = people video_path_split = video_path.split( '/' ) # eg <root>/Lack_TV_Bench/0007_white_floor_08_04_2019_08_28_10_47/dev3/images/scan_video.avi furniture_type = video_path_split[-5] experiment_id = video_path_split[-4] cam_id = video_path_split[-3] json_name = f"scan_video_000000{frame_id:06d}_keypoints.json" output_path = os.path.join(args.out_data_dir, furniture_type, experiment_id, cam_id, 'predictions', 'pose2d', 'keypoint_rcnn_ft_all') os.makedirs(output_path, exist_ok=True) json_file = os.path.join(output_path, json_name) # print(f"Writing: {json_file}") with open(json_file, 'w') as f: json.dump(output_dict, f) clip.close()
if args.export_type != 0: video_writer = ffmpeg_writer.FFMPEG_VideoWriter( f'images/outputs/{out_dir}/{video_name}.mp4', video_clip.size, video_clip.fps, codec="libx264", preset="medium", bitrate="2000k", audiofile=None, threads=None, ffmpeg_params=None) try: fnum = 0 stylized_frames = [] for frame in tqdm.tqdm(video_clip.iter_frames(), desc="Processing frames"): outframe = stylizer.stylize_with_octaves( frame, args.max_size, args.overlap, args.octave_num, args. octave_scale) if args.octave_num else stylizer.stylize_image( frame, args.max_size, args.overlap) if args.export_type != 0: video_writer.write_frame(outframe) if args.export_type != 1: save(f"{out_dir}/frame_{fnum}.jpg", np.asarray(outframe, dtype='float32')) save(f"{out_dir}/latest.jpg", np.asarray(outframe, dtype='float32') ) #open image viewer on this to see video progress along fnum += 1
high_res_clip = VideoFileClip( args.tmp_filepath, audio=False, resize_algorithm='fast_bilinear', ) # Find match start timestamps ########################################################################### sec_clip_frames_buffer = collections.deque() vs_sec_clip_frames = [] sec_matches = [] match_titles = [] next_sec = 0 for sec, clip_frame in clip.iter_frames(with_times=True): while sec_clip_frames_buffer: (buffer_sec, _) = sec_clip_frames_buffer[0] if sec - buffer_sec > CLIP_FRAME_BUFFER_MAX_SECS: sec_clip_frames_buffer.popleft() else: break sec_clip_frames_buffer.append((sec, clip_frame)) if sec < next_sec: continue clip_frame_img = clip_frame_to_image(clip_frame) # Detect VS splash screen start/continuing vs_img_hash_diff = imagehash.average_hash(clip_frame_img.crop(box=VS_IMAGE_BOX)) - VS_IMAGE_HASH
def process_video_file(filepath, output_dir=None, suffix=None, audio_model=None, image_model=None, input_repr="mel256", content_type="music", audio_embedding_size=6144, audio_center=True, audio_hop_size=0.1, image_embedding_size=8192, audio_batch_size=32, image_batch_size=32, overwrite=False, verbose=True): """ Computes and saves L3 audio and video frame embeddings for a given video file Note that image embeddings are computed for every frame of the video. Also note that embeddings for the audio and images are not temporally aligned. Please refer to the timestamps in the output files for the corresponding timestamps for each set of embeddings. Parameters ---------- filepath : str or list[str] Path or list of paths to video file(s) to be processed. output_dir : str or None Path to directory for saving output files. If None, output files will be saved to the directory containing the input file. suffix : str or None String to be appended to the output filename, i.e. <base filename>_<modality>_<suffix>.npz. If None, then no suffix will be added, i.e. <base filename>_<modality>.npz. audio_model : keras.models.Model or None Loaded audio model object. If a model is provided, then `input_repr`, `content_type`, and `embedding_size` will be ignored. If None is provided, the model will be loaded using the provided values of `input_repr`, `content_type` and `embedding_size`. image_model : keras.models.Model or None Loaded audio model object. If a model is provided, then `input_repr`, `content_type`, and `embedding_size` will be ignored. If None is provided, the model will be loaded using the provided values of `input_repr`, `content_type` and `embedding_size`. input_repr : "linear", "mel128", or "mel256" Spectrogram representation used for audio model. Ignored if `model` is a valid Keras model. content_type : "music" or "env" Type of content used to train the embedding model. Ignored if `model` is a valid Keras model. audio_embedding_size : 6144 or 512 Audio embedding dimensionality. Ignored if `model` is a valid Keras model. audio_center : boolean If True, pads beginning of audio signal so timestamps correspond to center of window. audio_hop_size : float Hop size in seconds. image_embedding_size : 8192 or 512 Video frame embedding dimensionality. Ignored if `model` is a valid Keras model. audio_batch_size : int Batch size used for input to audio embedding model image_batch_size : int Batch size used for input to image embedding model overwrite : bool If True, overwrites existing output files verbose : bool If True, prints verbose messages. Returns ------- """ if isinstance(filepath, str): filepath_list = [filepath] elif isinstance(filepath, list): filepath_list = filepath else: err_msg = 'filepath should be type str or list[str], but got {}.' raise OpenL3Error(err_msg.format(filepath)) # Load models if not audio_model: audio_model = load_audio_embedding_model(input_repr, content_type, audio_embedding_size) if not image_model: image_model = load_image_embedding_model(input_repr, content_type, image_embedding_size) audio_suffix, image_suffix = "audio", "image" if suffix: audio_suffix += "_" + suffix image_suffix += "_" + suffix audio_list = [] sr_list = [] audio_batch_filepath_list = [] total_audio_batch_size = 0 image_list = [] frame_rate_list = [] image_batch_filepath_list = [] num_files = len(filepath_list) for file_idx, filepath in enumerate(filepath_list): if not os.path.exists(filepath): raise OpenL3Error('File "{}" could not be found.'.format(filepath)) if verbose: print("openl3: Processing {} ({}/{})".format( filepath, file_idx + 1, num_files)) # Skip if overwriting isn't enabled and output file exists audio_output_path = get_output_path(filepath, audio_suffix + ".npz", output_dir=output_dir) image_output_path = get_output_path(filepath, image_suffix + ".npz", output_dir=output_dir) skip_audio = os.path.exists(audio_output_path) and not overwrite skip_image = os.path.exists(image_output_path) and not overwrite if skip_audio and skip_image: err_msg = "openl3: {} and {} exist, skipping." print(err_msg.format(audio_output_path, image_output_path)) continue try: clip = VideoFileClip(filepath, target_resolution=(256, 256), audio_fps=TARGET_SR) audio = clip.audio.to_soundarray(fps=TARGET_SR) images = np.array([frame for frame in clip.iter_frames()]) except Exception: err_msg = 'Could not open file "{}":\n{}' raise OpenL3Error(err_msg.format(filepath, traceback.format_exc())) if not skip_audio: audio_list.append(audio) sr_list.append(TARGET_SR) audio_batch_filepath_list.append(filepath) audio_len = audio.shape[0] audio_hop_length = int(audio_hop_size * TARGET_SR) num_windows = 1 + max( ceil((audio_len - TARGET_SR) / float(audio_hop_length)), 0) total_audio_batch_size += num_windows else: err_msg = "openl3: {} exists, skipping audio embedding extraction." print(err_msg.format(audio_output_path)) if not skip_image: image_list.append(images) frame_rate_list.append(int(clip.fps)) image_batch_filepath_list.append(filepath) else: err_msg = "openl3: {} exists, skipping image embedding extraction." print(err_msg.format(image_output_path)) if (total_audio_batch_size >= audio_batch_size or file_idx == (num_files - 1)) and len(audio_list) > 0: embedding_list, ts_list \ = get_audio_embedding(audio_list, sr_list, model=audio_model, input_repr=input_repr, content_type=content_type, embedding_size=audio_embedding_size, center=audio_center, hop_size=audio_hop_size, batch_size=audio_batch_size, verbose=verbose) for fpath, embedding, ts in zip(audio_batch_filepath_list, embedding_list, ts_list): output_path = get_output_path(fpath, audio_suffix + ".npz", output_dir=output_dir) np.savez(output_path, embedding=embedding, timestamps=ts) assert os.path.exists(output_path) if verbose: print("openl3: Saved {}".format(output_path)) audio_list = [] sr_list = [] audio_batch_filepath_list = [] total_audio_batch_size = 0 if (len(image_list) >= image_batch_size or file_idx == (num_files - 1)) and len(image_list) > 0: embedding_list, ts_list \ = get_image_embedding(image_list, frame_rate_list, model=image_model, input_repr=input_repr, content_type=content_type, embedding_size=image_embedding_size, batch_size=image_batch_size, verbose=verbose) for fpath, embedding, ts in zip(image_batch_filepath_list, embedding_list, ts_list): output_path = get_output_path(fpath, image_suffix + ".npz", output_dir=output_dir) np.savez(output_path, embedding=embedding, timestamps=ts) assert os.path.exists(output_path) if verbose: print("openl3: Saved {}".format(output_path)) image_list = [] frame_rate_list = [] image_batch_filepath_list = []
def ffwd_video(path_in, path_out, checkpoint_dir, device_t='/gpu:0', batch_size=4): video_clip = VideoFileClip(path_in, audio=False) # Create a temporary file to store the audio. fp = tempfile.NamedTemporaryFile(suffix='.aac') temp_audio_file_name = fp.name fp.close() # Create a temporary file to store the video. fp = tempfile.NamedTemporaryFile(suffix='.mp4') temp_video_file_name = fp.name fp.close() # Extract the audio. ffmpeg_tools.ffmpeg_extract_audio(path_in, temp_audio_file_name) video_writer = ffmpeg_writer.FFMPEG_VideoWriter( temp_video_file_name, video_clip.size, video_clip.fps, codec="libx264", preset="medium", audiofile=None, threads=None, ffmpeg_params=["-b:v", "2000k"]) g = tf.Graph() soft_config = tf.compat.v1.ConfigProto(allow_soft_placement=True) soft_config.gpu_options.allow_growth = True with g.as_default(), g.device(device_t), \ tf.compat.v1.Session(config=soft_config) as sess: batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3) img_placeholder = tf.compat.v1.placeholder(tf.float32, shape=batch_shape, name='img_placeholder') preds = src.transform.net(img_placeholder) saver = tf.compat.v1.train.Saver() if os.path.isdir(checkpoint_dir): ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise Exception("No checkpoint found...") else: saver.restore(sess, checkpoint_dir) X = np.zeros(batch_shape, dtype=np.float32) def style_and_write(count): for i in range(count, batch_size): X[i] = X[count - 1] # Use last frame to fill X _preds = sess.run(preds, feed_dict={img_placeholder: X}) for i in range(0, count): video_writer.write_frame( np.clip(_preds[i], 0, 255).astype(np.uint8)) frame_count = 0 # The frame count that written to X for frame in video_clip.iter_frames(): X[frame_count] = frame frame_count += 1 if frame_count == batch_size: style_and_write(frame_count) frame_count = 0 if frame_count != 0: style_and_write(frame_count) video_writer.close() # Merge audio and video ffmpeg_tools.ffmpeg_merge_video_audio(temp_video_file_name, temp_audio_file_name, path_out) # Delete temporary files os.remove(temp_video_file_name) os.remove(temp_audio_file_name)
def ffwd_video(path_in, path_out, checkpoint_dir, device_t='/gpu:0', batch_size=4): ''' feed forward video ''' # defining video rendering variables video_clip = VideoFileClip(path_in, audio=False) video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out, video_clip.size, video_clip.fps, codec='libx264', preset='medium', bitrate='2000k', audiofile=path_in, threads=None, ffmpeg_params=None) # defining tensorflow variables g = tf.Graph() soft_config = tf.ConfigProto(allow_soft_placement=True) soft_config.gpu_options.allow_growth = True # starting the tensorflow session with g.as_default(), g.device(device_t), tf.Session( config=soft_config) as sess: batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3) # defining placeholder vid_ph = tf.placeholder(tf.float32, shape=batch_shape, name='vid_ph') # forward propogation (building the graph) preds = transform_net.net(vid_ph) # defining saver saver = tf.train.Saver() # restoring the saved model if os.path.isdir(checkpoint_dir): ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise Exception("No checkpoint found...") else: saver.restore(sess, checkpoint_dir) x = np.zeros(batch_shape, dtype=np.float32) # function to generate styled video (batch images) and writing def style_and_write(count): # for batch size not complete case for i in range(count, batch_size): x[i] = x[ count - 1] # using last frame(received from .iter_frames) to fill remaing x (batch size not complete case) # running the graph to style video _preds = sess.run(preds, feed_dict={vid_ph: x}) for i in range(0, count): video_writer.write_frame( np.clip(_preds, 0, 255).astype(np.uint8)) frame_count = 0 # the frame count written to x for frame in video_clip.iter_frames(): x[frame_count] = frame frame_count += 1 if frame_count == batch_size: style_and_write(frame_count) frame_count = 0 # for last batch where no of images is less than the batch_size if frame_count != 0: style_and_write(frame_count) video_writer.close()
def stylize_objects(seg_model_path, orig_path_in, style_path_in, path_out, device_t="/gpu:0", target_class=1): """Generates a video where objects are segmented out and stylized. An outline is also drawn around the person and noise is added in proportion to the amount of base. Parameters ---------- seg_model_path : str The path to the segmentation model. Should be a .pb file. orig_path_in : str The path to the original un-stylized video file. style_path_in : str The path to the stylized video file. path_out : str The path to save the new video with only the objects stylized. device_t : str, optional The device to run the network on. target_class : int, optional The target you want generate masks for and stylize. Example ------- stylize_objects("models/model.pb", "video.mp4", "inter_styled_video.mp4", "styled_video.mp4") """ video_clip = VideoFileClip(orig_path_in, audio=True) style_video_clip = VideoFileClip(style_path_in, audio=False) video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out, video_clip.size, video_clip.fps, codec="libx264", preset="medium", bitrate="2000k", audiofile=orig_path_in, threads=None, ffmpeg_params=None) ch1, ch2 = get_base_bumps(video_clip) # load model g = tf.Graph() with g.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(seg_model_path, "rb") as f: serialized_graph = f.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name="") # code adapted from https://github.com/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb with g.as_default(), g.device(device_t), tf.Session() as sess: ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in ["num_detections", "detection_boxes", "detection_scores", "detection_classes", "detection_masks"]: tensor_name = key + ":0" if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict["detection_boxes"], [0]) detection_masks = tf.squeeze(tensor_dict["detection_masks"], [0]) # Reframe is required to translate mask from box coordinates to image # coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict["num_detections"][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, video_clip.size[1], video_clip.size[0]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict["detection_masks"] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name("image_tensor:0") pbar = tqdm(total=int(video_clip.fps * video_clip.duration)) for i, (frame, style_frame), in enumerate(zip(video_clip.iter_frames(), style_video_clip.iter_frames())): output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(frame, 0)}) # assume batch size = 1 classes = output_dict["detection_classes"][0][:int(output_dict["num_detections"][0])] # if no target class then have to use a 0 mask if target_class not in classes: mask = np.zeros((video_clip.size[1], video_clip.size[0])) to_style_frame = False else: mask = merge_classes(output_dict["detection_masks"][0, :, :, :], 1, classes) to_style_frame = True mask = draw_random_triangles(mask, size=(ch1[i]*30 + 1e-8)) outline = Image.fromarray(get_outline(mask)) mask = Image.fromarray(255*mask) nframe = Image.fromarray(frame) # can't paste with 0 mask if to_style_frame: nframe.paste(Image.fromarray(style_frame), mask=mask) nframe.paste(outline, mask=outline) video_writer.write_frame(nframe) pbar.update(1) pbar.close() video_writer.close()
shape = (BATCH_SIZE, video.size[1], video.size[0], 3) image = tf.compat.v1.placeholder(tf.float32, shape=shape, name="image") pred = transformer.net(image) saver = tf.compat.v1.train.Saver() saver.restore(sess, STYLE_MODEL) images = np.zeros(shape, dtype=np.float32) def write(tot): for i in range(tot, BATCH_SIZE): images[i] = images[i - 1] pred_n = sess.run(pred, feed_dict={image: images}) for i in range(tot): styled.append(np.clip(pred_n[i], 0, 255).astype(np.uint8)) tot = 0 for frame in video.iter_frames(): images[tot] = frame tot += 1 if tot == BATCH_SIZE: write(tot) tot = 0 if tot != 0: write(tot) end_time = time.time() convert_to_video(styled) print(f"Execution time {end_time - start_time}") print("The video has been styled!")
def ffwd_video( path_in, path_out, checkpoint_dir, device_t='/gpu:0', batch_size=4, data_format='NHWC', num_base_channels=32, # more cli params evaluate=False): video_clip = VideoFileClip(path_in, audio=False) video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out, video_clip.size, video_clip.fps, codec="libx264", preset="medium", bitrate="2000k", audiofile=path_in, threads=None, ffmpeg_params=None) g = tf.Graph() soft_config = tf.compat.v1.ConfigProto(allow_soft_placement=True) soft_config.gpu_options.allow_growth = True with g.as_default(), g.device(device_t), \ tf.compat.v1.Session(config=soft_config) as sess: batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3) img_placeholder = tf.compat.v1.placeholder(tf.float32, shape=batch_shape, name='img_placeholder') #preds = transform.net(img_placeholder) if data_format == 'NHWC': #NHWC path preds = transform.net(img_placeholder, data_format=data_format, num_base_channels=num_base_channels, evaluate=evaluate) else: #NCHW path img_placeholder_nchw = tf.transpose(a=img_placeholder, perm=[0, 3, 1, 2]) preds_nchw = transform.net(img_placeholder_nchw, data_format=data_format, num_base_channels=num_base_channels) preds = tf.transpose(a=preds_nchw, perm=[0, 2, 3, 1]) # add output node preds = tf.identity(preds, "output") #print("tf.identity: {}".format(preds)) saver = tf.compat.v1.train.Saver() if os.path.isdir(checkpoint_dir): ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise Exception("No checkpoint found...") else: saver.restore(sess, checkpoint_dir) X = np.zeros(batch_shape, dtype=np.float32) def style_and_write(count): for i in range(count, batch_size): X[i] = X[count - 1] # Use last frame to fill X _preds = sess.run(preds, feed_dict={img_placeholder: X}) for i in range(0, count): video_writer.write_frame( np.clip(_preds[i], 0, 255).astype(np.uint8)) frame_count = 0 # The frame count that written to X for frame in video_clip.iter_frames(): X[frame_count] = frame frame_count += 1 if frame_count == batch_size: style_and_write(frame_count) frame_count = 0 if frame_count != 0: style_and_write(frame_count) video_writer.close()