def make_video(audio, filename, progan, n_bins=60, random_state=0, imgs_per_batch=20): y, sr = librosa.load(audio) song_length = len(y) / sr z_audio = get_z_from_audio(y, z_length=progan.z_length, n_bins=n_bins, random_state=random_state) fps = z_audio.shape[0] / song_length res = progan.get_cur_res() shape = (res, res * 16 // 9, 3) imgs = np.zeros(shape=[imgs_per_batch, *shape], dtype=np.float32) def make_frame(t): global imgs cur_frame_idx = int(t * fps) if cur_frame_idx >= len(z_audio): return np.zeros(shape=shape, dtype=np.uint8) if cur_frame_idx % imgs_per_batch == 0: imgs = progan.generate(z_audio[cur_frame_idx:cur_frame_idx + imgs_per_batch]) imgs = imgs[:, :, :res * 8 // 9, :] imgs_rev = np.flip(imgs, 2) imgs = np.concatenate((imgs, imgs_rev), 2) return imgs[cur_frame_idx % imgs_per_batch] video_clip = VideoClip(make_frame=make_frame, duration=song_length) audio_clip = AudioFileClip(audio) video_clip = video_clip.set_audio(audio_clip) video_clip.write_videofile(filename, fps=fps)
def make_video(audio, filename, progan, n_bins=84, random_state=0): y, sr = librosa.load(audio) song_length = len(y) / sr z_audio = get_z_from_audio(y, z_length=progan.z_length, n_bins=n_bins, random_state=random_state) fps = z_audio.shape[0] / song_length shape = progan.generate(z_audio[0]).shape def make_frame(t): cur_frame_idx = int(t * fps) if cur_frame_idx < len(z_audio): img = progan.generate(z_audio[cur_frame_idx]) else: img = np.zeros(shape=shape, dtype=np.uint8) return img video_clip = VideoClip(make_frame=make_frame, duration=song_length) audio_clip = AudioFileClip(audio) video_clip = video_clip.set_audio(audio_clip) video_clip.write_videofile(filename, fps=fps)
def visualize_and_eval(video_name, face_detector, ahegao_classifier=None, output_file=None): enable_ahegao_classification = ahegao_classifier is not None cv2.ocl.setUseOpenCL(False) cap = cv2.VideoCapture(osp.join(VIDEOS_DIR, video_name)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) max_faces_probs = 3 if output_file is None: plt.ion() fig = plt.figure(figsize=(15, 8)) ax0 = plt.subplot2grid((2, 2), (0, 1)) # number of faces detected ax1 = plt.subplot2grid((2, 2), (1, 1)) # showing emotions distribution or faces probs ax2 = plt.subplot2grid((2, 2), (0, 0), rowspan=2) # showing image axarr = [ax0, ax1, ax2] plt.tight_layout() axarr[0].set_title('num faces detected') face_line, = axarr[0].plot([], [], 'r-') face_probs_lines = [] if enable_ahegao_classification: axarr[1].stackplot([], []) else: for i in range(max_faces_probs): face_probs_line, = axarr[1].plot([], [], 'r-') face_probs_lines.append(face_probs_line) axarr[1].set_ylim(-0.05, 1.05) axarr[1].yaxis.grid(True) im = axarr[2].imshow(np.zeros((height, width))) axarr[2].grid(False) axarr[2].axis('off') i = 0 face_data_x = [] face_data_y = [] emotion_data_x = [] emotion_data_y = np.empty(0) face_probs_x = [] face_probs_y = np.empty((0, 3)) j = 0 if output_file is None: def update_face_probs(face_probs_x, face_probs_y): for k, face_probs_line in enumerate(face_probs_lines): face_probs_line.set_xdata(face_probs_x) face_probs_line.set_ydata(face_probs_y[:, k]) def update_face_line(face_data_x, face_data_y): face_line.set_xdata(face_data_x) face_line.set_ydata(face_data_y) # update x and ylim to show all points: axarr[0].set_xlim(min(face_data_x) - 0.5, max(face_data_x) + 0.5) axarr[0].set_ylim(min(face_data_y) - 0.5, max(face_data_y) + 0.5) should_stop = False while not should_stop: should_stop, emotion_data_x, emotion_data_y, face_probs_x, face_probs_y, i, j = process_frame( ahegao_classifier, axarr, cap, emotion_data_x, emotion_data_y, enable_ahegao_classification, face_data_x, face_data_y, face_detector, face_probs_x, face_probs_y, i, im, j, max_faces_probs, update_face_probs, update_face_line) plt.draw() plt.pause(0.0001) else: fps = cap.get(cv2.CAP_PROP_FPS) frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) duration = frame_count / fps widgets = [progressbar.Percentage(), ' ', progressbar.Counter(), ' ', progressbar.Bar(), ' ', progressbar.FileTransferSpeed()] pbar = progressbar.ProgressBar(widgets=widgets, max_value=frame_count).start() def update_face_probs(face_probs_x, face_probs_y): axarr[1].clear() for k, face_probs_line in enumerate(face_probs_lines): axarr[1].plot(face_probs_x, face_probs_y[:, k], 'r-') axarr[1].set_ylim(-0.05, 1.05) axarr[1].yaxis.grid(True) def update_face_line(face_data_x, face_data_y): axarr[0].clear() axarr[0].set_title('num faces detected') axarr[0].plot(face_data_x, face_data_y, 'r-') axarr[0].set_xlim(min(face_data_x) - 0.5, max(face_data_x) + 0.5) axarr[0].set_ylim(min(face_data_y) - 0.5, max(face_data_y) + 0.5) # while not should_stop: def make_frame(t): nonlocal emotion_data_x, emotion_data_y, face_probs_x, face_probs_y, i, j pbar.update(i) should_stop, emotion_data_x, emotion_data_y, face_probs_x, face_probs_y, i, j = process_frame( ahegao_classifier, axarr, cap, emotion_data_x, emotion_data_y, enable_ahegao_classification, face_data_x, face_data_y, face_detector, face_probs_x, face_probs_y, i, im, j, max_faces_probs, update_face_probs, update_face_line) return mplfig_to_npimage(fig) pbar.finish() orig_audio = AudioFileClip(osp.join(VIDEOS_DIR, video_name)) animation = VideoClip(make_frame, duration=duration) animation.set_audio(orig_audio) animation.write_videofile(output_file, fps=fps) cap.release() cv2.destroyAllWindows()
width=256, height=256) else: raise NotImplementedError("Invalid Video Resize Mode") driving_video = [(frame / 255) for frame in source_video.iter_frames()] print() print("Generating Video") predictions = make_animation(source_image, driving_video, generator, kp_detector, **options) print() output_clip = VideoClip(make_frame, duration=source_duration) output_clip = output_clip.set_fps(source_fps) output_clip = output_clip.set_audio(source_audio) if args.image_resize == 'fill' and args.crop_output: print(f"Cropping output video to {unfill_width}x{unfill_height}") output_clip = movie_crop(output_clip, x_center=256 // 2, y_center=256 // 2, width=unfill_width, height=unfill_height) print("Saving Video...") output_clip.write_videofile(output_video_path, logger=None, verbose=False, **codecs[args.codec])