Beispiel #1
0
def make_video(audio, filename, progan, n_bins=60, random_state=0, imgs_per_batch=20):
    y, sr = librosa.load(audio)
    song_length = len(y) / sr
    z_audio = get_z_from_audio(y, z_length=progan.z_length, n_bins=n_bins, random_state=random_state)
    fps = z_audio.shape[0] / song_length
    res = progan.get_cur_res()
    shape = (res, res * 16 // 9, 3)

    imgs = np.zeros(shape=[imgs_per_batch, *shape], dtype=np.float32)

    def make_frame(t):
        global imgs
        cur_frame_idx = int(t * fps)

        if cur_frame_idx >= len(z_audio):
            return np.zeros(shape=shape, dtype=np.uint8)

        if cur_frame_idx % imgs_per_batch == 0:
            imgs = progan.generate(z_audio[cur_frame_idx:cur_frame_idx + imgs_per_batch])
            imgs = imgs[:, :, :res * 8 // 9, :]
            imgs_rev = np.flip(imgs, 2)
            imgs = np.concatenate((imgs, imgs_rev), 2)

        return imgs[cur_frame_idx % imgs_per_batch]

    video_clip = VideoClip(make_frame=make_frame, duration=song_length)
    audio_clip = AudioFileClip(audio)
    video_clip = video_clip.set_audio(audio_clip)
    video_clip.write_videofile(filename, fps=fps)
Beispiel #2
0
def make_video(audio, filename, progan, n_bins=84, random_state=0):
    y, sr = librosa.load(audio)
    song_length = len(y) / sr
    z_audio = get_z_from_audio(y, z_length=progan.z_length, n_bins=n_bins, random_state=random_state)
    fps = z_audio.shape[0] / song_length
    shape = progan.generate(z_audio[0]).shape
    def make_frame(t):
        cur_frame_idx = int(t * fps)
        if cur_frame_idx < len(z_audio):
            img = progan.generate(z_audio[cur_frame_idx])
        else:
            img = np.zeros(shape=shape, dtype=np.uint8)
        return img

    video_clip = VideoClip(make_frame=make_frame, duration=song_length)
    audio_clip = AudioFileClip(audio)
    video_clip = video_clip.set_audio(audio_clip)
    video_clip.write_videofile(filename, fps=fps)
Beispiel #3
0
def visualize_and_eval(video_name, face_detector, ahegao_classifier=None, output_file=None):
    enable_ahegao_classification = ahegao_classifier is not None

    cv2.ocl.setUseOpenCL(False)
    cap = cv2.VideoCapture(osp.join(VIDEOS_DIR, video_name))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    max_faces_probs = 3
    if output_file is None:
        plt.ion()
    fig = plt.figure(figsize=(15, 8))
    ax0 = plt.subplot2grid((2, 2), (0, 1))  # number of faces detected
    ax1 = plt.subplot2grid((2, 2), (1, 1))  # showing emotions distribution or faces probs
    ax2 = plt.subplot2grid((2, 2), (0, 0), rowspan=2)  # showing image
    axarr = [ax0, ax1, ax2]
    plt.tight_layout()
    axarr[0].set_title('num faces detected')
    face_line, = axarr[0].plot([], [], 'r-')
    face_probs_lines = []
    if enable_ahegao_classification:
        axarr[1].stackplot([], [])
    else:
        for i in range(max_faces_probs):
            face_probs_line, = axarr[1].plot([], [], 'r-')
            face_probs_lines.append(face_probs_line)
        axarr[1].set_ylim(-0.05, 1.05)
        axarr[1].yaxis.grid(True)
    im = axarr[2].imshow(np.zeros((height, width)))
    axarr[2].grid(False)
    axarr[2].axis('off')

    i = 0
    face_data_x = []
    face_data_y = []
    emotion_data_x = []
    emotion_data_y = np.empty(0)
    face_probs_x = []
    face_probs_y = np.empty((0, 3))
    j = 0

    if output_file is None:
        def update_face_probs(face_probs_x, face_probs_y):
            for k, face_probs_line in enumerate(face_probs_lines):
                face_probs_line.set_xdata(face_probs_x)
                face_probs_line.set_ydata(face_probs_y[:, k])

        def update_face_line(face_data_x, face_data_y):
            face_line.set_xdata(face_data_x)
            face_line.set_ydata(face_data_y)
            # update x and ylim to show all points:
            axarr[0].set_xlim(min(face_data_x) - 0.5, max(face_data_x) + 0.5)
            axarr[0].set_ylim(min(face_data_y) - 0.5, max(face_data_y) + 0.5)

        should_stop = False
        while not should_stop:
            should_stop, emotion_data_x, emotion_data_y, face_probs_x, face_probs_y, i, j = process_frame(
                ahegao_classifier, axarr, cap, emotion_data_x, emotion_data_y, enable_ahegao_classification,
                face_data_x, face_data_y, face_detector, face_probs_x, face_probs_y, i, im, j, max_faces_probs,
                update_face_probs, update_face_line)
            plt.draw()
            plt.pause(0.0001)
    else:
        fps = cap.get(cv2.CAP_PROP_FPS)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        duration = frame_count / fps

        widgets = [progressbar.Percentage(), ' ', progressbar.Counter(), ' ', progressbar.Bar(), ' ',
                   progressbar.FileTransferSpeed()]
        pbar = progressbar.ProgressBar(widgets=widgets, max_value=frame_count).start()

        def update_face_probs(face_probs_x, face_probs_y):
            axarr[1].clear()
            for k, face_probs_line in enumerate(face_probs_lines):
                axarr[1].plot(face_probs_x, face_probs_y[:, k], 'r-')
            axarr[1].set_ylim(-0.05, 1.05)
            axarr[1].yaxis.grid(True)

        def update_face_line(face_data_x, face_data_y):
            axarr[0].clear()
            axarr[0].set_title('num faces detected')
            axarr[0].plot(face_data_x, face_data_y, 'r-')
            axarr[0].set_xlim(min(face_data_x) - 0.5, max(face_data_x) + 0.5)
            axarr[0].set_ylim(min(face_data_y) - 0.5, max(face_data_y) + 0.5)

        # while not should_stop:
        def make_frame(t):
            nonlocal emotion_data_x, emotion_data_y, face_probs_x, face_probs_y, i, j
            pbar.update(i)
            should_stop, emotion_data_x, emotion_data_y, face_probs_x, face_probs_y, i, j = process_frame(
                ahegao_classifier, axarr, cap, emotion_data_x, emotion_data_y, enable_ahegao_classification,
                face_data_x, face_data_y, face_detector, face_probs_x, face_probs_y, i, im, j, max_faces_probs,
                update_face_probs, update_face_line)
            return mplfig_to_npimage(fig)
        pbar.finish()
        orig_audio = AudioFileClip(osp.join(VIDEOS_DIR, video_name))
        animation = VideoClip(make_frame, duration=duration)
        animation.set_audio(orig_audio)
        animation.write_videofile(output_file, fps=fps)

    cap.release()
    cv2.destroyAllWindows()
Beispiel #4
0
                                      width=256,
                                      height=256)
        else:
            raise NotImplementedError("Invalid Video Resize Mode")

    driving_video = [(frame / 255) for frame in source_video.iter_frames()]
    print()

    print("Generating Video")
    predictions = make_animation(source_image, driving_video, generator,
                                 kp_detector, **options)
    print()

    output_clip = VideoClip(make_frame, duration=source_duration)
    output_clip = output_clip.set_fps(source_fps)
    output_clip = output_clip.set_audio(source_audio)

    if args.image_resize == 'fill' and args.crop_output:
        print(f"Cropping output video to {unfill_width}x{unfill_height}")
        output_clip = movie_crop(output_clip,
                                 x_center=256 // 2,
                                 y_center=256 // 2,
                                 width=unfill_width,
                                 height=unfill_height)

    print("Saving Video...")
    output_clip.write_videofile(output_video_path,
                                logger=None,
                                verbose=False,
                                **codecs[args.codec])