Ejemplo n.º 1
0
def video_render(txt_file,image_file,sound_file,save_file):
        from moviepy.editor import ImageClip
        from moviepy.editor import CompositeVideoClip
        from moviepy.editor import CompositeAudioClip
        from moviepy.editor import TextClip
        from moviepy.editor import AudioFileClip
        from moviepy.editor import concatenate
        from moviepy.config import change_settings
        change_settings({"IMAGEMAGICK_BINARY": "/usr/local/bin/convert"})
        text=[]
        
        with open(txt_file,'r') as file:
            for lines in file:
                if lines!="\n":
                    text.append(lines.rstrip('\n'))
        durs=[]
        for i in text:            
            res = len(re.findall(r'\w+', i)) 
            if res/2>3:
                durs.append(res/2)
            else:
                durs.append(3)
        total_duration=sum(durs)
        
        a_clip = AudioFileClip(sound_file)
        if a_clip.duration<total_duration:
            new_audioclip = CompositeAudioClip([a_clip, a_clip.set_start(a_clip.duration-1)]).set_duration(total_duration+3)
        else:
            new_audioclip=a_clip.set_duration(total_duration+3)
        
        screen=(1920,1080)
        clip_list = []
        i=0
        for string in text:
            duration=durs[i]
            i+=1
            try:
                txt_clip = TextClip(string, fontsize = 70, color = 'white', method='caption',size=screen ).set_duration(duration).set_pos('center')
                clip_list.append(txt_clip)
            except UnicodeEncodeError:
                txt_clip = TextClip("Issue with text", fontsize = 70, color = 'white').set_duration(2) 
                clip_list.append(txt_clip)
        
        final_text_clip = concatenate(clip_list, method = "compose").set_start(3)  
            
        v_clip = ImageClip(image_file).set_duration(total_duration+3)
        video=CompositeVideoClip([v_clip, final_text_clip])
        # video = video.set_audio(AudioFileClip('sound/Serenity (1).mp3'))
        video = video.set_audio(new_audioclip)
        video.write_videofile(save_file, 
                              codec='libx264',
                              fps=10, 
                              threads=4,
                              audio_codec='aac', 
                              temp_audiofile='temp-audio.m4a', 
                              remove_temp=True
                              )
Ejemplo n.º 2
0
def visualize(model_cls, input_data):
    os.environ["FFMPEG_BINARY"] = "ffmpeg"

    model = model_cls()
    output = model.encode(input_data)
    output = output.reshape(output.shape[0] * 512, 128)
    min_val = np.amin(output)
    max_val_normalized = np.amax(output) - min_val

    last_percentage = -1
    figures = []

    # (graph total duration / graph datapoint count) * (graph datapoint count / graph width)
    figure_snapshot_rate = 40
    tick_to_sample_ratio = 32.87890625  # This is still off sync with the audio, 2:53 becomes 2:58 for some reason
    frame_duration = (figure_snapshot_rate * tick_to_sample_ratio) / 44100
    for i in range(128):
        column = i % 16
        row = int(i / 16)
        figures.append(Figure(60, 60, row, column, frame_duration))

    print(f"Rendering output: {output.shape}")
    for index, entry in enumerate(output):
        should_snapshot = index % figure_snapshot_rate == 0

        for plot_index, plot in enumerate(figures):
            plot.push((entry[plot_index] - min_val) / max_val_normalized)

            if should_snapshot:
                plot.snapshot()

        percentage = int(index / len(output) * 100)
        if percentage % 1 == 0 and last_percentage != percentage:
            last_percentage = percentage
            print(f"Capturing figures: {percentage}%...")

    print(f"{len(figures[0].figures)} figure frames rendered")
    clips = [FigureClip(figure) for figure in figures]

    audio_filename = f"vis/output.wav"
    output = model.predict_output(input_data).flatten()
    write_wav(audio_filename, output)

    del model
    backend.clear_session()

    audio = AudioFileClip(audio_filename)
    audio = audio.set_start(0)
    audio = audio.set_duration(
        min(audio.duration, frame_duration * len(figures[0].figures)))

    result = CompositeVideoClip(clips, size=(16 * 66 + 12, 8 * 66 + 12))
    result = result.set_audio(audio)
    result.write_videofile("vis/output.mp4", fps=1 / frame_duration)
Ejemplo n.º 3
0
def main():
    # backend.set_floatx("float16")
    # backend.set_epsilon(1e-4)

    data = get_dataset(
        block_interval=10000,
        block_size=INPUT_COUNT,
        file_count=30,
        output_size=0,
        shuffle=True,
    )
    train_data = data.train_data.reshape(len(data.train_data), INPUT_COUNT, 1)
    test_data = data.test_data.reshape(len(data.test_data), INPUT_COUNT, 1)

    model = ExperimentalModel()
    model.load()

    if "--train" in sys.argv:
        model.train(train_data, train_data, test_data, test_data)

    if "--plot" in sys.argv:
        plt.subplot(2, 2, 1)
        plt.plot(data.files[0][200])

        plt.subplot(2, 2, 2)
        plt.plot(
            model.predict_output(data.files[0][200].reshape(1, INPUT_COUNT,
                                                            1)).flatten())

        plt.subplot(2, 2, 3)
        plt.plot(data.files[0][210])

        plt.subplot(2, 2, 4)
        plt.plot(
            model.predict_output(data.files[0][210].reshape(1, INPUT_COUNT,
                                                            1)).flatten())

        plt.show()

    if "--out" in sys.argv:
        for i in range(min(len(data.files), 10)):
            inp = data.files[i].reshape(len(data.files[i]), INPUT_COUNT, 1)
            output = model.predict_output(inp).flatten()
            data.write_wav(f"output-{NAME}-{MODEL_ID}-{i}.wav", output)
            print(f"output-{NAME}-{MODEL_ID}-{i}.wav created")

    if "--convert" in sys.argv:
        file_data = get_dataset(
            block_interval=INPUT_COUNT,
            block_size=INPUT_COUNT,
            file_count=107,
            output_size=0,
            shuffle=False,
            just_files=True,
        )
        inp = data.files.reshape()

    if "--vis" in sys.argv:
        os.environ["FFMPEG_BINARY"] = "ffmpeg"
        from moviepy.editor import (CompositeVideoClip, AudioFileClip,
                                    VideoClip)

        file = data.files[0]
        inp = file.reshape(len(file), INPUT_COUNT, 1)
        output = model.encode(inp)
        output = output.reshape(output.shape[0] * 512, 128)
        min_val = np.amin(output)
        max_val_normalized = np.amax(output) - min_val

        class Figure(object):
            def __init__(self, width, height, row, column, frame_duration):
                self.width = width
                self.height = height
                self.row = row
                self.column = column
                self.frame_duration = frame_duration
                self.current_highest = 0
                self.buffer = [0 for i in range(self.width)]
                self.figures = []

            def push(self, val):
                if val > self.buffer[-1]:
                    self.buffer[-1] = val

            def render(self, peaks):
                figure = np.zeros((self.width, self.height), int)
                for column, peak in enumerate(peaks):
                    for fill in range(int(round(peak * (self.height - 1)))):
                        figure[self.height - 1 - fill, column] = 255
                return np.stack((figure, ) * 3, axis=-1)

            def snapshot(self):
                self.figures.append(self.buffer)
                self.buffer = self.buffer[1:self.width] + [0]

        class FigureClip(VideoClip):
            def __init__(self, figure):
                super().__init__()
                self.figure = figure
                self.make_frame = lambda time: self.make_into_frame(time)
                self.start = 0
                self.end = figure.frame_duration * len(figure.figures)
                self.size = (figure.width, figure.height)

                # 16 columns
                # 8 rows
                # padding of 6px
                self.pos = lambda _: (66 * figure.column + 6, 66 * figure.row +
                                      6)

            def make_into_frame(self, time):
                index = int(time / self.figure.frame_duration)
                if index > len(self.figure.figures):
                    return np.zeros(self.figure.width, self.figure.height)
                return self.figure.render(self.figure.figures[index])

        last_percentage = -1
        figures = []

        # (graph total duration / graph datapoint count) * (graph datapoint count / graph width)
        figure_snapshot_rate = 40
        tick_to_sample_ratio = 32.87890625  # This is still off sync with the audio, 2:53 becomes 2:58 for some reason
        frame_duration = (figure_snapshot_rate * tick_to_sample_ratio) / 44100
        for i in range(128):
            column = i % 16
            row = int(i / 16)
            figures.append(Figure(60, 60, row, column, frame_duration))

        print(f"Rendering output: {output.shape}")
        for index, entry in enumerate(output):
            should_snapshot = index % figure_snapshot_rate == 0

            for plot_index, plot in enumerate(figures):
                plot.push((entry[plot_index] - min_val) / max_val_normalized)

                if should_snapshot:
                    plot.snapshot()

            percentage = int(index / len(output) * 100)
            if percentage % 1 == 0 and last_percentage != percentage:
                last_percentage = percentage
                print(f"Capturing figures: {percentage}%...")

        print(f"{len(figures[0].figures)} figure frames rendered")
        clips = [FigureClip(figure) for figure in figures]

        audio_filename = f"vis/output.wav"
        output = model.predict_output(inp).flatten()
        data.write_wav(audio_filename, output)

        del model
        backend.clear_session()

        audio = AudioFileClip(audio_filename)
        audio = audio.set_start(0)
        audio = audio.set_duration(
            min(audio.duration, frame_duration * len(figures[0].figures)))

        result = CompositeVideoClip(clips, size=(16 * 66 + 12, 8 * 66 + 12))
        result = result.set_audio(audio)
        result.write_videofile("vis/output.mp4", fps=1 / frame_duration)