Exemple #1
0
def main():
    data = get_dataset(block_interval=200, block_size=800, file_count=107)
    model = AutoEncoder()
    model.load()
    model.train(data.train_data, data.test_data)
    for i in range(1):
        output = model.predict_output(data.files[i])
        data.write_wav(f"output-highfreq-{i}.wav", output)
def main():
    # backend.set_floatx("float16")
    # backend.set_epsilon(1e-4)

    data = get_dataset(
        block_interval=max(min(INPUT_COUNT, 10000), 16),
        block_size=INPUT_COUNT,
        file_count=30,
        output_size=0,
        shuffle=True,
    )
    train_data = data.train_data.reshape(len(data.train_data), INPUT_COUNT, 1)
    test_data = data.test_data.reshape(len(data.test_data), INPUT_COUNT, 1)

    model = ExperimentalModel()
    model.load()
    if "--train" in sys.argv:
        model.train(train_data, train_data, test_data, test_data)

    plt.subplot(2, 2, 1)
    plt.plot(data.files[0][200])

    plt.subplot(2, 2, 2)
    plt.plot(
        model.predict_output(data.files[0][200].reshape(1, INPUT_COUNT,
                                                        1)).flatten())

    plt.subplot(2, 2, 3)
    plt.plot(data.files[0][210])

    plt.subplot(2, 2, 4)
    plt.plot(
        model.predict_output(data.files[0][210].reshape(1, INPUT_COUNT,
                                                        1)).flatten())

    plt.show()

    for i in range(min(len(data.files), 10)):
        inp = data.files[i].reshape(len(data.files[i]), INPUT_COUNT, 1)
        output = model.predict_output(inp).flatten()
        data.write_wav(f"output-{NAME}-{MODEL_ID}-{i}.wav", output)
        print(f"output-{NAME}-{MODEL_ID}-{i}.wav created")
Exemple #3
0
def main():
    backend.set_floatx("float16")
    backend.set_epsilon(1e-4)

    data = get_dataset(
        block_interval=1,
        block_size=INPUT_COUNT,
        file_count=1,
        output_size=OUTPUT_COUNT,
        shuffle=True,
    )
    train_data = data.train_data.reshape(len(data.train_data), INPUT_COUNT, 1)
    test_data = data.test_data.reshape(len(data.test_data), INPUT_COUNT, 1)

    model = ExperimentalModel()
    model.load()
    # model.train(train_data, train_data, test_data, test_data)

    for i in range(1):
        inp = data.files[i].reshape(len(data.files[i]), INPUT_COUNT, 1)
        output = model.predict_output(inp).reshape(len(data.files[i]),
                                                   INPUT_COUNT)
        data.write_wav(f"output-{NAME}-{MODEL_ID}-{i}.wav", output)
        print(f"output-{NAME}-{MODEL_ID}-{i}.wav created")
def main():
    backend.set_floatx("float16")
    backend.set_epsilon(1e-4)

    just_files = False
    data = get_dataset(
        block_interval=50,
        block_size=INPUT_COUNT,
        file_count=1,
        output_size=0,
        shuffle=True,
        just_files=just_files,
    )
    if not just_files:
        train_data = data.train_data.reshape(len(data.train_data), INPUT_COUNT, 1)
        test_data = data.test_data.reshape(len(data.test_data), INPUT_COUNT, 1)

    level1 = Level1Autoencoder()
    level1.train(train_data, train_data, test_data, test_data)

    # Prepare data by running it through our first level autoencoder
    data = level1.encode(data.files[0].reshape(len(data.files[0]), INPUT_COUNT, 1))

    plotdata = data.reshape(len(data), 128)[:1000]
    plt.subplot(2, 1, 1)
    plt.plot(plotdata)

    data = data[:int(len(data) / LEVEL_2_INPUT_COUNT) * LEVEL_2_INPUT_COUNT]
    data = np.array(np.split(data, len(data) / LEVEL_2_INPUT_COUNT))
    data = data.reshape(len(data), LEVEL_2_INPUT_COUNT, 128)

    # Unload level 1 model
    del level1
    backend.clear_session()

    level2 = Level2Autoencoder()
    level2.train(data, data, data, data)

    output = level2.predict_output(data)
    print(output.shape)

    plotdata = output.reshape(output.shape[0] * output.shape[1], 128)[:1000]
    plt.subplot(2, 1, 2)
    plt.plot(plotdata)
    plt.show()

    print(output.shape)
    output = output.reshape(output.shape[0] * output.shape[1], 1, 128)
    print(output.shape)

    del level2
    backend.clear_session()

    level1 = Level1Autoencoder()
    output = level1.decode(output).flatten()

    data = Dataset()
    data.write_wav(f"output-{NAME}-{MODEL_ID}-level2.wav", output)

    for i in range(min(len(data.files), 2)):
        inp = data.files[i].reshape(len(data.files[i]), INPUT_COUNT, 1)
        output = level1.decode(level1.encode(inp)).reshape(len(data.files[i]), INPUT_COUNT)
        # output = level1.predict_output(inp).reshape(len(data.files[i]), INPUT_COUNT)
        data.write_wav(f"output-{NAME}-{MODEL_ID}-level1-{i}.wav", output)
        print(f"output-{NAME}-{MODEL_ID}-{i}.wav created")
        plt.subplot(2, 2)
        plt.plot(inp.flatten()[2000:8000])
        plt.subplot(2, 2)
        plt.plot(output.flatten()[2000:8000])
        plt.show()
Exemple #5
0
def main():
    # backend.set_floatx("float16")
    # backend.set_epsilon(1e-4)

    data = get_dataset(
        block_interval=10000,
        block_size=INPUT_COUNT,
        file_count=30,
        output_size=0,
        shuffle=True,
    )
    train_data = data.train_data.reshape(len(data.train_data), INPUT_COUNT, 1)
    test_data = data.test_data.reshape(len(data.test_data), INPUT_COUNT, 1)

    model = ExperimentalModel()
    model.load()

    if "--train" in sys.argv:
        model.train(train_data, train_data, test_data, test_data)

    if "--plot" in sys.argv:
        plt.subplot(2, 2, 1)
        plt.plot(data.files[0][200])

        plt.subplot(2, 2, 2)
        plt.plot(
            model.predict_output(data.files[0][200].reshape(1, INPUT_COUNT,
                                                            1)).flatten())

        plt.subplot(2, 2, 3)
        plt.plot(data.files[0][210])

        plt.subplot(2, 2, 4)
        plt.plot(
            model.predict_output(data.files[0][210].reshape(1, INPUT_COUNT,
                                                            1)).flatten())

        plt.show()

    if "--out" in sys.argv:
        for i in range(min(len(data.files), 10)):
            inp = data.files[i].reshape(len(data.files[i]), INPUT_COUNT, 1)
            output = model.predict_output(inp).flatten()
            data.write_wav(f"output-{NAME}-{MODEL_ID}-{i}.wav", output)
            print(f"output-{NAME}-{MODEL_ID}-{i}.wav created")

    if "--convert" in sys.argv:
        file_data = get_dataset(
            block_interval=INPUT_COUNT,
            block_size=INPUT_COUNT,
            file_count=107,
            output_size=0,
            shuffle=False,
            just_files=True,
        )
        inp = data.files.reshape()

    if "--vis" in sys.argv:
        os.environ["FFMPEG_BINARY"] = "ffmpeg"
        from moviepy.editor import (CompositeVideoClip, AudioFileClip,
                                    VideoClip)

        file = data.files[0]
        inp = file.reshape(len(file), INPUT_COUNT, 1)
        output = model.encode(inp)
        output = output.reshape(output.shape[0] * 512, 128)
        min_val = np.amin(output)
        max_val_normalized = np.amax(output) - min_val

        class Figure(object):
            def __init__(self, width, height, row, column, frame_duration):
                self.width = width
                self.height = height
                self.row = row
                self.column = column
                self.frame_duration = frame_duration
                self.current_highest = 0
                self.buffer = [0 for i in range(self.width)]
                self.figures = []

            def push(self, val):
                if val > self.buffer[-1]:
                    self.buffer[-1] = val

            def render(self, peaks):
                figure = np.zeros((self.width, self.height), int)
                for column, peak in enumerate(peaks):
                    for fill in range(int(round(peak * (self.height - 1)))):
                        figure[self.height - 1 - fill, column] = 255
                return np.stack((figure, ) * 3, axis=-1)

            def snapshot(self):
                self.figures.append(self.buffer)
                self.buffer = self.buffer[1:self.width] + [0]

        class FigureClip(VideoClip):
            def __init__(self, figure):
                super().__init__()
                self.figure = figure
                self.make_frame = lambda time: self.make_into_frame(time)
                self.start = 0
                self.end = figure.frame_duration * len(figure.figures)
                self.size = (figure.width, figure.height)

                # 16 columns
                # 8 rows
                # padding of 6px
                self.pos = lambda _: (66 * figure.column + 6, 66 * figure.row +
                                      6)

            def make_into_frame(self, time):
                index = int(time / self.figure.frame_duration)
                if index > len(self.figure.figures):
                    return np.zeros(self.figure.width, self.figure.height)
                return self.figure.render(self.figure.figures[index])

        last_percentage = -1
        figures = []

        # (graph total duration / graph datapoint count) * (graph datapoint count / graph width)
        figure_snapshot_rate = 40
        tick_to_sample_ratio = 32.87890625  # This is still off sync with the audio, 2:53 becomes 2:58 for some reason
        frame_duration = (figure_snapshot_rate * tick_to_sample_ratio) / 44100
        for i in range(128):
            column = i % 16
            row = int(i / 16)
            figures.append(Figure(60, 60, row, column, frame_duration))

        print(f"Rendering output: {output.shape}")
        for index, entry in enumerate(output):
            should_snapshot = index % figure_snapshot_rate == 0

            for plot_index, plot in enumerate(figures):
                plot.push((entry[plot_index] - min_val) / max_val_normalized)

                if should_snapshot:
                    plot.snapshot()

            percentage = int(index / len(output) * 100)
            if percentage % 1 == 0 and last_percentage != percentage:
                last_percentage = percentage
                print(f"Capturing figures: {percentage}%...")

        print(f"{len(figures[0].figures)} figure frames rendered")
        clips = [FigureClip(figure) for figure in figures]

        audio_filename = f"vis/output.wav"
        output = model.predict_output(inp).flatten()
        data.write_wav(audio_filename, output)

        del model
        backend.clear_session()

        audio = AudioFileClip(audio_filename)
        audio = audio.set_start(0)
        audio = audio.set_duration(
            min(audio.duration, frame_duration * len(figures[0].figures)))

        result = CompositeVideoClip(clips, size=(16 * 66 + 12, 8 * 66 + 12))
        result = result.set_audio(audio)
        result.write_videofile("vis/output.mp4", fps=1 / frame_duration)