def main(): data = get_dataset(block_interval=200, block_size=800, file_count=107) model = AutoEncoder() model.load() model.train(data.train_data, data.test_data) for i in range(1): output = model.predict_output(data.files[i]) data.write_wav(f"output-highfreq-{i}.wav", output)
def main(): # backend.set_floatx("float16") # backend.set_epsilon(1e-4) data = get_dataset( block_interval=max(min(INPUT_COUNT, 10000), 16), block_size=INPUT_COUNT, file_count=30, output_size=0, shuffle=True, ) train_data = data.train_data.reshape(len(data.train_data), INPUT_COUNT, 1) test_data = data.test_data.reshape(len(data.test_data), INPUT_COUNT, 1) model = ExperimentalModel() model.load() if "--train" in sys.argv: model.train(train_data, train_data, test_data, test_data) plt.subplot(2, 2, 1) plt.plot(data.files[0][200]) plt.subplot(2, 2, 2) plt.plot( model.predict_output(data.files[0][200].reshape(1, INPUT_COUNT, 1)).flatten()) plt.subplot(2, 2, 3) plt.plot(data.files[0][210]) plt.subplot(2, 2, 4) plt.plot( model.predict_output(data.files[0][210].reshape(1, INPUT_COUNT, 1)).flatten()) plt.show() for i in range(min(len(data.files), 10)): inp = data.files[i].reshape(len(data.files[i]), INPUT_COUNT, 1) output = model.predict_output(inp).flatten() data.write_wav(f"output-{NAME}-{MODEL_ID}-{i}.wav", output) print(f"output-{NAME}-{MODEL_ID}-{i}.wav created")
def main(): backend.set_floatx("float16") backend.set_epsilon(1e-4) data = get_dataset( block_interval=1, block_size=INPUT_COUNT, file_count=1, output_size=OUTPUT_COUNT, shuffle=True, ) train_data = data.train_data.reshape(len(data.train_data), INPUT_COUNT, 1) test_data = data.test_data.reshape(len(data.test_data), INPUT_COUNT, 1) model = ExperimentalModel() model.load() # model.train(train_data, train_data, test_data, test_data) for i in range(1): inp = data.files[i].reshape(len(data.files[i]), INPUT_COUNT, 1) output = model.predict_output(inp).reshape(len(data.files[i]), INPUT_COUNT) data.write_wav(f"output-{NAME}-{MODEL_ID}-{i}.wav", output) print(f"output-{NAME}-{MODEL_ID}-{i}.wav created")
def main(): backend.set_floatx("float16") backend.set_epsilon(1e-4) just_files = False data = get_dataset( block_interval=50, block_size=INPUT_COUNT, file_count=1, output_size=0, shuffle=True, just_files=just_files, ) if not just_files: train_data = data.train_data.reshape(len(data.train_data), INPUT_COUNT, 1) test_data = data.test_data.reshape(len(data.test_data), INPUT_COUNT, 1) level1 = Level1Autoencoder() level1.train(train_data, train_data, test_data, test_data) # Prepare data by running it through our first level autoencoder data = level1.encode(data.files[0].reshape(len(data.files[0]), INPUT_COUNT, 1)) plotdata = data.reshape(len(data), 128)[:1000] plt.subplot(2, 1, 1) plt.plot(plotdata) data = data[:int(len(data) / LEVEL_2_INPUT_COUNT) * LEVEL_2_INPUT_COUNT] data = np.array(np.split(data, len(data) / LEVEL_2_INPUT_COUNT)) data = data.reshape(len(data), LEVEL_2_INPUT_COUNT, 128) # Unload level 1 model del level1 backend.clear_session() level2 = Level2Autoencoder() level2.train(data, data, data, data) output = level2.predict_output(data) print(output.shape) plotdata = output.reshape(output.shape[0] * output.shape[1], 128)[:1000] plt.subplot(2, 1, 2) plt.plot(plotdata) plt.show() print(output.shape) output = output.reshape(output.shape[0] * output.shape[1], 1, 128) print(output.shape) del level2 backend.clear_session() level1 = Level1Autoencoder() output = level1.decode(output).flatten() data = Dataset() data.write_wav(f"output-{NAME}-{MODEL_ID}-level2.wav", output) for i in range(min(len(data.files), 2)): inp = data.files[i].reshape(len(data.files[i]), INPUT_COUNT, 1) output = level1.decode(level1.encode(inp)).reshape(len(data.files[i]), INPUT_COUNT) # output = level1.predict_output(inp).reshape(len(data.files[i]), INPUT_COUNT) data.write_wav(f"output-{NAME}-{MODEL_ID}-level1-{i}.wav", output) print(f"output-{NAME}-{MODEL_ID}-{i}.wav created") plt.subplot(2, 2) plt.plot(inp.flatten()[2000:8000]) plt.subplot(2, 2) plt.plot(output.flatten()[2000:8000]) plt.show()
def main(): # backend.set_floatx("float16") # backend.set_epsilon(1e-4) data = get_dataset( block_interval=10000, block_size=INPUT_COUNT, file_count=30, output_size=0, shuffle=True, ) train_data = data.train_data.reshape(len(data.train_data), INPUT_COUNT, 1) test_data = data.test_data.reshape(len(data.test_data), INPUT_COUNT, 1) model = ExperimentalModel() model.load() if "--train" in sys.argv: model.train(train_data, train_data, test_data, test_data) if "--plot" in sys.argv: plt.subplot(2, 2, 1) plt.plot(data.files[0][200]) plt.subplot(2, 2, 2) plt.plot( model.predict_output(data.files[0][200].reshape(1, INPUT_COUNT, 1)).flatten()) plt.subplot(2, 2, 3) plt.plot(data.files[0][210]) plt.subplot(2, 2, 4) plt.plot( model.predict_output(data.files[0][210].reshape(1, INPUT_COUNT, 1)).flatten()) plt.show() if "--out" in sys.argv: for i in range(min(len(data.files), 10)): inp = data.files[i].reshape(len(data.files[i]), INPUT_COUNT, 1) output = model.predict_output(inp).flatten() data.write_wav(f"output-{NAME}-{MODEL_ID}-{i}.wav", output) print(f"output-{NAME}-{MODEL_ID}-{i}.wav created") if "--convert" in sys.argv: file_data = get_dataset( block_interval=INPUT_COUNT, block_size=INPUT_COUNT, file_count=107, output_size=0, shuffle=False, just_files=True, ) inp = data.files.reshape() if "--vis" in sys.argv: os.environ["FFMPEG_BINARY"] = "ffmpeg" from moviepy.editor import (CompositeVideoClip, AudioFileClip, VideoClip) file = data.files[0] inp = file.reshape(len(file), INPUT_COUNT, 1) output = model.encode(inp) output = output.reshape(output.shape[0] * 512, 128) min_val = np.amin(output) max_val_normalized = np.amax(output) - min_val class Figure(object): def __init__(self, width, height, row, column, frame_duration): self.width = width self.height = height self.row = row self.column = column self.frame_duration = frame_duration self.current_highest = 0 self.buffer = [0 for i in range(self.width)] self.figures = [] def push(self, val): if val > self.buffer[-1]: self.buffer[-1] = val def render(self, peaks): figure = np.zeros((self.width, self.height), int) for column, peak in enumerate(peaks): for fill in range(int(round(peak * (self.height - 1)))): figure[self.height - 1 - fill, column] = 255 return np.stack((figure, ) * 3, axis=-1) def snapshot(self): self.figures.append(self.buffer) self.buffer = self.buffer[1:self.width] + [0] class FigureClip(VideoClip): def __init__(self, figure): super().__init__() self.figure = figure self.make_frame = lambda time: self.make_into_frame(time) self.start = 0 self.end = figure.frame_duration * len(figure.figures) self.size = (figure.width, figure.height) # 16 columns # 8 rows # padding of 6px self.pos = lambda _: (66 * figure.column + 6, 66 * figure.row + 6) def make_into_frame(self, time): index = int(time / self.figure.frame_duration) if index > len(self.figure.figures): return np.zeros(self.figure.width, self.figure.height) return self.figure.render(self.figure.figures[index]) last_percentage = -1 figures = [] # (graph total duration / graph datapoint count) * (graph datapoint count / graph width) figure_snapshot_rate = 40 tick_to_sample_ratio = 32.87890625 # This is still off sync with the audio, 2:53 becomes 2:58 for some reason frame_duration = (figure_snapshot_rate * tick_to_sample_ratio) / 44100 for i in range(128): column = i % 16 row = int(i / 16) figures.append(Figure(60, 60, row, column, frame_duration)) print(f"Rendering output: {output.shape}") for index, entry in enumerate(output): should_snapshot = index % figure_snapshot_rate == 0 for plot_index, plot in enumerate(figures): plot.push((entry[plot_index] - min_val) / max_val_normalized) if should_snapshot: plot.snapshot() percentage = int(index / len(output) * 100) if percentage % 1 == 0 and last_percentage != percentage: last_percentage = percentage print(f"Capturing figures: {percentage}%...") print(f"{len(figures[0].figures)} figure frames rendered") clips = [FigureClip(figure) for figure in figures] audio_filename = f"vis/output.wav" output = model.predict_output(inp).flatten() data.write_wav(audio_filename, output) del model backend.clear_session() audio = AudioFileClip(audio_filename) audio = audio.set_start(0) audio = audio.set_duration( min(audio.duration, frame_duration * len(figures[0].figures))) result = CompositeVideoClip(clips, size=(16 * 66 + 12, 8 * 66 + 12)) result = result.set_audio(audio) result.write_videofile("vis/output.mp4", fps=1 / frame_duration)