def predict_multitarget(model, input_filename, style_filename, output_filename): # Load melody audio = load_audio(input_filename, sr=DEFAULT_SAMPLING_RATE) mag, phase = forward_transform(audio) mag_db = amplitude_to_db(mag) mag_sliced = slice_magnitude(mag_db, IMG_DIM[1]) mag_sliced = (mag_sliced * 2) - 1 # Load style style = load_audio(style_filename, sr=DEFAULT_SAMPLING_RATE) style_mag, _ = forward_transform(style) style_mag_db = amplitude_to_db(style_mag) style_mag_sliced = slice_magnitude(style_mag_db, IMG_DIM[1]) # Take a random slice style_mag_sliced = style_mag_sliced[ np.random.choice(style_mag_sliced.shape[0]), :, :] style_mag_sliced = (style_mag_sliced * 2) - 1 style_mag_sliced = np.expand_dims(style_mag_sliced, axis=0) style_mag_sliced = np.repeat(style_mag_sliced, mag_sliced.shape[0], axis=0) # Concatenate [melody, style] input_data = np.concatenate([mag_sliced, style_mag_sliced], axis=3) prediction = model.predict(input_data) prediction = (prediction + 1) / 2 mag_db = join_magnitude_slices(prediction, phase.shape) mag = db_to_amplitude(mag_db) audio_out = inverse_transform(mag, phase) write_audio(output_filename, audio_out)
def predict(model, input_filename, output_filename): audio = load_audio(input_filename, sr=DEFAULT_SAMPLING_RATE) mag, phase = forward_transform(audio) mag_db = amplitude_to_db(mag) mag_sliced = slice_magnitude(mag_db, IMG_DIM[1]) mag_sliced = (mag_sliced * 2) - 1 prediction = model.predict(mag_sliced) prediction = (prediction + 1) / 2 mag_db = join_magnitude_slices(prediction, phase.shape) mag = db_to_amplitude(mag_db) audio_out = inverse_transform(mag, phase) write_audio(output_filename, audio_out)
def train(data, epochs, batch_size=1, gen_lr=5e-6, disc_lr=5e-7, epoch_offset=0): generator = Generator(input_shape=[None,None,2]) discriminator = Discriminator(input_shape=[None,None,1]) generator_optimizer = tf.keras.optimizers.Adam(gen_lr) discriminator_optimizer = tf.keras.optimizers.Adam(disc_lr) model_name = data['training'].origin+'_2_any' checkpoint_prefix = os.path.join(CHECKPOINT_DIR, model_name) if(not os.path.isdir(checkpoint_prefix)): os.makedirs(checkpoint_prefix) else: if(os.path.isfile(os.path.join(checkpoint_prefix, 'generator.h5'))): generator.load_weights(os.path.join(checkpoint_prefix, 'generator.h5'), by_name=True) print('Generator weights restorred from ' + checkpoint_prefix) if(os.path.isfile(os.path.join(checkpoint_prefix, 'discriminator.h5'))): discriminator.load_weights(os.path.join(checkpoint_prefix, 'discriminator.h5'), by_name=True) print('Discriminator weights restorred from ' + checkpoint_prefix) # Get the number of batches in the training set epoch_size = data['training'].__len__() print() print("Started training with the following parameters: ") print("\tCheckpoints: \t", checkpoint_prefix) print("\tEpochs: \t", epochs) print("\tgen_lr: \t", gen_lr) print("\tdisc_lr: \t", disc_lr) print("\tBatchSize: \t", batch_size) print("\tnBatches: \t", epoch_size) print() # Precompute the test input and target for validation audio_input = load_audio(os.path.join(TEST_AUDIOS_PATH, data['training'].origin+'.wav')) mag_input, phase = forward_transform(audio_input) mag_input = amplitude_to_db(mag_input) test_input = slice_magnitude(mag_input, mag_input.shape[0]) test_input = (test_input * 2) - 1 test_inputs = [] test_targets = [] for t in data['training'].target: audio_target = load_audio(os.path.join(TEST_AUDIOS_PATH, t+'.wav')) mag_target, _ = forward_transform(audio_target) mag_target = amplitude_to_db(mag_target) test_target = slice_magnitude(mag_target, mag_target.shape[0]) test_target = (test_target * 2) - 1 test_target_perm = test_target[np.random.permutation(test_target.shape[0]),:,:,:] test_inputs.append(np.concatenate([test_input, test_target_perm], axis=3)) test_targets.append(test_target) gen_mae_list, gen_mae_val_list = [], [] gen_loss_list, gen_loss_val_list = [], [] disc_loss_list, disc_loss_val_list = [], [] for epoch in range(epochs): gen_mae_total, gen_mae_val_total = 0, 0 gen_loss_total, gen_loss_val_total = 0, 0 disc_loss_total, disc_loss_val_total = 0, 0 print('Epoch {}/{}'.format((epoch+1)+epoch_offset, epochs+epoch_offset)) progbar = tf.keras.utils.Progbar(epoch_size) for i in range(epoch_size): # Get the data from the DataGenerator input_image, target = data['training'].__getitem__(i) with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: # Generate a fake image gen_output = generator(input_image, training=True) # Train the discriminator disc_real_output = discriminator([input_image[:,:,:,0:1], target], training=True) disc_generated_output = discriminator([input_image[:,:,:,0:1], gen_output], training=True) # Compute the losses gen_mae = l1_loss(target, gen_output) gen_loss = generator_loss(disc_generated_output, gen_mae) disc_loss = discriminator_loss(disc_real_output, disc_generated_output) # Compute the gradients generator_gradients = gen_tape.gradient(gen_loss,generator.trainable_variables) discriminator_gradients = disc_tape.gradient(disc_loss, discriminator.trainable_variables) # Apply the gradients generator_optimizer.apply_gradients(zip(generator_gradients, generator.trainable_variables)) discriminator_optimizer.apply_gradients(zip(discriminator_gradients, discriminator.trainable_variables)) # Update the progress bar gen_mae = gen_mae.numpy() gen_loss = gen_loss.numpy() disc_loss = disc_loss.numpy() gen_mae_total += gen_mae gen_loss_total += gen_loss disc_loss_total += disc_loss progbar.add(1, values=[ ("gen_mae", gen_mae), ("gen_loss", gen_loss), ("disc_loss", disc_loss) ]) gen_mae_list.append(gen_mae_total/epoch_size) gen_mae_val_list.append(gen_mae_val_total/epoch_size) gen_loss_list.append(gen_loss_total/epoch_size) gen_loss_val_list.append(gen_loss_val_total/epoch_size) disc_loss_list.append(disc_loss_total/epoch_size) disc_loss_val_list.append(disc_loss_val_total/epoch_size) history = pd.DataFrame({ 'gen_mae': gen_mae_list, 'gen_mae_val': gen_mae_val_list, 'gen_loss': gen_loss_list, 'gen_loss_val': gen_loss_val_list, 'disc_loss': disc_loss_list, 'disc_loss_val': disc_loss_val_list }) write_csv(history, os.path.join(checkpoint_prefix, 'history.csv')) epoch_output = os.path.join(OUTPUT_PATH, model_name, str((epoch+1)+epoch_offset).zfill(3)) init_directory(epoch_output) # Generate audios and save spectrograms for the entire audios for j in range(len(data['training'].target)): prediction = generator(test_inputs[j], training=False) prediction = (prediction + 1) / 2 generate_images(prediction, (test_inputs[j] + 1) / 2, (test_targets[j] + 1) / 2, os.path.join(epoch_output, 'spectrogram_'+data['training'].target[j])) generate_audio(prediction, phase, os.path.join(epoch_output, 'audio_'+data['training'].target[j]+'.wav')) print('Epoch outputs saved in ' + epoch_output) # Save the weights generator.save_weights(os.path.join(checkpoint_prefix, 'generator.h5')) discriminator.save_weights(os.path.join(checkpoint_prefix, 'discriminator.h5')) print('Weights saved in ' + checkpoint_prefix) # Callback at the end of the epoch for the DataGenerator data['training'].on_epoch_end()
def train(data, epochs, batch_size=1, lr=1e-3, epoch_offset=0): generator = Generator() generator_optimizer = tf.keras.optimizers.Adam(lr) model_name = data['training'].origin + '_2_' + data[ 'training'].target + '_generator' checkpoint_prefix = os.path.join(CHECKPOINT_DIR, model_name) if (not os.path.isdir(checkpoint_prefix)): os.makedirs(checkpoint_prefix) else: if (os.path.isfile(os.path.join(checkpoint_prefix, 'generator.h5'))): generator.load_weights(os.path.join(checkpoint_prefix, 'generator.h5'), by_name=True) print('Generator weights restorred from ' + checkpoint_prefix) # Get the number of batches in the training set epoch_size = data['training'].__len__() print() print("Started training with the following parameters: ") print("\tCheckpoints: \t", checkpoint_prefix) print("\tEpochs: \t", epochs) print("\tgen_lr: \t", lr) print("\tBatchSize: \t", batch_size) print("\tnBatches: \t", epoch_size) print() # Precompute the test input and target for validation audio_input = load_audio( os.path.join(TEST_AUDIOS_PATH, data['training'].origin + '.wav')) mag_input, phase = forward_transform(audio_input) mag_input = amplitude_to_db(mag_input) test_input = slice_magnitude(mag_input, mag_input.shape[0]) test_input = (test_input * 2) - 1 audio_target = load_audio( os.path.join(TEST_AUDIOS_PATH, data['training'].target + '.wav')) mag_target, _ = forward_transform(audio_target) mag_target = amplitude_to_db(mag_target) test_target = slice_magnitude(mag_target, mag_target.shape[0]) test_target = (test_target * 2) - 1 gen_mae_list, gen_mae_val_list = [], [] for epoch in range(epochs): gen_mae_total, gen_mae_val_total = 0, 0 print('Epoch {}/{}'.format((epoch + 1) + epoch_offset, epochs + epoch_offset)) progbar = tf.keras.utils.Progbar(epoch_size) for i in range(epoch_size): input_image, target = data['training'].__getitem__(i) with tf.GradientTape() as gen_tape: # Generate a fake image gen_output = generator(input_image, training=True) # Compute the losses gen_mae = l1_loss(target, gen_output) # Timbre transfer # gen_mae = l1_loss(input_image, gen_output) # Autoencoder # Compute the gradients generator_gradients = gen_tape.gradient( gen_mae, generator.trainable_variables) # Apply the gradients generator_optimizer.apply_gradients( zip(generator_gradients, generator.trainable_variables)) # Update the progress bar gen_mae = gen_mae.numpy() gen_mae_total += gen_mae progbar.add(1, values=[("gen_mae", gen_mae)]) gen_mae_total /= epoch_size gen_mae_list.append(gen_mae_total) gen_mae_val_list.append(gen_mae_val_total) history = pd.DataFrame({ 'gen_mae': gen_mae_list, 'gen_mae_val': gen_mae_val_list }) write_csv(history, os.path.join(checkpoint_prefix, 'history.csv')) epoch_output = os.path.join(OUTPUT_PATH, model_name, str((epoch + 1) + epoch_offset).zfill(3)) init_directory(epoch_output) # Generate audios and save spectrograms for the entire audios prediction = generator(test_input, training=False) prediction = (prediction + 1) / 2 generate_images(prediction, (test_input + 1) / 2, (test_target + 1) / 2, os.path.join(epoch_output, 'spectrogram')) generate_audio(prediction, phase, os.path.join(epoch_output, 'audio.wav')) print('Epoch outputs saved in ' + epoch_output) # Save the weights generator.save_weights(os.path.join(checkpoint_prefix, 'generator.h5')) print('Weights saved in ' + checkpoint_prefix) # Callback at the end of the epoch for the DataGenerator data['training'].on_epoch_end()
load_audio, slice_magnitude) if __name__ == "__main__": ap = argparse.ArgumentParser() ap.add_argument('--audios_path', required=True) ap.add_argument('--features_path', required=True) args = ap.parse_args() assert os.path.isdir(args.audios_path), 'Audios not found' for instrument in os.listdir(args.audios_path): print(instrument) audios_dir = os.path.join(args.audios_path, instrument) features_dir = os.path.join(args.features_path, instrument) init_directory(features_dir) for f in os.listdir(audios_dir): name, _ = os.path.splitext(f) audio = load_audio(os.path.join(audios_dir, f)) mag, _ = forward_transform(audio) mag = amplitude_to_db(mag) mag_sliced = slice_magnitude(mag, mag.shape[0]) print(name, mag_sliced.shape[0]) for i in range(mag_sliced.shape[0]): out_name = os.path.join(features_dir, name + '_' + str(i).zfill(3) + '.npy') if (not os.path.isfile(out_name)): np.save(out_name, mag_sliced[i, :, :, :])