def preprocess_for_training(train_A_dir, train_B_dir, cache_folder): num_mcep = 24 sampling_rate = 16000 frame_period = 5.0 n_frames = 128 print("Starting to prepocess data.......") start_time = time.time() wavs_A = preprocess.load_wavs(wav_dir=train_A_dir, sr=sampling_rate) wavs_B = preprocess.load_wavs(wav_dir=train_B_dir, sr=sampling_rate) f0s_A, timeaxes_A, sps_A, aps_A, coded_sps_A = preprocess.world_encode_data( wave=wavs_A, fs=sampling_rate, frame_period=frame_period, coded_dim=num_mcep) f0s_B, timeaxes_B, sps_B, aps_B, coded_sps_B = preprocess.world_encode_data( wave=wavs_B, fs=sampling_rate, frame_period=frame_period, coded_dim=num_mcep) log_f0s_mean_A, log_f0s_std_A = preprocess.logf0_statistics(f0s=f0s_A) log_f0s_mean_B, log_f0s_std_B = preprocess.logf0_statistics(f0s=f0s_B) print("Log Pitch A") print("Mean: {:.4f}, Std: {:.4f}".format(log_f0s_mean_A, log_f0s_std_A)) print("Log Pitch B") print("Mean: {:.4f}, Std: {:.4f}".format(log_f0s_mean_B, log_f0s_std_B)) coded_sps_A_transposed = preprocess.transpose_in_list(lst=coded_sps_A) coded_sps_B_transposed = preprocess.transpose_in_list(lst=coded_sps_B) coded_sps_A_norm, coded_sps_A_mean, coded_sps_A_std = preprocess.coded_sps_normalization_fit_transform( coded_sps=coded_sps_A_transposed) coded_sps_B_norm, coded_sps_B_mean, coded_sps_B_std = preprocess.coded_sps_normalization_fit_transform( coded_sps=coded_sps_B_transposed) if not os.path.exists(cache_folder): os.makedirs(cache_folder) np.savez(os.path.join(cache_folder, 'logf0s_normalization.npz'), mean_A=log_f0s_mean_A, std_A=log_f0s_std_A, mean_B=log_f0s_mean_B, std_B=log_f0s_std_B) np.savez(os.path.join(cache_folder, 'mcep_normalization.npz'), mean_A=coded_sps_A_mean, std_A=coded_sps_A_std, mean_B=coded_sps_B_mean, std_B=coded_sps_B_std) save_pickle(variable=coded_sps_A_norm, fileName=os.path.join(cache_folder, "coded_sps_A_norm.pickle")) save_pickle(variable=coded_sps_B_norm, fileName=os.path.join(cache_folder, "coded_sps_B_norm.pickle")) end_time = time.time() print("Preprocessing finsihed!! see your directory ../cache for cached preprocessed data") print("Time taken for preprocessing {:.4f} seconds".format( end_time - start_time))
def train(random_seed=0): np.random.seed(random_seed) # Download MIR1K dataset download_dir = 'download' data_dir = 'data' mir1k_dir = 'data/MIR1K' train_path = os.path.join(mir1k_dir, 'train.txt') valid_path = os.path.join(mir1k_dir, 'valid.txt') #mir1k_dir = download_mir1k(download_dir = download_dir, data_dir = data_dir) #wavs_dir = os.path.join(mir1k_dir, 'MIR-1K/UndividedWavfile') #wavs_dir = os.path.join(mir1k_dir, 'MIR-1K/Wavfile') with open(train_path, 'r') as text_file: content = text_file.readlines() wav_filenames_train = [file.strip() for file in content] with open(valid_path, 'r') as text_file: content = text_file.readlines() wav_filenames_valid = [file.strip() for file in content] # Preprocess parameters mir1k_sr = 16000 n_fft = 1024 hop_length = n_fft // 4 # Model parameters learning_rate = 0.0001 num_rnn_layer = 3 num_hidden_units = [256, 256, 256] batch_size = 64 sample_frames = 10 iterations = 50000 tensorboard_directory = './graphs/svsrnn' log_directory = './log' train_log_filename = 'train_log.csv' clear_tensorboard = False model_directory = './model' model_filename = 'svsrnn.ckpt' if not os.path.exists(log_directory): os.makedirs(log_directory) open(os.path.join(log_directory, train_log_filename), 'w').close() # Load train wavs wavs_mono_train, wavs_src1_train, wavs_src2_train = load_wavs( filenames=wav_filenames_train, sr=mir1k_sr) # Turn waves to spectrums stfts_mono_train, stfts_src1_train, stfts_src2_train = wavs_to_specs( wavs_mono=wavs_mono_train, wavs_src1=wavs_src1_train, wavs_src2=wavs_src2_train, n_fft=n_fft, hop_length=hop_length) wavs_mono_valid, wavs_src1_valid, wavs_src2_valid = load_wavs( filenames=wav_filenames_valid, sr=mir1k_sr) stfts_mono_valid, stfts_src1_valid, stfts_src2_valid = wavs_to_specs( wavs_mono=wavs_mono_valid, wavs_src1=wavs_src1_valid, wavs_src2=wavs_src2_valid, n_fft=n_fft, hop_length=hop_length) # Initialize model model = SVSRNN(num_features=n_fft // 2 + 1, num_rnn_layer=num_rnn_layer, num_hidden_units=num_hidden_units, tensorboard_directory=tensorboard_directory, clear_tensorboard=clear_tensorboard) # Start training for i in (range(iterations)): data_mono_batch, data_src1_batch, data_src2_batch = sample_data_batch( stfts_mono=stfts_mono_train, stfts_src1=stfts_src1_train, stfts_src2=stfts_src2_train, batch_size=batch_size, sample_frames=sample_frames) x_mixed, _ = sperate_magnitude_phase(data=data_mono_batch) y1, _ = sperate_magnitude_phase(data=data_src1_batch) y2, _ = sperate_magnitude_phase(data=data_src2_batch) train_loss = model.train(x=x_mixed, y1=y1, y2=y2, learning_rate=learning_rate) if i % 10 == 0: print('Step: %d Train Loss: %f' % (i, train_loss)) if i % 200 == 0: print('==============================================') data_mono_batch, data_src1_batch, data_src2_batch = sample_data_batch( stfts_mono=stfts_mono_valid, stfts_src1=stfts_src1_valid, stfts_src2=stfts_src2_valid, batch_size=batch_size, sample_frames=sample_frames) x_mixed, _ = sperate_magnitude_phase(data=data_mono_batch) y1, _ = sperate_magnitude_phase(data=data_src1_batch) y2, _ = sperate_magnitude_phase(data=data_src2_batch) y1_pred, y2_pred, validation_loss = model.validate(x=x_mixed, y1=y1, y2=y2) print('Step: %d Validation Loss: %f' % (i, validation_loss)) print('==============================================') with open(os.path.join(log_directory, train_log_filename), 'a') as log_file: log_file.write('{},{},{}\n'.format(i, train_loss, validation_loss)) if i % 1000 == 0: model.save(directory=model_directory, filename=model_filename)
def evaluate(): mir1k_sr = 16000 n_fft = 1024 hop_length = n_fft // 4 num_rnn_layer = 3 num_hidden_units = [256, 256, 256] tensorboard_directory = 'graphs/svsrnn' clear_tensorboard = False model_directory = 'model' model_filename = 'svsrnn.ckpt' model_filepath = os.path.join(model_directory, model_filename) mir1k_dir = 'data/MIR1K' test_path = os.path.join(mir1k_dir, 'test.txt') with open(test_path, 'r') as text_file: content = text_file.readlines() wav_filenames = [file.strip() for file in content] #wav_filenames = ['small_test_data/yifen_4_10.wav', 'small_test_data/yifen_5_10.wav'] output_directory = 'demo' if not os.path.exists(output_directory): os.makedirs(output_directory) wavs_mono, wavs_src1, wavs_src2 = load_wavs(filenames = wav_filenames, sr = mir1k_sr) stfts_mono, stfts_src1, stfts_src2 = wavs_to_specs( wavs_mono = wavs_mono, wavs_src1 = wavs_src1, wavs_src2 = wavs_src2, n_fft = n_fft, hop_length = hop_length) stfts_mono_full, stfts_src1_full, stfts_src2_full = prepare_data_full(stfts_mono = stfts_mono, stfts_src1 = stfts_src1, stfts_src2 = stfts_src2) model = SVSRNN(num_features = n_fft // 2 + 1, num_rnn_layer = num_rnn_layer, num_hidden_units = num_hidden_units, tensorboard_directory = tensorboard_directory, clear_tensorboard = clear_tensorboard) model.load(filepath = model_filepath) wavs_src1_pred = list() wavs_src2_pred = list() for wav_filename, wav_mono, stft_mono_full in zip(wav_filenames, wavs_mono, stfts_mono_full): stft_mono_magnitude, stft_mono_phase = sperate_magnitude_phase(data = stft_mono_full) stft_mono_magnitude = np.array([stft_mono_magnitude]) y1_pred, y2_pred = model.test(x = stft_mono_magnitude) # ISTFT with the phase from mono y1_stft_hat = combine_magnitdue_phase(magnitudes = y1_pred[0], phases = stft_mono_phase) y2_stft_hat = combine_magnitdue_phase(magnitudes = y2_pred[0], phases = stft_mono_phase) y1_stft_hat = y1_stft_hat.transpose() y2_stft_hat = y2_stft_hat.transpose() y1_hat = librosa.istft(y1_stft_hat, hop_length = hop_length) y2_hat = librosa.istft(y2_stft_hat, hop_length = hop_length) wavs_src1_pred.append(y1_hat) wavs_src2_pred.append(y2_hat) gnsdr, gsir, gsar = bss_eval_global(wavs_mono = wavs_mono, wavs_src1 = wavs_src1, wavs_src2 = wavs_src2, wavs_src1_pred = wavs_src1_pred, wavs_src2_pred = wavs_src2_pred) print('GNSDR:', gnsdr) print('GSIR:', gsir) print('GSAR:', gsar)
def generate_demo(): mir1k_sr = 16000 n_fft = 1024 hop_length = n_fft // 4 num_rnn_layer = 3 num_hidden_units = [256, 256, 256] tensorboard_directory = 'graphs/svsrnn' clear_tensorboard = False model_directory = 'model' model_filename = 'svsrnn.ckpt' model_filepath = os.path.join(model_directory, model_filename) mir1k_dir = 'data/MIR1K' test_path = os.path.join(mir1k_dir, 'test.txt') with open(test_path, 'r') as text_file: content = text_file.readlines() wav_filenames = [file.strip() for file in content] #wav_filenames = ['small_test_data/yifen_4_10.wav', 'small_test_data/yifen_5_10.wav'] output_directory = 'demo' if not os.path.exists(output_directory): os.makedirs(output_directory) wavs_mono, wavs_src1, wavs_src2 = load_wavs(filenames = wav_filenames, sr = mir1k_sr) stfts_mono, stfts_src1, stfts_src2 = wavs_to_specs( wavs_mono = wavs_mono, wavs_src1 = wavs_src1, wavs_src2 = wavs_src2, n_fft = n_fft, hop_length = hop_length) stfts_mono_full, stfts_src1_full, stfts_src2_full = prepare_data_full(stfts_mono = stfts_mono, stfts_src1 = stfts_src1, stfts_src2 = stfts_src2) model = SVSRNN(num_features = n_fft // 2 + 1, num_rnn_layer = num_rnn_layer, num_hidden_units = num_hidden_units, tensorboard_directory = tensorboard_directory, clear_tensorboard = clear_tensorboard) model.load(filepath = model_filepath) for wav_filename, wav_mono, stft_mono_full in zip(wav_filenames, wavs_mono, stfts_mono_full): wav_filename_dir = os.path.dirname(wav_filename) wav_filename_base = os.path.basename(wav_filename) wav_mono_filename = wav_filename_base.split('.')[0] + '_mono.wav' wav_src1_hat_filename = wav_filename_base.split('.')[0] + '_src1.wav' wav_src2_hat_filename = wav_filename_base.split('.')[0] + '_src2.wav' wav_mono_filepath = os.path.join(output_directory, wav_mono_filename) wav_src1_hat_filepath = os.path.join(output_directory, wav_src1_hat_filename) wav_src2_hat_filepath = os.path.join(output_directory, wav_src2_hat_filename) print('Processing %s ...' % wav_filename_base) stft_mono_magnitude, stft_mono_phase = sperate_magnitude_phase(data = stft_mono_full) stft_mono_magnitude = np.array([stft_mono_magnitude]) y1_pred, y2_pred = model.test(x = stft_mono_magnitude) # ISTFT with the phase from mono y1_stft_hat = combine_magnitdue_phase(magnitudes = y1_pred[0], phases = stft_mono_phase) y2_stft_hat = combine_magnitdue_phase(magnitudes = y2_pred[0], phases = stft_mono_phase) y1_stft_hat = y1_stft_hat.transpose() y2_stft_hat = y2_stft_hat.transpose() y1_hat = librosa.istft(y1_stft_hat, hop_length = hop_length) y2_hat = librosa.istft(y2_stft_hat, hop_length = hop_length) librosa.output.write_wav(wav_mono_filepath, wav_mono, mir1k_sr) librosa.output.write_wav(wav_src1_hat_filepath, y1_hat, mir1k_sr) librosa.output.write_wav(wav_src2_hat_filepath, y2_hat, mir1k_sr)
def train(random_seed = 0): np.random.seed(random_seed) # Download MIR1K dataset download_dir = 'download' data_dir = 'data' mir1k_dir = 'data/MIR1K' train_path = os.path.join(mir1k_dir, 'train.txt') valid_path = os.path.join(mir1k_dir, 'valid.txt') #mir1k_dir = download_mir1k(download_dir = download_dir, data_dir = data_dir) #wavs_dir = os.path.join(mir1k_dir, 'MIR-1K/UndividedWavfile') #wavs_dir = os.path.join(mir1k_dir, 'MIR-1K/Wavfile') with open(train_path, 'r') as text_file: content = text_file.readlines() wav_filenames_train = [file.strip() for file in content] with open(valid_path, 'r') as text_file: content = text_file.readlines() wav_filenames_valid = [file.strip() for file in content] # Preprocess parameters mir1k_sr = 16000 n_fft = 1024 hop_length = n_fft // 4 # Model parameters learning_rate = 0.0001 num_rnn_layer = 3 num_hidden_units = [256, 256, 256] batch_size = 64 sample_frames = 10 iterations = 50000 tensorboard_directory = './graphs/svsrnn' log_directory = './log' train_log_filename = 'train_log.csv' clear_tensorboard = False model_directory = './model' model_filename = 'svsrnn.ckpt' if not os.path.exists(log_directory): os.makedirs(log_directory) open(os.path.join(log_directory, train_log_filename), 'w').close() # Load train wavs wavs_mono_train, wavs_src1_train, wavs_src2_train = load_wavs(filenames = wav_filenames_train, sr = mir1k_sr) # Turn waves to spectrums stfts_mono_train, stfts_src1_train, stfts_src2_train = wavs_to_specs( wavs_mono = wavs_mono_train, wavs_src1 = wavs_src1_train, wavs_src2 = wavs_src2_train, n_fft = n_fft, hop_length = hop_length) wavs_mono_valid, wavs_src1_valid, wavs_src2_valid = load_wavs(filenames = wav_filenames_valid, sr = mir1k_sr) stfts_mono_valid, stfts_src1_valid, stfts_src2_valid = wavs_to_specs( wavs_mono = wavs_mono_valid, wavs_src1 = wavs_src1_valid, wavs_src2 = wavs_src2_valid, n_fft = n_fft, hop_length = hop_length) # Initialize model model = SVSRNN(num_features = n_fft // 2 + 1, num_rnn_layer = num_rnn_layer, num_hidden_units = num_hidden_units, tensorboard_directory = tensorboard_directory, clear_tensorboard = clear_tensorboard) # Start training for i in (range(iterations)): data_mono_batch, data_src1_batch, data_src2_batch = sample_data_batch( stfts_mono = stfts_mono_train, stfts_src1 = stfts_src1_train, stfts_src2 = stfts_src2_train, batch_size = batch_size, sample_frames = sample_frames) x_mixed, _ = sperate_magnitude_phase(data = data_mono_batch) y1, _ = sperate_magnitude_phase(data = data_src1_batch) y2, _ = sperate_magnitude_phase(data = data_src2_batch) train_loss = model.train(x = x_mixed, y1 = y1, y2 = y2, learning_rate = learning_rate) if i % 10 == 0: print('Step: %d Train Loss: %f' %(i, train_loss)) if i % 200 == 0: print('==============================================') data_mono_batch, data_src1_batch, data_src2_batch = sample_data_batch( stfts_mono = stfts_mono_valid, stfts_src1 = stfts_src1_valid, stfts_src2 = stfts_src2_valid, batch_size = batch_size, sample_frames = sample_frames) x_mixed, _ = sperate_magnitude_phase(data = data_mono_batch) y1, _ = sperate_magnitude_phase(data = data_src1_batch) y2, _ = sperate_magnitude_phase(data = data_src2_batch) y1_pred, y2_pred, validation_loss = model.validate(x = x_mixed, y1 = y1, y2 = y2) print('Step: %d Validation Loss: %f' %(i, validation_loss)) print('==============================================') with open(os.path.join(log_directory, train_log_filename), 'a') as log_file: log_file.write('{},{},{}\n'.format(i, train_loss, validation_loss)) if i % 1000 == 0: model.save(directory = model_directory, filename = model_filename)