def prediction(weights_path, name_model, audio_dir_prediction, dir_save_prediction, audio_input_prediction, audio_output_prediction, sample_rate, min_duration, frame_length, hop_length_frame, n_fft, hop_length_fft): """ This function takes as input pretrained weights, noisy voice sound to denoise, predict the denoise sound and save it to disk. """ # load json and create model json_file = open(weights_path + '/' + name_model + '.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) # load weights into new model loaded_model.load_weights(weights_path + '/' + name_model + '.h5') print("Loaded model from disk") # Extracting noise and voice from folder and convert to numpy audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction, sample_rate, frame_length, hop_length_frame, min_duration) #Dimensions of squared spectrogram dim_square_spec = int(n_fft / 2) + 1 print(dim_square_spec) # Create Amplitude and phase of the sounds m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram( audio, dim_square_spec, n_fft, hop_length_fft) #global scaling to have distribution -1/1 X_in = scaled_in(m_amp_db_audio) #Reshape for prediction X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1) #Prediction using loaded network X_pred = loaded_model.predict(X_in) #Rescale back the noise model inv_sca_X_pred = inv_scaled_ou(X_pred) #Remove noise model from noisy speech X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0] #Reconstruct audio from denoised spectrogram and phase print(X_denoise.shape) print(m_pha_audio.shape) print(frame_length) print(hop_length_fft) audio_denoise_recons = matrix_spectrogram_to_numpy_audio( X_denoise, m_pha_audio, frame_length, hop_length_fft) #Number of frames nb_samples = audio_denoise_recons.shape[0] #Save all frames in one file denoise_long = audio_denoise_recons.reshape(1, nb_samples * frame_length) * 10 # Update of Librosa no longer uses .output use sf.write instead #librosa.output.write_wav(dir_save_prediction + audio_output_prediction, denoise_long[0, :], sample_rate) import soundfile as sf sf.write(dir_save_prediction + audio_output_prediction, denoise_long[0, :], sample_rate, 'PCM_16')
def prediction(weights_dir, model_name, input_dir, output_dir, sample_rate, frame_length, hop_length_frame, n_fft, hop_length_fft): json_file = open(weights_dir + model_name + '.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) loaded_model.load_weights(weights_dir + model_name + '.h5') print("Loaded model from disk!") list_files = os.listdir(input_dir) for file in list_files: logmmse.logmmse_from_file(input_file=input_dir + file, output_file=output_dir + 'Remastered-' + file) audio_temp = dt.audio_files_to_file(input_dir, file, sample_rate) # logmmse.logmmse(data=audio_temp, sampling_rate=32000, output_file=output_dir + 'Remastered-' + file) # audio_out = nr.reduce_noise(audio_clip=audio_temp, noise_clip=audio_temp, n_fft=n_fft+1, win_length=n_fft+1, # hop_length=hop_length_fft) # librosa.output.write_wav(output_dir + 'Remastered-' + file, audio_out, sample_rate) audio_file = dt.audio_files_to_file(output_dir, 'Remastered-' + file, sample_rate) # fig, ax = plt.subplots(figsize=(12, 6)) # plt.title('Audio') # plt.ylabel('Amplitude') # plt.xlabel('Time(s)') # ax.plot(audio_temp) # ax.plot(audio_file, alpha=0.5) # plt.show() audio_list = [audio_file] audio = dt.audio_list_to_numpy(audio_list, frame_length, hop_length_frame) dim_square_spec = int(n_fft / 2) + 1 m_amp_db_audio, m_pha_audio = dt.audio_numpy_to_matrix_spectrogram(audio, dim_square_spec, n_fft, hop_length_fft) x_in = dt.scaled_in(m_amp_db_audio) x_in = x_in.reshape(x_in.shape[0], x_in.shape[1], x_in.shape[2], 1) x_pred = loaded_model.predict(x_in) inv_sca_x_pred = dt.inv_scaled_out(x_pred) x_denoise = m_amp_db_audio - inv_sca_x_pred[:, :, :, 0] audio_denoise_recons = dt.matrix_spectrogram_to_numpy_audio(x_denoise, m_pha_audio, frame_length, hop_length_fft) nb_samples = audio_denoise_recons.shape[0] denoise_long = audio_denoise_recons.reshape(1, nb_samples * frame_length) * 10 librosa.output.write_wav(output_dir + 'Final-' + file, denoise_long[0, :], sample_rate) noise_recons = dt.matrix_spectrogram_to_numpy_audio(inv_sca_x_pred[:, :, :, 0], m_pha_audio, frame_length, hop_length_fft) nb_samples = noise_recons.shape[0] noise_long = noise_recons.reshape(1, nb_samples * frame_length) librosa.output.write_wav(output_dir + 'Noise-' + file, noise_long[0, :], sample_rate)
def prediction(weights_path, model, audio_input_prediction, sample_rate, min_duration, frame_length, hop_length_frame, n_fft, hop_length_fft): """ This function takes as input pretrained weights, noisy voice sound to denoise, predict the denoise sound and save it to disk. """ loaded_model = model # load weights into new model loaded_model.load_weights(weights_path + '/' + 'model_best.h5') print("Loaded model from disk") audio_dir_prediction = "" # Extracting noise and voice from folder and convert to numpy audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction, sample_rate, frame_length, hop_length_frame, min_duration) #Dimensions of squared spectrogram dim_square_spec = int(n_fft / 2) + 1 print(dim_square_spec) # Create Amplitude and phase of the sounds m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram( audio, dim_square_spec, n_fft, hop_length_fft) #global scaling to have distribution -1/1 X_in = scaled_in(m_amp_db_audio) #Reshape for prediction X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1) #Prediction using loaded network X_pred = loaded_model.predict(X_in) #Rescale back the noise model inv_sca_X_pred = inv_scaled_ou(X_pred) #Remove noise model from noisy speech X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0] #Reconstruct audio from denoised spectrogram and phase print(X_denoise.shape) print(m_pha_audio.shape) print(frame_length) print(hop_length_fft) audio_denoise_recons = matrix_spectrogram_to_numpy_audio( X_denoise, m_pha_audio, frame_length, hop_length_fft) #Number of frames nb_samples = audio_denoise_recons.shape[0] #Save all frames in one file denoise_long = audio_denoise_recons.reshape(1, nb_samples * frame_length) * 10 return [denoise_long[0, :], sample_rate]
def prediction(weights_path, name_model, audio_dir_prediction, audio_input_prediction, sample_rate, min_duration, frame_length, hop_length_frame, n_fft, hop_length_fft, mode): """ This function takes as input pretrained weights, noisy voice sound to denoise, predict the denoise sound and save it to disk. """ loaded_model = load_model(weights_path + '/' + name_model + '.h5') print("Loaded model from disk") # Extracting noise and voice from folder and convert to numpy audio = audio_file_to_numpy(audio_dir_prediction, str(audio_input_prediction), sample_rate, frame_length, hop_length_frame, min_duration, mode) # Dimensions of squared spectrogram dim_square_spec = int(n_fft / 2) + 1 print("dim_square_spec:{}".format(dim_square_spec)) m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram( audio, dim_square_spec, n_fft, hop_length_fft) # global scaling to have distribution -1/1 X_in = scaled_in(m_amp_db_audio) # Reshape for prediction X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1) # Prediction using loaded network X_pred = loaded_model.predict(X_in) audio_class = audio for i in range(audio_class.shape[0]): audio_class[i, :] = audio_class[i, :] * 0 + X_pred[i] # Number of frames nb_samples = audio_class.shape[0] # Save all frames in one file audio_class_long = audio_class.reshape(1, nb_samples * frame_length) # librosa.output.write_wav(dir_save_prediction + audio_output_prediction, denoise_long[0, :], sample_rate) # sf.write(dir_save_prediction + audio_output_prediction, res, sample_rate) print("声音的强度{:.2f}".format(np.mean(np.abs(m_amp_db_audio)))) print("漏水的概率{:.2f}".format(np.mean(audio_class_long)))
def training(path_save_spectrogram, weights_path, name_model, training_from_scratch, epochs, batch_size): #load noisy voice & clean voice spectrograms created by data_creation mode X_in = np.load(path_save_spectrogram + 'noisy_voice_amp_db' + ".npy") X_ou = np.load(path_save_spectrogram + 'voice_amp_db' + ".npy") #Model of noise to predict X_ou = X_in - X_ou #Check distribution print(stats.describe(X_in.reshape(-1, 1))) print(stats.describe(X_ou.reshape(-1, 1))) #to scale between -1 and 1 X_in = scaled_in(X_in) X_ou = scaled_ou(X_ou) #Check shape of spectrograms print(X_in.shape) print(X_ou.shape) #Check new distribution print(stats.describe(X_in.reshape(-1, 1))) print(stats.describe(X_ou.reshape(-1, 1))) #Reshape for training X_in = X_in[:, :, :] X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1) X_ou = X_ou[:, :, :] X_ou = X_ou.reshape(X_ou.shape[0], X_ou.shape[1], X_ou.shape[2], 1) X_train, X_test, y_train, y_test = train_test_split(X_in, X_ou, test_size=0.10, random_state=42) if training_from_scratch: generator_nn = ConvAutoEncoder(weights_path=weights_path) #generator_nn.summary() generator_nn.fit(X_train, y_train, X_test, y_test) generator_nn.save_weights()
def encode(weights_path=args.weights_folder, name_model=args.name_model, audio_dir_prediction=args.audio_dir_prediction, dir_save_prediction=args.dir_save_prediction, audio_input_prediction=args.audio_input_prediction, audio_output_prediction=args.audio_output_prediction, sample_rate=args.sample_rate, min_duration=args.min_duration, frame_length=args.frame_length, hop_length_frame=args.hop_length_frame, n_fft=args.n_fft, hop_length_fft=args.hop_length_fft): loaded_model = ConvAutoEncoder(weights_path=weights_path) loaded_model.load_weights() loaded_model.info() print("Loaded model from:", weights_path) # Extracting noise and voice from folder and convert to numpy audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction, sample_rate, frame_length, hop_length_frame, min_duration) #Dimensions of squared spectrogram dim_square_spec = int(n_fft / 2) + 1 # Create Amplitude and phase of the sounds m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram( audio, dim_square_spec, n_fft, hop_length_fft) #global scaling to have distribution -1/1 X_in = scaled_in(m_amp_db_audio) #Reshape for prediction X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1) encoded = loaded_model.encode(X_in) #print(encoded) print('encoded.shape:'.encoded.shape) np.save('aaa', encoded) print('encoded file:', audio_dir_prediction + str(audio_input_prediction)) print('save to: aaa.npy')
def training(path_save_spectrogram, weights_path, name_model, training_from_scratch, epochs, batch_size): """ This function will read noisy voice and clean voice spectrograms created by data_creation mode, and train a Unet model on this dataset for epochs and batch_size specified. It saves best models to disk regularly If training_from_scratch is set to True it will train from scratch, if set to False, it will train from weights (name_model) provided in weights_path """ #load noisy voice & clean voice spectrograms created by data_creation mode X_in = np.load(path_save_spectrogram + 'noisy_voice_amp_db' + ".npy") X_ou = np.load(path_save_spectrogram + 'voice_amp_db' + ".npy") #Model of noise to predict X_ou = X_in - X_ou #Check distribution print(stats.describe(X_in.reshape(-1, 1))) print(stats.describe(X_ou.reshape(-1, 1))) #to scale between -1 and 1 X_in = scaled_in(X_in) X_ou = scaled_ou(X_ou) #Check shape of spectrograms print(X_in.shape) print(X_ou.shape) #Check new distribution print(stats.describe(X_in.reshape(-1, 1))) print(stats.describe(X_ou.reshape(-1, 1))) #Reshape for training X_in = X_in[:, :, :] X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1) X_ou = X_ou[:, :, :] X_ou = X_ou.reshape(X_ou.shape[0], X_ou.shape[1], X_ou.shape[2], 1) X_train, X_test, y_train, y_test = train_test_split(X_in, X_ou, test_size=0.10, random_state=42) #If training from scratch if training_from_scratch: generator_nn = unet() #If training from pre-trained weights else: generator_nn = unet(pretrained_weights=weights_path + name_model + '.h5') #Save best models to disk during training checkpoint = ModelCheckpoint(weights_path + '/weigths_HUBER_N2C.h5', verbose=1, monitor='val_loss', save_best_only=True, mode='auto') generator_nn.summary() #Training history = generator_nn.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, shuffle=True, callbacks=[checkpoint], verbose=1, validation_data=(X_test, y_test)) #Plot training and validation loss (log scale) loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(loss) + 1) plt.plot(epochs, loss, label='Training loss') plt.plot(epochs, val_loss, label='Validation loss') plt.yscale('log') plt.title('Training and validation loss') plt.legend() plt.show()
def training(path_save_spectrogram, weights_path, name_model, training_from_scratch, epochs, batch_size, nf): """ This function will read noisy voice and clean voice spectrograms created by data_creation mode, and train a Unet model on this dataset for epochs and batch_size specified. It saves best models to disk regularly If training_from_scratch is set to True it will train from scratch, if set to False, it will train from weights (name_model) provided in weights_path """ # load noisy voice & clean voice spectrograms created by data_creation mode # X_in = np.load(path_save_spectrogram +'noisy_voice_amp_db'+".npy") X_in1 = np.load(path_save_spectrogram + 'noise_amp_db' + str(nf) + ".npy") # X_in2 = np.load(path_save_spectrogram +'noisy_voice_amp_db'+".npy") X_in2 = np.load(path_save_spectrogram + 'voice_amp_db' + str(nf) + ".npy") # Model of noise to predict r = 0 X_in1 = X_in1[:, :, :] X_in2 = X_in2[:, :, :] * (1 - r) + X_in1 * r c = np.mean(np.abs(X_in2)) print(c) negn = X_in1.shape[0] posn = X_in2.shape[0] Z_ou = np.array([0] * negn + [1] * posn) X_in = np.concatenate((X_in1, X_in2), axis=0) nscales = 5 for i in range(1, nscales): X_in = np.concatenate( (X_in, X_in1 * (i / nscales), X_in2 * (i / nscales)), axis=0) Z_ou = np.concatenate((Z_ou, np.array([0] * negn + [1] * posn)), axis=0) X_in = scaled_in(X_in) # Check shape of spectrograms print(X_in.shape) X_in = X_in[:, :, :] X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1) print(X_in.shape) X_train, X_test, z_train, z_test = train_test_split(X_in, Z_ou, test_size=0.10, shuffle=True) # If training from scratch if training_from_scratch: nn = cnet() else: nn = cnet(pretrained_weights=weights_path + name_model + '.h5') # Save best models to disk during training checkpoint = ModelCheckpoint(weights_path + '/model_save.h5', verbose=1, monitor='val_loss', save_best_only=True, mode='auto') nn.summary() time.sleep(2) INIT_LR = 1e-5 losses = { "disc_output": "MeanAbsoluteError", } lossWeights = {"disc_output": 1.0} opt = Adam(lr=INIT_LR, decay=INIT_LR / epochs) nn.compile(optimizer=opt, loss=losses, loss_weights=lossWeights, metrics=["accuracy"]) # Training validation_data = (X_test, {"disc_output": z_test}) history = nn.fit(x=X_train, y={"disc_output": z_train}, epochs=epochs, batch_size=batch_size, shuffle=True, callbacks=[checkpoint], verbose=1, validation_data=validation_data) loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(loss) + 1) plt.plot(epochs, loss, label='Training class loss') plt.plot(epochs, val_loss, label='Validation class loss') plt.yscale('log') plt.title('Training and validation loss') plt.legend() plt.show()
def training(path_save_spectrogram, weights_path, name_model, training_from_scratch, epochs, batch_size): """ This function will read noisy voice and clean voice spectrograms created by data_creation mode, and train a Unet model on this dataset for epochs and batch_size specified. It saves best models to disk regularly. If training_from_scratch is set to True it will train from scratch, if set to False, it will train from weights (name_model) provided in weights_path """ #load noisy voice & clean voice spectrograms created by data_creation mode X_in = np.load(path_save_spectrogram + 'noisy_voice_amp_db' + ".npy") X_ou = np.load(path_save_spectrogram + 'voice_amp_db' + ".npy") #Model of noise to predict X_ou = X_in - X_ou #Check distribution print(stats.describe(X_in.reshape(-1, 1))) print(stats.describe(X_ou.reshape(-1, 1))) #to scale between -1 and 1 X_in = scaled_in(X_in) X_ou = scaled_ou(X_ou) #Check shape of spectrograms print(X_in.shape) print(X_ou.shape) #Check new distribution print(stats.describe(X_in.reshape(-1, 1))) print(stats.describe(X_ou.reshape(-1, 1))) #Reshape for training X_in = X_in[:, :, :] X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1) X_ou = X_ou[:, :, :] X_ou = X_ou.reshape(X_ou.shape[0], X_ou.shape[1], X_ou.shape[2], 1) X_train, X_test, y_train, y_test = train_test_split(X_in, X_ou, test_size=0.10, random_state=42) #If training from scratch if training_from_scratch: print("\nTraining from scratch\n.") generator_nn = unet() #If training from pre-trained weights else: pretrained_weights = "{}/{}.h5".format(weights_path, name_model) print("\nTraining from pre-trained weights: {}\n".format( pretrained_weights)) generator_nn = unet(pretrained_weights=pretrained_weights) # Save model each epoch, just in in case weights_name_each = "model_and_weights-{epoch:02d}.h5" checkpoint_each = ModelCheckpoint(weights_path + weights_name_each, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) #Save best models to disk during training weights_name_best = "model_and_weights-{epoch:02d}-{val_loss:.2f}.h5" checkpoint_best = ModelCheckpoint(weights_path + weights_name_best, verbose=1, monitor='val_loss', save_weights_only=False, save_best_only=True, mode='auto') # TensorBoard callback log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = TensorBoard( log_dir=log_dir, histogram_freq=1, write_images=True, ) generator_nn.summary() #Training history = generator_nn.fit( X_train, y_train, epochs=epochs, batch_size=batch_size, shuffle=True, callbacks=[checkpoint_each, checkpoint_best, tensorboard_callback], verbose=1, validation_data=(X_test, y_test)) #Plot training and validation loss (log scale) loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(loss) + 1) plt.plot(epochs, loss, label='Training loss') plt.plot(epochs, val_loss, label='Validation loss') plt.yscale('log') plt.title('Training and validation loss') plt.legend() plt.show()
audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction, sample_rate, frame_length, hop_length_frame, min_duration) # Choosing n_fft and hop_length_fft to have squared spectrograms n_fft = 255 hop_length_fft = 63 dim_square_spec = int(n_fft / 2) + 1 # Create Amplitude and phase of the sounds m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram( audio, dim_square_spec, n_fft, hop_length_fft) #global scaling to have distribution -1/1 X_in = scaled_in(m_amp_db_audio) #Reshape for prediction X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1) #Prediction using loaded network X_pred = loaded_model.predict(X_in) #Rescale back the noise model inv_sca_X_pred = inv_scaled_ou(X_pred) #Remove noise model from noisy speech X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0] #Reconstruct audio from denoised spectrogram and phase audio_denoise_recons = matrix_spectrogram_to_numpy_audio( X_denoise, m_pha_audio, frame_length, hop_length_fft) #Number of frames nb_samples = audio_denoise_recons.shape[0]
def training(data_noise_dir, data_voice_dir, spectrogram_dir, weights_dir, model_name, training_from_scratch, epochs, batch_size): for i in range(10): if i == 0: training_from_scratch = True else: training_from_scratch = False x_in = np.load( spectrogram_dir + str(i) + f'{data_voice_dir}_{data_noise_dir}_noisy_voice_amp_db' + ".npy") x_out = np.load(spectrogram_dir + str(i) + f'{data_voice_dir}_voice_amp_db' + ".npy") x_out = x_in - x_out print(stats.describe(x_in.reshape(-1, 1))) print(stats.describe(x_out.reshape(-1, 1))) x_in = scaled_in(x_in) x_out = scaled_out(x_out) print(x_in.shape) print(x_out.shape) print(stats.describe(x_in.reshape(-1, 1))) print(stats.describe(x_out.reshape(-1, 1))) x_in = x_in[:, :, :] x_in = x_in.reshape(x_in.shape[0], x_in.shape[1], x_in.shape[2], 1) x_out = x_out[:, :, :] x_out = x_out.reshape(x_out.shape[0], x_out.shape[1], x_out.shape[2], 1) x_train, x_test, y_train, y_test = train_test_split(x_in, x_out, test_size=0.10, random_state=42) if training_from_scratch: generator_nn = unet() else: generator_nn = unet(pretrained_weights=weights_dir + model_name + '.h5') checkpoint = ModelCheckpoint(weights_dir + model_name + '.h5', verbose=1, monitor='val_loss', save_best_only=True, mode='auto') generator_nn.summary() history = generator_nn.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, shuffle=True, callbacks=[checkpoint], verbose=1, validation_data=(x_test, y_test)) model_json = generator_nn.to_json() with open(f"{weights_dir + model_name}.json", "w") as json_file: json_file.write(model_json)