Python scaled_in Examples, data_tools.scaled_in Python Examples

Example #1

0

Show file

File: prediction_denoise.py Project: Tollanador/Speech-enhancement

def prediction(weights_path, name_model, audio_dir_prediction,
               dir_save_prediction, audio_input_prediction,
               audio_output_prediction, sample_rate, min_duration,
               frame_length, hop_length_frame, n_fft, hop_length_fft):
    """ This function takes as input pretrained weights, noisy voice sound to denoise, predict
    the denoise sound and save it to disk.
    """

    # load json and create model
    json_file = open(weights_path + '/' + name_model + '.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights(weights_path + '/' + name_model + '.h5')
    print("Loaded model from disk")

    # Extracting noise and voice from folder and convert to numpy
    audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction,
                                 sample_rate, frame_length, hop_length_frame,
                                 min_duration)

    #Dimensions of squared spectrogram
    dim_square_spec = int(n_fft / 2) + 1
    print(dim_square_spec)

    # Create Amplitude and phase of the sounds
    m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
        audio, dim_square_spec, n_fft, hop_length_fft)

    #global scaling to have distribution -1/1
    X_in = scaled_in(m_amp_db_audio)
    #Reshape for prediction
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    #Prediction using loaded network
    X_pred = loaded_model.predict(X_in)
    #Rescale back the noise model
    inv_sca_X_pred = inv_scaled_ou(X_pred)
    #Remove noise model from noisy speech
    X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0]
    #Reconstruct audio from denoised spectrogram and phase
    print(X_denoise.shape)
    print(m_pha_audio.shape)
    print(frame_length)
    print(hop_length_fft)
    audio_denoise_recons = matrix_spectrogram_to_numpy_audio(
        X_denoise, m_pha_audio, frame_length, hop_length_fft)
    #Number of frames
    nb_samples = audio_denoise_recons.shape[0]
    #Save all frames in one file
    denoise_long = audio_denoise_recons.reshape(1,
                                                nb_samples * frame_length) * 10
    # Update of Librosa no longer uses .output use sf.write instead
    #librosa.output.write_wav(dir_save_prediction + audio_output_prediction, denoise_long[0, :], sample_rate)
    import soundfile as sf
    sf.write(dir_save_prediction + audio_output_prediction, denoise_long[0, :],
             sample_rate, 'PCM_16')

Example #2

0

Show file

File: prediction_denoise.py Project: Windrist/speech_enhancement

def prediction(weights_dir, model_name, input_dir, output_dir, sample_rate, frame_length, hop_length_frame, n_fft,
               hop_length_fft):

    json_file = open(weights_dir + model_name + '.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    loaded_model.load_weights(weights_dir + model_name + '.h5')
    print("Loaded model from disk!")

    list_files = os.listdir(input_dir)
    for file in list_files:
        logmmse.logmmse_from_file(input_file=input_dir + file, output_file=output_dir + 'Remastered-' + file)
        audio_temp = dt.audio_files_to_file(input_dir, file, sample_rate)
        # logmmse.logmmse(data=audio_temp, sampling_rate=32000, output_file=output_dir + 'Remastered-' + file)
        # audio_out = nr.reduce_noise(audio_clip=audio_temp, noise_clip=audio_temp, n_fft=n_fft+1, win_length=n_fft+1,
        #                             hop_length=hop_length_fft)
        # librosa.output.write_wav(output_dir + 'Remastered-' + file, audio_out, sample_rate)
        audio_file = dt.audio_files_to_file(output_dir, 'Remastered-' + file, sample_rate)

        # fig, ax = plt.subplots(figsize=(12, 6))
        # plt.title('Audio')
        # plt.ylabel('Amplitude')
        # plt.xlabel('Time(s)')
        # ax.plot(audio_temp)
        # ax.plot(audio_file, alpha=0.5)
        # plt.show()

        audio_list = [audio_file]
        audio = dt.audio_list_to_numpy(audio_list, frame_length, hop_length_frame)

        dim_square_spec = int(n_fft / 2) + 1

        m_amp_db_audio, m_pha_audio = dt.audio_numpy_to_matrix_spectrogram(audio, dim_square_spec, n_fft,
                                                                           hop_length_fft)

        x_in = dt.scaled_in(m_amp_db_audio)
        x_in = x_in.reshape(x_in.shape[0], x_in.shape[1], x_in.shape[2], 1)
        x_pred = loaded_model.predict(x_in)
        inv_sca_x_pred = dt.inv_scaled_out(x_pred)
        x_denoise = m_amp_db_audio - inv_sca_x_pred[:, :, :, 0]

        audio_denoise_recons = dt.matrix_spectrogram_to_numpy_audio(x_denoise, m_pha_audio, frame_length,
                                                                    hop_length_fft)
        nb_samples = audio_denoise_recons.shape[0]
        denoise_long = audio_denoise_recons.reshape(1, nb_samples * frame_length) * 10
        librosa.output.write_wav(output_dir + 'Final-' + file, denoise_long[0, :], sample_rate)
        noise_recons = dt.matrix_spectrogram_to_numpy_audio(inv_sca_x_pred[:, :, :, 0], m_pha_audio, frame_length,
                                                            hop_length_fft)
        nb_samples = noise_recons.shape[0]
        noise_long = noise_recons.reshape(1, nb_samples * frame_length)
        librosa.output.write_wav(output_dir + 'Noise-' + file, noise_long[0, :], sample_rate)

Example #3

0

Show file

def prediction(weights_path, model, audio_input_prediction, sample_rate,
               min_duration, frame_length, hop_length_frame, n_fft,
               hop_length_fft):
    """ This function takes as input pretrained weights, noisy voice sound to denoise, predict
    the denoise sound and save it to disk.
    """

    loaded_model = model
    # load weights into new model
    loaded_model.load_weights(weights_path + '/' + 'model_best.h5')
    print("Loaded model from disk")

    audio_dir_prediction = ""
    # Extracting noise and voice from folder and convert to numpy
    audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction,
                                 sample_rate, frame_length, hop_length_frame,
                                 min_duration)

    #Dimensions of squared spectrogram
    dim_square_spec = int(n_fft / 2) + 1
    print(dim_square_spec)

    # Create Amplitude and phase of the sounds
    m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
        audio, dim_square_spec, n_fft, hop_length_fft)

    #global scaling to have distribution -1/1
    X_in = scaled_in(m_amp_db_audio)
    #Reshape for prediction
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    #Prediction using loaded network
    X_pred = loaded_model.predict(X_in)
    #Rescale back the noise model
    inv_sca_X_pred = inv_scaled_ou(X_pred)
    #Remove noise model from noisy speech
    X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0]
    #Reconstruct audio from denoised spectrogram and phase
    print(X_denoise.shape)
    print(m_pha_audio.shape)
    print(frame_length)
    print(hop_length_fft)
    audio_denoise_recons = matrix_spectrogram_to_numpy_audio(
        X_denoise, m_pha_audio, frame_length, hop_length_fft)
    #Number of frames
    nb_samples = audio_denoise_recons.shape[0]
    #Save all frames in one file
    denoise_long = audio_denoise_recons.reshape(1,
                                                nb_samples * frame_length) * 10
    return [denoise_long[0, :], sample_rate]

Example #4

0

Show file

def prediction(weights_path, name_model, audio_dir_prediction,
               audio_input_prediction, sample_rate, min_duration, frame_length,
               hop_length_frame, n_fft, hop_length_fft, mode):
    """ This function takes as input pretrained weights, noisy voice sound to denoise, predict
    the denoise sound and save it to disk.
    """
    loaded_model = load_model(weights_path + '/' + name_model + '.h5')
    print("Loaded model from disk")

    # Extracting noise and voice from folder and convert to numpy
    audio = audio_file_to_numpy(audio_dir_prediction,
                                str(audio_input_prediction), sample_rate,
                                frame_length, hop_length_frame, min_duration,
                                mode)

    # Dimensions of squared spectrogram
    dim_square_spec = int(n_fft / 2) + 1
    print("dim_square_spec:{}".format(dim_square_spec))

    m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
        audio, dim_square_spec, n_fft, hop_length_fft)

    # global scaling to have distribution -1/1
    X_in = scaled_in(m_amp_db_audio)
    # Reshape for prediction
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    # Prediction using loaded network
    X_pred = loaded_model.predict(X_in)
    audio_class = audio
    for i in range(audio_class.shape[0]):
        audio_class[i, :] = audio_class[i, :] * 0 + X_pred[i]

    # Number of frames
    nb_samples = audio_class.shape[0]
    # Save all frames in one file
    audio_class_long = audio_class.reshape(1, nb_samples * frame_length)
    # librosa.output.write_wav(dir_save_prediction + audio_output_prediction, denoise_long[0, :], sample_rate)
    # sf.write(dir_save_prediction + audio_output_prediction, res, sample_rate)

    print("声音的强度{:.2f}".format(np.mean(np.abs(m_amp_db_audio))))
    print("漏水的概率{:.2f}".format(np.mean(audio_class_long)))

Example #5

0

Show file

def training(path_save_spectrogram, weights_path, name_model,
             training_from_scratch, epochs, batch_size):
    #load noisy voice & clean voice spectrograms created by data_creation mode
    X_in = np.load(path_save_spectrogram + 'noisy_voice_amp_db' + ".npy")
    X_ou = np.load(path_save_spectrogram + 'voice_amp_db' + ".npy")
    #Model of noise to predict
    X_ou = X_in - X_ou

    #Check distribution
    print(stats.describe(X_in.reshape(-1, 1)))
    print(stats.describe(X_ou.reshape(-1, 1)))

    #to scale between -1 and 1
    X_in = scaled_in(X_in)
    X_ou = scaled_ou(X_ou)

    #Check shape of spectrograms
    print(X_in.shape)
    print(X_ou.shape)
    #Check new distribution
    print(stats.describe(X_in.reshape(-1, 1)))
    print(stats.describe(X_ou.reshape(-1, 1)))

    #Reshape for training
    X_in = X_in[:, :, :]
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    X_ou = X_ou[:, :, :]
    X_ou = X_ou.reshape(X_ou.shape[0], X_ou.shape[1], X_ou.shape[2], 1)

    X_train, X_test, y_train, y_test = train_test_split(X_in,
                                                        X_ou,
                                                        test_size=0.10,
                                                        random_state=42)

    if training_from_scratch:
        generator_nn = ConvAutoEncoder(weights_path=weights_path)
    #generator_nn.summary()
    generator_nn.fit(X_train, y_train, X_test, y_test)
    generator_nn.save_weights()

Example #6

0

Show file

File: encode.py Project: dangvansam98/speech-enhancement-flask

def encode(weights_path=args.weights_folder,
           name_model=args.name_model,
           audio_dir_prediction=args.audio_dir_prediction,
           dir_save_prediction=args.dir_save_prediction,
           audio_input_prediction=args.audio_input_prediction,
           audio_output_prediction=args.audio_output_prediction,
           sample_rate=args.sample_rate,
           min_duration=args.min_duration,
           frame_length=args.frame_length,
           hop_length_frame=args.hop_length_frame,
           n_fft=args.n_fft,
           hop_length_fft=args.hop_length_fft):
    loaded_model = ConvAutoEncoder(weights_path=weights_path)
    loaded_model.load_weights()
    loaded_model.info()
    print("Loaded model from:", weights_path)
    # Extracting noise and voice from folder and convert to numpy
    audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction,
                                 sample_rate, frame_length, hop_length_frame,
                                 min_duration)
    #Dimensions of squared spectrogram
    dim_square_spec = int(n_fft / 2) + 1
    # Create Amplitude and phase of the sounds
    m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
        audio, dim_square_spec, n_fft, hop_length_fft)
    #global scaling to have distribution -1/1
    X_in = scaled_in(m_amp_db_audio)
    #Reshape for prediction
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    encoded = loaded_model.encode(X_in)
    #print(encoded)

    print('encoded.shape:'.encoded.shape)
    np.save('aaa', encoded)
    print('encoded file:', audio_dir_prediction + str(audio_input_prediction))
    print('save to: aaa.npy')

Example #7

0

Show file

File: train_model.py Project: OlgaFomin/Speech-enhancement

def training(path_save_spectrogram, weights_path, name_model,
             training_from_scratch, epochs, batch_size):
    """ This function will read noisy voice and clean voice spectrograms created by data_creation mode,
    and train a Unet model on this dataset for epochs and batch_size specified. It saves best models to disk regularly
    If training_from_scratch is set to True it will train from scratch, if set to False, it will train
    from weights (name_model) provided in weights_path
    """
    #load noisy voice & clean voice spectrograms created by data_creation mode
    X_in = np.load(path_save_spectrogram + 'noisy_voice_amp_db' + ".npy")
    X_ou = np.load(path_save_spectrogram + 'voice_amp_db' + ".npy")
    #Model of noise to predict
    X_ou = X_in - X_ou

    #Check distribution
    print(stats.describe(X_in.reshape(-1, 1)))
    print(stats.describe(X_ou.reshape(-1, 1)))

    #to scale between -1 and 1
    X_in = scaled_in(X_in)
    X_ou = scaled_ou(X_ou)

    #Check shape of spectrograms
    print(X_in.shape)
    print(X_ou.shape)
    #Check new distribution
    print(stats.describe(X_in.reshape(-1, 1)))
    print(stats.describe(X_ou.reshape(-1, 1)))

    #Reshape for training
    X_in = X_in[:, :, :]
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    X_ou = X_ou[:, :, :]
    X_ou = X_ou.reshape(X_ou.shape[0], X_ou.shape[1], X_ou.shape[2], 1)

    X_train, X_test, y_train, y_test = train_test_split(X_in,
                                                        X_ou,
                                                        test_size=0.10,
                                                        random_state=42)

    #If training from scratch
    if training_from_scratch:

        generator_nn = unet()
    #If training from pre-trained weights
    else:

        generator_nn = unet(pretrained_weights=weights_path + name_model +
                            '.h5')

    #Save best models to disk during training
    checkpoint = ModelCheckpoint(weights_path + '/weigths_HUBER_N2C.h5',
                                 verbose=1,
                                 monitor='val_loss',
                                 save_best_only=True,
                                 mode='auto')

    generator_nn.summary()
    #Training
    history = generator_nn.fit(X_train,
                               y_train,
                               epochs=epochs,
                               batch_size=batch_size,
                               shuffle=True,
                               callbacks=[checkpoint],
                               verbose=1,
                               validation_data=(X_test, y_test))

    #Plot training and validation loss (log scale)
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(loss) + 1)

    plt.plot(epochs, loss, label='Training loss')
    plt.plot(epochs, val_loss, label='Validation loss')
    plt.yscale('log')
    plt.title('Training and validation loss')
    plt.legend()
    plt.show()

Example #8

0

Show file

File: train_model.py Project: Chinadavid1991/VOICE

def training(path_save_spectrogram, weights_path, name_model,
             training_from_scratch, epochs, batch_size, nf):
    """ This function will read noisy voice and clean voice spectrograms created by data_creation mode,
    and train a Unet model on this dataset for epochs and batch_size specified. It saves best models to disk regularly
    If training_from_scratch is set to True it will train from scratch, if set to False, it will train
    from weights (name_model) provided in weights_path
    """
    # load noisy voice & clean voice spectrograms created by data_creation mode
    # X_in = np.load(path_save_spectrogram +'noisy_voice_amp_db'+".npy")
    X_in1 = np.load(path_save_spectrogram + 'noise_amp_db' + str(nf) + ".npy")
    # X_in2 = np.load(path_save_spectrogram +'noisy_voice_amp_db'+".npy")
    X_in2 = np.load(path_save_spectrogram + 'voice_amp_db' + str(nf) + ".npy")
    # Model of noise to predict

    r = 0
    X_in1 = X_in1[:, :, :]
    X_in2 = X_in2[:, :, :] * (1 - r) + X_in1 * r
    c = np.mean(np.abs(X_in2))
    print(c)

    negn = X_in1.shape[0]
    posn = X_in2.shape[0]

    Z_ou = np.array([0] * negn + [1] * posn)

    X_in = np.concatenate((X_in1, X_in2), axis=0)
    nscales = 5
    for i in range(1, nscales):
        X_in = np.concatenate(
            (X_in, X_in1 * (i / nscales), X_in2 * (i / nscales)), axis=0)
        Z_ou = np.concatenate((Z_ou, np.array([0] * negn + [1] * posn)),
                              axis=0)

    X_in = scaled_in(X_in)

    # Check shape of spectrograms
    print(X_in.shape)

    X_in = X_in[:, :, :]
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)

    print(X_in.shape)

    X_train, X_test, z_train, z_test = train_test_split(X_in,
                                                        Z_ou,
                                                        test_size=0.10,
                                                        shuffle=True)

    # If training from scratch
    if training_from_scratch:
        nn = cnet()
    else:
        nn = cnet(pretrained_weights=weights_path + name_model + '.h5')

    # Save best models to disk during training
    checkpoint = ModelCheckpoint(weights_path + '/model_save.h5',
                                 verbose=1,
                                 monitor='val_loss',
                                 save_best_only=True,
                                 mode='auto')

    nn.summary()
    time.sleep(2)

    INIT_LR = 1e-5
    losses = {
        "disc_output": "MeanAbsoluteError",
    }

    lossWeights = {"disc_output": 1.0}
    opt = Adam(lr=INIT_LR, decay=INIT_LR / epochs)
    nn.compile(optimizer=opt,
               loss=losses,
               loss_weights=lossWeights,
               metrics=["accuracy"])
    # Training
    validation_data = (X_test, {"disc_output": z_test})

    history = nn.fit(x=X_train,
                     y={"disc_output": z_train},
                     epochs=epochs,
                     batch_size=batch_size,
                     shuffle=True,
                     callbacks=[checkpoint],
                     verbose=1,
                     validation_data=validation_data)

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(1, len(loss) + 1)
    plt.plot(epochs, loss, label='Training class loss')
    plt.plot(epochs, val_loss, label='Validation class loss')

    plt.yscale('log')
    plt.title('Training and validation loss')
    plt.legend()

    plt.show()

Example #9

0

Show file

def training(path_save_spectrogram, weights_path, name_model,
             training_from_scratch, epochs, batch_size):
    """ This function will read noisy voice and clean voice spectrograms created by data_creation mode,
    and train a Unet model on this dataset for epochs and batch_size specified. It saves best models to disk regularly.
    If training_from_scratch is set to True it will train from scratch, if set to False, it will train
    from weights (name_model) provided in weights_path
    """
    #load noisy voice & clean voice spectrograms created by data_creation mode
    X_in = np.load(path_save_spectrogram + 'noisy_voice_amp_db' + ".npy")
    X_ou = np.load(path_save_spectrogram + 'voice_amp_db' + ".npy")
    #Model of noise to predict
    X_ou = X_in - X_ou

    #Check distribution
    print(stats.describe(X_in.reshape(-1, 1)))
    print(stats.describe(X_ou.reshape(-1, 1)))

    #to scale between -1 and 1
    X_in = scaled_in(X_in)
    X_ou = scaled_ou(X_ou)

    #Check shape of spectrograms
    print(X_in.shape)
    print(X_ou.shape)
    #Check new distribution
    print(stats.describe(X_in.reshape(-1, 1)))
    print(stats.describe(X_ou.reshape(-1, 1)))

    #Reshape for training
    X_in = X_in[:, :, :]
    X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
    X_ou = X_ou[:, :, :]
    X_ou = X_ou.reshape(X_ou.shape[0], X_ou.shape[1], X_ou.shape[2], 1)

    X_train, X_test, y_train, y_test = train_test_split(X_in,
                                                        X_ou,
                                                        test_size=0.10,
                                                        random_state=42)

    #If training from scratch
    if training_from_scratch:
        print("\nTraining from scratch\n.")
        generator_nn = unet()
    #If training from pre-trained weights
    else:
        pretrained_weights = "{}/{}.h5".format(weights_path, name_model)
        print("\nTraining from pre-trained weights: {}\n".format(
            pretrained_weights))
        generator_nn = unet(pretrained_weights=pretrained_weights)

    # Save model each epoch, just in in case
    weights_name_each = "model_and_weights-{epoch:02d}.h5"

    checkpoint_each = ModelCheckpoint(weights_path + weights_name_each,
                                      monitor='val_loss',
                                      verbose=0,
                                      save_best_only=False,
                                      save_weights_only=False,
                                      mode='auto',
                                      period=1)

    #Save best models to disk during training
    weights_name_best = "model_and_weights-{epoch:02d}-{val_loss:.2f}.h5"
    checkpoint_best = ModelCheckpoint(weights_path + weights_name_best,
                                      verbose=1,
                                      monitor='val_loss',
                                      save_weights_only=False,
                                      save_best_only=True,
                                      mode='auto')

    # TensorBoard callback
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = TensorBoard(
        log_dir=log_dir,
        histogram_freq=1,
        write_images=True,
    )

    generator_nn.summary()
    #Training
    history = generator_nn.fit(
        X_train,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        shuffle=True,
        callbacks=[checkpoint_each, checkpoint_best, tensorboard_callback],
        verbose=1,
        validation_data=(X_test, y_test))

    #Plot training and validation loss (log scale)
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(loss) + 1)

    plt.plot(epochs, loss, label='Training loss')
    plt.plot(epochs, val_loss, label='Validation loss')
    plt.yscale('log')
    plt.title('Training and validation loss')
    plt.legend()
    plt.show()

Example #10

0

Show file

File: test_prediction.py Project: shivam1423/Speech-Enhancement

audio = audio_files_to_numpy(audio_dir_prediction, audio_input_prediction,
                             sample_rate, frame_length, hop_length_frame,
                             min_duration)

# Choosing n_fft and hop_length_fft to have squared spectrograms
n_fft = 255
hop_length_fft = 63

dim_square_spec = int(n_fft / 2) + 1

# Create Amplitude and phase of the sounds
m_amp_db_audio, m_pha_audio = numpy_audio_to_matrix_spectrogram(
    audio, dim_square_spec, n_fft, hop_length_fft)

#global scaling to have distribution -1/1
X_in = scaled_in(m_amp_db_audio)
#Reshape for prediction
X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
#Prediction using loaded network
X_pred = loaded_model.predict(X_in)
#Rescale back the noise model
inv_sca_X_pred = inv_scaled_ou(X_pred)
#Remove noise model from noisy speech
X_denoise = m_amp_db_audio - inv_sca_X_pred[:, :, :, 0]
#Reconstruct audio from denoised spectrogram and phase
audio_denoise_recons = matrix_spectrogram_to_numpy_audio(
    X_denoise, m_pha_audio, frame_length, hop_length_fft)
#Number of frames
nb_samples = audio_denoise_recons.shape[0]

Example #11

0

Show file

def training(data_noise_dir, data_voice_dir, spectrogram_dir, weights_dir,
             model_name, training_from_scratch, epochs, batch_size):

    for i in range(10):
        if i == 0:
            training_from_scratch = True
        else:
            training_from_scratch = False
        x_in = np.load(
            spectrogram_dir + str(i) +
            f'{data_voice_dir}_{data_noise_dir}_noisy_voice_amp_db' + ".npy")
        x_out = np.load(spectrogram_dir + str(i) +
                        f'{data_voice_dir}_voice_amp_db' + ".npy")

        x_out = x_in - x_out

        print(stats.describe(x_in.reshape(-1, 1)))
        print(stats.describe(x_out.reshape(-1, 1)))

        x_in = scaled_in(x_in)
        x_out = scaled_out(x_out)

        print(x_in.shape)
        print(x_out.shape)

        print(stats.describe(x_in.reshape(-1, 1)))
        print(stats.describe(x_out.reshape(-1, 1)))

        x_in = x_in[:, :, :]
        x_in = x_in.reshape(x_in.shape[0], x_in.shape[1], x_in.shape[2], 1)
        x_out = x_out[:, :, :]
        x_out = x_out.reshape(x_out.shape[0], x_out.shape[1], x_out.shape[2],
                              1)

        x_train, x_test, y_train, y_test = train_test_split(x_in,
                                                            x_out,
                                                            test_size=0.10,
                                                            random_state=42)

        if training_from_scratch:

            generator_nn = unet()
        else:

            generator_nn = unet(pretrained_weights=weights_dir + model_name +
                                '.h5')

        checkpoint = ModelCheckpoint(weights_dir + model_name + '.h5',
                                     verbose=1,
                                     monitor='val_loss',
                                     save_best_only=True,
                                     mode='auto')

        generator_nn.summary()
        history = generator_nn.fit(x_train,
                                   y_train,
                                   epochs=epochs,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   callbacks=[checkpoint],
                                   verbose=1,
                                   validation_data=(x_test, y_test))
        model_json = generator_nn.to_json()
        with open(f"{weights_dir + model_name}.json", "w") as json_file:
            json_file.write(model_json)