def reduce_noise(source_file, output_file=None, initial_noise=30, window_size=0, noise_threshold=0.15): logmmse_from_file(source_file, output_file=(output_file or source_file), initial_noise=initial_noise, window_size=window_size, noise_threshold=noise_threshold)
def prediction(weights_dir, model_name, input_dir, output_dir, sample_rate, frame_length, hop_length_frame, n_fft, hop_length_fft): json_file = open(weights_dir + model_name + '.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) loaded_model.load_weights(weights_dir + model_name + '.h5') print("Loaded model from disk!") list_files = os.listdir(input_dir) for file in list_files: logmmse.logmmse_from_file(input_file=input_dir + file, output_file=output_dir + 'Remastered-' + file) audio_temp = dt.audio_files_to_file(input_dir, file, sample_rate) # logmmse.logmmse(data=audio_temp, sampling_rate=32000, output_file=output_dir + 'Remastered-' + file) # audio_out = nr.reduce_noise(audio_clip=audio_temp, noise_clip=audio_temp, n_fft=n_fft+1, win_length=n_fft+1, # hop_length=hop_length_fft) # librosa.output.write_wav(output_dir + 'Remastered-' + file, audio_out, sample_rate) audio_file = dt.audio_files_to_file(output_dir, 'Remastered-' + file, sample_rate) # fig, ax = plt.subplots(figsize=(12, 6)) # plt.title('Audio') # plt.ylabel('Amplitude') # plt.xlabel('Time(s)') # ax.plot(audio_temp) # ax.plot(audio_file, alpha=0.5) # plt.show() audio_list = [audio_file] audio = dt.audio_list_to_numpy(audio_list, frame_length, hop_length_frame) dim_square_spec = int(n_fft / 2) + 1 m_amp_db_audio, m_pha_audio = dt.audio_numpy_to_matrix_spectrogram(audio, dim_square_spec, n_fft, hop_length_fft) x_in = dt.scaled_in(m_amp_db_audio) x_in = x_in.reshape(x_in.shape[0], x_in.shape[1], x_in.shape[2], 1) x_pred = loaded_model.predict(x_in) inv_sca_x_pred = dt.inv_scaled_out(x_pred) x_denoise = m_amp_db_audio - inv_sca_x_pred[:, :, :, 0] audio_denoise_recons = dt.matrix_spectrogram_to_numpy_audio(x_denoise, m_pha_audio, frame_length, hop_length_fft) nb_samples = audio_denoise_recons.shape[0] denoise_long = audio_denoise_recons.reshape(1, nb_samples * frame_length) * 10 librosa.output.write_wav(output_dir + 'Final-' + file, denoise_long[0, :], sample_rate) noise_recons = dt.matrix_spectrogram_to_numpy_audio(inv_sca_x_pred[:, :, :, 0], m_pha_audio, frame_length, hop_length_fft) nb_samples = noise_recons.shape[0] noise_long = noise_recons.reshape(1, nb_samples * frame_length) librosa.output.write_wav(output_dir + 'Noise-' + file, noise_long[0, :], sample_rate)
def get_command_recording(): _, file_name = tempfile.mkstemp(".wav", prefix="input_command") wave_file = wave.open(file_name, "wb") wave_file.setnchannels(1) wave_file.setsampwidth(pa.get_sample_size(pyaudio.paInt16)) wave_file.setframerate(sample_rate) nframes = int(sample_rate / frames_per_buffer * DURATION) for _ in range(nframes): audio = audio_stream.read(frames_per_buffer) wave_file.writeframes(audio) wave_file.close() logmmse_from_file(file_name, file_name) return file_name
def reconstruct_audio(signal_filename, output_filename, low_cutoff, high_cutoff): data = np.load(signal_filename) arr = data['arr_0'][:, ::2, 1:].astype(np.float64) arr = arr.reshape(-1, arr.shape[2]) filter = lambda x: signal.sosfilt( signal.butter(11, (low_cutoff - 50, high_cutoff + 50), 'bandpass', fs=2200, output='sos'), x) combined_signal = combine(arr, 4, filter) sos = signal.butter(11, (low_cutoff, high_cutoff), 'bandpass', fs=2200, output='sos') filtered = signal.sosfilt(sos, combined_signal) wav_res = np.clip((32767 * filtered / np.max(filtered)).astype('i2'), -32767, 32767) wavfile.write(output_filename, 2200, wav_res) denoised_filename = output_filename[:output_filename. rindex('.')] + '-denoised.wav' output = logmmse_from_file(output_filename) wavfile.write(denoised_filename, 2200, output)
def reducenoise(): logmmse_from_file(voicewave, output_file=voicerd, initial_noise=60, window_size=0, noise_threshold=0.15)
for j in multipliers: def get_audio(): return randomizer.get_noisy_speech(_set='dev') set_snr(j[0]) for i in range(0, EPOCHS): hm, bg, mfcc_feature = generate_sample(_n_filt=NFILT, _winlen=WINLEN, _winstep=WINSTEP, _winfunc=np.hamming, _generator=get_audio, to_write=True) logmmse_from_file( os.path.join(os.path.dirname(__file__), '../sample_test/mixed.wav'), os.path.join(os.path.dirname(__file__), '../sample_test/logmmse.wav')) sequence_feature = np.array( to_sequence_with_stride(mfcc_feature, left_pad=PAD_L, right_pad=PAD_R)) # sys model prediction = predict(sequence_feature, 'sys_model') estimate = get_estimate(bg, prediction, _winlen=WINLEN, _winstep=WINSTEP, _winfunc=np.hamming) write_wav('sys_model.wav', estimate.astype(np.float32)) evaluate(estimated_sources=['sys_model.wav', 'logmmse.wav'])