def process(): if request.method == 'POST': f = request.files['file'] file_path = folder_path+ filename f.save(file_path) audio_path = file_path x , sr = librosa.load(audio_path) plt.figure(figsize=(14, 5)) librosa.display.waveplot(x, sr=sr) fig1 = folder_path+'original.png' plt.savefig(fig1) rate, data = wavfile.read(audio_path) data = np.asarray(data, dtype=np.float16) try: reduced_noise = nr.reduce_noise(audio_clip=data, noise_clip=data) wavfile.write(folder_path+f'clean_{filename}', rate, reduced_noise) except: data = data.flatten()/32768 reduced_noise = nr.reduce_noise(audio_clip=data, noise_clip=data) wavfile.write(folder_path+f'clean_{filename}', rate*2, reduced_noise) plt.figure(figsize=(14, 5)) librosa.display.waveplot(reduced_noise, sr=rate) fig2 = folder_path+'processed.png' plt.savefig(fig2) return render_template('view.html',fig1 = fig1, fig2 = fig2,filename = filename)
def reduce_noise_2chnl(self, noise_sample_start, noise_sample_end): noise_start = int(noise_sample_start * self._rate) noise_end = int(noise_sample_end * self._rate) return transpose( array([ reduce_noise(self._data[:, 0], self._data[noise_start:noise_end, 0]), reduce_noise(self._data[:, 1], self._data[noise_start:noise_end, 1]) ]))
def run(self, audio_file_path, dargs=None): reduced_path = 'denoised_' + audio_file_path print(audio_file_path) self.rate, self.data = wavfile.read(audio_file_path) if dargs is not None: self.reduced_noise = nr.reduce_noise(arg for arg in dargs) else: self.reduced_noise = nr.reduce_noise(audio_file_path) write(reduced_path, 44100, self.reduced_noise) return reduced_path
def get_spectrogram_feature(filepath, melspec, todb, is_train=True): y, sr = torchaudio.load(filepath) if is_train == False: y = y[0].numpy() noise = y[:] y = nr.reduce_noise(audio_clip=y, noise_clip=noise, verbose=False, n_fft=512, win_length=512, hop_length=256) y = torch.FloatTensor(y) y = y.unsqueeze(0) if is_train: y = y[0].numpy() if np.random.rand() <= 0.3: noise = np.random.normal(scale=0.003, size=len(y)).astype(np.float32) y += noise if np.random.rand() <= 0.3: noise = y[:] y = nr.reduce_noise(audio_clip=y, noise_clip=noise, verbose=False, n_fft=512, win_length=512, hop_length=256) y = torch.FloatTensor(y) y = y.unsqueeze(0) mel = melspec(y) mel = mel.transpose(1, 2) mel = todb(mel) if is_train: mel = time_mask(freq_mask(time_warp(mel), num_masks=2), num_masks=2) feat = mel.squeeze(0) # sig, sr = librosa.load(filepath, sr = 16000) # mfcc = librosa.feature.mfcc(y=sig, sr=sr, n_mfcc=40, n_fft=2048, n_mels=256, hop_length=128, fmax=8000) # (rate, width, sig) = wavio.readwav(filepath) # sig = sig.ravel() # stft = torch.stft(torch.FloatTensor(sig), # N_FFT, # hop_length=int(0.01*SAMPLE_RATE), # win_length=int(0.030*SAMPLE_RATE), # window=torch.hamming_window(int(0.030*SAMPLE_RATE)), # center=False, # normalized=True, # onesided=True) # stft = (stft[:,:,0].pow(2) + stft[:,:,1].pow(2)).pow(0.5); # amag = stft.numpy(); # feat = torch.FloatTensor(amag) # feat = torch.FloatTensor(feat).transpose(0, 1) return feat
def cut_noise(data, channels=0): # load data index = search_silence(data) # select section of data that is noise # noisy_part = data[99200:115200] noisy_part = data[index[1]:index[2]] # perform noise reduction print(data.shape) reduced_noise_0 = nr.reduce_noise(audio_clip=data[:, 0], noise_clip=noisy_part[:, 0]) reduced_noise_1 = nr.reduce_noise(audio_clip=data[:, 1], noise_clip=noisy_part[:, 1]) return reduced_noise_0, reduced_noise_1
def process_nr(input_clip: np.array, noise: np.array) -> np.array: processed = np.zeros(input_clip.shape) processed[0, :] = nr.reduce_noise( audio_clip=np.asfortranarray(input_clip[0, :]), noise_clip=np.asfortranarray(noise[0, :]), verbose=False, ) processed[1, :] = nr.reduce_noise( audio_clip=np.asfortranarray(input_clip[1, :]), noise_clip=np.asfortranarray(noise[1, :]), verbose=False, ) return processed
def post(self): payload = request.get_json() audio = np.array(payload['audio']) / 32768 noise = np.array(payload['noise']) / 32768 sample_rate = payload['sample_rate'] #url = request.args.get('url') #wav = requests.get(url).content #wav = request.data #audio,sample_rate = librosa.load(BytesIO(wav),sr=16000,res_type='kaiser_fast') audio = nr.reduce_noise(audio_clip=audio, noise_clip=noise, verbose=False) audio = fix_audio(audio) #audio_tensor=librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40) audio_tensor = librosa.power_to_db(librosa.feature.melspectrogram( y=audio, sr=sample_rate, n_mels=40), ref=np.max) batch = np.reshape(audio_tensor, (1, 40, 126, 1)) out = self.model.predict(batch) index = keras.backend.argmax(out[0]).numpy() #pct = out * 100 #return [out.tolist(),str(index)] #return str([out, index, pct]) return self.labels[index]
def _filter_audio_nr(self, path_audio): """ Applies filter to given audio file. Returns path to filtered auido file. Source code: https://timsainburg.com/noise-reduction-python.html Args: path_audio: path of audio file """ # Create path path_audio_filt = self._extend_filename(path_audio, 'filt', True) # load data rate, data = scipy.io.wavfile.read(path_audio) data = data / 32768 # perform noise reduction data_reduced_noise = nr.reduce_noise(audio_clip=data, noise_clip=data, verbose=False) # save flitered audio scipy.io.wavfile.write(path_audio_filt, rate, data_reduced_noise) return path_audio_filt
def audacity_noise_reduce(noise_file, audio_samples, verbose=False): ''' Uses a sample file of noise to noise-reduce like Audacity does Inputs: noise_file: path to noise file audio_samples: samples to be noise-reduced verbose: whether or not to print graphs Returns: noise-reduced samples. for some reason makes a smaller spectrogram? #TODO ''' noise_samples, sample_rate = load( noise_file, mono= False, # Don't automatically load as mono, so we can warn if we force to mono sr=22050.0, # Resample res_type='kaiser_best', ) # perform noise reduction reduced_noise_samples = nr.reduce_noise(audio_clip=audio_samples, noise_clip=noise_samples, verbose=verbose) return reduced_noise_samples
def reduce_noise(input): """ 进行噪声去除,总共有两步 - 进行带通滤波 - 根据噪声频谱去除噪声 Args: input (list): 音频信号 Returns: list: 去除噪声之后的音频信号 """ output = [] for y in input: # 带通滤波 y_band = signal.lfilter(BANDPASS_FILTER, [1.0], y) # 去除通带中的噪声 y_r = noisereduce.reduce_noise(audio_clip=y_band, noise_clip=y_band[0:4000], verbose=False) output.append(y_r) return output
def remove_noise_function(file_name): #read audio audio = f'{ file_name }.wav' path = os.fspath(audio) data, sr = librosa.load(path=path, duration=5.0) #Remoove noise # select section of data that is noise noise_len = 2 # seconds noise = band_limited_noise( min_freq=4000, max_freq=12000, samples=len(data), samplerate=sr) * 10 noise_clip = noise[:sr * noise_len] # perform noise reduction reduced_noise = nr.reduce_noise(audio_clip=data, noise_clip=noise_clip, verbose=True) #diaplay audio print('after remove ') t = ipd.Audio(reduced_noise, rate=sr) librosa.output.write_wav(f'{ file_name }.wav', reduced_noise, sr) # changing format from wav to flac wav_audio = AudioSegment.from_file(f"{ file_name }.wav", format="wav") wav_audio.export(f"{ file_name }.flac", format="flac")
def denoise_audio( signal: np.ndarray, rate: int, including_multipass: bool = True ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, int, int]: """ Denoises given audio signal. :param signal: audio signal as int16 values :param rate: audio sample rate in samples per second :param including_multipass: also use Multi-band Spectral subtraction for advanced denoising. Caution: returned audio length may then be smaller. :returns: denoised audio signal as int16 values :returns: intervals of pure noise :returns: start of used noise interval :returns: end of used noise interval """ intervals = get_noise_intervals(signal, rate) a, b = get_largest_noise_interval(intervals) noisy_part = signal[a:b] # perform noise reduction reduced_noise = nr.reduce_noise(audio_clip=signal.astype(np.float16), noise_clip=noisy_part.astype(np.float16), verbose=False).astype(np.int16) noise_signal = signal - reduced_noise # Call multipass if needed if including_multipass: voice_leakage = multiband_substraction_denoise(noise_signal, rate, a, b) noise_signal = noise_signal[:voice_leakage.size] - voice_leakage return reduced_noise, noise_signal, intervals, a, b
def create_mfcc(src=SRC_DIR, dst='speech_data.json', n_mfcc=13, n_fft=2048, hop_length=512, pad=True, save_file=True): data = {'mappings': [], 'mfccs': [], 'labels': []} for idx, (dir_path, dir_names, filenames) in enumerate(os.walk(src)): if dir_path is not src: print('Processing:', dir_path.split('\\')[-1]) data['mappings'].append(dir_path.split('\\')[-1]) for f in filenames: try: signal, sample_rate = librosa.load(os.path.join(dir_path, f), sr=SAMPLE_RATE) signal_noise_reduced = nr.reduce_noise(audio_clip=signal, noise_clip=signal, verbose=False) signal_trimmed, _ = librosa.effects.trim(signal_noise_reduced) mfcc = librosa.feature.mfcc( signal_trimmed.T, sr=sample_rate, n_fft=n_fft, n_mfcc=n_mfcc, hop_length=hop_length) data['mfccs'].append(mfcc.tolist()) data['labels'].append(idx-1) except: print('File loading failed:', f) pass if save_file: with open(dst, 'w') as f: json.dump(data, f, indent=4) return data
def prepare_wav(wav_loc, hparams=None): """ load wav and convert to correct format """ # get rate and date rate, data = load_wav(wav_loc) # convert data if needed if np.issubdtype(type(data[0]), np.integer): data = int16_to_float32(data) # bandpass filter if hparams is not None: data = butter_bandpass_filter(data, hparams.butter_lowcut, hparams.butter_highcut, rate, order=5) # reduce noise if hparams.reduce_noise: data = nr.reduce_noise(audio_clip=data, noise_clip=data, **hparams.noise_reduce_kwargs) return rate, data
def process_audio_data(audio_file, datetime_audio): raw_audio, sample_rate = librosa.load(audio_file) noisy_part = raw_audio[0:25000] nr_audio = nr.reduce_noise(audio_clip=raw_audio, noise_clip=noisy_part, verbose=False) return nr_audio, sample_rate, datetime_audio
def decode(in_file, out_file): """ This function takes in a file prefix to a data/model file pair, and decodes a wav file from them at the provided location. example : python encode.py decode "path of the .npz file intended for reconstructing" " path of directory where to save the reconstructed audio file " """ # Load the model autoencoder = keras.models.load_model("autoencoder.model") #Constructing the decoder layers in_layer = keras.layers.Input(shape=(1, 441//8)) decode = autoencoder.layers[-3](in_layer) decode = autoencoder.layers[-2](decode) decode = autoencoder.layers[-1](decode) decoder = keras.models.Model(in_layer, decode) # Load the data ins = np.load(in_file + ".npz") encoded = ins['data'] chans = ins['params'][0] samps = ins['params'][1] width = ins['params'][2] samp_rate = ins['params'][3] # Run the decoder outputs = decoder.predict(encoded) # Build a wav file out = np.concatenate(np.concatenate(outputs)) #Removing noise in the reconstructed file using thresholding and noise clipping noisy_part = out[out > 0.9] out = nr.reduce_noise(audio_clip=out, noise_clip=noisy_part) out = (((out * 2.0) - 1.0) * float(pow(2, 15))).astype(int) out = list(map(norm, out)) dataToWave(out_file + ".wav", out, chans, samps, width, samp_rate)
def process_noise_data(CHUNK, data, noise): """ 音频数据处理 :param CHUNK: :param data: :param noise: :return: """ WIN_LENGTH = CHUNK // 2 HOP_LENGTH = CHUNK // 4 if noise: data = np.frombuffer(data, np.int16) data = int16_to_float32(data) nData = int16_to_float32(np.frombuffer(b''.join(noise), np.int16)) data = nr.reduce_noise(audio_clip=data, noise_clip=nData, verbose=False, n_std_thresh=1.5, prop_decrease=1, win_length=WIN_LENGTH, n_fft=WIN_LENGTH, hop_length=HOP_LENGTH, n_grad_freq=4) data = float32_to_int16(data) data = np.ndarray.tobytes(data) return data
def process_wav(wav, noisy=False): """ Processes a wav sample into a constant length, scaled, log-mel spectrogram. :param wav: The audio time series :param noisy: Used if the data is known to be noisy :return: np.array """ # Reshape to a constant length. Slice if too long, pad if too short if auc.MAX_DATA_POINTS < len(wav): wav = wav[:auc.MAX_DATA_POINTS] else: wav = pad_wav(wav) if noisy: noisy_part = wav[:auc.NOISY_DURATION] # noinspection PyTypeChecker wav = nr.reduce_noise(audio_clip=wav, noise_clip=noisy_part, verbose=False) # Convert to log-mel spectrogram melspecgram = sg.wave_to_melspecgram(wav) # Scale the spectrogram to be between -1 and 1 scaled_melspecgram = sg.scale_melspecgram(melspecgram) return scaled_melspecgram
def remove_noise(self): y,sr = librosa.load("test.wav") noise_len = 2 # seconds noise = band_limited_noise(min_freq=2000, max_freq = 12000, samples=len(y), samplerate=sr)*10 noise_clip = noise[:sr*noise_len] noise_reduced = nr.reduce_noise(audio_clip=y, noise_clip=noise_clip, prop_decrease=1.0, verbose=False) sf.write('test.wav', noise_reduced, sr) self.predict_lbl['text'] = 'Đã remove noise'
def processAudio(file, path): audio, sr = librosa.load(file) # raw audio file # trim long audio clips, add silence to short audio clips n = 616500 # correspondw to approx 30 seconds, 1 sec ~ 20550 n if audio.shape[0] >= 4 * n: # ignore all files > 2 mins print(f'Skipped, clip was {audio.shape[0]/(2*n)} mins long') return () elif audio.shape[0] >= n: audio, _ = librosa.effects.trim(audio, top_db=20, frame_length=512, hop_length=64) audio = audio[:n - 1] # Augment the audio with varying levels of noise audio1 = nr.reduce_noise(audio, findNoise(audio), verbose=False) # de-noised most audio2 = nr.reduce_noise(audio, findNoise(audio) / 1.5, verbose=False) # de-noised less audio3 = nr.reduce_noise(audio, findNoise(audio) / 2, verbose=False) # de-noised least # Augment the audio by translating each sample wrt the time axis audio = translate(audio1) + translate(audio2) + translate(audio3) # Convert each audio file into a Mel Spectrogram and save it n_mels = 257 k = 0 path = path.replace('.' + path.split('.')[-1], '') for clip in audio: mel = librosa.feature.melspectrogram(clip, sr=sr, n_fft=2048, hop_length=int(clip.shape[0] / 2000), n_mels=n_mels) mel = librosa.power_to_db(mel, ref=np.max) mel = minMaxNormalize(mel) mel = mel[0:258, 0:2000] #shape is 257 x 2000 image_path = path + f'-{k}.jpg' print(k) save_spectrogram( mel, image_path) # save a black & white version of the spectrogram k += 1 return ()
def noiseReduce(file_name): data, rate = sf.read('./heartbeat_data/' + file_name + '.wav') data = np.array(data) noise_reduced = nr.reduce_noise(audio_clip=data, noise_clip=data, prop_decrease=0.7, verbose=True) sf.write('./heartbeat_noseReduce_data/' + file_name + '_nr_v.wav', noise_reduced, rate)
def __call__(self, wav): y, sr = wav noise_reduced = nr.reduce_noise(audio_clip=y, noise_clip=self.noise, prop_decrease=0.9, verbose=False, n_std_thresh=self.threshold, use_tensorflow=False) return noise_reduced, sr
def prepare_wav(wav_loc, hparams, debug): """ load wav and convert to correct format """ if debug: debug_data = {} else: debug_data = None # get rate and date data, _ = librosa.load(wav_loc, sr=hparams.sr) # convert data if needed if np.issubdtype(type(data[0]), np.integer): data = int16_to_float32(data) # Chunks to avoid memory issues len_chunk_minutes = 10 len_chunk_sample = hparams.sr * 60 * len_chunk_minutes data_chunks = [] for t in range(0, len(data), len_chunk_sample): start = t end = min(len(data), t + len_chunk_sample) data_chunks.append(data[start:end]) # only keep one chunk for debug if debug: break # bandpass filter data_cleaned = [] if hparams is not None: for data in data_chunks: if debug: debug_data['x'] = data data = butter_bandpass_filter(data, hparams.butter_lowcut, hparams.butter_highcut, hparams.sr, order=5) if debug: debug_data['x_filtered'] = data # reduce noise if hparams.reduce_noise: data = nr.reduce_noise(audio_clip=data, noise_clip=data, **hparams.noise_reduce_kwargs) if debug: debug_data['x_rn'] = data data_cleaned.append(data) else: data_cleaned = data_chunks # concatenate chunks data = np.concatenate(data_cleaned) return data, debug_data
def process(files): data, sr = librosa.load(files[0]) noisypart, sr1 = librosa.load(files[1]) datax = nr.reduce_noise(audio_clip=data, noise_clip=noisypart, verbose=False) lung = butter_bandpass_filter(datax, 100, 2500, sr) path = r"D:\lungSound.wav" write(path, sr, lung)
def loadWavFile(fileName, filePath, savePlot, maxAudioLength, reduceNoise=True): # Read file # rate, data = wavfile.read(filePath) # print(filePath, rate, data.shape, "audio length", data.shape[0] / rate, data[0]) data, rate = librosa.load(filePath, sr=None) # print(filePath, rate, data.shape, "librosa audio length", data.shape[0] / rate, data[0]) if reduceNoise: noiseRemovedData = noisereduce.reduce_noise(audio_clip=data, noise_clip=data[0:10000], verbose=False) noiseRemovedData = noisereduce.reduce_noise( audio_clip=noiseRemovedData, noise_clip=data[-10000:], verbose=False) data = noiseRemovedData maxDataLength = int(maxAudioLength * rate) padding = [] if data.shape[0] > maxDataLength: raise ValueError("Max audio length breached") else: paddingDataLength = maxDataLength - data.shape[0] padding = [0 for i in range(paddingDataLength)] # data is stereo sound. take left speaker only leftSpeakerSound = data # data[:,0] # print("leftSpeakerSound.shape", leftSpeakerSound.shape) audioWithPadding = numpy.concatenate((leftSpeakerSound, padding)) # print("audioWithPadding.shape", audioWithPadding.shape) if savePlot: fig, ax = plt.subplots() ax.plot(audioWithPadding) fig.suptitle(fileName) fig.savefig("./output_img/wav/" + fileName + "_wav.png") plt.close(fig) return audioWithPadding, rate
def main(): for (i, start, end, (y, samplerate)) in [(1, 26353, 28110, load(_fullpath(id, 1)))]: noise_clip = y[start:end] # perform noise reduction reduced_noise = nr.reduce_noise(audio_clip=y, noise_clip=noise_clip, verbose=True) sf.write(_writepath(id, i), reduced_noise, samplerate=samplerate)
def noise_reduction(array): import tensorflow as tf physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) # wname = mktemp('.wav') # call.export(wname, format="wav") noisy_part = array reduced_noise = nr.reduce_noise(audio_clip=array.astype('float64'), noise_clip=noisy_part.astype('float64'), use_tensorflow=True, verbose=False) return reduced_noise
def reduce_noise(path): file = wavefile.load(path) samplerate = file[0] data = file[1][0] nr_data = nr.reduce_noise(audio_clip=np.array(data), noise_clip=np.array(data[samplerate:2 * samplerate]), verbose=False) sf.write(path, np.array(list(np.float_(nr_data))), samplerate) return
def clean_file(self, fname, noise_sample="noise_sample.wav"): noise_rate, noise_clip = wavfile.read(noise_sample) noise_clip = noise_clip.astype(np.float32) audio_rate, audio_clip = wavfile.read(fname) audio_clip = audio_clip.astype(np.float32) processed_clip = nr.reduce_noise(audio_clip=audio_clip, noise_clip=noise_clip, verbose=False) wavfile.write(fname, audio_rate, np.asarray(processed_clip, dtype=np.int16))
def noise_reduce(audio_data,rate,win_length,amp_adjust): print("Analyzing the audio") import noisereduce as nr mean=0 noise_list=[] mean_list=[] win_length=win_length amp_adjust=amp_adjust for j in range(0,len(audio_data)): if(j>0 and j%win_length==0): mean=math.sqrt(mean/win_length) mean_list.append(mean) mean=0 mean+=audio_data[j]**2 else: mean+=audio_data[j]**2 k=0 for i in range(0,len(mean_list)): if i>0 and i<len(mean_list)-1: if(mean_list[i-1]<mean_list[i]+amp_adjust and mean_list[i-1]>mean_list[i]-amp_adjust): if(mean_list[i+1]<mean_list[i]+amp_adjust and mean_list[i+1]>mean_list[i]-amp_adjust): if k==0: noise_list.append((i-1)*win_length) k+=1 else: if(k>0): noise_list.append((i+1)*win_length) k=0 else: k=0 noise_list.append((i-1)*win_length) noise_list.append((i+1)*win_length) elif(i==len(mean_list)-1): if(mean_list[i-1]<mean_list[i]+amp_adjust and mean_list[i-1]>mean_list[i]-amp_adjust): noise_list.append((i+1)*win_length) diff=0 for i in range(0,len(noise_list)-1): if(i%2==0): if(noise_list[i+1]-noise_list[i] > diff): diff = noise_list[i+1]-noise_list[i] noise_start = noise_list[i]+win_length noise_stop = noise_list[i+1]-win_length noisy_part =audio_data[noise_start:noise_stop] # perform noise reduction nr_data = nr.reduce_noise(audio_clip=audio_data, noise_clip=noisy_part,n_grad_freq=2,n_grad_time=16,n_fft=2048,win_length=2048,hop_length=512,n_std_thresh=1.5,prop_decrease=1.0) nr_data = np.int16(nr_data/np.max(np.abs(nr_data))*32768) write('test212.wav', rate,nr_data[512:len(nr_data)-512]) print("Analyzing Succesfully Completed") print("Recognizing started")