def remove_noise(self, signal, rate): signal = remove_clicks(signal, rate, window_size=2**10, margin=1.2) # Apply highpass filter to greatly reduce signal strength below 1500 Hz. self.sample_highpassed = highpass_filter(signal, rate, cut=1500) self.segmentator = select_best_segmentator(self.sample_highpassed, rate, detector='energy') no_silence_intervals = self.segmentator.get_number_of_silence_intervals() out = signal if no_silence_intervals == 0: return self.sample_highpassed elif no_silence_intervals == 1: # Perform spectral subtraction on sample (not high-passed!) noise = self.segmentator.get_next_silence(signal) # Get silence period out = reduce_noise(signal, noise) # Perform spectral subtraction else: noise = self.segmentator.get_next_silence(signal) # Get silence period out = reduce_noise(signal, noise) # Perform spectral subtraction noise = self.segmentator.get_next_silence(signal) # Try again out = reduce_noise(out, noise) # Perform spectral subtraction # Apply high-pass filter on spectral-subtracted sample out = highpass_filter(out, rate, 1500) return out
def formatAudBatch(self, aud_msg_array, name=""): # perform pre-processing on the audio input num_frames = len(aud_msg_array) input_data = np.reshape(aud_msg_array, (num_frames * len(aud_msg_array[0]))) # modify data core_data = input_data # mute the first 2 seconds of audio (where the NAO speaks) mute_time = 2 input_data[:np.argmax(input_data) + int(16000 * mute_time)] = 0 # get the indicies for the noise sample noise_sample_s, noise_sample_e = 16000 * (-1.5), -1 # perform spectral subtraction to reduce noise noise = core_data[int(noise_sample_s):noise_sample_e] filtered_input = reduce_noise(np.array(core_data), noise) # smooth signal b, a = signal.butter(3, 0.05) filtered_input = signal.lfilter(b, a, filtered_input) noise = filtered_input[int(noise_sample_s):noise_sample_e] # additional spectral subtraction to remove remaining noise filtered_input = reduce_noise(filtered_input, noise) # generate spectrogram S = librosa.feature.melspectrogram(y=filtered_input, sr=self.rate, n_mels=128, fmax=8000) S = librosa.power_to_db(S, ref=np.max) arr = [] # split the spectrogram into A_i. This generates an overlap between # frames with as set stride stride = S.shape[1] / float(num_frames) frame_len = aud_dtype["cmp_w"] #pad the entire spectrogram so that overlaps at either end do not fall out of bounds empty = np.zeros((S.shape[0], 3)) empty_end = np.zeros((S.shape[0], 8)) S = np.concatenate((empty, S, empty_end), axis=1) split_data = np.zeros(shape=(num_frames, S.shape[0], frame_len), dtype=S.dtype) for i in range(0, num_frames): split_data[i] = S[:, int(math.floor(i * stride) ):int(math.floor(i * stride)) + frame_len] #normalize the output to be between 0 and 255 split_data -= split_data.min() split_data /= split_data.max() / 255.0 return np.reshape(split_data, (num_frames, -1))
def remove_noise(self, signal, rate): signal = remove_clicks(signal, rate, window_size=2**10, margin=1.2) # Apply highpass filter to greatly reduce signal strength below 1500 Hz. self.sample_highpassed = highpass_filter(signal, rate, cut=500) self.segmentator = select_best_segmentator(self.sample_highpassed, rate, detector='energy') no_silence_intervals = self.segmentator.get_number_of_silence_intervals() out = signal if no_silence_intervals == 0: return self.sample_highpassed elif no_silence_intervals == 1: # Perform spectral subtraction on sample (not high-passed!) noise = self.segmentator.get_next_silence(signal) # Get silence period out = reduce_noise(signal, noise) # Perform spectral subtraction else: noise = self.segmentator.get_next_silence(signal) # Get silence period out = reduce_noise(signal, noise) # Perform spectral subtraction noise = self.segmentator.get_next_silence(signal) # Try again out = reduce_noise(out, noise) # Perform spectral subtraction # Apply high-pass filter on spectral-subtracted sample out = highpass_filter(out, rate, 500) return out
def remove_noise(self, signal, rate): # Apply highpass filter to greatly reduce signal strength below 1500 Hz. self.sample_highpassed = highpass_filter(signal, rate, 1500) self.segmentator = select_best_segmentator(self.sample_highpassed, rate) no_silence_intervals = self.segmentator.get_number_of_silence_intervals() out = signal # # w = wavelets.Wavelets() # signal = w.denoise(signal) # if no_silence_intervals == 0: raise ValueError('Could not find any silence intervals') elif no_silence_intervals == 1: # Perform spectral subtraction on sample (not high-passed!) noise = self.segmentator.get_next_silence(signal) # Get silence period out = ns.reduce_noise(signal, noise, 0) # Perform spectral subtraction else: noise = self.segmentator.get_next_silence(signal) # Get silence period out = ns.reduce_noise(signal, noise, 0) # Perform spectral subtraction noise = self.segmentator.get_next_silence(signal) # Try again out = ns.reduce_noise(out, noise, 0) # Perform spectral subtraction # Apply high-pass filter on spectral-subtracted sample out = highpass_filter(out, rate, 1500) return out
def remove_noise(self, signal, rate): # Apply highpass filter to greatly reduce signal strength below 1500 Hz. self.sample_highpassed = highpass_filter(signal, rate, 1500) self.segmentator = select_best_segmentator(self.sample_highpassed, rate) no_silence_intervals = self.segmentator.get_number_of_silence_intervals( ) out = signal # # w = wavelets.Wavelets() # signal = w.denoise(signal) # if no_silence_intervals == 0: raise ValueError('Could not find any silence intervals') elif no_silence_intervals == 1: # Perform spectral subtraction on sample (not high-passed!) noise = self.segmentator.get_next_silence( signal) # Get silence period out = ns.reduce_noise(signal, noise, 0) # Perform spectral subtraction else: noise = self.segmentator.get_next_silence( signal) # Get silence period out = ns.reduce_noise(signal, noise, 0) # Perform spectral subtraction noise = self.segmentator.get_next_silence(signal) # Try again out = ns.reduce_noise(out, noise, 0) # Perform spectral subtraction # Apply high-pass filter on spectral-subtracted sample out = highpass_filter(out, rate, 1500) return out
def generate_spectrogram(self, audio_data, source): """ Generates a mel-spectrogram for the given audio data :param audio_data: ndarray that will be processed :param source: string indicating if the audio source is a 'kinect' sensor or 'nao' robot :return: generated mel-spectrogram """ num_frames = int(float(len(audio_data)) / self._nao_rate * 10) if 'kinect' in source: noise = self._kinect_noise_sample[:len(audio_data)] spect_sample = self._kinect_spect_sample max_mean = self._max_kinect_spect_mean else: noise = self._nao_noise_sample spect_sample = self._nao_spect_sample max_mean = self._max_nao_spect_mean if noise is None: noise_sample_s, noise_sample_e = int(self._nao_rate * (-1.5)), -1 noise = audio_data[noise_sample_s, noise_sample_e] # perform spectral subtraction to reduce noise filtered_input = reduce_noise(np.array(audio_data), noise) # smooth signal b, a = signal.butter(3, [0.05]) filtered_input = signal.lfilter(b, a, filtered_input) # noise = filtered_input[noise_sample_s: noise_sample_e] # additional spectral subtraction to remove remaining noise filtered_input = reduce_noise(filtered_input, noise) # attach spectrogram sample if self._generate_samples: frame_size = len(filtered_input) / num_frames for i in range(num_frames): curr_start = i * frame_size mean = np.mean(filtered_input[curr_start:curr_start + frame_size]) if mean > max_mean: max_mean = mean spect_sample = filtered_input[curr_start:curr_start + frame_size] else: filtered_input = np.append(filtered_input, spect_sample) num_frames += 1 # generate spectrogram spectrogram = librosa.feature.melspectrogram(y=filtered_input, sr=self._nao_rate, n_mels=128, fmax=8000) spectrogram = librosa.power_to_db(spectrogram, ref=np.max) if 'kinect' in source: self.kinect_spectrogram = spectrogram else: self.nao_spectrogram = spectrogram # split the spectrogram into A_i. This generates an overlap between # frames with as set stride stride = spectrogram.shape[1] / float(num_frames) frame_len = aud_dtype["cmp_w"] # pad the entire spectrogram so that overlaps at either end do not fall out of bounds min_val = np.nanmin(spectrogram) empty = np.zeros((spectrogram.shape[0], 8)) empty.fill(min_val) empty_end = np.zeros((spectrogram.shape[0], 8)) empty_end.fill(min_val) spectrogram = np.concatenate((empty, spectrogram, empty_end), axis=1) split_data = np.zeros(shape=(num_frames, spectrogram.shape[0], frame_len), dtype=spectrogram.dtype) for i in range(0, num_frames): split_data[i] = spectrogram[:, int(math.floor(i * stride) ):int(math.floor(i * stride)) + frame_len] # normalize the output to be between 0 and 255 split_data -= split_data.min() split_data /= split_data.max() / 255.0 if not self._generate_samples: split_data = split_data[:-1] num_frames -= 1 else: if 'kinect' in source: self._kinect_spect_sample = spect_sample self._max_kinect_spect_mean = max_mean else: self._nao_spect_sample = spect_sample self._max_nao_spect_mean = max_mean return np.reshape(split_data, (num_frames, -1))
def formatAudBatch(self, aud_msg_array, name=""): # perform pre-processing on the audio input for x in range(len(aud_msg_array)): aud_msg_array[x] = self.formatAudMsg(aud_msg_array[x]) num_frames = len(aud_msg_array) input_data = np.reshape(aud_msg_array, (num_frames * len(aud_msg_array[0]))) # modify data core_data = input_data # mute the first 2 seconds of audio (where the NAO speaks) # mute_time = 2 # input_data[:np.argmax(input_data)+int(16000*mute_time)] = 0 # get the indicies for the noise sample noise_sample_s, noise_sample_e = 16000 * (-1.5), -1 # perform spectral subtraction to reduce noise noise = core_data[int(noise_sample_s):noise_sample_e] filtered_input = reduce_noise(np.array(core_data), noise) # smooth signal b, a = signal.butter(3, 0.05) filtered_input = signal.lfilter(b, a, filtered_input) # additional spectral subtraction to remove remaining noise noise = filtered_input[int(noise_sample_s):noise_sample_e] filtered_input = reduce_noise(filtered_input, noise) # generate spectrogram S = librosa.feature.melspectrogram(y=filtered_input, sr=self.rate, n_mels=128, fmax=8000) S = librosa.power_to_db(S, ref=np.max) arr = [] # if(False): # # if True then output spectrogram to png file (requires matplot.pyplot lib to be imported) # plt.figure(figsize=(10,4)) # # librosa.display.specshow(S,y_axis='mel', fmax=8000,x_axis='time') # plt.colorbar(format='%+2.0f dB') # plt.title('Mel-Spectrogram') # plt.tight_layout() # print("spectrogram ouput to file.") # # out_file = "assesment_out.png"#"spec_img/all_spec/"+name+".png" # plt.savefig(out_file) # self.counter += 1 # plt.clf() # split the spectrogram into A_i. This generates an overlap between # frames with as set stride stride = S.shape[1] / float(num_frames) frame_len = aud_dtype["cmp_w"] # pad the entire spectrogram so that overlaps at either end do not fall out of bounds min_val = np.nanmin(S) empty = np.zeros((S.shape[0], 3)) empty.fill(min_val) empty_end = np.zeros((S.shape[0], 8)) empty_end.fill(min_val) S = np.concatenate((empty, S, empty_end), axis=1) split_data = np.zeros(shape=(num_frames, S.shape[0], frame_len), dtype=S.dtype) for i in range(0, num_frames): split_data[i] = S[:, int(math.floor(i * stride) ):int(math.floor(i * stride)) + frame_len] # normalize the output to be between 0 and 255 split_data -= split_data.min() split_data /= split_data.max() / 255.0 return np.reshape(split_data, (num_frames, -1))
def formatAudBatch(self, aud_msg_array, name=""): # perform pre-processing on the audio input for x in range(len(aud_msg_array)): aud_msg_array[x] = self.formatAudMsg(aud_msg_array[x]) num_frames = len(aud_msg_array) core_data = np.reshape(aud_msg_array, (num_frames * len(aud_msg_array[0]))) # modify data # core_data = input_data # core_data = input_data[:int(16000*1.2)] # np.save(NOISE_SAMPLE_PATH.replace('#', '3'), core_data) # dummy = np.load(NOISE_SAMPLE_PATH.replace('#', '3')) # core_data = np.append(core_data, dummy) # num_frames = num_frames + int(len(dummy)/len(aud_msg_array[0])) # get the indicies for the noise sample # noise_sample_s, noise_sample_e = 1, -1 #16000 * (-1.5), -1 # perform spectral subtraction to reduce noise noise = self.__noise_sample_1 # noise = core_data[int(noise_sample_s): int(noise_sample_e)] # np.save(NOISE_SAMPLE_PATH.replace('#', '1'), noise) filtered_input = reduce_noise(np.array(core_data), noise) # smooth signal b, a = signal.butter(3, 0.05) filtered_input = signal.lfilter(b, a, filtered_input) # additional spectral subtraction to remove remaining noise noise = self.__noise_sample_2 # noise = filtered_input[int(noise_sample_s): int(noise_sample_e)] # np.save(NOISE_SAMPLE_PATH.replace('#', '2'), noise) filtered_input = reduce_noise(filtered_input, noise) filtered_input = np.append(filtered_input, self.__noise_dummy) num_frames = num_frames + int(len(self.__noise_dummy)/len(aud_msg_array[0])) # generate spectrogram S = librosa.feature.melspectrogram(y=filtered_input, sr=self.rate, n_mels=128, fmax=8000) S = librosa.power_to_db(S, ref=np.max) # if(True): # # if True then output spectrogram to png file (requires matplot.pyplot lib to be imported) # plt.figure(figsize=(10, 4)) # # librosa.display.specshow(S,y_axis='mel', fmax=8000,x_axis='time') # plt.colorbar(format='%+2.0f dB') # plt.title('Mel-Spectrogram') # plt.tight_layout() # print("spectrogram ouput to file.") # # out_file = "debug/audio_{}.png".format(self.__chunk_counter) # plt.savefig(out_file) # self.counter += 1 # plt.clf() # split the spectrogram into A_i. This generates an overlap between # frames with as set stride stride = S.shape[1] / float(num_frames) frame_len = aud_dtype["cmp_w"] # pad the entire spectrogram so that overlaps at either end do not fall out of bounds min_val = np.nanmin(S) empty = np.zeros((S.shape[0], 3)) empty.fill(min_val) empty_end = np.zeros((S.shape[0], 8)) empty_end.fill(min_val) S = np.concatenate((empty, S, empty_end), axis=1) split_data = np.zeros(shape=(num_frames, S.shape[0], frame_len), dtype=S.dtype) for i in range(0, num_frames): split_data[i] = S[:, int(math.floor(i * stride)):int(math.floor(i * stride)) + frame_len] # normalize the output to be between 0 and 255 split_data -= split_data.min() split_data /= split_data.max() / 255.0 return np.reshape(split_data, (num_frames, -1))[:-int(len(self.__noise_dummy) / len(aud_msg_array[0])) - 1]