Example #1
0
    def remove_noise(self, signal, rate):
        signal = remove_clicks(signal, rate, window_size=2**10, margin=1.2)
        # Apply highpass filter to greatly reduce signal strength below 1500 Hz.
        self.sample_highpassed = highpass_filter(signal, rate, cut=1500)

        self.segmentator = select_best_segmentator(self.sample_highpassed, rate, detector='energy')
        no_silence_intervals = self.segmentator.get_number_of_silence_intervals()

        out = signal

        if no_silence_intervals == 0:
            return self.sample_highpassed
        elif no_silence_intervals == 1:
            # Perform spectral subtraction on sample (not high-passed!)
            noise = self.segmentator.get_next_silence(signal)  # Get silence period
            out = reduce_noise(signal, noise)  # Perform spectral subtraction
        else:
            noise = self.segmentator.get_next_silence(signal)  # Get silence period
            out = reduce_noise(signal, noise)  # Perform spectral subtraction
            noise = self.segmentator.get_next_silence(signal)  # Try again
            out = reduce_noise(out, noise)  # Perform spectral subtraction

        # Apply high-pass filter on spectral-subtracted sample
        out = highpass_filter(out, rate, 1500)

        return out
    def formatAudBatch(self, aud_msg_array, name=""):
        # perform pre-processing on the audio input

        num_frames = len(aud_msg_array)
        input_data = np.reshape(aud_msg_array,
                                (num_frames * len(aud_msg_array[0])))

        # modify data
        core_data = input_data

        # mute the first 2 seconds of audio (where the NAO speaks)
        mute_time = 2
        input_data[:np.argmax(input_data) + int(16000 * mute_time)] = 0

        # get the indicies for the noise sample
        noise_sample_s, noise_sample_e = 16000 * (-1.5), -1

        # perform spectral subtraction to reduce noise
        noise = core_data[int(noise_sample_s):noise_sample_e]
        filtered_input = reduce_noise(np.array(core_data), noise)

        # smooth signal
        b, a = signal.butter(3, 0.05)
        filtered_input = signal.lfilter(b, a, filtered_input)
        noise = filtered_input[int(noise_sample_s):noise_sample_e]

        # additional spectral subtraction to remove remaining noise
        filtered_input = reduce_noise(filtered_input, noise)

        # generate spectrogram
        S = librosa.feature.melspectrogram(y=filtered_input,
                                           sr=self.rate,
                                           n_mels=128,
                                           fmax=8000)
        S = librosa.power_to_db(S, ref=np.max)

        arr = []

        # split the spectrogram into A_i. This generates an overlap between
        # frames with as set stride
        stride = S.shape[1] / float(num_frames)
        frame_len = aud_dtype["cmp_w"]

        #pad the entire spectrogram so that overlaps at either end do not fall out of bounds
        empty = np.zeros((S.shape[0], 3))
        empty_end = np.zeros((S.shape[0], 8))
        S = np.concatenate((empty, S, empty_end), axis=1)

        split_data = np.zeros(shape=(num_frames, S.shape[0], frame_len),
                              dtype=S.dtype)
        for i in range(0, num_frames):
            split_data[i] = S[:,
                              int(math.floor(i * stride)
                                  ):int(math.floor(i * stride)) + frame_len]

        #normalize the output to be between 0 and 255
        split_data -= split_data.min()
        split_data /= split_data.max() / 255.0

        return np.reshape(split_data, (num_frames, -1))
Example #3
0
    def remove_noise(self, signal, rate):
        signal = remove_clicks(signal, rate, window_size=2**10, margin=1.2)
        # Apply highpass filter to greatly reduce signal strength below 1500 Hz.
        self.sample_highpassed = highpass_filter(signal, rate, cut=500)
        self.segmentator = select_best_segmentator(self.sample_highpassed, rate, detector='energy')
        no_silence_intervals = self.segmentator.get_number_of_silence_intervals()

        out = signal

        if no_silence_intervals == 0:
            return self.sample_highpassed
        elif no_silence_intervals == 1:
            # Perform spectral subtraction on sample (not high-passed!)
            noise = self.segmentator.get_next_silence(signal)  # Get silence period
            out = reduce_noise(signal, noise)  # Perform spectral subtraction
        else:
            noise = self.segmentator.get_next_silence(signal)  # Get silence period
            out = reduce_noise(signal, noise)  # Perform spectral subtraction
            noise = self.segmentator.get_next_silence(signal)  # Try again
            out = reduce_noise(out, noise)  # Perform spectral subtraction

        # Apply high-pass filter on spectral-subtracted sample
        out = highpass_filter(out, rate, 500)

        return out
    def remove_noise(self, signal, rate):
        # Apply highpass filter to greatly reduce signal strength below 1500 Hz.  
        self.sample_highpassed = highpass_filter(signal, rate, 1500)
        
        self.segmentator = select_best_segmentator(self.sample_highpassed, rate)
        no_silence_intervals = self.segmentator.get_number_of_silence_intervals()
        
        out = signal
#                
#        w = wavelets.Wavelets()
#        signal = w.denoise(signal)        
#        
        if no_silence_intervals == 0:
            raise ValueError('Could not find any silence intervals')
        elif no_silence_intervals == 1:
            # Perform spectral subtraction on sample (not high-passed!)
            noise = self.segmentator.get_next_silence(signal) # Get silence period
            out = ns.reduce_noise(signal, noise, 0) # Perform spectral subtraction
        else:
            noise = self.segmentator.get_next_silence(signal) # Get silence period
            out = ns.reduce_noise(signal, noise, 0) # Perform spectral subtraction
            noise = self.segmentator.get_next_silence(signal) # Try again
            out = ns.reduce_noise(out, noise, 0) # Perform spectral subtraction           
        
        # Apply high-pass filter on spectral-subtracted sample
        out = highpass_filter(out, rate, 1500)
        
        return out
Example #5
0
    def remove_noise(self, signal, rate):
        # Apply highpass filter to greatly reduce signal strength below 1500 Hz.
        self.sample_highpassed = highpass_filter(signal, rate, 1500)

        self.segmentator = select_best_segmentator(self.sample_highpassed,
                                                   rate)
        no_silence_intervals = self.segmentator.get_number_of_silence_intervals(
        )

        out = signal
        #
        #        w = wavelets.Wavelets()
        #        signal = w.denoise(signal)
        #
        if no_silence_intervals == 0:
            raise ValueError('Could not find any silence intervals')
        elif no_silence_intervals == 1:
            # Perform spectral subtraction on sample (not high-passed!)
            noise = self.segmentator.get_next_silence(
                signal)  # Get silence period
            out = ns.reduce_noise(signal, noise,
                                  0)  # Perform spectral subtraction
        else:
            noise = self.segmentator.get_next_silence(
                signal)  # Get silence period
            out = ns.reduce_noise(signal, noise,
                                  0)  # Perform spectral subtraction
            noise = self.segmentator.get_next_silence(signal)  # Try again
            out = ns.reduce_noise(out, noise,
                                  0)  # Perform spectral subtraction

        # Apply high-pass filter on spectral-subtracted sample
        out = highpass_filter(out, rate, 1500)

        return out
Example #6
0
    def generate_spectrogram(self, audio_data, source):
        """
        Generates a mel-spectrogram for the given audio data
        :param audio_data: ndarray that will be processed
        :param source: string indicating if the audio source is a 'kinect' sensor or 'nao' robot
        :return: generated mel-spectrogram
        """
        num_frames = int(float(len(audio_data)) / self._nao_rate * 10)
        if 'kinect' in source:
            noise = self._kinect_noise_sample[:len(audio_data)]
            spect_sample = self._kinect_spect_sample
            max_mean = self._max_kinect_spect_mean
        else:
            noise = self._nao_noise_sample
            spect_sample = self._nao_spect_sample
            max_mean = self._max_nao_spect_mean

        if noise is None:
            noise_sample_s, noise_sample_e = int(self._nao_rate * (-1.5)), -1
            noise = audio_data[noise_sample_s, noise_sample_e]

        # perform spectral subtraction to reduce noise
        filtered_input = reduce_noise(np.array(audio_data), noise)

        # smooth signal
        b, a = signal.butter(3, [0.05])
        filtered_input = signal.lfilter(b, a, filtered_input)
        # noise = filtered_input[noise_sample_s: noise_sample_e]

        # additional spectral subtraction to remove remaining noise
        filtered_input = reduce_noise(filtered_input, noise)

        # attach spectrogram sample
        if self._generate_samples:
            frame_size = len(filtered_input) / num_frames
            for i in range(num_frames):
                curr_start = i * frame_size
                mean = np.mean(filtered_input[curr_start:curr_start +
                                              frame_size])
                if mean > max_mean:
                    max_mean = mean
                    spect_sample = filtered_input[curr_start:curr_start +
                                                  frame_size]
        else:
            filtered_input = np.append(filtered_input, spect_sample)
            num_frames += 1

        # generate spectrogram
        spectrogram = librosa.feature.melspectrogram(y=filtered_input,
                                                     sr=self._nao_rate,
                                                     n_mels=128,
                                                     fmax=8000)
        spectrogram = librosa.power_to_db(spectrogram, ref=np.max)
        if 'kinect' in source:
            self.kinect_spectrogram = spectrogram
        else:
            self.nao_spectrogram = spectrogram

        # split the spectrogram into A_i. This generates an overlap between
        # frames with as set stride
        stride = spectrogram.shape[1] / float(num_frames)
        frame_len = aud_dtype["cmp_w"]

        # pad the entire spectrogram so that overlaps at either end do not fall out of bounds
        min_val = np.nanmin(spectrogram)
        empty = np.zeros((spectrogram.shape[0], 8))
        empty.fill(min_val)
        empty_end = np.zeros((spectrogram.shape[0], 8))
        empty_end.fill(min_val)
        spectrogram = np.concatenate((empty, spectrogram, empty_end), axis=1)

        split_data = np.zeros(shape=(num_frames, spectrogram.shape[0],
                                     frame_len),
                              dtype=spectrogram.dtype)
        for i in range(0, num_frames):
            split_data[i] = spectrogram[:,
                                        int(math.floor(i * stride)
                                            ):int(math.floor(i * stride)) +
                                        frame_len]

        # normalize the output to be between 0 and 255
        split_data -= split_data.min()
        split_data /= split_data.max() / 255.0

        if not self._generate_samples:
            split_data = split_data[:-1]
            num_frames -= 1
        else:
            if 'kinect' in source:
                self._kinect_spect_sample = spect_sample
                self._max_kinect_spect_mean = max_mean
            else:
                self._nao_spect_sample = spect_sample
                self._max_nao_spect_mean = max_mean

        return np.reshape(split_data, (num_frames, -1))
Example #7
0
    def formatAudBatch(self, aud_msg_array, name=""):
        # perform pre-processing on the audio input

        for x in range(len(aud_msg_array)):
            aud_msg_array[x] = self.formatAudMsg(aud_msg_array[x])

        num_frames = len(aud_msg_array)
        input_data = np.reshape(aud_msg_array,
                                (num_frames * len(aud_msg_array[0])))
        # modify data
        core_data = input_data

        # mute the first 2 seconds of audio (where the NAO speaks)
        # mute_time = 2
        # input_data[:np.argmax(input_data)+int(16000*mute_time)] = 0

        # get the indicies for the noise sample
        noise_sample_s, noise_sample_e = 16000 * (-1.5), -1

        # perform spectral subtraction to reduce noise
        noise = core_data[int(noise_sample_s):noise_sample_e]
        filtered_input = reduce_noise(np.array(core_data), noise)

        # smooth signal
        b, a = signal.butter(3, 0.05)
        filtered_input = signal.lfilter(b, a, filtered_input)

        # additional spectral subtraction to remove remaining noise
        noise = filtered_input[int(noise_sample_s):noise_sample_e]
        filtered_input = reduce_noise(filtered_input, noise)

        # generate spectrogram
        S = librosa.feature.melspectrogram(y=filtered_input,
                                           sr=self.rate,
                                           n_mels=128,
                                           fmax=8000)
        S = librosa.power_to_db(S, ref=np.max)

        arr = []

        # if(False):
        #     # if True then output spectrogram to png file (requires matplot.pyplot lib to be imported)
        #     plt.figure(figsize=(10,4))
        #
        #     librosa.display.specshow(S,y_axis='mel', fmax=8000,x_axis='time')
        #     plt.colorbar(format='%+2.0f dB')
        #     plt.title('Mel-Spectrogram')
        #     plt.tight_layout()
        #     print("spectrogram ouput to file.")
        #
        #     out_file = "assesment_out.png"#"spec_img/all_spec/"+name+".png"
        #     plt.savefig(out_file)
        #     self.counter += 1
        #     plt.clf()

        # split the spectrogram into A_i. This generates an overlap between
        # frames with as set stride
        stride = S.shape[1] / float(num_frames)
        frame_len = aud_dtype["cmp_w"]

        # pad the entire spectrogram so that overlaps at either end do not fall out of bounds
        min_val = np.nanmin(S)

        empty = np.zeros((S.shape[0], 3))
        empty.fill(min_val)
        empty_end = np.zeros((S.shape[0], 8))
        empty_end.fill(min_val)
        S = np.concatenate((empty, S, empty_end), axis=1)

        split_data = np.zeros(shape=(num_frames, S.shape[0], frame_len),
                              dtype=S.dtype)
        for i in range(0, num_frames):
            split_data[i] = S[:,
                              int(math.floor(i * stride)
                                  ):int(math.floor(i * stride)) + frame_len]

        # normalize the output to be between 0 and 255
        split_data -= split_data.min()
        split_data /= split_data.max() / 255.0

        return np.reshape(split_data, (num_frames, -1))
    def formatAudBatch(self, aud_msg_array, name=""):
        # perform pre-processing on the audio input

        for x in range(len(aud_msg_array)):
            aud_msg_array[x] = self.formatAudMsg(aud_msg_array[x])

        num_frames = len(aud_msg_array)
        core_data = np.reshape(aud_msg_array, (num_frames * len(aud_msg_array[0])))
        # modify data
        # core_data = input_data
        # core_data = input_data[:int(16000*1.2)]

        # np.save(NOISE_SAMPLE_PATH.replace('#', '3'), core_data)
        # dummy = np.load(NOISE_SAMPLE_PATH.replace('#', '3'))
        # core_data = np.append(core_data, dummy)
        # num_frames = num_frames + int(len(dummy)/len(aud_msg_array[0]))

        # get the indicies for the noise sample
        # noise_sample_s, noise_sample_e = 1, -1 #16000 * (-1.5), -1

        # perform spectral subtraction to reduce noise
        noise = self.__noise_sample_1
        # noise = core_data[int(noise_sample_s): int(noise_sample_e)]
        # np.save(NOISE_SAMPLE_PATH.replace('#', '1'), noise)
        filtered_input = reduce_noise(np.array(core_data), noise)

        # smooth signal
        b, a = signal.butter(3, 0.05)
        filtered_input = signal.lfilter(b, a, filtered_input)

        # additional spectral subtraction to remove remaining noise
        noise = self.__noise_sample_2
        # noise = filtered_input[int(noise_sample_s): int(noise_sample_e)]
        # np.save(NOISE_SAMPLE_PATH.replace('#', '2'), noise)
        filtered_input = reduce_noise(filtered_input, noise)

        filtered_input = np.append(filtered_input, self.__noise_dummy)
        num_frames = num_frames + int(len(self.__noise_dummy)/len(aud_msg_array[0]))

        # generate spectrogram
        S = librosa.feature.melspectrogram(y=filtered_input, sr=self.rate, n_mels=128, fmax=8000)
        S = librosa.power_to_db(S, ref=np.max)

        # if(True):
        #     # if True then output spectrogram to png file (requires matplot.pyplot lib to be imported)
        #     plt.figure(figsize=(10, 4))
        #
        #     librosa.display.specshow(S,y_axis='mel', fmax=8000,x_axis='time')
        #     plt.colorbar(format='%+2.0f dB')
        #     plt.title('Mel-Spectrogram')
        #     plt.tight_layout()
        #     print("spectrogram ouput to file.")
        #
        #     out_file = "debug/audio_{}.png".format(self.__chunk_counter)
        #     plt.savefig(out_file)
        #     self.counter += 1
        #     plt.clf()

        # split the spectrogram into A_i. This generates an overlap between
        # frames with as set stride
        stride = S.shape[1] / float(num_frames)
        frame_len = aud_dtype["cmp_w"]

        # pad the entire spectrogram so that overlaps at either end do not fall out of bounds
        min_val = np.nanmin(S)

        empty = np.zeros((S.shape[0], 3))
        empty.fill(min_val)
        empty_end = np.zeros((S.shape[0], 8))
        empty_end.fill(min_val)
        S = np.concatenate((empty, S, empty_end), axis=1)

        split_data = np.zeros(shape=(num_frames, S.shape[0], frame_len), dtype=S.dtype)
        for i in range(0, num_frames):
            split_data[i] = S[:,
                            int(math.floor(i * stride)):int(math.floor(i * stride)) + frame_len]

        # normalize the output to be between 0 and 255
        split_data -= split_data.min()
        split_data                 /= split_data.max() / 255.0

        return np.reshape(split_data, (num_frames, -1))[:-int(len(self.__noise_dummy) /
                                                              len(aud_msg_array[0])) - 1]