def obtain_fft_in_db(data, n_fft): # Viewing the fft of the entire signal data, phase = librosa.core.magphase(fft(data, n=n_fft // 2 + 1)) data = librosa.util.normalize(data) data = amplitude_to_db(data) data += 120 return data
def save_spectrogram(data, output_file): specshow(amplitude_to_db(data, ref=np.max), sr=TARGET_SAMPLE_RATE, x_axis='time', y_axis='cqt_note', hop_length=HOP_LENGTH, bins_per_octave=BINS_PER_OCTAVE) plt.colorbar(format='%+2.0f dB') plt.title("Constant-Q Transform") plt.tight_layout() plt.savefig(output_file)
def plot_spectrogram(file): audio, sr = lr.load(SOUNDFILE, sr=RATE) time = np.arange(len(audio)) / sr spec = stft(audio, hop_length=FRAME, n_fft=2**7) spec_db = amplitude_to_db(np.abs(spec)) fig, ax = plt.subplots(figsize=(9,3)) specshow(spec_db, sr=sr, x_axis='time', y_axis='hz', hop_length=FRAME, ax=ax, cmap='magma') fig.suptitle('Spectrogram of the Recording') ax.set_ylabel('Frequency in Hz') ax.set_xlabel('Time in min:s') plt.tight_layout() plt.show()
def generate_spectrogram(samples, rate, opt): plt.figure(figsize=(10, 5)) if opt == 0: plt.title('Spectrogram') elif opt == 1: plt.title('Harmonic Components Spectrogram') elif opt == 2: plt.title('Percussive Components Spectrogram') ld.specshow(lc.amplitude_to_db(np.abs(lc.stft(samples)), ref=np.max), y_axis='log', x_axis='time') filename = "specplot_" + str(datetime.datetime.now().timestamp()) + ".png" plt.savefig(filename) with open(filename, 'rb') as f: encoding = base64.b64encode(f.read()).decode('utf-8') return encoding
def plot_log_power_specgram(self, sound_names, raw_sounds): i = self.i fig = plt.figure(figsize=self.figsize, dpi=self.dpi) for n, f in zip(sound_names, raw_sounds): plt.subplot(10, 1, i) D = core.amplitude_to_db(np.abs(librosa.stft(f))**2, ref=np.max) """ref_power parameter deprecated after librosa 0.6.0 and librosa.core.logamplitude has been removed; replaced by amplitude_to_db""" #D = librosa.logamplitude(np.abs(librosa.stft(f))**2, ref_power=np.max) display.specshow(D, x_axis='time', y_axis='log') plt.title(n.title()) i += 1 plt.suptitle("Figure 3: Log power spectrogram", x=self.x, y=self.y, fontsize=self.fontsize) plt.show()
def build_X_y(): """ Building X and y for the input and output of the CNN """ tmp = check_data() if tmp: return tmp.data[0], tmp.data[1] # return X, y from the pickle folder X = [] y = [] for index, file in tqdm(enumerate(df['fname'])): # if file[0] == 'O' or file[0] == 'C': sample_rate, signal = wavfile.read('clean/' + file) # Read & 1.processing mel = melspectrogram(y=signal, sr=config.sample_rate, n_mels=config.n_mels, n_fft=config.n_fft, hop_length=config.hop_length, window=config.window) S = amplitude_to_db(mel) S[0] = (2 * S.mean() + S[0]) / 3 # Reducing Noise S[1] = (S.mean() + 2 * S[1]) / 3 # Reducing Noise random_int = random.randint(0, 3) # Radom state using different filters if random_int == 1: S = medfilt2d(S) if random_int == 2: S = wiener(S) if random_int == 3: S = S X.append(S) fname = work_status(file) y.append(fname) X = np.array(X) print(X.shape) X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1) y = np.array(y) y = to_categorical(y, num_classes=config.num_classes) config.data = (X, y) with open(config.p_path, 'wb') as handle: pickle.dump(config, handle, protocol=2) return X, y
def compute_melgram(audio_path, SR=12000, N_FFT=512, N_MELS=96, HOP_LEN=256, DURA=29.12): # compute only center portion of the track """ # mel-spectrogram parameters SR = 12000 N_FFT = 512 N_MELS = 96 HOP_LEN = 256 DURA = 29.12 # to make it 1366 frame.. """ print('loading...', audio_path) src, sr = librosa.load(audio_path, sr=SR) # load whole signal n_sample = src.shape[0] n_sample_fit = int(DURA * SR) if n_sample < n_sample_fit: # if too short src = np.hstack((src, np.zeros( (int(DURA * SR) - n_sample, )))) # still problem ? elif n_sample > n_sample_fit: # if too long sp0 = int((n_sample - n_sample_fit) / 2) src = src[sp0:sp0 + n_sample_fit] # feature.melspectrogram out still power. Is use amplitude_to_db OK? Or, is it power_to_db? melgram = feature.melspectrogram(y=src, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=N_MELS) ret = core.amplitude_to_db(melgram, ref=1.0) """ # alternative: power=2 S = np.abs( core.stft(y=src, n_fft=N_FFT, hop_length=HOP_LEN) ) **power mel_basis = filters.mel(sr, n_fft=N_FFT, n_mels=N_MELS) ret= np.dot(mel_basis, S) ret= core.power_to_db(ret, ref=1.0) # mel_basis is still power ret= core.amplitude_to_db(ret, ref=1.0) # mel_basis is still power """ ret = ret[np.newaxis, np.newaxis, :] return ret
def extract_segments(clip, filename, sets, label, label_name, frames): FRAMES_PER_SEGMENT = frames - 1 # 41 frames ~= 950 ms WINDOW_SIZE = 512 * FRAMES_PER_SEGMENT # 23 ms per frame STEP_SIZE = 512 * FRAMES_PER_SEGMENT // 2 # 512 * 20 = 10240 BANDS = 60 s = 0 segments = [] normalization_factor = 1 / np.max(np.abs(clip)) clip = clip * normalization_factor while len(clip[s * STEP_SIZE:s * STEP_SIZE + WINDOW_SIZE]) == WINDOW_SIZE: signal = clip[s * STEP_SIZE:s * STEP_SIZE + WINDOW_SIZE] melspec = melspectrogram(signal, sr=22050, n_fft=1024, hop_length=512, n_mels=BANDS) logspec = amplitude_to_db(melspec) logspec = logspec.T.flatten()[:, np.newaxis].T logspec = pd.DataFrame( data=logspec, dtype='float32', index=[0], columns=list('logspec_b{}_f{}'.format(i % BANDS, i // BANDS) for i in range(np.shape(logspec)[1]))) if np.mean(logspec.values) > -70.0: segment_meta = pd.DataFrame( { 'filename': filename, 'sets': sets, 'label': label, 'label_name': label_name, 's_begin': s * STEP_SIZE, 's_end': s * STEP_SIZE + WINDOW_SIZE }, index=[0]) segments.append(pd.concat((segment_meta, logspec), axis=1)) s = s + 1 segments = pd.concat(segments, ignore_index=True) return segments
def spectrograms_of_heartbeat_audio(audio, time, sfreq): # Prepare the STFT HOP_LENGTH = 2**4 spec = stft(audio, hop_length=HOP_LENGTH, n_fft=2**7) # Convert into decibels spec_db = amplitude_to_db(spec) # Compare the raw audio to the spectrogram of the audio fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True) axs[0].plot(time, audio) specshow(spec_db, sr=sfreq, x_axis='time', y_axis='hz', hop_length=HOP_LENGTH) plt.show() return spec
def __prepateInput(self, input_signal, sampling_rate): if sampling_rate != self.__INPUT_SAMPLING_RATE: input_signal = self.__resample(input_signal, sampling_rate) freq, time, stft = spectrogram( input_signal, fs=self.__INPUT_SAMPLING_RATE, window=get_window(self.__WINDOW, self.__N_SAMPLES_WINDOW), # nperseg=None, noverlap=self.__N_SAMPLES_OVERLAP, nfft=self.__N_SAMPLES_WINDOW, # detrend='constant', return_onesided=True, scaling='spectrum', axis=-1, mode='complex') db_values = amplitude_to_db(np.abs(stft)) db_values = np.transpose(db_values)[:, np.newaxis, :] phase = np.angle(stft) return [freq, time, db_values, phase]
def engineering_spectral_features(spec, times_spec): # Calculate the spectral centroid and bandwidth for the spectrogram spec = spec.real.astype("float32") bandwidths = lr.feature.spectral_bandwidth(S=spec)[0] centroids = lr.feature.spectral_centroid(S=spec)[0] # Convert spectrogram to decibels for visualization spec_db = amplitude_to_db(spec) # Display these features on top of the spectrogram fig, ax = plt.subplots(figsize=(10, 5)) HOP_LENGTH = 2**4 ax = specshow(spec_db, x_axis='time', y_axis='hz', hop_length=HOP_LENGTH) ax.plot(times_spec, centroids) ax.fill_between(times_spec, centroids - bandwidths / 2, centroids + bandwidths / 2, alpha=.5) ax.set(ylim=[None, 6000]) plt.show()
def plot_esc50_spectrograms(): esc50dir = './dataset/ESC-50-master/' esc50audio = esc50dir + 'audio/' esc50meta = esc50dir + 'meta/' esc50 = glob(esc50audio + '*.wav') esc50 = [s[len(esc50audio):] for s in esc50] meta = pd.read_csv(esc50meta + 'esc50.csv') roosters = list(meta[meta['category'] == 'rooster']['filename']) breathing = list(meta[meta['category'] == 'crow']['filename']) hens = list(meta[meta['category'] == 'hen']['filename']) fig, axs = plt.subplots(3,2,figsize=(10,8),sharex=True) fig.suptitle('Comparison between different Classes') i = randint(0,meta.shape[1]) files = [roosters[i],breathing[i],hens[i]] names = ['a Rooster','a Crow','a Hen'] for j,f in enumerate(files): a,f = lr.load(esc50audio+f, sr=RATE) t = np.arange(0, len(a)) / f axs[j][0].plot(t,a) axs[j][0].set_title('Waveform of '+names[j]) axs[j][0].set_xlabel('Time in s') axs[j][0].set_ylabel('Amplitude') spec = stft(a, hop_length=FRAME, n_fft=2**7) spec_db = amplitude_to_db(np.abs(spec)) specshow(spec_db, sr=f, x_axis='time', y_axis='hz', hop_length=FRAME, ax = axs[j][1], cmap='magma') axs[j][1].set_title('Spectrogram of '+names[j]) axs[j][1].set_xlabel('Time in s') axs[j][1].set_ylabel('Frequency in Hz') plt.tight_layout() plt.show()
def extract_segments(clip,frames=41): FRAMES_PER_SEGMENT = frames - 1 # 41 frames ~= 950 ms WINDOW_SIZE = 512 * FRAMES_PER_SEGMENT # 23 ms per frame STEP_SIZE = 512 * FRAMES_PER_SEGMENT // 2 # 512 * 20 = 10240 BANDS = 60 s = 0 segments = [] normalization_factor = 1 / np.max(np.abs(clip)) clip = clip * normalization_factor logspec = 0 if len(clip[s * STEP_SIZE:s * STEP_SIZE + WINDOW_SIZE]) == WINDOW_SIZE: signal = clip[s * STEP_SIZE:s * STEP_SIZE + WINDOW_SIZE] melspec = melspectrogram(signal, sr=22050, n_fft=1024, hop_length=512, n_mels=BANDS) logspec = amplitude_to_db(melspec) logspec = logspec.T.flatten()[:, np.newaxis].T logspec = pd.DataFrame( data=logspec, dtype='float32', index=[0], columns=list('logspec_b{}_f{}'.format(i % BANDS, i // BANDS) for i in range(np.shape(logspec)[1])) ) return logspec
## Engineering spectral features import librosa as lr # Calculate the spectral centroid and bandwidth for the spectrogram bandwidths = lr.feature.spectral_bandwidth(S=spec)[0] centroids = lr.feature.spectral_centroid(S=spec)[0] ________________________________________________________________ from librosa.core import amplitude_to_db from librosa.display import specshow # Convert spectrogram to decibels for visualization spec_db = amplitude_to_db(spec) # Display these features on top of the spectrogram fig, ax = plt.subplots(figsize=(10, 5)) ax = specshow(spec_db, x_axis='time', y_axis='hz', hop_length=HOP_LENGTH) ax.plot(times_spec, centroids) ax.fill_between(times_spec, centroids - bandwidths / 2, centroids + bandwidths / 2, alpha=.5) ax.set(ylim=[None, 6000]) plt.show()
# FFT import numpy as np import librosa as lr from librosa.core import stft, amplitude_to_db from librosa.display import specshow HOP_LENGTH = 2**4 SIZE_WINDOW = 2**7 audio_spec = stft(audio, hop_length=HOP_LENGTH, n_fft=SIZE_WINDOW) spec_db = amplitude_to_db(audio_spec) specshow(spec_db, sr=sfreq, x_axis='time', y_axis='hz', hop_length=HOP_LENGTH) # spectral centroid and bandwidth bandwidths = lr.feature.spectral_bandwidth(S=spec)[0] centroids = lr.feature.spectral_centroid(S=spec)[0] ax = specshow(spec, sr=sfreq, x_axis='time', y_axis='hz', hop_length=HOP_LENGTH) ax.plot(time_spec, centroids) ax.fill_between(times_spec, centroids - bandwidths / 2, centroids + bandwidths / 2, alpha=0.5) centroids_all = [] bandwidths_all = []
fourier = fft.fft(channel1) #print(fourier) """plt.figure() plt.plot(fourier, alpha=0.9, color='blue') plt.xlabel('k') plt.ylabel('Amplitude') plt.show() """ # CQT # On charge le fichier wav avec librosa x, sr = librosa.load( "test.wav", sr=44100, mono=True) # mono=True transforme l'audio en mono (à faire) cqt = librosa.cqt(x, sr=sr, bins_per_octave=36) log_cqt = librosa.amplitude_to_db(np.abs(cqt)) # Spectrogram FFT """ plt.figure(2, figsize=(8,6)) plt.subplot(211) Pxx, freqs, bins, im = plt.specgram(channel1, Fs=rate, NFFT=1024, cmap=plt.get_cmap('plasma')) cbar=plt.colorbar(im) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') cbar.set_label('Intensity dB') plt.subplot(212) Pxx, freqs, bins, im = plt.specgram(channel2, Fs=rate, NFFT=1024, cmap=plt.get_cmap('plasma')) cbar=plt.colorbar(im) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)')
def PlotSpec(S): fftsize = S.shape[0] * 2 specshow(amplitude_to_db(S,ref=np.max), sr=SR * fftsize / FFTSIZE, y_axis="linear")
import librosa.core as lib import numpy as np from librosa.display import specshow from librosa.core import amplitude_to_db from librosa.feature import chroma_stft import matplotlib.pyplot as plt # In[6]: y, sr = lib.load('./data/13_LeadVox.wav') # In[18]: stft = lib.stft(y) specshow(amplitude_to_db(np.abs(stft), ref=np.max), x_axis='time', y_axis='log') plt.show() # In[35]: pitches, magnitudes = lib.piptrack(y=y, sr=sr) # In[49]: # In[32]: import librosa.onset #odf = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512)
def nlfc(data_orig_mod, freq, n, db_ref, start_freq, compression_ratio, compression_frequency, compression_nfft, compress=True): order = 32 ftype = 'butter' rp = None rs = None if db_ref is not None: ftype = 'cheby2' rp = 0.1 rs = 80 # Lowpass filter w_lp, h_lp = design_filter(order=order, cutoff=[start_freq], fs=fs, freqs=freq, ftype=ftype, rp=rp, rs=rs) data_low_pass = apply_filter(data_orig_mod, h_lp) plt.figure(get_fig_nums() + 1) plt.title('low pass') specshow( amplitude_to_db(data_low_pass) if db_ref is None else data_low_pass, x_axis='time', y_axis='linear') plt.colorbar() # Highpass filter w_hp, h_hp = design_filter(order=order, cutoff=[start_freq], btype='highpass', fs=fs, freqs=freq, ftype=ftype, rp=rp, rs=rs) data_high_pass = apply_filter(data_orig_mod, h_hp) plt.figure(get_fig_nums() + 1) plt.title('high pass before') specshow( amplitude_to_db(data_high_pass) if db_ref is None else data_high_pass, x_axis='time', y_axis='linear') plt.colorbar() # Convert signals back to time domain data_high_pass_td = istft(data_high_pass, center=center, length=n) # Resample time-domain signal data_high_pass_td = librosa.core.resample(data_high_pass_td, fs, compression_frequency) n2 = len(data_high_pass_td) # FFT with 172 Hz bins and 1.45ms rate resample_freqs = fft_frequencies(sr=compression_frequency, n_fft=compression_nfft) data_hp_padded = librosa.util.fix_length(data_high_pass_td, n2 + compression_nfft // 2) adf = lambda frq_list: abs(frq_list - start_freq) sf_idx = np.where(freq == min(freq, key=adf))[0][0] modulated_carrier_freqs_dict = {} modulated_carrier_freqs = [] for i, f in enumerate(freq[freq < freq[sf_idx]]): f_idx = sf_idx + i adf_in = lambda frq_list: abs(frq_list - f) idx = np.where(resample_freqs == min(resample_freqs, key=adf_in))[0][0] modulated_carrier_freqs_dict.update({ (idx, resample_freqs[idx]): (idx, resample_freqs[idx]) }) modulated_carrier_freqs.append(idx) for i, f in enumerate(freq[freq >= freq[sf_idx]]): f_idx = sf_idx + i adf_in = lambda frq_list: abs(frq_list - f) idx = np.where(resample_freqs == min(resample_freqs, key=adf_in))[0][0] f_out = start_freq**(1 - compression_ratio) * f**(compression_ratio) adf_out = lambda frq_list: abs(frq_list - f_out) frq_idx = np.where( resample_freqs == min(resample_freqs, key=adf_out))[0][0] modulated_carrier_freqs_dict.update({ (idx, resample_freqs[idx]): (frq_idx, resample_freqs[frq_idx]) }) modulated_carrier_freqs.append(frq_idx) data_hp_resampled = stft(data_hp_padded, n_fft=512, center=center) modulated_carrier = np.zeros(data_hp_resampled.T.shape, dtype=np.complex) data_hp_resampled_T = data_hp_resampled.T for time_idx, sample in enumerate(data_hp_resampled_T): modulated_carrier[time_idx] = sample[modulated_carrier_freqs] modulated_carrier = modulated_carrier.T modulated_carrier_td = istft(modulated_carrier, center=center, length=n2) # Resample time-domain signal data_high_pass_td_new = librosa.core.resample(modulated_carrier_td, compression_frequency, fs) # Pad the data since istft will drop any data in the last frame if samples are # less than n_fft. data_pad_new = librosa.util.fix_length(data_high_pass_td_new, n + n_fft // 2) data_high_pass_modulated = stft(data_pad_new, n_fft=n_fft, center=center) # Set theory yay! # data_double = data_high_pass_modulated - data_low_pass data_stacked = data_high_pass_modulated if compress else data_high_pass + data_low_pass if db_ref is not None: dmag, dphase = librosa.core.magphase(data_stacked) dmag = db_to_amplitude(dmag, ref=db_ref) data_stacked = dmag * dphase plt.figure(get_fig_nums() + 1) plt.title('high pass after') specshow(amplitude_to_db(data_high_pass_modulated) if db_ref is None else data_high_pass_modulated, x_axis='time', y_axis='linear') plt.colorbar() # plt.show() plt.figure(get_fig_nums() + 1) plt.title('stacked') specshow(amplitude_to_db(data_stacked) if db_ref is None else data_stacked, x_axis='time', y_axis='linear') plt.colorbar() return data_stacked
def PlotTemplates(T): fftsize = T.shape[1] * 2 specshow(amplitude_to_db(T.T), sr=SR * fftsize / FFTSIZE, y_axis="linear")
def eq(data_db, eq_freqs, audiogram, sr, n_fft, db_ref, data_amp, phase, plot=False): data_raw = deepcopy(data_amp) # mag, phase = librosa.core.magphase(data_raw) data = deepcopy(data_db) if db_ref is None: data = amplitude_to_db(data, ref=np.max) min_data = np.min(data) # Add 80 to magnitude of data # breath_indices = data < (min_data + 12.5) # data[breath_indices] = min_data data += abs(min_data) # half everything? data_halved = data / 2 if plot: plt.figure(get_fig_nums() + 1) plt.title('data halved') specshow( amplitude_to_db(data_halved) if db_ref is None else data_halved, x_axis='time', y_axis='linear') plt.colorbar() eq_freqs = np.array(eq_freqs) sample_rates = np.array([sr for _ in eq_freqs]) fb_sos, _ = librosa.filters._multirate_fb(eq_freqs, sample_rates, Q=25.0, passband_ripple=0.01, stopband_attenuation=80) max_scaling = np.max(audiogram[1]) scaled_audiogram = audiogram[1] / 120. fb = [] if plot: plt.figure(get_fig_nums() + 1) plt.title('filterbank') for sos in fb_sos: freqs, fb_filter = scipy.signal.sosfreqz(np.array(sos), n_fft // 2 + 1, fs=sr) fb.append(fb_filter) if plot: plt.plot(freqs, np.abs(fb_filter)) fb = np.array(fb) fb_smoothened = np.zeros((fb.shape[1], ), dtype=np.float) for filt in fb: fb_smoothened += abs(filt) data_halved_complete = data_halved filtered_data = apply_filter(data_halved_complete, abs(fb_smoothened), scaled_audiogram) # fb_mag, fb_phase = librosa.core.magphase(filtered_data) fb_mag = filtered_data fb_mag += data_halved if plot: plt.figure(get_fig_nums() + 1) plt.title('filtered') specshow( amplitude_to_db(abs(fb_mag)) if db_ref is None else abs(fb_mag), x_axis='time', y_axis='linear') plt.colorbar() db_shift = 65 max_filtered_dt = np.max(fb_mag) # print(max_filtered_dt - db_shift) fb_mag -= (max_filtered_dt - db_shift) if plot: plt.figure(get_fig_nums() + 1) plt.title('final output') specshow( amplitude_to_db(abs(fb_mag)) if db_ref is None else abs(fb_mag), x_axis='time', y_axis='linear') plt.colorbar() # Lowpass filter start_freq = 4500 order = 32 ftype = 'butter' rp = None rs = None if db_ref is not None: ftype = 'cheby2' rp = 0.01 rs = 80 w_lp, h_lp = design_filter(order=order, cutoff=[start_freq], fs=sr, freqs=n_fft // 2 + 1, ftype=ftype, rp=rp, rs=rs) data_low_pass = apply_filter(fb_mag, abs(h_lp)) dlp_min = np.min(data_low_pass) remove_bacground = data_low_pass < 30 data_low_pass[remove_bacground] = dlp_min if plot: plt.figure(get_fig_nums() + 1) plt.title('low pass') specshow(amplitude_to_db(abs(data_low_pass)) if db_ref is None else abs(data_low_pass), x_axis='time', y_axis='linear') plt.colorbar() data_out = db_to_amplitude(data_low_pass, ref=1) data_out_noisy = scipy.signal.wiener(data_out, mysize=[n_fft, 3], noise=0.01) return data_out_noisy
def fourier_transformation(audio: np.ndarray) -> np.ndarray: spec = stft(audio, hop_length=2 ** 4, n_fft=2 ** 7) spec_db = amplitude_to_db(np.abs(spec)) # convert into decibels return spec_db
def process_sentence(data, fs, n_fft=512, center=True, plot=False): # Default settings for speech analysis # n_fft = 512 to provide 25ms-35ms samples # (https://towardsdatascience.com/how-to-apply-machine-learning-and-deep-learning-methods-to-audio-analysis-615e286fcbbc) n = len(data) # Pad the data since istft will drop any data in the last frame if samples are # less than n_fft. data_pad = librosa.util.fix_length(data, n + n_fft // 2) # data_pad = data # Get the frequency distribution freq = fft_frequencies(sr=fs, n_fft=n_fft) # Get the equation and freq, db array from the audiogram provided x_audiogram = [125, 250, 500, 1000, 1500, 2000, 2400, 2800, 3000] y_audiogram = [10, 15, 0, -10, -30, -35, -40, -50, -60, -70] # y_audiogram = [0, 0, 0, 0, 0, 0, 0] audiogram = process_audiogram(x_audiogram, y_audiogram, freq, plot) # Preemphasis to increase amplitude of high frequencies # data_emph_filt = librosa.effects.preemphasis(data_pad) # Perform the stft, separate magnitude and save phase for later (important) data_pad_stft = stft(data_pad, n_fft=n_fft, center=center) mag, phase = librosa.core.magphase(data_pad_stft) db_ref = np.max # Consider using frequencies of phonomes. # eq_freqs = [125, 250, 500, 1000, 1500, 2000, 4000] eq_freqs = librosa.filters.mel_frequencies(n_mels=12, fmin=100., fmax=5000., htk=True) # mel_fb = librosa.filters.mel( # fs, # n_fft, # n_mels=8, # fmin=315., # fmax=8000., # norm=None # ) # mel_fb_smoothened = np.zeros((mel_fb.shape[1])) # for bands in mel_fb: # mel_fb_smoothened += bands # fb_filtered_raw = apply_filter(mag, mel_fb_smoothened) # fb_filtered = fb_filtered_raw # Normalize to 60db fb_filtered = mag if db_ref is not None: fb_filtered = amplitude_to_db(mag, ref=db_ref) # Multiply new magnitude with saved phase to reconstruct sentence data_orig_mod = mag * phase # mag_inv data_proc_mag = eq(fb_filtered, eq_freqs, audiogram, fs, n_fft, db_ref, mag, phase, plot) data_proc_mag_td = istft(data_proc_mag, center=center, length=n) if plot: librosa.output.write_wav(os.path.join( constants.PP_DATA_DIR, "audio", 'preprocessed_unfiltered_magnitude.wav'), data_proc_mag_td, fs, norm=True) data_proc_magphase_td = istft(data_proc_mag * phase, center=center, length=n) if plot: librosa.output.write_wav(os.path.join( constants.PP_DATA_DIR, "audio", 'preprocessed_unfiltered_magphase.wav'), data_proc_magphase_td, fs, norm=True) data_proc_griffinlim_td = librosa.core.griffinlim(data_proc_mag) if plot: librosa.output.write_wav(os.path.join( constants.PP_DATA_DIR, "audio", 'preprocessed_unfiltered_griffinlim.wav'), data_proc_griffinlim_td, fs, norm=True) data_proc = data_proc_mag * phase # # compression parameters # start_freq = 2400. # compression_ratio = 1. / 2. #:1 # compression_frequency = 12000 # compression_nfft = 512 # data_stacked = nlfc( # data_orig_mod, # freq, # n, # db_ref, # start_freq, # compression_ratio, # compression_frequency, # compression_nfft, # compress=False # ) # Perform the inverse stft data_mod = istft(data_proc, center=center, length=n) # Denoising denoised_signal = data_mod # denoised_signal = pra.denoise.apply_subspace(data_mod, frame_len=64, mu=2, lookback=20, skip=1, thresh=0.85, data_type='float64') # denoised_signal = pra.denoise.apply_iterative_wiener(data_mod, frame_len=n_fft, lpc_order=12, iterations=2, alpha=0.8, thresh=0.05) if plot: plt.figure(get_fig_nums() + 1) plt.title('final output time domain') plt.plot(denoised_signal) # Normalize denoised_signal = librosa.util.normalize(denoised_signal) if plot: librosa.output.write_wav(os.path.join(constants.PP_DATA_DIR, "audio", 'preprocessed_filtered.wav'), denoised_signal, fs, norm=True) return denoised_signal, audiogram
from librosa.core import stft, amplitude_to_db from librosa.display import specshow audio_files = glob("datasets/files/set_a/*.wav") # Read in the first audio file, create the time array audio, sfreq = lr.load(audio_files[3]) # calculate our STFT HOP_LENGTH = 2**4 SIZE_WINDOW = 2**7 audio_spec = stft(audio, hop_length=HOP_LENGTH, n_fft=SIZE_WINDOW) # convert into decibels spec = amplitude_to_db(audio_spec) # Visualize specshow(spec, sr=sfreq, x_axis="time", y_axis="hz", hop_length=HOP_LENGTH) # calculate spectral features bandwidth = lr.feature.spectral_bandwidth(S=np.abs(spec))[0] centroids = lr.feature.spectral_centroid(S=np.abs(spec))[0] # display these features on top of the spectrogram ax = specshow(spec, x_axis="time", y_axis="hz", hop_length=HOP_LENGTH) ax.plot(times, centroids) ax.fill_between(times, centroids - bandwidths / 2, centroids + bandwidths / 2, alpha=0.5)
# ran = randrange(1000) # Calculation for c in classes: file = df[df.label==c].iloc[20,0] sample_rate, signal = wavfile.read('clean/'+file) Y, freq = sp.calc_fft(signal, sample_rate) # FFT stft_signal = np.abs(stft(signal, n_fft=config.n_fft, hop_length=config.hop_length, window=config.window)) stft_signal = amplitude_to_db(stft_signal, ref=np.max) mel = melspectrogram(y=signal, sr=config.sample_rate, n_mels=config.n_mels, n_fft=config.n_fft, hop_length=config.hop_length, window=config.window) mel[0] = (2*mel.mean() + mel[0])/3 # Reducing Noise mel[1] = (mel.mean() + 2*mel[1])/3 # Reducing Noise mel_db = amplitude_to_db(mel, ref=np.max) # mel_db = power_to_db(mel) # mel_pow = medfilt2d(mel_pow) # mel_pow = wiener(mel_pow) # Store in dictionaries c = dict_status[c]