def test_lpc_multi(y_multi): y, sr = y_multi Lall = librosa.lpc(y, order=6) L0 = librosa.lpc(y[0], order=6) L1 = librosa.lpc(y[1], order=6) assert np.allclose(Lall[0], L0) assert np.allclose(Lall[1], L1) assert not np.allclose(L0, L1)
def calculateRPCC(audio): sample_frequency = 16000 frame_length = sample_frequency // 4 # 25 ms low_frequency = 20 high_frequency = 7600 num_mel_bins = 30 # s_n, _ = librosa.load(path, sr=16000) # # # Calculate Residual Signal lpc_coefficients = librosa.lpc(audio, 22) s_n_hat = scipy.signal.lfilter([0] + -1*lpc_coefficients[1:], [1], audio) r_n = audio - s_n_hat # # Do Hilbert Transform # # Analytical Signal analytical_signal = scipy.signal.hilbert(r_n) # # Residual phase residual_phase = analytical_signal.real / abs(analytical_signal) # # # FFT and magnitude win_length = sample_frequency // 4 hop_length = sample_frequency // 10 n_fft = 4096 # QUESTION: Should this be changed? magnitude_spectrum = np.abs(librosa.stft(residual_phase, n_fft=n_fft, win_length=win_length, hop_length=hop_length)) # Warp to mel mel = librosa.filters.mel(sr=sample_frequency, n_fft=n_fft, n_mels=num_mel_bins, fmin=low_frequency, fmax=high_frequency) mel_warped_signal = mel.dot(magnitude_spectrum) # Log Signal log_spectrum = np.log(mel_warped_signal) # DCT of Signal dct_spectrum = scipy.fftpack.dct(log_spectrum) return dct_spectrum
def lpc_emotion_upload(): entry = dict() wav_files = [] SAMPLE_RATE = 44100 b, _ = librosa.core.load('pickles/catalyst.wav', sr=SAMPLE_RATE) y, sr = librosa.load('pickles/catalyst.wav') lpc = librosa.lpc(y, 5) for no in range(0, len(lpc)): entry['LIB_LPC{0}'.format(no)] = lpc[no] y, sr = librosa.load('pickles/catalyst.wav') pitches, magnitudes = librosa.core.piptrack(y, sr) # Select out pitches with high energy pitches = pitches[magnitudes > np.median(magnitudes)] pit = librosa.pitch_tuning(pitches) entry['pitch'] = pit wav_files.append(entry) wav_df = pd.DataFrame(wav_files) lpc_clf = joblib.load('pickles/lpc_model.sav') bar = pd.DataFrame(lpc_clf.predict_proba(wav_df)) bar.columns = lpc_clf.classes_ bar_t = bar.T bar_t.columns = ['values'] print('HERE') fig = go.Figure(data=[ go.Pie(labels=lpc_clf.classes_, values=bar_t['values'], hole=.3), ]) return lpc_clf.predict(wav_df), fig
def extract_feature(file_name): result = np.array([]) y, sr = librosa.load(file_name) LPC = librosa.lpc(y, 16) print(LPC, file_name) return result
def raw_data_processing(data_path): data = list() label = list() file_dir = os.listdir(path=data_path) for i in range(len(file_dir)): file_list = os.listdir(data_path + "\\" + file_dir[i]) for j in range(len(file_list)): file_name = data_path + "\\" + file_dir[i] + "\\" + file_list[j] print("file_name: ", file_name) audio_file, sr = librosa.load( path=file_name, sr=int(mediainfo(file_name)['sample_rate'])) try: data.append(librosa.lpc(audio_file, 100)) label.append(i) except: print("Error occured at ", file_name) continue data = np.asarray(data) label = np.asarray(label) return data, label
def get_wav_df(self): wav_files = [] for wav in os.listdir(self.wav_dir): if wav.endswith('.wav'): entry = dict() entry['Session'] = wav fs, signal = swav.read(self.wav_dir + '/' + wav) y, sr = librosa.load(self.wav_dir + '/' + wav) lpc = librosa.lpc(y, 5) for no in range(0, len(lpc)): entry['LIB_LPC{0}'.format(no)] = lpc[no] y, sr = librosa.load(self.wav_dir + '/' + wav) pitches, magnitudes = librosa.core.piptrack(y, sr) # Select out pitches with high energy pitches = pitches[magnitudes > np.median(magnitudes)] pit = librosa.pitch_tuning(pitches) entry['pitch'] = pit wav_files.append(entry) # wav_files = [] # entry = dict() # iemocap_wav_list = self._load() # print(iemocap_wav_list.getframerate()) # print(iemocap_wav_list) # entry['Session'] = glob.glob("*.wav", iemocap_wav_list) # if bool(entry): # wav_files.append(entry) wav_df = pd.DataFrame(wav_files) return wav_df
def extract_LLD_from_audio(audio, fs): # MFCC mfcc = librosa.feature.mfcc(audio, fs, n_fft=N_FFT, hop_length=HOP_LENGTH, center=False).transpose() mfcc_hsf = extract_HSF(mfcc) # LPC lpc = librosa.lpc(audio, 16) # Mel-Spectrogram spect = librosa.feature.melspectrogram(y=audio, sr=fs, n_fft=N_FFT, hop_length=HOP_LENGTH, center=False) spect = librosa.power_to_db(spect, ref=np.max).transpose() spect_hsf = extract_HSF(spect) # Other features f0 = get_F_0(audio, fs)[0] hnr = get_HNR(audio, fs) return np.asarray(mfcc), np.asarray(mfcc_hsf), np.asarray(lpc), np.asarray( spect), np.asarray(spect_hsf), np.asarray([f0, hnr])
def get_lpc_feature(input_audio, sampling_rate, order = 20, preemphasis = True, includeDerivatives = True, win = np.hamming(160), inc = 80): # audio, sr = librosa.load(input_audio, sr=sampling_rate) audio = input_audio # Pre-emphasis filter (zero is at 50 Hz) if(preemphasis): audio = signal.lfilter([1, -np.exp(-2*np.pi*50/sampling_rate)],1,audio) # Get frames from input audio frame = get_frame_from_file(audio, win=win, inc=inc, sr=sampling_rate, n_channels=1, duration = None) c = np.zeros((frame.shape[1], order)) # Compute LPC coefficients for i in range(c.shape[0]): lpc_ftr = librosa.lpc(frame[:,i], order) c[i,:] = lpc_ftr[1:] nf = c.shape[0] # Calculate derivative if(includeDerivatives): vf=np.arange(4,-5,-1)/60 ww=np.zeros(4, dtype=int) cx = np.vstack((c[ww,:], c, c[(nf-1)*(ww+1),:])) filtered_cx = signal.lfilter(vf,1,np.transpose(cx).flatten()) dc = np.reshape(filtered_cx,(nf+8,order),order='F') dc = np.delete(dc, np.arange(0,8), axis=0) c = np.hstack((c,dc)) c = np.transpose(c) c = c.astype(np.single) return c
def lpc_coeffs(myData): MyFs = constants.fs MyCoeffs = 512 Lpcs = librosa.lpc(myData, MyCoeffs) MyLocs, MyFreqs = freqz(1, Lpcs, worN=MyCoeffs, fs=MyFs) MyFreqs = 20 * np.log10(np.abs(MyFreqs)) return MyLocs, MyFreqs
def lpc_filtering(regions_lpc, audio_f, audio_noised, coef_number): lpc_coeffs = [] error = [] for start, end in regions_lpc: a = librosa.lpc(audio_noised[start:end], coef_number) lpc_coeffs.append(a) error.extend(scipy.signal.lfilter(a, [1], audio_f[start:end])) error = np.array(error) return error, lpc_coeffs
def test_lpc_simple(): srand() n = 5000 est_a = np.zeros((n, 6)) truth_a = [1, 0.5, 0.4, 0.3, 0.2, 0.1] for i in range(n): noise = np.random.randn(1000) filtered = scipy.signal.lfilter([1], truth_a, noise) est_a[i, :] = librosa.lpc(filtered, 5) assert np.allclose(truth_a, np.mean(est_a, axis=0), rtol=0, atol=1e-3)
def lp(y, order=16): """ Uses librosa.core.lpc to calculate linear prediction coefficients via Burg’s method. Then use scipy.signal.lfilter to calculate the LP residual by inverse filtering. """ a = librosa.lpc(y, order=order) return scipy.signal.lfilter([0] + -1 * a[1:], [1], y)
def lpc(y, sr, args): # LPC coefficients coefs = librosa.lpc(y, args.order) # White noise wn = np.random.uniform(size=int(sr * args.sec)) # Convolution print(coefs.shape) print(y.shape) y_out = sig.lfilter([1.0], coefs, wn) y_out = y_out / np.max(y_out) return y_out
def formant_signal(x, order): ''' formant_signal(x,order) takes wave amplitude array (x) and returns the LPC approximation (s_formant) based on a linear autoregressive model to a given specified order (order). s_formant can be understood as the (sourceless) amplitude due to the vocal formants of the signal x. ''' a = librosa.lpc(x, order) # (x,16) b = np.hstack([[0], -1 * a[1:]]) s_formant = sp.signal.lfilter(b, [1], x) return s_formant
def devideExcitationAndVocal(y=None): l = len(y) if l != 512: print(l) y = y * np.hanning(l) #############加汉宁窗 ar = librosa.lpc(y, 12) ff = scipy.fft(ar, l)**(-1) est_x = scipy.signal.lfilter(0 - ar[1:], 1, y) ERR = scipy.fft(y - est_x) G = np.mean((y - est_x)**2)**0.5 # return np.log(np.abs(ff[:l//2])+1e-8),np.log(np.abs(ERR[:l//2])+1e-8) return np.abs(ff[:l // 2]) * G, np.abs(ERR[:l // 2])
def findFormantes(datos): datos = np.asfortranarray(datos) A = librosa.lpc(datos, 16) raices = np.roots(A) #formantes! formantes=[] for k in raices: if(k.imag>0): w = np.arctan2(k.imag, k.real) Fk = w*(rate/(2*np.pi)) Bw = (-1/2)*(rate/(2*np.pi))*np.log((k.real**2+k.imag**2)**(1/2)) if(Fk>90 and Bw<450): formantes.append(Fk) return np.sort(formantes)
def feature_extraction(file_name): with soundfile.SoundFile(file_name) as sound_file: X = sound_file.read(dtype="float32") sample_rate = sound_file.samplerate result = np.array([]) lpc = np.mean(librosa.lpc(X, 16).T, axis=0) result = np.hstack((result, lpc)) mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=50).T, axis=0) result = np.hstack((result, mfccs)) return result
def extract_feature(file_name, mfcc, lpc, mel): X, sample_rate = librosa.load(file_name) result = np.array([]) if mfcc: stft = np.abs(librosa.stft(X)) if mfcc: mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=13).T, axis=0) result = np.hstack((result, mfccs)) if lpc: lpc = (librosa.lpc(X,16)) result = np.hstack((result, lpc)) if mel: mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0) result = np.hstack((result, mel)) return result
def get_formants(x, lp_order, nr_formants): #compute lp coefficients a = librosa.lpc(x, lp_ord) #get roots from lp coefficients rts = np.roots(a) rts = [r for r in rts if np.imag(r) >= 0] #get angles angz = np.arctan2(np.imag(rts), np.real(rts)) #get formant frequencies formants = sorted(angz * (fs_targ / (2 * math.pi))) return formants[0:nr_formants]
def create_lpspectrum(file, order): file = str(file) audio_path = 'static/wav/audio' + file + '.wav' audio, sampling_rate = librosa.load(audio_path) lp_result = librosa.lpc(audio, order) y_result = scipy.signal.lfilter([0] + -1 * lp_result[1:], [1], audio) plt.figure() plt.grid() plt.magnitude_spectrum(y_result, Fs=sampling_rate, color="blue") plt.title('LP Spectrum') plt.grid(color='grey', linestyle='--', linewidth=0.5) plt.savefig('static/images/lpspectrum-wav' + file + '-order' + str(order) + '.png') plt.close() return plt
def lpc(data, window=256, overlap=128, order=6): startindex = 0 endindex = window features = [] while endindex <= len(data): y = data[startindex:endindex] lpc = librosa.lpc(y, order) features.append(lpc) startindex = endindex - overlap endindex = startindex + window return features
def lp_residual(file, order): file = str(file) audio_path = 'static/wav/audio' + file + '.wav' audio, sampling_rate = librosa.load(audio_path) lp_result = librosa.lpc(audio, order) y_result = scipy.signal.lfilter([0] + -1 * lp_result[1:], [1], audio) output_file("lpresidual-wav" + file + "-order" + str(order) + ".html") p = figure(plot_width=550, plot_height=300, x_axis_label='Time', y_axis_label="Magnitude") p.title = Title(text='LP Residual') timefilter = [i for i in range(len(audio))] p.line(timefilter, audio - y_result, line_width=2) show(p)
def create_lpresidual(file, order): file = str(file) audio_path = 'static/wav/audio' + file + '.wav' audio, sampling_rate = librosa.load(audio_path) lp_result = librosa.lpc(audio, order) y_result = scipy.signal.lfilter([0] + -1 * lp_result[1:], [1], audio) plt.figure() plt.grid() plt.plot(audio - y_result) plt.xlabel("Time") plt.ylabel("Magnitude") plt.title('LP Residual') plt.grid(color='grey', linestyle='--', linewidth=0.5) plt.savefig('static/images/lpresidual-wav' + file + '-order' + str(order) + '.png') plt.close() return plt
def get_lpc_tonality(self): """Ratio of fourth to zeroth LPC coefficient. Metric for wheeze detection based on the observation by Oletic et al. that linear predictive coding (LPC) estimation error tends to fall off more rapidly (i.e., in fewer coefficients) with tonal sounds than with non-tonal sounds. More tonal sounds will have a higher value of this ratio. **due to instability issues in librosa.lpc, this is currently not returned in self.get_features()** Oletic, Dinko, Bruno Arsenali, and Vedran Bilas. 2012. “Towards continuous wheeze detection body sensor node as a core of asthma monitoring system.” In Wireless Mobile Communication and Healthcare, 83:165–72. https://doi.org/10.1007/978-3-642-29734-2_23. """ lpc_ = librosa.lpc(self.x, 4) return np.abs(lpc_[0] / lpc_[4])
def formatSignal(file,frame_length=320,LPCsize=16): wave,_ = wavelib.load(file,sr=None) signalLen = wave.shape[0] sizeof = signalLen // frame_length wave[1:] = -0.85 * wave[:-1] + wave[1:] wave = wave[:sizeof*frame_length] wave = wave.reshape([sizeof,frame_length]) input_date = np.hstack((wave[:-1,:],wave[1:,:])) baseline = wave[1:,:] N,_ = baseline.shape LPC = np.zeros((N,LPCsize)) for i in range(N): a = wavelib.lpc(baseline[i], LPCsize) LPC[i,:] = -1 * a[1:] Initial_state = input_date[:,frame_length-LPCsize:frame_length] return input_date,baseline,LPC,Initial_state
def formants(signal, width, step, fs, nb=4): """ formants(signal, width, step, fs, nb=4) Compute the formants for all frames of a signal. Parameters ---------- signal : ndarray width : float The width of frame in ms. step : float The step between two frames in ms. fs : float The sampling frequency. nb : integer The number of formants considers for each frame. Returns ------- formants : ndarray An 2 dim array with the formants of each frame. """ frames = split(signal, width, step, fs) b, a = [1, -0.67], [1] roots = [] for frame in frames: filtered_frame = sgl.lfilter(b, a, frame) hamming_win = sgl.windows.hamming(filtered_frame.size) filtered_frame *= hamming_win # apply hamming window on the frame lpcs = lpc(filtered_frame, 9) root = np.roots(lpcs) frame_res = root[root.imag > 0][:nb] if len(frame_res < nb): frame_res = np.concatenate( (frame_res, [0] * (nb - len(frame_res)))) frame_res = np.sort(frame_res) roots.append(frame_res) angles = np.angle(roots) freq = angles * (fs / (2 * np.pi)) return np.sort(freq, axis=1)
def compute_lsfs(audio, expected_len=100, n_lsfs=20, sample_rate=16000, frame_size=512): """Scaled Line spectral frequencies. Args: audio: Numpy ndarray or tensor. Shape [batch_size, audio_length] or [batch_size,]. expected_len: Expected feature series length: this preference over frame_size roots from choice of DDSP authors in using pad_or_trim_to_expected_length() function that impose a length that may not match the ones computed using parameters like frame_size. n_lsfs: Number of LSF values to return sample_rate: Audio sample rate in Hz. Returns: Line spectral frequencies. Shape [batch_size, n_lsfs] or [n_lsfs,]. """ window_func = create_window(frame_size, name='hanning') LSFS = np.zeros((expected_len, n_lsfs)) start_indexes = np.linspace(0, audio.size - frame_size - 1, num=expected_len, endpoint=True).astype('int') for window_index in range(expected_len): #WINDOWING start_index = start_indexes[window_index] windowed_sig = np.multiply(audio[start_index:start_index + frame_size], window_func) #ANALYSIS: LSF parameter extraction a = librosa.lpc(windowed_sig, n_lsfs) lsfs = np.array( poly2lsf(a)) / np.pi #division by PI will put lsfs in 0-1 range LSFS[window_index] = lsfs return LSFS
def compute_lpc_lsp(wav_file, label, feature_name, lsp_nums=[0, 1]): y, sr = librosa.load(wav_file) #rate,data=read(wav_file) plt.figure() snd = parselmouth.Sound(wav_file) spectrogram = snd.to_spectrogram() draw_spectrogram(spectrogram) plt.twinx() intensity = snd.to_intensity(time_step=0.025, minimum_pitch=75) time_stamps = intensity.xs() colors = ["r", "b"] for lsp_num in lsp_nums: lsp_values = [] for i in range(len(time_stamps) - 1): curr_data = y[int(time_stamps[i] * sr):int((time_stamps[i] + 1) * sr)] lpc_value = librosa.lpc(curr_data, 8) lsp = lpc2lsp(lpc_value, otype=0)[lsp_num + 1] lsp_values.append(lsp / (np.pi) * sr) plt.ylabel("LSP frequency [Hz]") plt.plot(time_stamps[:-1],lsp_values,'o',markersize=5,\ color="w") plt.plot(time_stamps[:-1], lsp_values, 'o', markersize=2, color=colors[lsp_num], label="LSPfreuqnecy[{}]".format(lsp_num)) #quantile_value=np.mean(lsp_values) # plt.plot(time_stamps[:-1],[quantile_value]*len(time_stamps[:-1]),markersize=2,\ # color=colors[lsp_num],label="LSPfrequency[{}] mean".format(lsp_num)) #quantile_value=min(lsp_values) #plt.plot(time_stamps[:-1],[quantile_value]*len(time_stamps[:-1]),markersize=2,color="b",label="percentile1.0") plt.xlim([snd.xmin, snd.xmax]) plt.ylim([0, 5000]) plt.legend(loc="upper right") plt.savefig("/home/jialu/voice_quality_plots/v2/lspFrequency/" + label + "_" + feature_name + ".png") plt.close()
def lp_spectrum(file, order): file = str(file) audio_path = 'static/wav/audio' + file + '.wav' audio, sampling_rate = librosa.load(audio_path) lp_result = librosa.lpc(audio, order) y_result = scipy.signal.lfilter([0] + -1 * lp_result[1:], [1], audio) n = len(audio) T = 1 / sampling_rate yf = scipy.fft(y_result) xf = np.linspace(0.0, 1.0 / (2.0 * T), n / 2) output_file("lpspectrum-wav" + file + "-order" + str(order) + ".html") p = figure(plot_width=550, plot_height=300, x_axis_label='Frequency', y_axis_label="Amplitude") p.title = Title(text='LP Spectrum') p.line(xf, 2.0 / n * np.abs(yf[:n // 2]), line_width=2) show(p)
def get_formants(x, Fs, nformants): # Read from file. # print("leyendo archivo..") # Fs, x = wv.read(file_path) #print("Freq Mues: ", Fs) # Get Hamming window. N = len(x) w = hamming(N) # Apply window and high pass filter. x1 = x * w x1 = lfilter([1], [1., 0.63], x1) # Get LPC. #print("calculando lpc..") ncoeff = 2 + (Fs / 1000) # print("NCOEF: ", int(ncoeff)) A = lpc(x1, int(ncoeff)) # print(A) # Get roots. rts = np.roots(A) rts = [r for r in rts if np.imag(r) >= 0] # Get angles. angz = np.arctan2(np.imag(rts), np.real(rts)) # Get frequencies. # Fs = spf.getframerate() #print("calculando formantes..") frqs = sorted(angz * (Fs / (2 * math.pi))) return frqs[:nformants]