def extract_LLD_from_audio(audio, fs): # MFCC mfcc = librosa.feature.mfcc(audio, fs, n_fft=N_FFT, hop_length=HOP_LENGTH, center=False).transpose() mfcc_hsf = extract_HSF(mfcc) # LPC lpc = librosa.lpc(audio, 16) # Mel-Spectrogram spect = librosa.feature.melspectrogram(y=audio, sr=fs, n_fft=N_FFT, hop_length=HOP_LENGTH, center=False) spect = librosa.power_to_db(spect, ref=np.max).transpose() spect_hsf = extract_HSF(spect) # Other features f0 = get_F_0(audio, fs)[0] hnr = get_HNR(audio, fs) return np.asarray(mfcc), np.asarray(mfcc_hsf), np.asarray(lpc), np.asarray( spect), np.asarray(spect_hsf), np.asarray([f0, hnr])
def sa_featurize(audiofile): ''' from the docs https://brookemosby.github.io/Signal_Analysis/Signal_Analysis.features.html#module-Signal_Analysis.features.signal ''' y, sr = librosa.core.load(audiofile) duration = len(y) / sr print(duration) f0 = get_F_0(y, sr)[0] hnr = get_HNR(y, sr) jitter = get_Jitter(y, sr) jitter_features = list(jitter.values()) jitter_labels = list(jitter) pulses = get_Pulses(y, sr) pulses = len(pulses) / duration features = [f0, hnr, pulses] + jitter_features labels = ['FundamentalFrequency', 'HarmonicstoNoiseRatio', 'PulsesPerSec' ] + jitter_labels print(dict(zip(labels, features))) return features, labels
def test_get_HNR(): #Here we test all the exceptions with pytest.raises(Exception) as excinfo: sig.get_HNR(sig1, r1, min_pitch=0) assert excinfo.typename == 'ValueError' assert excinfo.value.args[0] == "min_pitch has to be greater than zero." with pytest.raises(Exception) as excinfo: sig.get_HNR(sig1, r1, silence_threshold=3) assert excinfo.typename == 'ValueError' assert excinfo.value.args[0] == "silence_threshold isn't in [ 0, 1 ]." #Testing values that came from Praat for each signal, using the standard #values params = [(sig1, r1, 13.102), (sig2, r2, 9.660), (sig3, r3, 17.940), (sig4, r4, 16.254), (np.zeros(500), 500, 0)] for param in params: wave, rate, true_val = param est_val = sig.get_HNR(wave, rate) assert abs(est_val - true_val) < .3, 'HNR not accurate'
def pitch_based_features(signal, rate, win_length, hop_length): mean_hnr = sig.get_HNR(signal, rate) return mean_hnr
def get_data(inputwave): fs, signal = wav.read(inputwave) y, sr = lb.load(inputwave) mfcc = speechpy.feature.mfcc(signal, sampling_frequency=fs, frame_length=0.020, frame_stride=0.01, num_filters=40, fft_length=512, low_frequency=0, high_frequency=None, num_cepstral=12) zcr = lb.feature.zero_crossing_rate(y) f0 = freq_from_autocorr(signal, fs) rms = lb.feature.rmse(y=y) HNR = SA.get_HNR(signal, fs, time_step=0, min_pitch=75, silence_threshold=0.1, periods_per_window=4.5) ################################################################################################ print(inputwave) mfcc_data.append(mfcc.mean(axis=0)) mfccstd.append(np.std(mfcc, axis=0)) mfccskew.append(stats.skew(mfcc, axis=0)) mfcckurt.append(stats.kurtosis(mfcc, axis=0)) mfccmax.append(np.max(mfcc, axis=0)) mfccmin.append(np.min(mfcc, axis=0)) zcrmean.append(zcr.mean(axis=1)) zcrstd.append(np.std(zcr, axis=1)) zcrskew.append(stats.skew(zcr, axis=1)) zcrkurt.append(stats.kurtosis(zcr, axis=1)) zcrmax.append(np.max(zcr, axis=1)) zcrmin.append(np.min(zcr, axis=1)) #f0mean.append(f0.mean(axis=0)) #f0std.append(f0.std(axis=0)) #f0skew.append(stats.skew(f0, axis=0)) #f0kurt.append(stats.kurtosis(f0, axis=0)) #f0max.append(np.max(f0, axis=0)) #f0min.append(np.min(f0, axis=0)) rmsmean.append(rms.mean(axis=1)) rmsstd.append(np.std(rms, axis=1)) rmsskew.append(stats.skew(rms, axis=1)) rmskurt.append(stats.kurtosis(rms, axis=1)) rmsmax.append(np.max(rms, axis=1)) rmsmin.append(np.min(rms, axis=1)) hnrmean.append(HNR.mean(axis=0)) hnrstd.append(HNR.std(axis=0)) hnrskew.append(stats.skew(HNR, axis=0)) hnrkurt.append(stats.kurtosis(HNR, axis=0)) hnrmax.append(np.max(HNR, axis=0)) hnrmin.append(np.min(HNR, axis=0)) ####################################################################################################### mfcc_array = np.asarray(mfcc_data) mfccstd_array = np.asarray(mfccstd) mfccskew_array = np.asarray(mfccskew) mfcckurt_array = np.asarray(mfcckurt) mfccmax_array = np.asarray(mfccmax) mfccmin_array = np.asarray(mfccmin) zcrmean_array = np.asarray(zcrmean) zcrstd_array = np.asarray(zcrstd) zcrskew_array = np.asarray(zcrskew) zcrkurt_array = np.asarray(zcrkurt) zcrmax_array = np.asarray(zcrmax) zcrmin_array = np.asarray(zcrmin) rmsmean_array = np.asarray(rmsmean) rmsstd_array = np.asarray(rmsstd) rmsskew_array = np.asarray(rmsskew) rmskurt_array = np.asarray(rmskurt) rmsmax_array = np.asarray(rmsmax) rmsmin_array = np.asarray(rmsmin) hnrmean_array = np.asarray(hnrmean) hnrstd_array = np.asarray(hnrstd) hnrskew_array = np.asarray(hnrskew) hnrkurt_array = np.asarray(hnrkurt) hnrmax_array = np.asarray(hnrmax) hnrmin_array = np.asarray(hnrmin) hnrmean_rearray = np.reshape(hnrmean_array, (-1, 1)) hnrstd_rearray = np.reshape(hnrstd_array, (-1, 1)) hnrskew_rearray = np.reshape(hnrskew_array, (-1, 1)) hnrkurt_rearray = np.reshape(hnrkurt_array, (-1, 1)) hnrmax_rearray = np.reshape(hnrmax_array, (-1, 1)) hnrmin_rearray = np.reshape(hnrmin_array, (-1, 1)) finaldata = np.concatenate( (mfcc_array, mfccstd_array, mfccskew_array, mfcckurt_array, mfccmax_array, mfccmin_array, zcrmean_array, zcrstd_array, zcrskew_array, zcrkurt_array, zcrmax_array, zcrmin_array, rmsmean_array, rmsstd_array, rmsskew_array, rmskurt_array, rmsmax_array, rmsmin_array, hnrmean_rearray, hnrstd_rearray, hnrskew_rearray, hnrkurt_rearray, hnrmax_rearray, hnrmin_rearray), axis=1) return finaldata
def get_hnr(y, sr): fr = int(sr / 100) return np.array([ get_HNR(y[fr * i:fr * (i + 1)], sr) for i in range(int(y.shape[0] / fr)) ])
mfcc_dir_name = '\\mfcc\\' + speaker_directory if not os.path.isdir(corename + mfcc_dir_name): os.mkdir(corename + mfcc_dir_name) specgram_dir_name = '\\specgrams\\' + speaker_directory if not os.path.isdir(corename + specgram_dir_name): os.mkdir(corename + specgram_dir_name) # iterate through all recordings in a given directory for count, wavname in enumerate(sorted(os.listdir(corename + recordings_core + '\\' + speaker_directory))): print('--- {} --- {} ---'.format(wavname, sentences.iloc[count]['mod'].upper())) # load wave rate, signal = read(corename + recordings_core + '\\' + speaker_directory + '\\' + wavname) # get f0, Harmonic-to-Noise Ratio (HNR) and jitter values recF0mean = get_F_0(signal, rate) hnr = get_HNR(signal, rate) jttr = get_Jitter(signal, rate)['local'] # librosa:: preprocessing (conversion to float) signal = signal / float(2 ** 15) # librosa:: generate specgram and save to relevant dir D = librosa.stft(signal) figure() specshow(librosa.amplitude_to_db(librosa.magphase(D)[0], ref=np.max)) axis('off') savefig(corename + specgram_dir_name + '\\' + wavname[-10:-4] + '.png', dpi=200) close() # librosa:: calculate MFCC's (n_mfcc=20) and save *.npy file to relevant dir recMFCC = mfcc(signal, rate, n_mfcc=20, hop_length=winshift,