def compute_librosa_features(self, audio_data, feat_name): """ Compute feature using librosa methods :param audio_data: signal :param feat_name: feature to compute :return: np array """ # if rmse_feat.shape == (1, 427): # rmse_feat = np.concatenate((rmse_feat, np.zeros((1, 4))), axis=1) if feat_name == 'zero_crossing_rate': return zero_crossing_rate(y=audio_data, hop_length=self.FRAME) elif feat_name == 'rmse': return rmse(y=audio_data, hop_length=self.FRAME) elif feat_name == 'mfcc': return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) elif feat_name == 'spectral_centroid': return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) elif feat_name == 'spectral_rolloff': return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) elif feat_name == 'spectral_bandwidth': return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
def findTimbral(wave): # 19 dimensions timbral_feature = {} centroid = feature.spectral_centroid(wave) timbral_feature['mu_centroid'] = np.mean(centroid) timbral_feature['var_centroid'] = np.var(centroid, ddof=1) rolloff = feature.spectral_rolloff(wave) timbral_feature['mu_rolloff'] = np.mean(rolloff) timbral_feature['var_rolloff'] = np.var(rolloff, ddof=1) flux = onset_strength(wave, lag=1) # spectral flux timbral_feature['mu_flux'] = np.mean(flux) timbral_feature['var_flux'] = np.var(flux, ddof=1) zero_crossing = feature.zero_crossing_rate(wave) timbral_feature['mu_zcr'] = np.mean(zero_crossing) timbral_feature['var_zcr'] = np.var(zero_crossing) five_mfcc = feature.mfcc(wave, n_mfcc=10) # n_mfcc = 10 dim i = 1 for coef in five_mfcc: timbral_feature['mu_mfcc' + str(i)] = np.mean(coef) timbral_feature['var_mfcc' + str(i)] = np.var(coef, ddof=1) i = i + 1 percent = feature_low_energy(wave) # 1 dim timbral_feature['low_energy'] = percent return timbral_feature
def compute_librosa_features(self, audio_data, feat_name): """ Compute feature using librosa methods :param audio_data: signal :param feat_name: feature to compute :return: np array """ # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) logging.info('=> Computing {}'.format(feat_name)) if feat_name == 'zero_crossing_rate': return zero_crossing_rate(y=audio_data, hop_length=self.FRAME) elif feat_name == 'rmse': return rms(y=audio_data, hop_length=self.FRAME) elif feat_name == 'mfcc': return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) elif feat_name == 'spectral_centroid': return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) elif feat_name == 'spectral_rolloff': return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) elif feat_name == 'spectral_bandwidth': return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
def compute_spec_centroid(data): """ :param data: :return: """ cents = feature.spectral_centroid(data)[0] return cents
def get_files_centroid(tracks): output_tracks = {} for track in tracks: y, sr = librosa.load(track) centroid = spectral_centroid(y, sr) nth_track, track_name = extract_track_name(track) output_tracks[nth_track] = centroid return output_tracks
def test_spectral_centroid(self): correct = rosaft.spectral_centroid(y=self.sig, sr=self.fs, S=None, n_fft=nfft, hop_length=stepsize) actual = spectral_centroid(self.args) self.assertTrue(np.abs(correct - actual).max() < tol)
def spectral_centroid(args): psd = get_psd(args) fs, nfft, noverlap = unroll_args(args, ['fs', 'nfft', 'noverlap']) hopsize = nfft - noverlap return rosaft.spectral_centroid(y=None, sr=fs, S=psd, n_fft=nfft, hop_length=hopsize)
def get_spectral_features(x, fs, fmin=[], fmax=[], nfft=2048, do_plot=False, logscale=1): if fmin and fmax: spect_centroid = np.mean( feature.spectral_centroid(x, fs, n_fft=nfft, freq=np.linspace(fmin, fmax, 1 + int(nfft / 2)))) spect_rolloff = np.mean( feature.spectral_rolloff(x, fs, n_fft=nfft, freq=np.linspace(fmin, fmax, 1 + int(nfft / 2)))) else: spect_centroid = np.mean(feature.spectral_centroid(x, fs, n_fft=nfft)) spect_rolloff = np.mean(feature.spectral_rolloff(x, fs, n_fft=nfft)) peaks_freq, peak_amps, pxx_db, freqs = find_spectrum_peaks( x, fs, fmin, fmax, nfft) n_peaks = peaks_freq.size if do_plot: colors = sns.color_palette(n_colors=3) f = plt.figure() ax = f.add_subplot(111) ax.plot(freqs, pxx_db, color=colors[0]) ax.axvline(spect_centroid, color=colors[2]) ax.scatter(peaks_freq, peak_amps, color=colors[1]) # ax.axvline(spect_rolloff) ax.autoscale(axis="x", tight=True) ax.set(xlabel='Frequency (Hz)', ylabel='Gain (dB)', title='Spectral Features') if logscale: ax.set_xscale('log') ax.grid(True, which="both", ls="-") plt.legend(['Pxx (dB)', 'Spectral Centroid', 'Spectral Peaks']) return spect_centroid, spect_rolloff, peaks_freq, pxx_db, freqs
def get_spectral_centroid(self, outside_series=None, outside_sr=None): """ :return: """ y = self.select_series(outside_series) sr = self.select_sr(outside_sr) return spectral_centroid(y, sr=sr)[0]
def feature_engineer(self, audio_data): """ Extract features using librosa.feature. Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param audio_data: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ zcr_feat = zero_crossing_rate(y=audio_data, hop_length=self.FRAME) # rmse_feat = rmse(y=audio_data, hop_length=self.FRAME) mfcc_feat = mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) spectral_centroid_feat = spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) spectral_rolloff_feat = spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) spectral_bandwidth_feat = spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME) # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) concat_feat = np.concatenate( ( zcr_feat, # rmse_feat, mfcc_feat, spectral_centroid_feat, spectral_rolloff_feat, # chroma_cens_feat spectral_bandwidth_feat), axis=0) median_feat = np.mean(concat_feat, axis=1, keepdims=True).transpose() features_df = pd.DataFrame(data=median_feat, columns=self.COL, index=None) features_df['label'] = self.label return features_df
def extract_feature(self, audio_data): """ extract features from audio data :param audio_data: :return: """ zcr = lrf.zero_crossing_rate(audio_data, frame_length=self.FRAME, hop_length=self.FRAME / 2) feature_zcr = np.mean(zcr) ste = audio_utils.AudioUtils.ste(audio_data, 'hamming', int(20 * 0.001 * self.RATE)) feature_ste = np.mean(ste) ste_acc = np.diff(ste) feature_steacc = np.mean(ste_acc[ste_acc > 0]) stzcr = audio_utils.AudioUtils.stzcr(audio_data, 'hamming', int(20 * 0.001 * self.RATE)) feature_stezcr = np.mean(stzcr) mfcc = lrf.mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) feature_mfcc = np.mean(mfcc, axis=1) spectral_centroid = lrf.spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME / 2) feature_spectral_centroid = np.mean(spectral_centroid) spectral_bandwidth = lrf.spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME / 2) feature_spectral_bandwidth = np.mean(spectral_bandwidth) spectral_rolloff = lrf.spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME / 2, roll_percent=0.90) feature_spectral_rolloff = np.mean(spectral_rolloff) spectral_flatness = lrf.spectral_flatness(y=audio_data, hop_length=self.FRAME / 2) feature_spectral_flatness = np.mean(spectral_flatness) features = np.append([ feature_zcr, feature_ste, feature_steacc, feature_stezcr, feature_spectral_centroid, feature_spectral_bandwidth, feature_spectral_rolloff, feature_spectral_flatness ], feature_mfcc) return features, self.label
def predict(file): label_encoder = LabelEncoder() label_encoder.classes_ = np.load(os.path.join(outdir, encoder_filename)) model_json_handle = open(os.path.join(outdir, model_filename), "r") model_json = model_json_handle.read() model_json_handle.close() model = model_from_json(model_json) model.load_weights(os.path.join(outdir, model_weights_filename)) model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') file_path = os.path.join(os.getcwd(), file) y, sr = librosa.load(file_path, res_type='kaiser_fast') prediction_feature = np.array([get_mfcc(y, sr)]) predicted_proba_vector = model.predict_proba(prediction_feature) predicted_proba = predicted_proba_vector[0] fixed_size = 44100 centroid = spectral_centroid(y=y, sr=sr) frequency = np.average(centroid) centroid = fix_length(centroid, size=fixed_size) length = librosa.get_duration(y=y, sr=sr) result = { 'file_path': file_path, 'classes': {}, 'position': { 'frequency': frequency, 'length': length } } for i in range(len(predicted_proba)): category = label_encoder.inverse_transform(np.array([i])) result['classes'][category[0]] = format(predicted_proba[i], '.32f') return result
def reduce_noise_power(y, sr): cent = spectral_centroid(y=y, sr=sr) threshold_h = round(np.median(cent)) * 1.5 threshold_l = round(np.median(cent)) * 0.1 less_noise = AudioEffectsChain().lowshelf( gain=-30.0, frequency=threshold_l, slope=0.8).highshelf(gain=-12.0, frequency=threshold_h, slope=0.5) #.limiter(gain=6.0) y_clean = less_noise(y) return y_clean
def feature_extractor (y, sr): print('вошли в процедyрy feature_extractor') from librosa import feature as f print('либрозy как f загрyзили') rmse = f.rms(y=y)[0] #f.rmse (y = y) spec_cent = f.spectral_centroid (y = y, sr = sr) spec_bw = f.spectral_bandwidth (y = y, sr = sr) rolloff = f.spectral_rolloff (y = y, sr = sr) zcr = f.zero_crossing_rate (y) mfcc = f.mfcc(y = y, sr = sr) # mel cepstral coefficients chroma = f.chroma_stft(y=y, sr=sr) output = np.vstack([rmse, spec_cent, spec_bw, rolloff, zcr, chroma, mfcc]).T print('feature_extractor закончил работy') return (output)
def get_mir(audio_path): hop_length = 200 # Spectral Flux/Flatness, MFCCs, SDCs spectrogram = madmom.audio.spectrogram.Spectrogram(audio_path, frame_size=2048, hop_size=hop_length, fft_size=4096) # only take 30s snippets to align data audio = madmom.audio.signal.Signal(audio_path, dtype=float, start=0, stop=30) all_features = [] #print(spectrogram.shape) #print(audio.shape) #print('signal sampling rate: {}'.format(audio.sample_rate)) # madmom features all_features.extend([ spectral_flux(spectrogram), superflux(spectrogram), complex_flux(spectrogram) ]) #, MFCC(spectrogram)]) # mfcc still wrong shape as it is a 2 array # librosa features libr_features = [ spectral_centroid(audio, hop_length=hop_length), spectral_bandwidth(audio, hop_length=hop_length), spectral_flatness(audio, hop_length=hop_length), spectral_rolloff(audio, hop_length=hop_length), rmse(audio, hop_length=hop_length), zero_crossing_rate(audio, hop_length=hop_length) ] #, mfcc(audio)]) for libr in libr_features: all_features.append(np.squeeze(libr, axis=0)) # for feature in all_features: # print(feature.shape) X = np.stack(all_features, axis=1)[na, :, :] return X
def _calc_feat(self, window, feat_name): feat = None # calculate feature if feat_name == 'mfcc': feat = FT.mfcc(y=window, sr=self.sr, n_mfcc=_N_MFCC) elif feat_name == 'chroma_stft': feat = FT.chroma_stft(y=window, sr=self.sr) elif feat_name == 'melspectrogram': feat = FT.melspectrogram(y=window, sr=self.sr, n_mels=128, n_fft=1024, hop_length=512) feat = L.power_to_db(feat) elif feat_name == 'spectral_centroid': feat = FT.spectral_centroid(y=window, sr=self.sr) elif feat_name == 'spectral_rolloff': feat = FT.spectral_rolloff(y=window, sr=self.sr) elif feat_name == 'tonnetz': feat = FT.tonnetz(y=window, sr=self.sr) elif feat_name == 'zero_crossing_rate': feat = FT.zero_crossing_rate(y=window) else: assert False, 'Invalid feature' # pool feature from multiple frames if self.feature_pool == 'sum': feat = feat.sum(axis=1) elif self.feature_pool == 'max': feat = feat.max(axis=1) elif self.feature_pool == 'mean': feat = feat.mean(axis=1) elif self.feature_pool == 'flatten': feat = feat.flatten() elif self.feature_pool == 'none': pass else: assert False, 'Invalid feature pooling scheme' # normalize features if self.l2_norm and feat.shape[0] > 1: feat /= np.linalg.norm(feat) return feat
def feature_engineer(self, audio_data): """ Extract features using librosa.feature. Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param audio_data: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ logging.info('Computing zero_crossing_rate...') start = timeit.default_timer() zcr_feat = zero_crossing_rate(y=audio_data, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing rmse...') start = timeit.default_timer() rmse_feat = rmse(y=audio_data, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing mfcc...') start = timeit.default_timer() mfcc_feat = mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing spectral centroid...') start = timeit.default_timer() spectral_centroid_feat = spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing spectral rolloff...') start = timeit.default_timer() spectral_rolloff_feat = spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing spectral bandwidth...') start = timeit.default_timer() spectral_bandwidth_feat = spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) # logging.info('Computing chroma cens...') # start = timeit.default_timer() # # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) # # stop = timeit.default_timer() # logging.info('Time taken: {0}'.format(stop - start)) concat_feat = np.concatenate((zcr_feat, rmse_feat, mfcc_feat, spectral_centroid_feat, spectral_rolloff_feat, # chroma_cens_feat, spectral_bandwidth_feat ), axis=0) logging.info('Averaging...') start = timeit.default_timer() mean_feat = np.mean(concat_feat, axis=1, keepdims=True).transpose() stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) return mean_feat, self.label
def get_spectral_features(x, fs, fmin=[], fmax=[], nfft=2048, do_plot=False, logscale=True): """ Compute some spectral features using the `librosa <https://librosa.github.io/librosa/index.html>`_ library : * Spectrum centroid * Spectrum rolloff * Peaks in the power spectral density Parameters ---------- x : array Input array. Must be 1D. fs : float Sampling frequency (Hz) fmin : float Minimum frequency (Hz) fmax : float Maximum frequency (Hz) nfft : int Number of points for the FFT - Default: 2048 do_plot : bool If true, plot the spectral features - Default: False logscale : bool If True, use a log-scale for the x-axis - Default : True Returns ------- spect_centroid : float Spetrum centroid. See :func:`librosa.feature.spectral_centroid` spect_rolloff : float Spectrum rolloff. See :func:`librosa.feature.spectral_centroid` peaks_freq : array Peak in the spectrum pxx_db : array Power Spectral Density (PSD), in dB freqs : array Frequency associated with the PSD """ x = np.array(x) if x.ndim > 1: raise ValueError('Input x must be 1D') if not HAS_LIBROSA: raise ImportError('Librosa is not installed/available') if fmin and fmax: spect_centroid = np.mean( feature.spectral_centroid(x, fs, n_fft=nfft, freq=np.linspace(fmin, fmax, 1 + int(nfft / 2)))) spect_rolloff = np.mean( feature.spectral_rolloff(x, fs, n_fft=nfft, freq=np.linspace(fmin, fmax, 1 + int(nfft / 2)))) else: spect_centroid = np.mean(feature.spectral_centroid(x, fs, n_fft=nfft)) spect_rolloff = np.mean(feature.spectral_rolloff(x, fs, n_fft=nfft)) peaks_freq, peak_amps, pxx_db, freqs = find_spectrum_peaks( x, fs, fmin, fmax, nfft) # n_peaks = peaks_freq.size if do_plot: colors = sns.color_palette(n_colors=3) f = plt.figure() ax = f.add_subplot(111) ax.plot(freqs, pxx_db, color=colors[0]) ax.axvline(spect_centroid, color=colors[2]) ax.scatter(peaks_freq, peak_amps, color=colors[1]) # ax.axvline(spect_rolloff) ax.autoscale(axis="x", tight=True) ax.set(xlabel='Frequency (Hz)', ylabel='Gain (dB)', title='Spectral Features') if logscale: ax.set_xscale('log') ax.grid(True, which="both", ls="-") plt.legend(['Pxx (dB)', 'Spectral Centroid', 'Spectral Peaks']) return spect_centroid, spect_rolloff, peaks_freq, pxx_db, freqs
def get_feature_from_librosa(wave_name, window): #print wave_name (rate, sig) = wav.read(wave_name) chroma_stft_feat = feature.chroma_stft(sig, rate, n_fft=window, hop_length=window / 2) #print chroma_stft_feat.shape mfcc_feat = feature.mfcc(y=sig, sr=rate, n_mfcc=13, hop_length=window / 2) mfcc_feat = mfcc_feat[1:, :] #print mfcc_feat.shape d_mfcc_feat = feature.delta(mfcc_feat) #print d_mfcc_feat.shape d_d_mfcc_feat = feature.delta(d_mfcc_feat) #print d_d_mfcc_feat.shape zero_crossing_rate_feat = feature.zero_crossing_rate(sig, frame_length=window, hop_length=window / 2) #print zero_crossing_rate_feat.shape S = librosa.magphase( librosa.stft(sig, hop_length=window / 2, win_length=window, window='hann'))[0] rmse_feat = feature.rmse(S=S) #print rmse_feat.shape centroid_feat = feature.spectral_centroid(sig, rate, n_fft=window, hop_length=window / 2) #print centroid_feat.shape bandwith_feat = feature.spectral_bandwidth(sig, rate, n_fft=window, hop_length=window / 2) #print bandwith_feat.shape contrast_feat = feature.spectral_contrast(sig, rate, n_fft=window, hop_length=window / 2) #print contrast_feat.shape rolloff_feat = feature.spectral_rolloff(sig, rate, n_fft=window, hop_length=window / 2) #计算滚降频率 #print rolloff_feat.shape poly_feat = feature.poly_features(sig, rate, n_fft=window, hop_length=window / 2) #拟合一个n阶多项式到谱图列的系数。 #print poly_feat.shape #============================================================================== # print(chroma_stft_feat.shape) # #print(corr_feat.shape) # print(mfcc_feat.shape) # print(d_mfcc_feat.shape) # print(d_d_mfcc_feat.shape) # print(zero_crossing_rate_feat.shape) # print(rmse_feat.shape) # print(centroid_feat.shape) # print(bandwith_feat.shape) # print(contrast_feat.shape) # print(rolloff_feat.shape) # print(poly_feat.shape) #============================================================================== feat = numpy.hstack( (chroma_stft_feat.T, mfcc_feat.T, d_mfcc_feat.T, d_d_mfcc_feat.T, zero_crossing_rate_feat.T, rmse_feat.T, centroid_feat.T, bandwith_feat.T, contrast_feat.T, rolloff_feat.T, poly_feat.T)) feat = feat.T return feat #一行代表一帧的特征
def main(aud): waves = {} sg, mask, data, audio_mask, sample_rate = load_audio(str(aud)) waves['audio'] = data[audio_mask] length = len(data[audio_mask]) w = myfunc() windo = w(length) windows = {} wave = waves['audio'] species = 'gens_specie' windows[species] = [] for i in range(0, int(len(wave) / 6.144000e+03)): windows[species].append(wave[i:int(i + 6.144000e+03)]) #creating df for test audio new_dataset_test = pd.DataFrame() for species in windows.keys(): for i in range(0, len(windows)): data_point = { 'species': species.split('_')[1], 'genus': species.split('_')[0] } #print(type(data_point)) spec_centroid = feature.spectral_centroid(windows[species][i])[0] #print(windows_fixed[species][i]) chroma = feature.chroma_stft(windows[species][i], sample_rate) for j in range(0, 13): data_point['spec_centr_' + str(j)] = spec_centroid[j] for k in range(0, 12): data_point['chromogram_' + str(k) + "_" + str(j)] = chroma[k, j] new_dataset_test = new_dataset_test.append(data_point, ignore_index=True) #classification of test audio features = list(new_dataset.columns) features.remove('species') features.remove('genus') X = new_dataset[features].values y = new_dataset['species'].values X_test = new_dataset_test[features].values y_test = new_dataset_test['species'].values NB = naive_bayes.GaussianNB() SSS = sklearn.model_selection.StratifiedShuffleSplit(n_splits=5, test_size=0.2) for train_index, val_index in SSS.split(X, y): X_train, X_val = X[train_index], X[val_index] y_train, y_val = y[train_index], y[val_index] NB.fit(X_train, y_train) y_pred = NB.predict(X_test) check = pd.DataFrame() df = pd.read_csv("/home/megha/Desktop/Audio_website/templates/descr.csv", delimiter=';') check = df.loc[df['check'] == y_pred[0]] #print(check['Description']) #accs.append(sklearn.metrics.accuracy_score(y_pred=y_pred, y_true=y_val)) return y_pred[0], check
def extract_features(soundwave,sampling_rate,sound_name="test",feature_list=[]): """ extracts features with help of librosa :param soundwave: extracted soundwave from file :param sampling_rate: sampling rate :param feature_list: list of features to compute :param sound_name: type of sound, i.e. dog :return: np.array of all features for the soundwave """ print("Computing features for ",sound_name) if len(feature_list)==0: feature_list=["chroma_stft","chroma_cqt","chroma_cens","melspectrogram", "mfcc","rmse","spectral_centroid","spectral_bandwidth", "spectral_contrast","spectral_flatness","spectral_rolloff", "poly_features","tonnetz","zero_crossing_rate"] features=[] #feature_len #"chroma_stft":12 if "chroma_stft" in feature_list: features.append(feat.chroma_stft(soundwave, sampling_rate)) #"chroma_cqt":12 if "chroma_cqt" in feature_list: features.append(feat.chroma_cqt(soundwave, sampling_rate)) #"chroma_cens":12 if "chroma_cens" in feature_list: features.append(feat.chroma_cens(soundwave, sampling_rate)) #"malspectrogram":128 if "melspectrogram" in feature_list: features.append(feat.melspectrogram(soundwave, sampling_rate)) #"mfcc":20 if "mfcc" in feature_list: features.append(feat.mfcc(soundwave, sampling_rate)) #"rmse":1 if "rmse" in feature_list: features.append(feat.rmse(soundwave)) #"spectral_centroid":1 if "spectral_centroid" in feature_list: features.append(feat.spectral_centroid(soundwave, sampling_rate)) #"spectral_bandwidth":1 if "spectral_bandwidth" in feature_list: features.append(feat.spectral_bandwidth(soundwave, sampling_rate)) #"spectral_contrast":7 if "spectral_contrast" in feature_list: features.append(feat.spectral_contrast(soundwave, sampling_rate)) #"spectral_flatness":1 if "spectral_flatness" in feature_list: features.append(feat.spectral_flatness(soundwave)) #"spectral_rolloff":1 if "spectral_rolloff" in feature_list: features.append(feat.spectral_rolloff(soundwave, sampling_rate)) #"poly_features":2 if "poly_features" in feature_list: features.append(feat.poly_features(soundwave, sampling_rate)) #"tonnetz":6 if "tonnetz" in feature_list: features.append(feat.tonnetz(soundwave, sampling_rate)) #"zero_crossing_rate":1 if "zero_crossing_rate" in feature_list: features.append(feat.zero_crossing_rate(soundwave)) return np.concatenate(features)
train_data = np.array([]) for chunk in train_data_reader: #print(chunk) chunk1 = np.array(chunk) for thing in chunk1: print(counter) thing1 = np.array(thing) #print(thing1) row = np.array([]) cstft = np.mean(lf.chroma_stft(thing1[:-1]).T, axis=0) row = np.concatenate((row, cstft)) cqt = np.mean(lf.chroma_cqt(thing1[:-1]).T, axis=0) row = np.concatenate((row, cqt)) sens = np.mean(lf.chroma_cens(thing1[:-1]).T, axis=0) row = np.concatenate((row, sens)) spcent = np.mean(lf.spectral_centroid(thing1[:-1]).T, axis=0) row = np.concatenate((row, spcent)) flatness = np.mean(lf.spectral_flatness(thing1[:-1]).T, axis=0) row = np.concatenate((row, flatness)) rolloff = np.mean(lf.spectral_rolloff(thing1[:-1]).T, axis=0) row = np.concatenate((row, rolloff)) mspec = np.mean(lf.melspectrogram(thing1[:-1]).T, axis=0) row = np.concatenate((row, mspec)) mfcc = np.mean(lf.mfcc(thing1[:-1], n_mfcc=30).T, axis=0) row = np.concatenate((row, mfcc)) tonnetz = np.mean(lf.tonnetz(thing1[:-1]).T, axis=0) row = np.concatenate((row, tonnetz)) rmse = np.mean(lf.rmse(thing1[:-1]).T, axis=0) row = np.concatenate((row, rmse)) contrast = np.mean(lf.spectral_contrast(thing1[:-1]).T, axis=0) row = np.concatenate((row, contrast))
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth zero_crossing_rate' for i in range(1, 21): header += f' mfcc{i}' header += ' label' header = header.split() file = open('data_training.csv', 'w', newline='') with file: writer = csv.writer(file) writer.writerow(header) sukus = 'banjar_hulu banjar_kuala dayak_bakumpai dayak_ngaju'.split() for g in sukus: for filename in os.listdir(f'data_training/{g}'): songname = f'data_training/{g}/{filename}' y, sr = librosa.load(songname, mono=True, duration=30) chroma_stft = fitur.chroma_stft(y=y, sr=sr) spec_cent = fitur.spectral_centroid(y=y, sr=sr) spec_bw = fitur.spectral_bandwidth(y=y, sr=sr) rmse = fitur.rmse(y) zcr = fitur.zero_crossing_rate(y) mfcc = fitur.mfcc(y=y, sr=sr) to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(zcr)}' for e in mfcc: to_append += f' {np.mean(e)}' to_append += f' {g}' file = open('data_training.csv', 'a', newline='') with file: writer = csv.writer(file) writer.writerow(to_append.split())
print(crop_feat.shape) print(crop_feat) crop_feat = np.pad(crop_feat, (0, maxlen - len(crop_feat)), mode='constant') print(crop_feat) return crop_feat features = [] feat = mfcc(y, sr, nfilt=10, winstep=0.02) for i in range(0, feat.shape[0] - 10, 5): print(i) x = crop_feature(feat, i, nb_step=10) print(x.shape) features.append(x) print("shape {}".format(librosa.feature.rms(y).shape)) centroid = feature.spectral_centroid(y, sr) print(centroid.shape) bandwidth = feature.spectral_bandwidth(y, sr) print(bandwidth.shape) print(feature.spectral_rolloff(y, sr).shape) print(feature.rms(y).shape) name = np.full(bandwidth.shape, "haha") max = np.concatenate((name.T, centroid.T, bandwidth.T, data.T), axis=1) # # test = np.zeros((41, 10)) # print(test.shape) # test =test[0 : 11] # print(test.shape)
def feature_extraction_all(signal, sr, n_mfcc, buffer_len, normalization_values): """ Feature extraction interface :param signal: Signal :param sr: Signal :param n_mfcc: Signal :param buffer_len: Signal :param normalization_values: normalization values of the dataset :output features: Array of features Features are extracted from the incoming audio signal when an onset is detected. """ features = [] signal = np.array(signal) if signal.size != 0: S, phase = librosa.magphase( librosa.stft(y=signal, n_fft=buffer_len, hop_length=int(buffer_len / 4))) # Mel Frequency cepstral coefficients mfcc = feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc, n_fft=int(512 * 2), hop_length=int(128 * 2)) mfcc_mean = np.mean(mfcc, axis=1) mfcc_std = np.std(mfcc, axis=1) # RMS rms = feature.rms(S=S, frame_length=buffer_len, hop_length=int(buffer_len / 4)) rms_mean = np.mean(rms, axis=1) rms_std = np.std(rms, axis=1) # Spectral Centroid spectral_centroid = feature.spectral_centroid(S=S, sr=sr) spectral_centroid_mean = np.mean(spectral_centroid, axis=1) spectral_centroid_std = np.std(spectral_centroid, axis=1) # Rolloff spectral_rolloff = feature.spectral_rolloff(S=S, sr=sr) spectral_rolloff_mean = np.mean(spectral_rolloff, axis=1) spectral_rolloff_std = np.std(spectral_rolloff, axis=1) # Bandwidth spectral_bandwidth = feature.spectral_bandwidth(S=S, sr=sr) spectral_bandwidth_mean = np.mean(spectral_bandwidth, axis=1) spectral_bandwidth_std = np.std(spectral_bandwidth, axis=1) # Contrast spectral_contrast = feature.spectral_contrast(S=S, sr=sr) spectral_contrast_mean = np.mean(spectral_contrast, axis=1) spectral_contrast_std = np.std(spectral_contrast, axis=1) # Flatness spectral_flatness = feature.spectral_flatness(S=S) spectral_flatness_mean = np.mean(spectral_flatness, axis=1) spectral_flatness_std = np.std(spectral_flatness, axis=1) if len(normalization_values) > 1: # Duration features.append( normalize(len(signal), normalization_values['duration'])) features.extend( normalize( mfcc_mean, normalization_values[[ 'mfcc_mean_1', 'mfcc_mean_2', 'mfcc_mean_3', 'mfcc_mean_4', 'mfcc_mean_5', 'mfcc_mean_6', 'mfcc_mean_7', 'mfcc_mean_8', 'mfcc_mean_9', 'mfcc_mean_10' ]])) features.extend( normalize( mfcc_std, normalization_values[[ 'mfcc_std_1', 'mfcc_std_2', 'mfcc_std_3', 'mfcc_std_4', 'mfcc_std_5', 'mfcc_std_6', 'mfcc_std_7', 'mfcc_std_8', 'mfcc_std_9', 'mfcc_std_10' ]])) features.extend( normalize(rms_mean, normalization_values['rms_mean'])) features.extend(normalize(rms_std, normalization_values['rms_std'])) features.extend( normalize(spectral_centroid_mean, normalization_values['spectral_centroid_mean'])) features.extend( normalize(spectral_centroid_std, normalization_values['spectral_centroid_std'])) features.extend( normalize(spectral_rolloff_mean, normalization_values['spectral_rolloff_mean'])) features.extend( normalize(spectral_rolloff_std, normalization_values['spectral_rolloff_std'])) features.extend( normalize(spectral_bandwidth_mean, normalization_values['spectral_bandwidth_mean'])) features.extend( normalize(spectral_bandwidth_std, normalization_values['spectral_bandwidth_std'])) features.extend( normalize( spectral_contrast_mean, normalization_values[[ 'spectral_contrast_mean_1', 'spectral_contrast_mean_2', 'spectral_contrast_mean_3', 'spectral_contrast_mean_4', 'spectral_contrast_mean_5', 'spectral_contrast_mean_6', 'spectral_contrast_mean_7' ]])) features.extend( normalize( spectral_contrast_std, normalization_values[[ 'spectral_contrast_std_1', 'spectral_contrast_std_2', 'spectral_contrast_std_3', 'spectral_contrast_std_4', 'spectral_contrast_std_5', 'spectral_contrast_std_6', 'spectral_contrast_std_7' ]])) features.extend( normalize(spectral_flatness_mean, normalization_values['spectral_flatness_mean'])) features.extend( normalize(spectral_flatness_std, normalization_values['spectral_flatness_std'])) else: features.append(len(signal)) features.extend(mfcc_mean) features.extend(mfcc_std) features.extend(rms_mean) features.extend(rms_std) features.extend(spectral_centroid_mean) features.extend(spectral_centroid_std) features.extend(spectral_rolloff_mean) features.extend(spectral_rolloff_std) features.extend(spectral_bandwidth_mean) features.extend(spectral_bandwidth_std) features.extend(spectral_contrast_mean) features.extend(spectral_contrast_std) features.extend(spectral_flatness_mean) features.extend(spectral_flatness_std) features = np.array(features) return features
def featurize(self): """ Extract features using librosa.feature. Convert wav vec, the sound amplitude as a function of time, to a variety of extracted features, such as Mel Frequency Cepstral Coeffs, Root Mean Square Energy, Zero Crossing Rate, etc. :param observations :ptype: list of tuples (label, wav vec, sampling rate) :return: :rtype: Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param raw: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ start = timeit.default_timer() logging.debug('Loading Librosa raw audio vector...') raw, _ = librosa.load(self.path, sr=self.RATE, mono=True) raw = raw[:self.TRUNCLENGTH] if len(raw) < self.TRUNCLENGTH: logging.info(f"Not featurizing {self.path} because raw vector is " f"too short. `None` will be returned for all data " f"formats.") return self logging.debug('Computing Zero Crossing Rate...') zcr_feat = zero_crossing_rate(y=raw, hop_length=self.FRAME) logging.debug('Computing RMSE ...') rmse_feat = rmse(y=raw, hop_length=self.FRAME) logging.debug('Computing MFCC...') mfcc_feat = mfcc(y=raw, sr=self.RATE, n_mfcc=self.N_MFCC) logging.debug('Computing spectral centroid...') spectral_centroid_feat = spectral_centroid(y=raw, sr=self.RATE, hop_length=self.FRAME) logging.debug('Computing spectral roll-off ...') spectral_rolloff_feat = spectral_rolloff(y=raw, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) logging.debug('Computing spectral bandwidth...') spectral_bandwidth_feat = spectral_bandwidth(y=raw, sr=self.RATE, hop_length=self.FRAME) logging.debug('Concatenate all features...') mat = np.concatenate(( zcr_feat, rmse_feat, spectral_centroid_feat, spectral_rolloff_feat, spectral_bandwidth_feat, mfcc_feat, ), axis=0) logging.debug(f'Mat shape: {mat.shape}') logging.debug(f'Create self.raw...') self.raw = raw.reshape(1, -1) logging.debug(f'Create self.vec by averaging mat along time dim...') self.vec = np.mean(mat, axis=1, keepdims=True).reshape(1, -1) logging.debug(f'Vec shape: {self.vec.shape}') logging.debug(f'Create self.mat...') assert mat.shape == (18, 426), 'Matrix dims do not match (426,18)' self.mat = mat.reshape( 1, 18, 426, ) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) return self
def centroid(wave_form, sample_rate, hop_length): return feature.spectral_centroid(y=wave_form, sr=sample_rate, hop_length=hop_length).T