def get_mean_percussive_ratio_dbs(feat_files, margin=3.0): mean_percussive_ratios_db = [] idx = 0 idxs = [] for feat_file in tqdm.tqdm(feat_files): try: features = np.load(feat_file) idxs.append(idx) idx += 1 except Exception as e: print('Skipping {}. {}'.format(feat_file, e)) idx += 1 continue D = features['linspec_mag'] * np.exp(1.j * features['linspec_mag']) H, P = hpss(D, margin=margin) Pm, Pp = magphase(P) S, phase = magphase(D) P_rms = rmse(S=Pm) S_rms = rmse(S=S) percussive_ratio = P_rms / S_rms mean_percussive_ratio_db = amplitude_to_db( np.array([np.mean(percussive_ratio)]))[0] mean_percussive_ratios_db.append(mean_percussive_ratio_db) mean_percussive_ratios_db = np.array(mean_percussive_ratios_db) return mean_percussive_ratios_db, idxs
def compute_librosa_features(self, audio_data, feat_name): """ Compute feature using librosa methods :param audio_data: signal :param feat_name: feature to compute :return: np array """ # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) logging.info('Computing {}...'.format(feat_name)) if feat_name == 'zero_crossing_rate': return zero_crossing_rate(y=audio_data, hop_length=self.FRAME) elif feat_name == 'rmse': return rmse(y=audio_data, hop_length=self.FRAME) elif feat_name == 'mfcc': return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) elif feat_name == 'spectral_centroid': return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) elif feat_name == 'spectral_rolloff': return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) elif feat_name == 'spectral_bandwidth': return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
def compute_librosa_features(self, audio_data, feat_name): """ Compute feature using librosa methods :param audio_data: signal :param feat_name: feature to compute :return: np array """ # if rmse_feat.shape == (1, 427): # rmse_feat = np.concatenate((rmse_feat, np.zeros((1, 4))), axis=1) if feat_name == 'zero_crossing_rate': return zero_crossing_rate(y=audio_data, hop_length=self.FRAME) elif feat_name == 'rmse': return rmse(y=audio_data, hop_length=self.FRAME) elif feat_name == 'mfcc': return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) elif feat_name == 'spectral_centroid': return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) elif feat_name == 'spectral_rolloff': return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) elif feat_name == 'spectral_bandwidth': return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
def initialize_bpf(filename, filepath, only_show=False, rewrite=False): wav_filename = audioconvert.convert_to_monowav(filename, filepath) timestart = time.time() y, sr = load(wav_filename, dtype="float32", res_type=TYPE) print("{LOAD TIME}:%f" % (time.time() - timestart)) tempo, beats = beat_track(y=y, tightness=100) # 计算主要节拍点 tempo1, beats1 = beat_track(y=y, tightness=1) # 计算节拍点,tightness就是对节拍的吸附性,越低越混乱 onset_envelope = onset_strength(y=y) rms_envelope = rmse(y=y) # -----------RMS ENVELOPE tempo = normalize_tempo(tempo) MAX_RMS = np.max(rms_envelope) AVERAGE_RMS = np.mean(rms_envelope) onset_all_beat = [] frame_all_beat = [] for beat in beats1: onset_all_beat.append(onset_envelope[beat]) frame_all_beat.append(beat) AVERAGE_ONSET = np.mean(onset_all_beat) new_frames_list = [] if not os.path.exists("dat/plt/%s.plt" % filename) or rewrite: print("No plt found, initializing...") plt_file = open("dat/plt/%s.plt" % filename, mode="w") plt_file.write( repr((filename, rms_envelope.T.tolist(), onset_all_beat, frame_all_beat, MAX_RMS, AVERAGE_RMS, AVERAGE_ONSET))) plt_file.close() plt_file = open("dat/plt/%s.plt" % filename, mode="r") plt_file_content = eval(plt_file.read()) plt_process = Process(target=plt_show, args=plt_file_content) plt_process.start() if not only_show: for beat in beats1: if onset_envelope[beat] > AVERAGE_ONSET / ONSET_DETECT_RATIO \ or rms_envelope.T[beat] > MAX_RMS / RMS_RATIO: new_frames_list.append(beat) print("{MAX_ONSET}:%f" % onset_envelope.max()) new_beats_frame = np.array(new_frames_list) mainbeatlocation = frames_to_time(beats) beatlocation = frames_to_time(new_beats_frame).tolist() beatmain = [] for beat in beatlocation: # 分别计算出每个节拍到主要节拍点的距离,也就是这个节拍的主要程度 p = abs(mainbeatlocation - beat) # print("%f: %f" % (beat, p.min())) beatmain.append(p.min()) file = open("dat/bpf/%s.bpf" % filename, mode="w") file.write( repr([tempo, beatlocation, beatmain, mainbeatlocation.tolist()])) file.close() if (os.path.exists("dat/%s.wav" % filename)): os.remove("dat/%s.wav" % filename) return "dat/bpf/%s.bpf" % filename
def __init__(self, name, y, sr, per_order, text): self.name = name self.audio_timeseries = y self.sr = sr self.per_order = per_order self.text = text self.rmse_ = rmse(y)[0] self.spectral_bandwidth_ = spectral_bandwidth(y, sr = sr)[0] self.zero_crossing_rate_ = zero_crossing_rate(y)[0] self.label = None
def feature_engineer(self, audio_data): """ Extract features using librosa.feature. Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param audio_data: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ zcr_feat = zero_crossing_rate(y=audio_data, hop_length=self.FRAME) rmse_feat = rmse(y=audio_data, hop_length=self.FRAME) mfcc_feat = mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) spectral_centroid_feat = spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) spectral_rolloff_feat = spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) spectral_bandwidth_feat = spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME) # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) concat_feat = np.concatenate( ( zcr_feat, rmse_feat, mfcc_feat, spectral_centroid_feat, spectral_rolloff_feat, # chroma_cens_feat spectral_bandwidth_feat), axis=0) median_feat = np.mean(concat_feat, axis=1, keepdims=True).transpose() features_df = pd.DataFrame(data=median_feat, columns=self.COL, index=None) features_df['label'] = self.label return features_df
def features(rawsnd, num): import librosa import librosa.feature as lib_feat x, sample_rate = librosa.load(rawsnd, sr=16000) ft = lib_feat.mfcc(y=x, sr=sample_rate, n_mfcc=num, n_fft=int(sample_rate * 0.025), hop_length=int(sample_rate * 0.010)) ft[0] = lib_feat.rmse(y=x, hop_length=int(0.010 * sample_rate), n_fft=int(0.025 * sample_rate)) deltas = librosa.feature.delta(ft) ft_plus_deltas = np.vstack([ft, deltas]) ft_plus_deltas /= np.max(np.abs(ft_plus_deltas), axis=0) return (ft_plus_deltas.T)
def get_mir(audio_path): hop_length = 200 # Spectral Flux/Flatness, MFCCs, SDCs spectrogram = madmom.audio.spectrogram.Spectrogram(audio_path, frame_size=2048, hop_size=hop_length, fft_size=4096) # only take 30s snippets to align data audio = madmom.audio.signal.Signal(audio_path, dtype=float, start=0, stop=30) all_features = [] #print(spectrogram.shape) #print(audio.shape) #print('signal sampling rate: {}'.format(audio.sample_rate)) # madmom features all_features.extend([ spectral_flux(spectrogram), superflux(spectrogram), complex_flux(spectrogram) ]) #, MFCC(spectrogram)]) # mfcc still wrong shape as it is a 2 array # librosa features libr_features = [ spectral_centroid(audio, hop_length=hop_length), spectral_bandwidth(audio, hop_length=hop_length), spectral_flatness(audio, hop_length=hop_length), spectral_rolloff(audio, hop_length=hop_length), rmse(audio, hop_length=hop_length), zero_crossing_rate(audio, hop_length=hop_length) ] #, mfcc(audio)]) for libr in libr_features: all_features.append(np.squeeze(libr, axis=0)) # for feature in all_features: # print(feature.shape) X = np.stack(all_features, axis=1)[na, :, :] return X
def plt_show_solo(filename, filepath): wav_filename = audioconvert.convert_to_monowav(filename, filepath) timestart = time.time() y, sr = load(wav_filename, dtype="float32", res_type=TYPE) print("{LOAD TIME}:%f" % (time.time() - timestart)) tempo1, beats1 = beat_track(y=y, tightness=1) # 计算节拍点,tightness就是对节拍的吸附性,越低越混乱 onset_envelope = onset_strength(y=y) rms_envelope = rmse(y=y) # -----------RMS ENVELOPE MAX_RMS = np.max(rms_envelope) AVERAGE_RMS = np.mean(rms_envelope) onset_all_beat = [] frame_all_beat = [] for beat in beats1: onset_all_beat.append(onset_envelope[beat]) frame_all_beat.append(beat) AVERAGE_ONSET = np.mean(onset_all_beat) plt_show(filename, rms_envelope.T, onset_all_beat, frame_all_beat, MAX_RMS, AVERAGE_RMS, AVERAGE_ONSET)
def features(rawsnd, num): """Compute num amount of audio features of a sound Args: rawsnd : array with string paths to .wav files num : numbers of mfccs to compute Returns: Return a num x max_stepsize*32 feature vector """ import librosa import librosa.feature as lib_feat x, sample_rate = librosa.load(rawsnd, sr=16000) ft = lib_feat.mfcc(y=x, sr=sample_rate, n_mfcc=num, n_fft=int(sample_rate * 0.025), hop_length=int(sample_rate * 0.010)) ft[0] = lib_feat.rmse(y=x, hop_length=int(0.010 * sample_rate), n_fft=int(0.025 * sample_rate)) deltas = librosa.feature.delta(ft) ft_plus_deltas = np.vstack([ft, deltas]) ft_plus_deltas /= np.max(np.abs(ft_plus_deltas), axis=0) return (ft_plus_deltas.T)
def feature_engineer(self, audio_data): """ Extract features using librosa.feature. Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param audio_data: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ logging.info('Computing zero_crossing_rate...') start = timeit.default_timer() zcr_feat = zero_crossing_rate(y=audio_data, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing rmse...') start = timeit.default_timer() rmse_feat = rmse(y=audio_data, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing mfcc...') start = timeit.default_timer() mfcc_feat = mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing spectral centroid...') start = timeit.default_timer() spectral_centroid_feat = spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing spectral rolloff...') start = timeit.default_timer() spectral_rolloff_feat = spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) logging.info('Computing spectral bandwidth...') start = timeit.default_timer() spectral_bandwidth_feat = spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) # logging.info('Computing chroma cens...') # start = timeit.default_timer() # # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) # # stop = timeit.default_timer() # logging.info('Time taken: {0}'.format(stop - start)) concat_feat = np.concatenate((zcr_feat, rmse_feat, mfcc_feat, spectral_centroid_feat, spectral_rolloff_feat, # chroma_cens_feat, spectral_bandwidth_feat ), axis=0) logging.info('Averaging...') start = timeit.default_timer() mean_feat = np.mean(concat_feat, axis=1, keepdims=True).transpose() stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) return mean_feat, self.label
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth zero_crossing_rate' for i in range(1, 21): header += f' mfcc{i}' header += ' label' header = header.split() file = open('data_training.csv', 'w', newline='') with file: writer = csv.writer(file) writer.writerow(header) sukus = 'banjar_hulu banjar_kuala dayak_bakumpai dayak_ngaju'.split() for g in sukus: for filename in os.listdir(f'data_training/{g}'): songname = f'data_training/{g}/{filename}' y, sr = librosa.load(songname, mono=True, duration=30) chroma_stft = fitur.chroma_stft(y=y, sr=sr) spec_cent = fitur.spectral_centroid(y=y, sr=sr) spec_bw = fitur.spectral_bandwidth(y=y, sr=sr) rmse = fitur.rmse(y) zcr = fitur.zero_crossing_rate(y) mfcc = fitur.mfcc(y=y, sr=sr) to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(zcr)}' for e in mfcc: to_append += f' {np.mean(e)}' to_append += f' {g}' file = open('data_training.csv', 'a', newline='') with file: writer = csv.writer(file) writer.writerow(to_append.split())
def extract_features(soundwave,sampling_rate,sound_name="test",feature_list=[]): """ extracts features with help of librosa :param soundwave: extracted soundwave from file :param sampling_rate: sampling rate :param feature_list: list of features to compute :param sound_name: type of sound, i.e. dog :return: np.array of all features for the soundwave """ print("Computing features for ",sound_name) if len(feature_list)==0: feature_list=["chroma_stft","chroma_cqt","chroma_cens","melspectrogram", "mfcc","rmse","spectral_centroid","spectral_bandwidth", "spectral_contrast","spectral_flatness","spectral_rolloff", "poly_features","tonnetz","zero_crossing_rate"] features=[] #feature_len #"chroma_stft":12 if "chroma_stft" in feature_list: features.append(feat.chroma_stft(soundwave, sampling_rate)) #"chroma_cqt":12 if "chroma_cqt" in feature_list: features.append(feat.chroma_cqt(soundwave, sampling_rate)) #"chroma_cens":12 if "chroma_cens" in feature_list: features.append(feat.chroma_cens(soundwave, sampling_rate)) #"malspectrogram":128 if "melspectrogram" in feature_list: features.append(feat.melspectrogram(soundwave, sampling_rate)) #"mfcc":20 if "mfcc" in feature_list: features.append(feat.mfcc(soundwave, sampling_rate)) #"rmse":1 if "rmse" in feature_list: features.append(feat.rmse(soundwave)) #"spectral_centroid":1 if "spectral_centroid" in feature_list: features.append(feat.spectral_centroid(soundwave, sampling_rate)) #"spectral_bandwidth":1 if "spectral_bandwidth" in feature_list: features.append(feat.spectral_bandwidth(soundwave, sampling_rate)) #"spectral_contrast":7 if "spectral_contrast" in feature_list: features.append(feat.spectral_contrast(soundwave, sampling_rate)) #"spectral_flatness":1 if "spectral_flatness" in feature_list: features.append(feat.spectral_flatness(soundwave)) #"spectral_rolloff":1 if "spectral_rolloff" in feature_list: features.append(feat.spectral_rolloff(soundwave, sampling_rate)) #"poly_features":2 if "poly_features" in feature_list: features.append(feat.poly_features(soundwave, sampling_rate)) #"tonnetz":6 if "tonnetz" in feature_list: features.append(feat.tonnetz(soundwave, sampling_rate)) #"zero_crossing_rate":1 if "zero_crossing_rate" in feature_list: features.append(feat.zero_crossing_rate(soundwave)) return np.concatenate(features)
def rmse_spec(raw): """Calculates the root-mean-square energy for each sample in data.""" return np.array([rmse(S=x) for x in raw])
def get_feature_from_librosa(wave_name, window): #print wave_name (rate, sig) = wav.read(wave_name) chroma_stft_feat = feature.chroma_stft(sig, rate, n_fft=window, hop_length=window / 2) #print chroma_stft_feat.shape mfcc_feat = feature.mfcc(y=sig, sr=rate, n_mfcc=13, hop_length=window / 2) mfcc_feat = mfcc_feat[1:, :] #print mfcc_feat.shape d_mfcc_feat = feature.delta(mfcc_feat) #print d_mfcc_feat.shape d_d_mfcc_feat = feature.delta(d_mfcc_feat) #print d_d_mfcc_feat.shape zero_crossing_rate_feat = feature.zero_crossing_rate(sig, frame_length=window, hop_length=window / 2) #print zero_crossing_rate_feat.shape S = librosa.magphase( librosa.stft(sig, hop_length=window / 2, win_length=window, window='hann'))[0] rmse_feat = feature.rmse(S=S) #print rmse_feat.shape centroid_feat = feature.spectral_centroid(sig, rate, n_fft=window, hop_length=window / 2) #print centroid_feat.shape bandwith_feat = feature.spectral_bandwidth(sig, rate, n_fft=window, hop_length=window / 2) #print bandwith_feat.shape contrast_feat = feature.spectral_contrast(sig, rate, n_fft=window, hop_length=window / 2) #print contrast_feat.shape rolloff_feat = feature.spectral_rolloff(sig, rate, n_fft=window, hop_length=window / 2) #计算滚降频率 #print rolloff_feat.shape poly_feat = feature.poly_features(sig, rate, n_fft=window, hop_length=window / 2) #拟合一个n阶多项式到谱图列的系数。 #print poly_feat.shape #============================================================================== # print(chroma_stft_feat.shape) # #print(corr_feat.shape) # print(mfcc_feat.shape) # print(d_mfcc_feat.shape) # print(d_d_mfcc_feat.shape) # print(zero_crossing_rate_feat.shape) # print(rmse_feat.shape) # print(centroid_feat.shape) # print(bandwith_feat.shape) # print(contrast_feat.shape) # print(rolloff_feat.shape) # print(poly_feat.shape) #============================================================================== feat = numpy.hstack( (chroma_stft_feat.T, mfcc_feat.T, d_mfcc_feat.T, d_d_mfcc_feat.T, zero_crossing_rate_feat.T, rmse_feat.T, centroid_feat.T, bandwith_feat.T, contrast_feat.T, rolloff_feat.T, poly_feat.T)) feat = feat.T return feat #一行代表一帧的特征
def featurize(self): """ Extract features using librosa.feature. Convert wav vec, the sound amplitude as a function of time, to a variety of extracted features, such as Mel Frequency Cepstral Coeffs, Root Mean Square Energy, Zero Crossing Rate, etc. :param observations :ptype: list of tuples (label, wav vec, sampling rate) :return: :rtype: Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param raw: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ start = timeit.default_timer() logging.debug('Loading Librosa raw audio vector...') raw, _ = librosa.load(self.path, sr=self.RATE, mono=True) raw = raw[:self.TRUNCLENGTH] if len(raw) < self.TRUNCLENGTH: logging.info(f"Not featurizing {self.path} because raw vector is " f"too short. `None` will be returned for all data " f"formats.") return self logging.debug('Computing Zero Crossing Rate...') zcr_feat = zero_crossing_rate(y=raw, hop_length=self.FRAME) logging.debug('Computing RMSE ...') rmse_feat = rmse(y=raw, hop_length=self.FRAME) logging.debug('Computing MFCC...') mfcc_feat = mfcc(y=raw, sr=self.RATE, n_mfcc=self.N_MFCC) logging.debug('Computing spectral centroid...') spectral_centroid_feat = spectral_centroid(y=raw, sr=self.RATE, hop_length=self.FRAME) logging.debug('Computing spectral roll-off ...') spectral_rolloff_feat = spectral_rolloff(y=raw, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) logging.debug('Computing spectral bandwidth...') spectral_bandwidth_feat = spectral_bandwidth(y=raw, sr=self.RATE, hop_length=self.FRAME) logging.debug('Concatenate all features...') mat = np.concatenate(( zcr_feat, rmse_feat, spectral_centroid_feat, spectral_rolloff_feat, spectral_bandwidth_feat, mfcc_feat, ), axis=0) logging.debug(f'Mat shape: {mat.shape}') logging.debug(f'Create self.raw...') self.raw = raw.reshape(1, -1) logging.debug(f'Create self.vec by averaging mat along time dim...') self.vec = np.mean(mat, axis=1, keepdims=True).reshape(1, -1) logging.debug(f'Vec shape: {self.vec.shape}') logging.debug(f'Create self.mat...') assert mat.shape == (18, 426), 'Matrix dims do not match (426,18)' self.mat = mat.reshape( 1, 18, 426, ) stop = timeit.default_timer() logging.info('Time taken: {0}'.format(stop - start)) return self
row = np.concatenate((row, cqt)) sens = np.mean(lf.chroma_cens(thing1[:-1]).T, axis=0) row = np.concatenate((row, sens)) spcent = np.mean(lf.spectral_centroid(thing1[:-1]).T, axis=0) row = np.concatenate((row, spcent)) flatness = np.mean(lf.spectral_flatness(thing1[:-1]).T, axis=0) row = np.concatenate((row, flatness)) rolloff = np.mean(lf.spectral_rolloff(thing1[:-1]).T, axis=0) row = np.concatenate((row, rolloff)) mspec = np.mean(lf.melspectrogram(thing1[:-1]).T, axis=0) row = np.concatenate((row, mspec)) mfcc = np.mean(lf.mfcc(thing1[:-1], n_mfcc=30).T, axis=0) row = np.concatenate((row, mfcc)) tonnetz = np.mean(lf.tonnetz(thing1[:-1]).T, axis=0) row = np.concatenate((row, tonnetz)) rmse = np.mean(lf.rmse(thing1[:-1]).T, axis=0) row = np.concatenate((row, rmse)) contrast = np.mean(lf.spectral_contrast(thing1[:-1]).T, axis=0) row = np.concatenate((row, contrast)) tempo = np.mean(lf.tempogram(thing[:-1], win_length=88).T, axis=0) row = np.concatenate((row, tempo)) row = np.append(row, thing1[-1]) #print(len(row)) train_data = np.append(train_data, row) counter += 1 columns = ["feat_" + str(i) for i in range(299)] columns.append("class") df_train2 = pd.DataFrame(columns=columns)