def test_spectral_flatness(self): correct = rosaft.spectral_flatness(y=self.sig, S=None, n_fft=nfft, hop_length=stepsize) actual = spectral_flatness(self.args) self.assertTrue(np.abs(correct - actual).max() < tol)
def spectral_flatness(args): psd = get_psd(args) nfft, noverlap = unroll_args(args, ['nfft', 'noverlap']) hopsize = nfft - noverlap return rosaft.spectral_flatness(y=None, S=psd, n_fft=nfft, hop_length=hopsize)
def extract_feature(self, audio_data): """ extract features from audio data :param audio_data: :return: """ zcr = lrf.zero_crossing_rate(audio_data, frame_length=self.FRAME, hop_length=self.FRAME / 2) feature_zcr = np.mean(zcr) ste = audio_utils.AudioUtils.ste(audio_data, 'hamming', int(20 * 0.001 * self.RATE)) feature_ste = np.mean(ste) ste_acc = np.diff(ste) feature_steacc = np.mean(ste_acc[ste_acc > 0]) stzcr = audio_utils.AudioUtils.stzcr(audio_data, 'hamming', int(20 * 0.001 * self.RATE)) feature_stezcr = np.mean(stzcr) mfcc = lrf.mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) feature_mfcc = np.mean(mfcc, axis=1) spectral_centroid = lrf.spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME / 2) feature_spectral_centroid = np.mean(spectral_centroid) spectral_bandwidth = lrf.spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME / 2) feature_spectral_bandwidth = np.mean(spectral_bandwidth) spectral_rolloff = lrf.spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME / 2, roll_percent=0.90) feature_spectral_rolloff = np.mean(spectral_rolloff) spectral_flatness = lrf.spectral_flatness(y=audio_data, hop_length=self.FRAME / 2) feature_spectral_flatness = np.mean(spectral_flatness) features = np.append([ feature_zcr, feature_ste, feature_steacc, feature_stezcr, feature_spectral_centroid, feature_spectral_bandwidth, feature_spectral_rolloff, feature_spectral_flatness ], feature_mfcc) return features, self.label
def get_mir(audio_path): hop_length = 200 # Spectral Flux/Flatness, MFCCs, SDCs spectrogram = madmom.audio.spectrogram.Spectrogram(audio_path, frame_size=2048, hop_size=hop_length, fft_size=4096) # only take 30s snippets to align data audio = madmom.audio.signal.Signal(audio_path, dtype=float, start=0, stop=30) all_features = [] #print(spectrogram.shape) #print(audio.shape) #print('signal sampling rate: {}'.format(audio.sample_rate)) # madmom features all_features.extend([ spectral_flux(spectrogram), superflux(spectrogram), complex_flux(spectrogram) ]) #, MFCC(spectrogram)]) # mfcc still wrong shape as it is a 2 array # librosa features libr_features = [ spectral_centroid(audio, hop_length=hop_length), spectral_bandwidth(audio, hop_length=hop_length), spectral_flatness(audio, hop_length=hop_length), spectral_rolloff(audio, hop_length=hop_length), rmse(audio, hop_length=hop_length), zero_crossing_rate(audio, hop_length=hop_length) ] #, mfcc(audio)]) for libr in libr_features: all_features.append(np.squeeze(libr, axis=0)) # for feature in all_features: # print(feature.shape) X = np.stack(all_features, axis=1)[na, :, :] return X
def feature_extraction_all(signal, sr, n_mfcc, buffer_len, normalization_values): """ Feature extraction interface :param signal: Signal :param sr: Signal :param n_mfcc: Signal :param buffer_len: Signal :param normalization_values: normalization values of the dataset :output features: Array of features Features are extracted from the incoming audio signal when an onset is detected. """ features = [] signal = np.array(signal) if signal.size != 0: S, phase = librosa.magphase( librosa.stft(y=signal, n_fft=buffer_len, hop_length=int(buffer_len / 4))) # Mel Frequency cepstral coefficients mfcc = feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc, n_fft=int(512 * 2), hop_length=int(128 * 2)) mfcc_mean = np.mean(mfcc, axis=1) mfcc_std = np.std(mfcc, axis=1) # RMS rms = feature.rms(S=S, frame_length=buffer_len, hop_length=int(buffer_len / 4)) rms_mean = np.mean(rms, axis=1) rms_std = np.std(rms, axis=1) # Spectral Centroid spectral_centroid = feature.spectral_centroid(S=S, sr=sr) spectral_centroid_mean = np.mean(spectral_centroid, axis=1) spectral_centroid_std = np.std(spectral_centroid, axis=1) # Rolloff spectral_rolloff = feature.spectral_rolloff(S=S, sr=sr) spectral_rolloff_mean = np.mean(spectral_rolloff, axis=1) spectral_rolloff_std = np.std(spectral_rolloff, axis=1) # Bandwidth spectral_bandwidth = feature.spectral_bandwidth(S=S, sr=sr) spectral_bandwidth_mean = np.mean(spectral_bandwidth, axis=1) spectral_bandwidth_std = np.std(spectral_bandwidth, axis=1) # Contrast spectral_contrast = feature.spectral_contrast(S=S, sr=sr) spectral_contrast_mean = np.mean(spectral_contrast, axis=1) spectral_contrast_std = np.std(spectral_contrast, axis=1) # Flatness spectral_flatness = feature.spectral_flatness(S=S) spectral_flatness_mean = np.mean(spectral_flatness, axis=1) spectral_flatness_std = np.std(spectral_flatness, axis=1) if len(normalization_values) > 1: # Duration features.append( normalize(len(signal), normalization_values['duration'])) features.extend( normalize( mfcc_mean, normalization_values[[ 'mfcc_mean_1', 'mfcc_mean_2', 'mfcc_mean_3', 'mfcc_mean_4', 'mfcc_mean_5', 'mfcc_mean_6', 'mfcc_mean_7', 'mfcc_mean_8', 'mfcc_mean_9', 'mfcc_mean_10' ]])) features.extend( normalize( mfcc_std, normalization_values[[ 'mfcc_std_1', 'mfcc_std_2', 'mfcc_std_3', 'mfcc_std_4', 'mfcc_std_5', 'mfcc_std_6', 'mfcc_std_7', 'mfcc_std_8', 'mfcc_std_9', 'mfcc_std_10' ]])) features.extend( normalize(rms_mean, normalization_values['rms_mean'])) features.extend(normalize(rms_std, normalization_values['rms_std'])) features.extend( normalize(spectral_centroid_mean, normalization_values['spectral_centroid_mean'])) features.extend( normalize(spectral_centroid_std, normalization_values['spectral_centroid_std'])) features.extend( normalize(spectral_rolloff_mean, normalization_values['spectral_rolloff_mean'])) features.extend( normalize(spectral_rolloff_std, normalization_values['spectral_rolloff_std'])) features.extend( normalize(spectral_bandwidth_mean, normalization_values['spectral_bandwidth_mean'])) features.extend( normalize(spectral_bandwidth_std, normalization_values['spectral_bandwidth_std'])) features.extend( normalize( spectral_contrast_mean, normalization_values[[ 'spectral_contrast_mean_1', 'spectral_contrast_mean_2', 'spectral_contrast_mean_3', 'spectral_contrast_mean_4', 'spectral_contrast_mean_5', 'spectral_contrast_mean_6', 'spectral_contrast_mean_7' ]])) features.extend( normalize( spectral_contrast_std, normalization_values[[ 'spectral_contrast_std_1', 'spectral_contrast_std_2', 'spectral_contrast_std_3', 'spectral_contrast_std_4', 'spectral_contrast_std_5', 'spectral_contrast_std_6', 'spectral_contrast_std_7' ]])) features.extend( normalize(spectral_flatness_mean, normalization_values['spectral_flatness_mean'])) features.extend( normalize(spectral_flatness_std, normalization_values['spectral_flatness_std'])) else: features.append(len(signal)) features.extend(mfcc_mean) features.extend(mfcc_std) features.extend(rms_mean) features.extend(rms_std) features.extend(spectral_centroid_mean) features.extend(spectral_centroid_std) features.extend(spectral_rolloff_mean) features.extend(spectral_rolloff_std) features.extend(spectral_bandwidth_mean) features.extend(spectral_bandwidth_std) features.extend(spectral_contrast_mean) features.extend(spectral_contrast_std) features.extend(spectral_flatness_mean) features.extend(spectral_flatness_std) features = np.array(features) return features
def extract_features(soundwave,sampling_rate,sound_name="test",feature_list=[]): """ extracts features with help of librosa :param soundwave: extracted soundwave from file :param sampling_rate: sampling rate :param feature_list: list of features to compute :param sound_name: type of sound, i.e. dog :return: np.array of all features for the soundwave """ print("Computing features for ",sound_name) if len(feature_list)==0: feature_list=["chroma_stft","chroma_cqt","chroma_cens","melspectrogram", "mfcc","rmse","spectral_centroid","spectral_bandwidth", "spectral_contrast","spectral_flatness","spectral_rolloff", "poly_features","tonnetz","zero_crossing_rate"] features=[] #feature_len #"chroma_stft":12 if "chroma_stft" in feature_list: features.append(feat.chroma_stft(soundwave, sampling_rate)) #"chroma_cqt":12 if "chroma_cqt" in feature_list: features.append(feat.chroma_cqt(soundwave, sampling_rate)) #"chroma_cens":12 if "chroma_cens" in feature_list: features.append(feat.chroma_cens(soundwave, sampling_rate)) #"malspectrogram":128 if "melspectrogram" in feature_list: features.append(feat.melspectrogram(soundwave, sampling_rate)) #"mfcc":20 if "mfcc" in feature_list: features.append(feat.mfcc(soundwave, sampling_rate)) #"rmse":1 if "rmse" in feature_list: features.append(feat.rmse(soundwave)) #"spectral_centroid":1 if "spectral_centroid" in feature_list: features.append(feat.spectral_centroid(soundwave, sampling_rate)) #"spectral_bandwidth":1 if "spectral_bandwidth" in feature_list: features.append(feat.spectral_bandwidth(soundwave, sampling_rate)) #"spectral_contrast":7 if "spectral_contrast" in feature_list: features.append(feat.spectral_contrast(soundwave, sampling_rate)) #"spectral_flatness":1 if "spectral_flatness" in feature_list: features.append(feat.spectral_flatness(soundwave)) #"spectral_rolloff":1 if "spectral_rolloff" in feature_list: features.append(feat.spectral_rolloff(soundwave, sampling_rate)) #"poly_features":2 if "poly_features" in feature_list: features.append(feat.poly_features(soundwave, sampling_rate)) #"tonnetz":6 if "tonnetz" in feature_list: features.append(feat.tonnetz(soundwave, sampling_rate)) #"zero_crossing_rate":1 if "zero_crossing_rate" in feature_list: features.append(feat.zero_crossing_rate(soundwave)) return np.concatenate(features)
#print(chunk) chunk1 = np.array(chunk) for thing in chunk1: print(counter) thing1 = np.array(thing) #print(thing1) row = np.array([]) cstft = np.mean(lf.chroma_stft(thing1[:-1]).T, axis=0) row = np.concatenate((row, cstft)) cqt = np.mean(lf.chroma_cqt(thing1[:-1]).T, axis=0) row = np.concatenate((row, cqt)) sens = np.mean(lf.chroma_cens(thing1[:-1]).T, axis=0) row = np.concatenate((row, sens)) spcent = np.mean(lf.spectral_centroid(thing1[:-1]).T, axis=0) row = np.concatenate((row, spcent)) flatness = np.mean(lf.spectral_flatness(thing1[:-1]).T, axis=0) row = np.concatenate((row, flatness)) rolloff = np.mean(lf.spectral_rolloff(thing1[:-1]).T, axis=0) row = np.concatenate((row, rolloff)) mspec = np.mean(lf.melspectrogram(thing1[:-1]).T, axis=0) row = np.concatenate((row, mspec)) mfcc = np.mean(lf.mfcc(thing1[:-1], n_mfcc=30).T, axis=0) row = np.concatenate((row, mfcc)) tonnetz = np.mean(lf.tonnetz(thing1[:-1]).T, axis=0) row = np.concatenate((row, tonnetz)) rmse = np.mean(lf.rmse(thing1[:-1]).T, axis=0) row = np.concatenate((row, rmse)) contrast = np.mean(lf.spectral_contrast(thing1[:-1]).T, axis=0) row = np.concatenate((row, contrast)) tempo = np.mean(lf.tempogram(thing[:-1], win_length=88).T, axis=0) row = np.concatenate((row, tempo))
def compute_average_spectral_flatness(y): return np.mean(spectral_flatness(y=y))
def flatness(wave_form, sample_rate, hop_length): return feature.spectral_flatness(y=wave_form, hop_length=hop_length).T