def get_chromagram(y, sr, chroma): """ returns chromagram Parameters ---------- y : number > 0 [scalar] audio sr: number > 0 [scalar] target sampling rate chroma: str chroma-samplerate-framesize-overlap Returns ------- list of chromagrams """ params = get_parameters_chroma(chroma) chroma = params["chroma"] doce_bins_tuned_chroma = None if chroma == 'nnls': doce_bins_tuned_chroma = get_nnls(y, params["sr"], params["fr"], params["off"]) elif chroma == 'cqt': win = get_window('blackmanharris', params["fr"]) doce_bins_tuned_chroma = chroma_cqt(y=y, sr=params["sr"], C=None, hop_length=params["off"], norm=None, # threshold=0.0, window=win, fmin=110, n_chroma=12, n_octaves=4 if params["chroma"] == "cqt" and params["sr"] == 5525 else 5, bins_per_octave=36) elif chroma == 'cens': win = get_window('blackmanharris', params["fr"]) doce_bins_tuned_chroma = chroma_cens(y=y, sr=params["sr"], C=None, hop_length=params["off"], norm=None, window=win, fmin=110, n_chroma=12, n_octaves=5, bins_per_octave=36) elif chroma == 'stft': win = get_window('blackmanharris', params["fr"]) doce_bins_tuned_chroma = chroma_stft(y=y, sr=params["sr"], hop_length=params["off"], norm=None, window=win, n_chroma=12) return doce_bins_tuned_chroma
def chroma_cens(args): sig = get_sig(args) fs, nfft, noverlap = unroll_args(args, ['fs', 'nfft', 'noverlap']) hopsize = nfft - noverlap return rosaft.chroma_cens(y=sig, sr=fs, hop_length=hopsize)
def extract_features(soundwave,sampling_rate,sound_name="test",feature_list=[]): """ extracts features with help of librosa :param soundwave: extracted soundwave from file :param sampling_rate: sampling rate :param feature_list: list of features to compute :param sound_name: type of sound, i.e. dog :return: np.array of all features for the soundwave """ print("Computing features for ",sound_name) if len(feature_list)==0: feature_list=["chroma_stft","chroma_cqt","chroma_cens","melspectrogram", "mfcc","rmse","spectral_centroid","spectral_bandwidth", "spectral_contrast","spectral_flatness","spectral_rolloff", "poly_features","tonnetz","zero_crossing_rate"] features=[] #feature_len #"chroma_stft":12 if "chroma_stft" in feature_list: features.append(feat.chroma_stft(soundwave, sampling_rate)) #"chroma_cqt":12 if "chroma_cqt" in feature_list: features.append(feat.chroma_cqt(soundwave, sampling_rate)) #"chroma_cens":12 if "chroma_cens" in feature_list: features.append(feat.chroma_cens(soundwave, sampling_rate)) #"malspectrogram":128 if "melspectrogram" in feature_list: features.append(feat.melspectrogram(soundwave, sampling_rate)) #"mfcc":20 if "mfcc" in feature_list: features.append(feat.mfcc(soundwave, sampling_rate)) #"rmse":1 if "rmse" in feature_list: features.append(feat.rmse(soundwave)) #"spectral_centroid":1 if "spectral_centroid" in feature_list: features.append(feat.spectral_centroid(soundwave, sampling_rate)) #"spectral_bandwidth":1 if "spectral_bandwidth" in feature_list: features.append(feat.spectral_bandwidth(soundwave, sampling_rate)) #"spectral_contrast":7 if "spectral_contrast" in feature_list: features.append(feat.spectral_contrast(soundwave, sampling_rate)) #"spectral_flatness":1 if "spectral_flatness" in feature_list: features.append(feat.spectral_flatness(soundwave)) #"spectral_rolloff":1 if "spectral_rolloff" in feature_list: features.append(feat.spectral_rolloff(soundwave, sampling_rate)) #"poly_features":2 if "poly_features" in feature_list: features.append(feat.poly_features(soundwave, sampling_rate)) #"tonnetz":6 if "tonnetz" in feature_list: features.append(feat.tonnetz(soundwave, sampling_rate)) #"zero_crossing_rate":1 if "zero_crossing_rate" in feature_list: features.append(feat.zero_crossing_rate(soundwave)) return np.concatenate(features)
counter = 0 train_data = np.array([]) for chunk in train_data_reader: #print(chunk) chunk1 = np.array(chunk) for thing in chunk1: print(counter) thing1 = np.array(thing) #print(thing1) row = np.array([]) cstft = np.mean(lf.chroma_stft(thing1[:-1]).T, axis=0) row = np.concatenate((row, cstft)) cqt = np.mean(lf.chroma_cqt(thing1[:-1]).T, axis=0) row = np.concatenate((row, cqt)) sens = np.mean(lf.chroma_cens(thing1[:-1]).T, axis=0) row = np.concatenate((row, sens)) spcent = np.mean(lf.spectral_centroid(thing1[:-1]).T, axis=0) row = np.concatenate((row, spcent)) flatness = np.mean(lf.spectral_flatness(thing1[:-1]).T, axis=0) row = np.concatenate((row, flatness)) rolloff = np.mean(lf.spectral_rolloff(thing1[:-1]).T, axis=0) row = np.concatenate((row, rolloff)) mspec = np.mean(lf.melspectrogram(thing1[:-1]).T, axis=0) row = np.concatenate((row, mspec)) mfcc = np.mean(lf.mfcc(thing1[:-1], n_mfcc=30).T, axis=0) row = np.concatenate((row, mfcc)) tonnetz = np.mean(lf.tonnetz(thing1[:-1]).T, axis=0) row = np.concatenate((row, tonnetz)) rmse = np.mean(lf.rmse(thing1[:-1]).T, axis=0) row = np.concatenate((row, rmse))
def plotstft_chroma(audiopath, binsize=2**10, plotpath=None, colormap="jet", pitch_vector=None): samples, samplerate = librosa.core.load(audiopath, sr=16000, mono=True, duration=14) # decimo per evitare robaccia inutile, SOLO PER FIGURA ESPERIMENTO 417!! print 'Se vuoi decimare (esperimento 417) qui dentro a plotstft_chroma lo puoi fare togliendo un commento qui sotto' #samples = SS.decimate(samples, 4) #samplerate = samplerate / 4 #pitch_vector = pitch_vector / 2 # devo fare lo stesso con ivalori di pitch print 'Decimo per dimezzare altezza img' samples = SS.decimate(samples, 2) samplerate = samplerate / 2 #pitch_vector = pitch_vector / 2 # devo fare lo stesso con ivalori di pitch s = stft(samples, binsize) sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) ims = 20. * np.log10(np.abs(sshow) / 10e-6) # amplitude to decibel timebins, freqbins = np.shape(ims) plt.figure(figsize=(10, 3)) plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none", clim=(np.min(ims) + 70, np.max(ims))) #plt.colorbar() plt.xlabel("Time [s]", fontsize=24) plt.ylabel("Frequency [Hz]", fontsize=24) plt.xlim([0, timebins - 1]) plt.ylim([0, freqbins]) plt.tick_params(axis='both', which='major', labelsize=20) xlocs = np.float32(np.linspace(0, timebins - 1, 4)) plt.xticks(xlocs, [ "%.0f" % l for l in ((xlocs * len(samples) / timebins) + (0.5 * binsize)) / samplerate ]) ylocs = np.int16(np.round(np.linspace(0, freqbins - 1, 6))) plt.yticks(ylocs, ["%.0f" % freq[i] for i in ylocs]) if pitch_vector != None: plt.plot(pitch_vector, 'r') if plotpath: plt.savefig(plotpath, format='pdf', bbox_inches="tight") else: plt.show() plt.clf() ### CHROMA samples, samplerate = librosa.core.load( audiopath, sr=16000, mono=True, duration=14) # reload per annullare processing above chroma = lbf.chroma_cens(y=samples, sr=samplerate, hop_length=128) plt.figure(figsize=(10, 3)) lbd.specshow(chroma, y_axis='chroma', x_axis='time', cmap='gray') plt.xlabel("Time [s]", fontsize=24) plt.ylabel("Pitch Class", fontsize=24, color='w') plt.tick_params(axis='y', which='major', labelsize=20, labelcolor='w', length=6, width=3) plt.tick_params(axis='x', which='major', labelsize=20) plt.savefig("chromagram-" + plotpath, format='pdf', bbox_inches="tight")