def compute_chroma(data): """ :param data: :return: """ chr = feature.chroma_stft(data) return chr
def chromagram(audio, fs, params): # params is dictionary with correct parameters for chroma_stft librosa convenience function # fill in docs with their meanings later on by checking librosa docs if params["n_fft"] < params["win_length"]: raise ParamInputError("Window too large! For {} FFT samples and {}Hz sampling frequency, maximum window size is {:.2f}ms".format(params["n_fft"], fs, params["n_fft"]*1000/fs)) chromagram = chroma_stft( audio, fs, norm = None, n_fft = params["n_fft"], hop_length = params["hop_length"], win_length = params["win_length"], window = params["window"], center = params["center"] ) # Compression chromagram = np.log10(1+params["gamma"]*chromagram) # Normalization for column in chromagram.T: if np.linalg.norm(column, params["norm"]) < params["epsilon"]: column[...] = (1/np.linalg.norm(np.ones(12), params["norm"])) * np.ones(12) else: column[...] = column/np.linalg.norm(column, params["norm"]) return chromagram
def get_chroma_frequencies(self, outside_series=None, outside_sr=None): y = self.select_series(outside_series) sr = self.select_sr(outside_sr) hop_length = 512 return hop_length, chroma_stft(y, sr=sr, hop_length=hop_length)
def chroma_stft(args): psd = get_psd(args) fs, nfft, noverlap = unroll_args(args, ['fs', 'nfft', 'noverlap']) hopsize = nfft - noverlap return rosaft.chroma_stft(y=None, sr=fs, S=psd, n_fft=nfft, hop_length=hopsize)
def STFT(self, audio): '''Computes STFT of audio signal \naudio -> mono audio signal in form of numpy array''' audio = self._make_mono(audio) stft = chroma_stft(audio, sr=self.sr, n_chroma=n_chroma, hop_length=hop_len) return stft
def create_features(): #Function extracts features from all files print("\nConverting LM data to features...") #prints to user when the function starts up features_list = [] #creates an empty list to hold features from all files for i, (file_name) in enumerate(file_names): #for each smaller .wav file in specified directory print("File " + str(i+1) + "...") #prints to use what file extracting features from full_file_name = source_location + "\\" + \ file_name #get full name of file, inc directory rate, data = wvf.read(full_file_name) #read in file as .wav as data and its sampling rate if audio_processing_choice == "chroma": #if feature extraction choice is 'chroma', data = np.asarray( #process data by flattening data to 1D, taking every [float(datum) for datum in #other value of data, converting each to floats, and data.flatten()[0::2]]) #converting to a numpy array features = chroma_stft(y=data, sr=rate).T #This modified data is passed to chroma function #with sampling rate and result transposed #to give ('# frames' x '12 features') features = np.repeat(features, 3, axis=1) #Append several copies of this horizontally to give #36 features (enough for CNNv3 to work with) elif audio_processing_choice == "cqt": #else if feature extraction choice is 'cqt', data = np.asarray( #process data by flattening data to 1D, taking every [float(datum) for datum in #other value of data, converting each to floats, and data.flatten()[0::2]]) #converting to a numpy array features = chroma_cqt(y=data, sr=rate, #This modified data is passed to cqt function with n_chroma=reduced_dim).T #sampling rate and result transposed to give #('# frames' x 'reduced_dim features') elif audio_processing_choice == "mfcc": #else if feature extraction choice is 'mfcc', features = mfcc(signal=data, samplerate=rate, #pass .wav data directly with sampling rate winlen=frame_time_len, #to 'mfcc' function and result is feature vector as winstep=frame_time_len, #('# frames' x 'reduced_dim features') numcep=reduced_dim, nfilt=reduced_dim*2, nfft= frame_len) elif audio_processing_choice == "fbank": features = fbank(signal=data, samplerate=rate, #else if feature extraction choice is 'fbank', winlen=frame_time_len, #pass .wav data directly with sampling rate winstep=frame_time_len, #to 'fbank' function and result is feature vector as nfilt=reduced_dim, #('# frames' x 'reduced_dim features') nfft=frame_len)[0] #(with only first item from list as this is the numpy #array we're interested in; the other being array #of energies in each frame) else: #else if feature extraction choice is anything else features = logfbank(signal=data, #pass .wav data directly with sampling rate samplerate=rate, #to 'logfbank' function and result is feature vector winlen=frame_time_len, #as ('# frames' x 'reduced_dim features') winstep=frame_time_len, nfilt=reduced_dim, nfft=frame_len) features_list.append(features) #Add the extracted features of current .wav file to #list, and return this list after features return features_list #of all files have been extracted
def get_chromagram(y, sr, chroma): """ returns chromagram Parameters ---------- y : number > 0 [scalar] audio sr: number > 0 [scalar] target sampling rate chroma: str chroma-samplerate-framesize-overlap Returns ------- list of chromagrams """ params = get_parameters_chroma(chroma) chroma = params["chroma"] doce_bins_tuned_chroma = None if chroma == 'nnls': doce_bins_tuned_chroma = get_nnls(y, params["sr"], params["fr"], params["off"]) elif chroma == 'cqt': win = get_window('blackmanharris', params["fr"]) doce_bins_tuned_chroma = chroma_cqt(y=y, sr=params["sr"], C=None, hop_length=params["off"], norm=None, # threshold=0.0, window=win, fmin=110, n_chroma=12, n_octaves=4 if params["chroma"] == "cqt" and params["sr"] == 5525 else 5, bins_per_octave=36) elif chroma == 'cens': win = get_window('blackmanharris', params["fr"]) doce_bins_tuned_chroma = chroma_cens(y=y, sr=params["sr"], C=None, hop_length=params["off"], norm=None, window=win, fmin=110, n_chroma=12, n_octaves=5, bins_per_octave=36) elif chroma == 'stft': win = get_window('blackmanharris', params["fr"]) doce_bins_tuned_chroma = chroma_stft(y=y, sr=params["sr"], hop_length=params["off"], norm=None, window=win, n_chroma=12) return doce_bins_tuned_chroma
def feature_extractor (y, sr): print('вошли в процедyрy feature_extractor') from librosa import feature as f print('либрозy как f загрyзили') rmse = f.rms(y=y)[0] #f.rmse (y = y) spec_cent = f.spectral_centroid (y = y, sr = sr) spec_bw = f.spectral_bandwidth (y = y, sr = sr) rolloff = f.spectral_rolloff (y = y, sr = sr) zcr = f.zero_crossing_rate (y) mfcc = f.mfcc(y = y, sr = sr) # mel cepstral coefficients chroma = f.chroma_stft(y=y, sr=sr) output = np.vstack([rmse, spec_cent, spec_bw, rolloff, zcr, chroma, mfcc]).T print('feature_extractor закончил работy') return (output)
def extract_features(self): # Generates a Short-time Fourier transform (STFT) to use in the chroma_stft stft = np.abs(librosa.stft(self.waveform)) # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series mfccs = mfcc(y=self.waveform, sr=self.sample_rate, n_mfcc=40).mean(axis=1) # Computes a chromagram from a waveform or power spectrogram. chroma = chroma_stft(S=stft, sr=self.sample_rate).mean(axis=1) # Computes a mel-scaled spectrogram. mel = melspectrogram(self.waveform, sr=self.sample_rate).mean(axis=1) # Computes spectral contrast contrast = spectral_contrast(S=stft, sr=self.sample_rate).mean(axis=1) # Computes the tonal centroid features (tonnetz) harmonic = librosa.effects.harmonic(self.waveform) tonn = tonnetz(y=harmonic, sr=self.sample_rate).mean(axis=1) return np.concatenate([mfccs, chroma, mel, contrast, tonn], axis=0)
def get_beat_sync_chroma(audio): """ Get a beat synchronous chroma :param audio: The path to the audio file :return: A beat synchronous chroma """ y, sr = core.load(audio, sr=44100) tempo, framed_dbn = self_tempo_estimation(y, sr) np.append(framed_dbn, np.array(len(y)/sr)) # Calculate chroma semitone spectrum chromas = [] for i in range(1, len(framed_dbn)): stft = abs(core.stft(y[int(framed_dbn[i-1]*sr):int(framed_dbn[i]*sr)])) chroma = np.mean(feature.chroma_stft(y=None, S=stft**2), axis=1) chromas.append(chroma) chromas = np.array(chromas).transpose() return chromas
def plot_chroma(file, title): # Load in the song using kaiser_fast to speed up loading x, sr = librosa.load(file, sr=42000, res_type='kaiser_fast') s = np.abs(librosa.stft(x, n_fft=4096))**2 # Generating chromagram chroma = chroma_stft(S=s, sr=sr) plt.figure(figsize=(18, 4)) librosa.display.specshow(chroma, y_axis='chroma', x_axis='time') plt.colorbar() plt.title(title + ' Chromagram') plt.tight_layout() plt.show() pass
def extract_coefficents(self, type, hasLabel=True): out = General_Out(type, 'raw', 'chroma') rootdir = os.getcwd() LoadDir = os.path.join(rootdir, 'data/' + type) for subdir, dirs, files in os.walk(LoadDir): for file in files: if file.endswith(".au"): y, sr = load( os.path.join(subdir, file), sr=None) #sr=None gets the original sample_rate ceps = chroma_stft(y, sr) ceps = np.transpose(ceps) num_ceps = ceps.shape[0] x = np.mean(ceps[int(num_ceps * 1 / 10):int(num_ceps * 9 / 10)], axis=0) print(file, x) out.add(file, x) out.write()
def _calc_feat(self, window, feat_name): feat = None # calculate feature if feat_name == 'mfcc': feat = FT.mfcc(y=window, sr=self.sr, n_mfcc=_N_MFCC) elif feat_name == 'chroma_stft': feat = FT.chroma_stft(y=window, sr=self.sr) elif feat_name == 'melspectrogram': feat = FT.melspectrogram(y=window, sr=self.sr, n_mels=128, n_fft=1024, hop_length=512) feat = L.power_to_db(feat) elif feat_name == 'spectral_centroid': feat = FT.spectral_centroid(y=window, sr=self.sr) elif feat_name == 'spectral_rolloff': feat = FT.spectral_rolloff(y=window, sr=self.sr) elif feat_name == 'tonnetz': feat = FT.tonnetz(y=window, sr=self.sr) elif feat_name == 'zero_crossing_rate': feat = FT.zero_crossing_rate(y=window) else: assert False, 'Invalid feature' # pool feature from multiple frames if self.feature_pool == 'sum': feat = feat.sum(axis=1) elif self.feature_pool == 'max': feat = feat.max(axis=1) elif self.feature_pool == 'mean': feat = feat.mean(axis=1) elif self.feature_pool == 'flatten': feat = feat.flatten() elif self.feature_pool == 'none': pass else: assert False, 'Invalid feature pooling scheme' # normalize features if self.l2_norm and feat.shape[0] > 1: feat /= np.linalg.norm(feat) return feat
def get_dbeat_sync_chroma(audio): """ Get a downbeat synchronous chroma :param audio: The path to the audio file :return: A downbeat synchronous chroma """ y, sr = core.load(audio, sr=44100) tempo, beats = self_tempo_estimation(y, sr) np.append(beats, np.array(len(y)/sr)) act = beatrnn()(audio) beats = downbeattrack(beats_per_bar=[4, 4], fps=100)(act) downbeats = beats[beats[:, 1] == 1][:][:, 0] framed_dbn = np.concatenate([np.array([0]), downbeats ]) # Calculate chroma semitone spectrum semitones = [] chromas = [] for i in range(1, len(framed_dbn)): stft = abs(core.stft(y[int(framed_dbn[i-1]*sr):int(framed_dbn[i]*sr)])) chroma = np.mean(feature.chroma_stft(y=None, S=stft**2), axis=1) semitone = np.mean(hz_to_pitch(stft, sr=sr), axis=1) chromas.append(chroma) semitones.append(semitone) chromas = np.array(chromas).transpose() semitones = np.array(semitones).transpose() # Plot the results and return the values time = np.arange(len(y)) / sr fig, ax = plt.subplots(3, 1) ax[0].plot(time, y) ax[0].vlines(framed_dbn, -1, 1, colors='r', linestyles='dashdot') ax[0].set_xlim(framed_dbn[0], framed_dbn[-1]) plt.sca(ax[1]) plt.pcolor(framed_dbn, np.arange(13), chromas) plt.yticks(np.arange(13)+0.5, ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]) plt.ylim(0, 12) plt.sca(ax[2]) plt.pcolor(semitones) print(tempo) return chromas, semitones, downbeats, tempo
def get_beat_sync_chroma_and_spectrum(audio, sr=None, bpm=None): """ Returns the beat_sync_chroma and the beat_sync_spectrums :param audio: Path to the song, or numpy array :param rate: Sample rate in case the audio param is numpy array :param bpm: Precalculated bpm :return: (beat_sync_chroma, beat_sync_spec) """ if not isinstance(audio, np.ndarray): sr = 44100 y = std.MonoLoader(filename=audio, samplerate=44100)() else: y = audio eql_y = std.EqualLoudness()(y) tempo, framed_dbn = self_tempo_estimation(y, sr, tempo=bpm) if framed_dbn.shape[0] % 4 == 0: framed_dbn = np.append(framed_dbn, np.array(len(y)/sr)) band1 = (0, 220) band2 = (220, 1760) band3 = (1760, sr / 2) band1list = [] band2list = [] band3list = [] chromas = [] for i in range(1, len(framed_dbn)): fft_eq = abs(np.fft.fft(eql_y[int(framed_dbn[i - 1] * sr):int(framed_dbn[i] * sr)])) freqs = np.fft.fftfreq(len(fft_eq), 1 / sr) band1list.append(np.sqrt(np.mean(sum(fft_eq[np.where(np.logical_and(freqs > band1[0], freqs < band1[1]))]**2)))) band2list.append(np.sqrt(np.mean(sum(fft_eq[np.where(np.logical_and(freqs > band2[0], freqs < band2[1]))]**2)))) band3list.append(np.sqrt(np.mean(sum(fft_eq[np.where(np.logical_and(freqs > band3[0], freqs < band3[1]))]**2)))) stft = abs(core.stft(y[int(framed_dbn[i - 1] * sr):int(framed_dbn[i] * sr)])) chroma = np.mean(feature.chroma_stft(y=None, S=stft ** 2), axis=1) chromas.append(chroma) chromas = np.array(chromas).transpose() band1list = np.array(band1list).transpose() band2list = np.array(band2list).transpose() band3list = np.array(band3list).transpose() return (chromas, np.vstack([band1list, band2list, band3list]))
'''This program is used to plot chromagrams''' import matplotlib.pyplot as plt import librosa.display as ld import scipy.io.wavfile as sc import librosa.feature as lf import librosa as lb file_name = 'Training_Data_Set/rock/rock.00001.wav' plt.figure(1) rate, data = sc.read(file_name) plt.figure(1) plt.plot(data) plt.show() M = lf.chroma_stft(data, sr=rate, n_fft=4096, hop_length=512) ld.specshow(M, x_axis='frames', y_axis='chroma') plt.colorbar() plt.title('rock_1') plt.show()
def main(aud): waves = {} sg, mask, data, audio_mask, sample_rate = load_audio(str(aud)) waves['audio'] = data[audio_mask] length = len(data[audio_mask]) w = myfunc() windo = w(length) windows = {} wave = waves['audio'] species = 'gens_specie' windows[species] = [] for i in range(0, int(len(wave) / 6.144000e+03)): windows[species].append(wave[i:int(i + 6.144000e+03)]) #creating df for test audio new_dataset_test = pd.DataFrame() for species in windows.keys(): for i in range(0, len(windows)): data_point = { 'species': species.split('_')[1], 'genus': species.split('_')[0] } #print(type(data_point)) spec_centroid = feature.spectral_centroid(windows[species][i])[0] #print(windows_fixed[species][i]) chroma = feature.chroma_stft(windows[species][i], sample_rate) for j in range(0, 13): data_point['spec_centr_' + str(j)] = spec_centroid[j] for k in range(0, 12): data_point['chromogram_' + str(k) + "_" + str(j)] = chroma[k, j] new_dataset_test = new_dataset_test.append(data_point, ignore_index=True) #classification of test audio features = list(new_dataset.columns) features.remove('species') features.remove('genus') X = new_dataset[features].values y = new_dataset['species'].values X_test = new_dataset_test[features].values y_test = new_dataset_test['species'].values NB = naive_bayes.GaussianNB() SSS = sklearn.model_selection.StratifiedShuffleSplit(n_splits=5, test_size=0.2) for train_index, val_index in SSS.split(X, y): X_train, X_val = X[train_index], X[val_index] y_train, y_val = y[train_index], y[val_index] NB.fit(X_train, y_train) y_pred = NB.predict(X_test) check = pd.DataFrame() df = pd.read_csv("/home/megha/Desktop/Audio_website/templates/descr.csv", delimiter=';') check = df.loc[df['check'] == y_pred[0]] #print(check['Description']) #accs.append(sklearn.metrics.accuracy_score(y_pred=y_pred, y_true=y_val)) return y_pred[0], check
import IPython.display import matplotlib.style as ms #y, sr = librosa.load('MoanaAudio2016.wav') y, sr = librosa.load('SecondHalfOfMoana.wav', offset=30.0, duration=5.0) from librosa.feature import chroma_stft block_gen = sf.blocks('SecondHalfOfMoana.wav', blocksize=1024) print(block_gen) samplerate = sf.info('SecondHalfOfMoana.wav').samplerate chromas = [] for bl in block_gen: # downmix frame to mono (averaging out the channel dimension) y = np.mean(bl, axis=1) # compute chroma feature chromas.append(chroma_stft(y, sr=sr)) print("This is Y:%d \n", y) ms.use('seaborn-muted') #%matplotlib inline #Load the example track y, sr = librosa.load(librosa.util.example_audio_file()) # How about something more advanced? Let's decompose a spectrogram with NMF, and then resynthesize an individual component D = librosa.stft(y) # Separate the magnitude and phase S, phase = librosa.magphase(D) # Decompose by nmf components, activations = librosa.decompose.decompose(S, n_components=8,
def extract_features(soundwave,sampling_rate,sound_name="test",feature_list=[]): """ extracts features with help of librosa :param soundwave: extracted soundwave from file :param sampling_rate: sampling rate :param feature_list: list of features to compute :param sound_name: type of sound, i.e. dog :return: np.array of all features for the soundwave """ print("Computing features for ",sound_name) if len(feature_list)==0: feature_list=["chroma_stft","chroma_cqt","chroma_cens","melspectrogram", "mfcc","rmse","spectral_centroid","spectral_bandwidth", "spectral_contrast","spectral_flatness","spectral_rolloff", "poly_features","tonnetz","zero_crossing_rate"] features=[] #feature_len #"chroma_stft":12 if "chroma_stft" in feature_list: features.append(feat.chroma_stft(soundwave, sampling_rate)) #"chroma_cqt":12 if "chroma_cqt" in feature_list: features.append(feat.chroma_cqt(soundwave, sampling_rate)) #"chroma_cens":12 if "chroma_cens" in feature_list: features.append(feat.chroma_cens(soundwave, sampling_rate)) #"malspectrogram":128 if "melspectrogram" in feature_list: features.append(feat.melspectrogram(soundwave, sampling_rate)) #"mfcc":20 if "mfcc" in feature_list: features.append(feat.mfcc(soundwave, sampling_rate)) #"rmse":1 if "rmse" in feature_list: features.append(feat.rmse(soundwave)) #"spectral_centroid":1 if "spectral_centroid" in feature_list: features.append(feat.spectral_centroid(soundwave, sampling_rate)) #"spectral_bandwidth":1 if "spectral_bandwidth" in feature_list: features.append(feat.spectral_bandwidth(soundwave, sampling_rate)) #"spectral_contrast":7 if "spectral_contrast" in feature_list: features.append(feat.spectral_contrast(soundwave, sampling_rate)) #"spectral_flatness":1 if "spectral_flatness" in feature_list: features.append(feat.spectral_flatness(soundwave)) #"spectral_rolloff":1 if "spectral_rolloff" in feature_list: features.append(feat.spectral_rolloff(soundwave, sampling_rate)) #"poly_features":2 if "poly_features" in feature_list: features.append(feat.poly_features(soundwave, sampling_rate)) #"tonnetz":6 if "tonnetz" in feature_list: features.append(feat.tonnetz(soundwave, sampling_rate)) #"zero_crossing_rate":1 if "zero_crossing_rate" in feature_list: features.append(feat.zero_crossing_rate(soundwave)) return np.concatenate(features)
adir = os.path.join(audio_dir, genre) ldir = os.path.join(label_dir, genre) file_names = [".".join(f.split(".")[:-1]) for f in os.listdir(adir)] file_names = sorted(file_names, key=last_5chars) acc = [] count = 0 print("Running genre", genre, "...") for f in file_names: with open(os.path.join(ldir, f + label_ext)) as label_file: t = int(label_file.readline()) if t < 0: continue count += 1 data, sr = load(os.path.join(adir, f + audio_ext), sr=None) chroma = chroma_stft(y=data, sr=sr, n_fft=4096, base_c=False) chroma = np.mean(chroma, axis=1) chroma = np.log(1 + g * chroma) prob = np.apply_along_axis(pearsonr, 1, template, chroma)[:, 0] weight = np.tile(chroma, 4) y = np.argmax(prob * weight) % 24 acc.append(mirex_evaluate(y, t)) print(f + "\t" + str(y)) table.add_row([ genre, acc.count(1), acc.count(0.5), acc.count(0.3),
mean_filt2 = np.ones(w // 2 // d + 1) / (w // 2 // d + 1) overall_acc = [] sym2num = np.vectorize(inv_key_map.get) num2sym = np.vectorize(key_map.get, otypes=[np.str]) evaluate_vec = np.vectorize(mirex_evaluate, otypes=[float]) for f in file_names: label = np.loadtxt(os.path.join(data_dir, ref_prefix + f + '.txt'), dtype='str') t = sym2num(label[:, 1]) data, sr = load(os.path.join(data_dir, f + '.wav'), sr=None) hop_size = int(sr / d) window_size = hop_size * 2 chroma_a = chroma_stft(y=data, sr=sr, hop_length=hop_size, n_fft=window_size, base_c=False) chroma_a = np.apply_along_axis(fftconvolve, 1, chroma_a, mean_filt, 'same') if chroma_a.shape[1] > len(label) * d: chroma_a = chroma_a[:, :len(label) * d] elif chroma_a.shape[1] < len(label) * d: chroma_a = np.column_stack((chroma_a, np.zeros((12, len(label) * d - chroma_a.shape[1])))) # chroma_a = decimate(chroma_a[:, int(d/2):], d, axis=1) chroma_a = chroma_a.reshape(12, len(label), d).mean(axis=2) chroma_a = np.log(1 + g * chroma_a) chroma_a = np.apply_along_axis(fftconvolve, 1, chroma_a, mean_filt2, 'same') prob = np.zeros((ks_template.shape[0], chroma_a.shape[1])) for n in range(chroma_a.shape[1]):
from librosa.feature import chroma_stft from librosa.display import specshow import matplotlib.pyplot as plt # Bhairav Block Wise Reading ## Bhairav 1 block_gen = sf.blocks('data/Hindustani/wav/Bhairav/bhairav1.wav', blocksize=2646000) rate = sf.info("data/Hindustani/wav/Bhairav/bhairav1.wav").samplerate info = sf.info("data/Hindustani/wav/Bhairav/bhairav1.wav") print(info) chromas = [] for bl in block_gen: y = np.mean(bl, axis=1) chromas.append(chroma_stft(y, sr=rate)) len(chromas) for j, chroma in enumerate(chromas): specshow(chroma, x_axis="time", y_axis="chroma", vmin=0, vmax=1) plt.title(f"Chromagram of Bhairav1_{j}") plt.savefig(f"data/chroma_files/bhairav-chromas/bhairav1/bhairav1_{j}.png") ## Bhairav 2 block_gen = sf.blocks('data/Hindustani/wav/Bhairav/bhairav2.wav', blocksize=2646000) rate = sf.info("data/Hindustani/wav/Bhairav/bhairav2.wav").samplerate info = sf.info("data/Hindustani/wav/Bhairav/bhairav2.wav") print(info)
sym2num = np.vectorize(inv_key_map.get) num2sym = np.vectorize(key_map.get, otypes=[np.str]) evaluateVec = np.vectorize(mirex_evaluate, otypes=[float]) for f in file_names: label = np.loadtxt(os.path.join(data_dir, f + '.txt'), dtype='str') t = sym2num(label[:, 1]) data, sr = load(os.path.join(data_dir, f + '.wav'), sr=None) hopSize = int(sr / d) windowSize = hopSize * 2 chromaVec = chroma_stft(y=data, sr=sr, hop_length=hopSize, n_fft=windowSize, base_c=False) chromaVec = np.apply_along_axis(fftconvolve, 1, chromaVec, meanFilt, 'same') if chromaVec.shape[1] > len(label) * d: chromaVec = chromaVec[:, :len(label) * d] elif chromaVec.shape[1] < len(label) * d: chromaVec = np.column_stack( (chromaVec, np.zeros( (12, len(label) * d - chromaVec.shape[1])))) chromaVec = chromaVec.reshape(12, len(label), d).mean(axis=2) chromaVec = np.log(1 + g * chromaVec)
import os import numpy as np import soundfile as sf from librosa.feature import chroma_stft from librosa.display import specshow import matplotlib.pyplot as plt bhup_files = os.listdir("data/Hindustani/wav/Bhup") print(bhup_files) for h, i in enumerate(bhup_files): os.system(f"mkdir data/chroma_files/bhup-chromas/bhup{h+1}") chroma_dict = {} for j in range(len(bhup_files)): rate = sf.info(f"data/Hindustani/wav/Bhup/bhup{j+1}.wav").samplerate block_gen = sf.blocks(f"data/Hindustani/wav/Bhup/bhup{j+1}.wav", blocksize=rate * 60) chroma_dict[f"bhup{j+1}"] = [] for bl in block_gen: y = np.mean(bl, axis=1) chroma_dict[f"bhup{j+1}"].append(chroma_stft(y, sr=rate)) for k, chroma in enumerate(chroma_dict[f"bhup{j+1}"]): specshow(chroma, x_axis="time", y_axis="chroma", vmin=0, vmax=1) plt.title(f"Chromagram of Bhup{j+1}_{k+1}") plt.savefig( f"data/chroma_files/bhup-chromas/bhup{j+1}/bhup{j+1}_{k+1}.png")
feature=dict() #this contains feature vecotrs in repective genres feature.fromkeys(geners) for gener in geners: # This iterates in various directories over each genre for music in Train_files[gener]: final=np.empty((1,12),float)#this iterates over each files in a given genre name=working_dir + '\\' + gener + '\\' + music rate,data=sc.read(name) #print rate,data if (len(data.shape) == 2): # resovling files with 2 channels. data = data[:,0] C_DFT=lf.chroma_stft(y=data,sr=rate,n_fft=4096,hop_length=2048) '''select=range(0,C_DFT.shape[1],6) for i in range(len(select)-1): C_DFT_temp=C_DFT[:,select[i]:select[i+1]] col=np.mean(C_DFT_temp,axis=1) bins=np.array(range(1,13,1)) col2=np.reshape(col,(1,len(col))) cen=util.centorid(col,bins) var=util.spread(col,bins,cen) max=np.argmax(col) min=np.argmin(col) if(i!=len(select)-2): C_DFT_temp=C_DFT[:,select[i+1]:select[i+2]]
train_data_1.head() print(np.array(train_data_1)[0]) train_data = np.array([]) counter = 0 train_data = np.array([]) for chunk in train_data_reader: #print(chunk) chunk1 = np.array(chunk) for thing in chunk1: print(counter) thing1 = np.array(thing) #print(thing1) row = np.array([]) cstft = np.mean(lf.chroma_stft(thing1[:-1]).T, axis=0) row = np.concatenate((row, cstft)) cqt = np.mean(lf.chroma_cqt(thing1[:-1]).T, axis=0) row = np.concatenate((row, cqt)) sens = np.mean(lf.chroma_cens(thing1[:-1]).T, axis=0) row = np.concatenate((row, sens)) spcent = np.mean(lf.spectral_centroid(thing1[:-1]).T, axis=0) row = np.concatenate((row, spcent)) flatness = np.mean(lf.spectral_flatness(thing1[:-1]).T, axis=0) row = np.concatenate((row, flatness)) rolloff = np.mean(lf.spectral_rolloff(thing1[:-1]).T, axis=0) row = np.concatenate((row, rolloff)) mspec = np.mean(lf.melspectrogram(thing1[:-1]).T, axis=0) row = np.concatenate((row, mspec)) mfcc = np.mean(lf.mfcc(thing1[:-1], n_mfcc=30).T, axis=0) row = np.concatenate((row, mfcc))
Train_files.fromkeys(geners) for x in geners: # To make to dictionary Training data data = os.listdir(working_dir + '\\' + x) Train_files[x] = data fig = plt.figure(1) #generating a figure to polt later feature = dict() #this contains feature vecotrs in repective genres feature.fromkeys(geners) for gener in geners: # This iterates in various directories over each genre for music in Train_files[ gener]: #this iterates over each files in a given genre name = working_dir + '\\' + gener + '\\' + music rate, data = sc.read(name) print rate, data if (len(data.shape) == 2): # resovling files with 2 channels. data = data[:, 0] C_DFT = lf.chroma_stft(y=data, sr=rate) C_DF = C_DFT.T label = [[gener] * C_DFT.shape[1]] label = np.array(label) print label.shape, C_DF.shape out = np.concatenate((label.T, C_DF), axis=1) print out.shape tds_write.writerows(out) tds.close()
import os import numpy as np import soundfile as sf from librosa.feature import chroma_stft from librosa.display import specshow import matplotlib.pyplot as plt des_files = os.listdir("data/Hindustani/wav/Des") print(des_files) for h,i in enumerate(des_files): os.system(f"mkdir data/chroma_files/des-chromas/des{h+1}") chroma_dict = {} for j in range(len(des_files)): rate = sf.info(f"data/Hindustani/wav/Des/des{j+1}.wav").samplerate block_gen = sf.blocks(f"data/Hindustani/wav/Des/des{j+1}.wav", blocksize=rate*60) chroma_dict[f"des{j+1}"] = [] for bl in block_gen: y = np.mean(bl, axis=1) chroma_dict[f"des{j+1}"].append(chroma_stft(y, sr=rate)) for k, chroma in enumerate(chroma_dict[f"des{j+1}"]): specshow(chroma, x_axis="time", y_axis="chroma", vmin=0, vmax=1) plt.title(f"Chromagram of Des{j+1}_{k+1}") plt.savefig(f"data/chroma_files/des-chromas/des{j+1}/des{j+1}_{k+1}.png")
def get_feature_from_librosa(wave_name, window): #print wave_name (rate, sig) = wav.read(wave_name) chroma_stft_feat = feature.chroma_stft(sig, rate, n_fft=window, hop_length=window / 2) #print chroma_stft_feat.shape mfcc_feat = feature.mfcc(y=sig, sr=rate, n_mfcc=13, hop_length=window / 2) mfcc_feat = mfcc_feat[1:, :] #print mfcc_feat.shape d_mfcc_feat = feature.delta(mfcc_feat) #print d_mfcc_feat.shape d_d_mfcc_feat = feature.delta(d_mfcc_feat) #print d_d_mfcc_feat.shape zero_crossing_rate_feat = feature.zero_crossing_rate(sig, frame_length=window, hop_length=window / 2) #print zero_crossing_rate_feat.shape S = librosa.magphase( librosa.stft(sig, hop_length=window / 2, win_length=window, window='hann'))[0] rmse_feat = feature.rmse(S=S) #print rmse_feat.shape centroid_feat = feature.spectral_centroid(sig, rate, n_fft=window, hop_length=window / 2) #print centroid_feat.shape bandwith_feat = feature.spectral_bandwidth(sig, rate, n_fft=window, hop_length=window / 2) #print bandwith_feat.shape contrast_feat = feature.spectral_contrast(sig, rate, n_fft=window, hop_length=window / 2) #print contrast_feat.shape rolloff_feat = feature.spectral_rolloff(sig, rate, n_fft=window, hop_length=window / 2) #计算滚降频率 #print rolloff_feat.shape poly_feat = feature.poly_features(sig, rate, n_fft=window, hop_length=window / 2) #拟合一个n阶多项式到谱图列的系数。 #print poly_feat.shape #============================================================================== # print(chroma_stft_feat.shape) # #print(corr_feat.shape) # print(mfcc_feat.shape) # print(d_mfcc_feat.shape) # print(d_d_mfcc_feat.shape) # print(zero_crossing_rate_feat.shape) # print(rmse_feat.shape) # print(centroid_feat.shape) # print(bandwith_feat.shape) # print(contrast_feat.shape) # print(rolloff_feat.shape) # print(poly_feat.shape) #============================================================================== feat = numpy.hstack( (chroma_stft_feat.T, mfcc_feat.T, d_mfcc_feat.T, d_d_mfcc_feat.T, zero_crossing_rate_feat.T, rmse_feat.T, centroid_feat.T, bandwith_feat.T, contrast_feat.T, rolloff_feat.T, poly_feat.T)) feat = feat.T return feat #一行代表一帧的特征
songname = '.'.join(songname) song.export(songname, format = "wav") else: songname = '.'.join(songname) print 'Start reading file' # read file src, samplerate = load(songname) dur = get_duration(y=src, sr=samplerate) # set time stime = time() # get chromagram print 'get chromagram' chromagram = chroma_stft(y = src, sr = samplerate, hop_length = 512 * 8) printDt(stime, time()) # count correlation print 'count correlation' correlation = np.corrcoef( np.cov(np.transpose(chromagram))) corsize = correlation.shape[0] printDt(stime, time()) thumbnailSize = int(22 / dur * corsize) startSec = int(5 / dur * corsize)
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth zero_crossing_rate' for i in range(1, 21): header += f' mfcc{i}' header += ' label' header = header.split() file = open('data_training.csv', 'w', newline='') with file: writer = csv.writer(file) writer.writerow(header) sukus = 'banjar_hulu banjar_kuala dayak_bakumpai dayak_ngaju'.split() for g in sukus: for filename in os.listdir(f'data_training/{g}'): songname = f'data_training/{g}/{filename}' y, sr = librosa.load(songname, mono=True, duration=30) chroma_stft = fitur.chroma_stft(y=y, sr=sr) spec_cent = fitur.spectral_centroid(y=y, sr=sr) spec_bw = fitur.spectral_bandwidth(y=y, sr=sr) rmse = fitur.rmse(y) zcr = fitur.zero_crossing_rate(y) mfcc = fitur.mfcc(y=y, sr=sr) to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(zcr)}' for e in mfcc: to_append += f' {np.mean(e)}' to_append += f' {g}' file = open('data_training.csv', 'a', newline='') with file: writer = csv.writer(file) writer.writerow(to_append.split())