def extract_features(data_dir): m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05 fs = [] filess = [] fns = [] for root, dirs, files in os.walk(data_dir): for d in dirs: print("PATH: " + os.path.join(root, d)) try: f, files, fn = aF.directory_feature_extraction( os.path.join(root, d), m_win, m_step, s_win, s_step) fs.append(f) filess.extend(files) fns.append(fn) except: pass fs = np.concatenate(fs, axis=0) fn.append('beat') fn.append('beat_conf') frame = pd.DataFrame(fs, columns=fn) frame['audio_path'] = filess save_path = "./features.csv" frame.to_csv(save_path)
def multiple_directory_feature_extraction(path_list, mid_window, mid_step, short_window, short_step, compute_beat=False): """ this version is used to get the whole info of features, class_names, feature_names,file_names """ # feature extraction for each class: features = [] class_names = [] file_names = [] for i, d in enumerate(path_list): f, fn, feature_names = \ aF.directory_feature_extraction(d, mid_window, mid_step, short_window, short_step, compute_beat=compute_beat) if f.shape[0] > 0: # if at least one audio file has been found in the provided folder: features.append(f) file_names.append(fn) print(feature_names) if d[-1] == os.sep: class_names.append(d.split(os.sep)[-2]) else: class_names.append(d.split(os.sep)[-1]) return features, class_names, feature_names, file_names
def exp4(): print('pyAudioAnalysis example 4') dirs = [ '{0}music/classical'.format(AfeExp.data_folder), '{0}music/metal'.format(AfeExp.data_folder) ] class_names = ['classical', 'metal'] m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05 features = [] for d in dirs: # get feature matrix for each directory (class) f, files, fn = aMF.directory_feature_extraction( d, m_win, m_step, s_win, s_step) features.append(f) print(features[0].shape, features[1].shape) f1 = np.array([ features[0][:, fn.index('spectral_centroid_mean')], features[0][:, fn.index('energy_entropy_mean')] ]) f2 = np.array([ features[1][:, fn.index('spectral_centroid_mean')], features[1][:, fn.index('energy_entropy_mean')] ]) plots = [ go.Scatter(x=f1[0, :], y=f1[1, :], name=class_names[0], mode='markers'), go.Scatter(x=f2[0, :], y=f2[1, :], name=class_names[1], mode='markers') ] mylayout = go.Layout(xaxis=dict(title="spectral_centroid_mean"), yaxis=dict(title="energy_entropy_mean")) #plotly.offline.iplot(go.Figure(data=plots, layout=mylayout)) plotly.offline.plot({ 'data': plots, 'layout': mylayout }, auto_open=True)
def extract_features(): # initialize feature data structures m3u_paths = {} librosa_feats = {} pyaudio_feats = {} feat_names = None # variables for pyAudioAnalysis feature extraction mid_term_window = 1 mid_term_step = 1 short_term_window = 0.05 short_term_step = 0.05 for root, dirs, files in os.walk(path, topdown=False): # extract features from the root directory chosen pyaudio_feat, song_files, feat_names = MidTermFeatures.directory_feature_extraction( root, mid_term_window, mid_term_step, short_term_window, short_term_step, False) # update the progress bar progress['value'] = 30 tkobj2.update() index = 0 for s in song_files: # save features as dictionary with song names as keys and pyAudioAnalysis features as values s_dict_name = ntpath.basename(s) if pyaudio_feat.ndim == 1: # special case of indexing when there is only one song pyaudio_feats[s_dict_name] = pyaudio_feat else: pyaudio_feats[s_dict_name] = pyaudio_feat[index] index += 1 for folder in dirs: # extract pyaudioanalysis features from each subfolder folder_path = os.path.join(root, folder) pyaudio_feat, song_files, feat_names = MidTermFeatures.directory_feature_extraction( folder_path, mid_term_window, mid_term_step, short_term_window, short_term_step, False) index = 0 for s in song_files: # save features as dictionary with song names as keys and pyAudioAnalysis features as values s_dict_name = ntpath.basename(s) if pyaudio_feat.ndim == 1: # special case of indexing when there is only one song pyaudio_feats[s_dict_name] = pyaudio_feat else: pyaudio_feats[s_dict_name] = pyaudio_feat[index] index += 1 # go through all mp3 files to extract librosa features for song in files: song_path = os.path.join(root, song) if song_path.endswith(".mp3"): # get the tempo of the song waveform, samp_rate = librosa.load(song_path) tempo, beat_frames = librosa.beat.beat_track( waveform, samp_rate) # get the chroma number of the song beat_times = librosa.frames_to_time(beat_frames, samp_rate) y_harmonic, y_percussive = librosa.effects.hpss(waveform) chromagram = librosa.feature.chroma_cqt(y_harmonic, samp_rate) beat_chroma = librosa.util.sync(chromagram, beat_frames, aggregate=np.median) # calculate the diff of beat chroma to convert information into a single float chroma_df = pd.DataFrame(beat_chroma) diff_values = chroma_df.diff() diff_mean = diff_values.mean(axis=0, skipna=True) chroma_num = sum(diff_mean) / len(diff_mean) # save the librosa features and the full path of each mp3 song librosa_feats[song] = [tempo, chroma_num] m3u_paths[song] = song_path # update the progress bar progress['value'] = 75 tkobj2.update() return (m3u_paths, librosa_feats, pyaudio_feats, feat_names)
def visualizeFeaturesFolder(folder, dimReductionMethod, priorKnowledge="none"): ''' This function generates a chordial visualization for the recordings of the provided path. ARGUMENTS: - folder: path of the folder that contains the WAV files to be processed - dimReductionMethod: method used to reduce the dimension of the initial feature space before computing the similarity. - priorKnowledge: if this is set equal to "artist" ''' if dimReductionMethod == "pca": allMtFeatures, wavFilesList, _ = aF.directory_feature_extraction( folder, 30.0, 30.0, 0.050, 0.050, compute_beat=True) if allMtFeatures.shape[0] == 0: print("Error: No data found! Check input folder") return namesCategoryToVisualize = [ ntpath.basename(w).replace('.wav', '').split(" --- ")[0] for w in wavFilesList ] namesToVisualize = [ ntpath.basename(w).replace('.wav', '') for w in wavFilesList ] (F, MEAN, STD) = aT.normalize_features([allMtFeatures]) F = np.concatenate(F) # check that the new PCA dimension is at most equal to the number of samples K1 = 2 K2 = 10 if K1 > F.shape[0]: K1 = F.shape[0] if K2 > F.shape[0]: K2 = F.shape[0] pca1 = sklearn.decomposition.PCA(n_components=K1) pca1.fit(F) pca2 = sklearn.decomposition.PCA(n_components=K2) pca2.fit(F) finalDims = pca1.transform(F) finalDims2 = pca2.transform(F) else: allMtFeatures, Ys, wavFilesList = aF.directory_feature_extraction_no_avg( folder, 20.0, 5.0, 0.040, 0.040 ) # long-term statistics cannot be applied in this context (LDA needs mid-term features) if allMtFeatures.shape[0] == 0: print("Error: No data found! Check input folder") return namesCategoryToVisualize = [ ntpath.basename(w).replace('.wav', '').split(" --- ")[0] for w in wavFilesList ] namesToVisualize = [ ntpath.basename(w).replace('.wav', '') for w in wavFilesList ] ldaLabels = Ys if priorKnowledge == "artist": uNamesCategoryToVisualize = list(set(namesCategoryToVisualize)) YsNew = np.zeros(Ys.shape) for i, uname in enumerate( uNamesCategoryToVisualize): # for each unique artist name: indicesUCategories = [ j for j, x in enumerate(namesCategoryToVisualize) if x == uname ] for j in indicesUCategories: indices = np.nonzero(Ys == j) YsNew[indices] = i ldaLabels = YsNew (F, MEAN, STD) = aT.normalize_features([allMtFeatures]) F = np.array(F[0]) clf = sklearn.discriminant_analysis.LinearDiscriminantAnalysis( n_components=10) clf.fit(F, ldaLabels) reducedDims = clf.transform(F) pca = sklearn.decomposition.PCA(n_components=2) pca.fit(reducedDims) reducedDims = pca.transform(reducedDims) # TODO: CHECK THIS ... SHOULD LDA USED IN SEMI-SUPERVISED ONLY???? uLabels = np.sort( np.unique((Ys)) ) # uLabels must have as many labels as the number of wavFilesList elements reducedDimsAvg = np.zeros((uLabels.shape[0], reducedDims.shape[1])) finalDims = np.zeros((uLabels.shape[0], 2)) for i, u in enumerate(uLabels): indices = [j for j, x in enumerate(Ys) if x == u] f = reducedDims[indices, :] finalDims[i, :] = f.mean(axis=0) finalDims2 = reducedDims for i in range(finalDims.shape[0]): plt.text(finalDims[i, 0], finalDims[i, 1], ntpath.basename(wavFilesList[i].replace('.wav', '')), horizontalalignment='center', verticalalignment='center', fontsize=10) plt.plot(finalDims[i, 0], finalDims[i, 1], '*r') plt.xlim([1.2 * finalDims[:, 0].min(), 1.2 * finalDims[:, 0].max()]) plt.ylim([1.2 * finalDims[:, 1].min(), 1.2 * finalDims[:, 1].max()]) plt.show() SM = 1.0 - distance.squareform(distance.pdist(finalDims2, 'cosine')) for i in range(SM.shape[0]): SM[i, i] = 0.0 chordialDiagram("visualization", SM, 0.50, namesToVisualize, namesCategoryToVisualize) SM = 1.0 - distance.squareform(distance.pdist(F, 'cosine')) for i in range(SM.shape[0]): SM[i, i] = 0.0 chordialDiagram("visualizationInitial", SM, 0.50, namesToVisualize, namesCategoryToVisualize) # plot super-categories (i.e. artistname uNamesCategoryToVisualize = sort(list(set(namesCategoryToVisualize))) finalDimsGroup = np.zeros( (len(uNamesCategoryToVisualize), finalDims2.shape[1])) for i, uname in enumerate(uNamesCategoryToVisualize): indices = [ j for j, x in enumerate(namesCategoryToVisualize) if x == uname ] f = finalDims2[indices, :] finalDimsGroup[i, :] = f.mean(axis=0) SMgroup = 1.0 - distance.squareform( distance.pdist(finalDimsGroup, 'cosine')) for i in range(SMgroup.shape[0]): SMgroup[i, i] = 0.0 chordialDiagram("visualizationGroup", SMgroup, 0.50, uNamesCategoryToVisualize, uNamesCategoryToVisualize)
"""! @brief Example 13 @details pyAudioAnalysis feature extraction for classes organized in folders and feature histogram representation (per feature and class). 3-class classification task: animals vs speech vs music segments @author Theodoros Giannakopoulos {[email protected]} """ from pyAudioAnalysis import MidTermFeatures as aF import os.path import utilities as ut if __name__ == '__main__': dirs = ["../data/general/animals", "../data/general/speech", "../data/general/music"] class_names = [os.path.basename(d) for d in dirs] m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05 features = [] for d in dirs: # get feature matrix for each directory (class) f, files, fn = aF.directory_feature_extraction(d, m_win, m_step, s_win, s_step) features.append(f) ut.plot_feature_histograms(features, fn, class_names)
parse = parse_arguments() if parse.audio is None: raise 'Input directory is Empty' if not os.path.isdir(parse.audio): raise 'Input path is not a directory' if parse.groundtruth is None: raise 'Ground truth directory is Empty' if not os.path.isdir(parse.audio): raise 'Ground truth path is not a directory' files, labels = read_data(parse.audio, parse.groundtruth) one_hot = MultiLabelBinarizer() labels = one_hot.fit_transform(labels) class_names = [str(c) for c in one_hot.classes_] mid_window, mid_step, short_window, short_step = 1, 1, 0.1, 0.1 f, fn, feature_names = mF.directory_feature_extraction( parse.audio, mid_window, mid_step, short_window, short_step) X_train, y_train, X_test, y_test = split_data(f, labels, fn) print("LinearSVc Classifier") classifier = OneVsRestClassifier(LinearSVC(max_iter=10000), n_jobs=-1) classifier.fit(X_train, y_train) pickle.dump(classifier, open(parse.output + ".sav", 'wb')) test_yhat = classifier.predict(X_test) print("Testing SVM Classification Report {0} %".format( classification_report(y_test, test_yhat, target_names=class_names)))
def visualizeFeaturesFolder(folder, dimReductionMethod, priorKnowledge="none"): ''' This function generates a content visualization for the recordings of the provided path. ARGUMENTS: - folder: path of the folder that contains the WAV files to be processed - dimReductionMethod: method used to reduce the dimension of the initial feature space before computing the similarity. - priorKnowledge: if this is set equal to "artist" ''' if dimReductionMethod == "pca": all_mt_feat, wav_files, _ = aF.directory_feature_extraction( folder, 30.0, 30.0, 0.050, 0.050, compute_beat=True) if all_mt_feat.shape[0] == 0: print("Error: No data found! Check input folder") return names_category_toviz = [ ntpath.basename(w).replace('.wav', '').split(" --- ")[0] for w in wav_files ] names_to_viz = [ ntpath.basename(w).replace('.wav', '') for w in wav_files ] scaler = StandardScaler() F = scaler.fit_transform(all_mt_feat) # check that the new PCA dimension is at most equal # to the number of samples K1 = 2 K2 = 10 if K1 > F.shape[0]: K1 = F.shape[0] if K2 > F.shape[0]: K2 = F.shape[0] pca1 = sklearn.decomposition.PCA(n_components=K1) pca1.fit(F) pca2 = sklearn.decomposition.PCA(n_components=K2) pca2.fit(F) finalDims = pca1.transform(F) finalDims2 = pca2.transform(F) else: # long-term statistics cannot be applied in this context # (LDA needs mid-term features) all_mt_feat, Ys, wav_files = aF.\ directory_feature_extraction_no_avg(folder, 20.0, 5.0, 0.040, 0.040) if all_mt_feat.shape[0] == 0: print("Error: No data found! Check input folder") return names_category_toviz = [ ntpath.basename(w).replace('.wav', '').split(" --- ")[0] for w in wav_files ] names_to_viz = [ ntpath.basename(w).replace('.wav', '') for w in wav_files ] ldaLabels = Ys if priorKnowledge == "artist": unames_category_toviz = list(set(names_category_toviz)) YsNew = np.zeros(Ys.shape) for i, uname in enumerate(unames_category_toviz): indicesUCategories = [ j for j, x in enumerate(names_category_toviz) if x == uname ] for j in indicesUCategories: indices = np.nonzero(Ys == j) YsNew[indices] = i ldaLabels = YsNew scaler = StandardScaler() F = scaler.fit_transform(all_mt_feat) clf = sklearn.discriminant_analysis.\ LinearDiscriminantAnalysis(n_components=10) clf.fit(F, ldaLabels) reducedDims = clf.transform(F) pca = sklearn.decomposition.PCA(n_components=2) pca.fit(reducedDims) reducedDims = pca.transform(reducedDims) # TODO: CHECK THIS ... SHOULD LDA USED IN SEMI-SUPERVISED ONLY???? # uLabels must have as many labels as the number of wav_files elements uLabels = np.sort(np.unique((Ys))) reducedDimsAvg = np.zeros((uLabels.shape[0], reducedDims.shape[1])) finalDims = np.zeros((uLabels.shape[0], 2)) for i, u in enumerate(uLabels): indices = [j for j, x in enumerate(Ys) if x == u] f = reducedDims[indices, :] finalDims[i, :] = f.mean(axis=0) finalDims2 = reducedDims for i in range(finalDims.shape[0]): plt.text(finalDims[i, 0], finalDims[i, 1], ntpath.basename(wav_files[i].replace('.wav', '')), horizontalalignment='center', verticalalignment='center', fontsize=10) plt.plot(finalDims[i, 0], finalDims[i, 1], '*r') plt.xlim([1.2 * finalDims[:, 0].min(), 1.2 * finalDims[:, 0].max()]) plt.ylim([1.2 * finalDims[:, 1].min(), 1.2 * finalDims[:, 1].max()]) plt.show() SM = 1.0 - distance.squareform(distance.pdist(F, 'cosine')) # plot super-categories (i.e. artistname) unames_category_toviz = sort(list(set(names_category_toviz))) finalDimsGroup = np.zeros( (len(unames_category_toviz), finalDims2.shape[1])) for i, uname in enumerate(unames_category_toviz): indices = [j for j, x in enumerate(names_category_toviz) if x == uname] f = finalDims2[indices, :] finalDimsGroup[i, :] = f.mean(axis=0) SMgroup = 1.0 - distance.squareform( distance.pdist(finalDimsGroup, 'cosine')) data = SMgroup fig = px.imshow(data, labels=dict(x="", y="", color="Category similarity"), x=unames_category_toviz, y=unames_category_toviz) fig.update_xaxes(side="top") fig.show()
from torchvision import transforms from torchvision.datasets import MNIST from torchvision.utils import save_image from pyAudioAnalysis import MidTermFeatures as mtf import numpy as np import pickle import os.path if os.path.isfile('data.pkl'): with open('data.pkl', 'rb') as f: mid_term_features_2 = pickle.load(f) wav_file_list2 = pickle.load(f) else: with open('data.pkl', 'wb') as f: mid_term_features, wav_file_list2, mid_feature_names = mtf.directory_feature_extraction( 'audio', 1, 1, 0.2, 0.2) mid_term_features = mid_term_features[:, 0:128] m = mid_term_features.mean(axis=0) s = np.std(mid_term_features, axis=0) mid_term_features_2 = (mid_term_features - m) / s pickle.dump(mid_term_features_2, f) pickle.dump(wav_file_list2, f) x = torch.tensor(mid_term_features_2, dtype=torch.float32) num_epochs = 200 batch_size = 128 learning_rate = 1e-3 dataset = TensorDataset(x) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
The pyAudioAnalysis has two functions in order to extract a bunch of useful features from a wav file. ''' from pyAudioAnalysis import MidTermFeatures as mF import numpy as np import pandas as pd import os basepath_train_cough = 'C:/Users/Guillem/Desktop/HACKATHON 2020/Unlabeled audio/TRAIN/Cough/' basepath_train_nocough = 'C:/Users/Guillem/Desktop/HACKATHON 2020/Unlabeled audio/TRAIN/No_Cough/' [mid_term_features_cough, wav_file_list_cough, mid_feature_names] = mF.directory_feature_extraction(basepath_train_cough, 0.1, 0.1, 0.01, 0.01, compute_beat=False) [mid_term_features_nocough, wav_file_list_nocough, mid_feature_names] = mF.directory_feature_extraction(basepath_train_nocough, 0.1, 0.1, 0.01, 0.01, compute_beat=False) label_nocough = np.zeros(np.shape(mid_term_features_nocough)[0]) label_cough = np.ones(np.shape(mid_term_features_cough)[0]) features = np.concatenate( (mid_term_features_cough,
def exp5(): print('pyAudioAnalysis example 5') dirs = [ '{0}music/classical'.format(AfeExp.data_folder), '{0}music/metal'.format(AfeExp.data_folder) ] class_names = ['classical', 'metal'] m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05 features = [] for d in dirs: # get feature matrix for each directory (class) f, files, fn = aMF.directory_feature_extraction( d, m_win, m_step, s_win, s_step) features.append(f) print(features[0].shape, features[1].shape) f1 = np.array([ features[0][:, fn.index('spectral_centroid_mean')], features[0][:, fn.index('energy_entropy_mean')] ]) f2 = np.array([ features[1][:, fn.index('spectral_centroid_mean')], features[1][:, fn.index('energy_entropy_mean')] ]) print('f1 type:{0}; shape:{1}; value:{2};'.format( type(f1), f1.shape, f1)) print('f2 type:{0}; shape:{1}; value:{2};'.format( type(f2), f2.shape, f2)) y = np.concatenate((np.zeros(f1.shape[1]), np.ones(f2.shape[1]))) f = np.concatenate((f1.T, f2.T), axis=0) print('y: {0}; {1};'.format(y.shape, y)) print('X: {0}; {1};'.format(f.shape, f)) # train the svm classifier cl = sks.SVC(kernel='rbf', C=20) cl.fit(f, y) p1 = go.Scatter(x=f1[0, :], y=f1[1, :], name=class_names[0], marker=dict(size=10, color='rgba(255, 182, 193, .9)'), mode='markers') p2 = go.Scatter(x=f2[0, :], y=f2[1, :], name=class_names[1], marker=dict(size=10, color='rgba(100, 100, 220, .9)'), mode='markers') mylayout = go.Layout(xaxis=dict(title="spectral_centroid_mean"), yaxis=dict(title="energy_entropy_mean")) # apply the trained model on the points of a grid x_ = np.arange(f[:, 0].min(), f[:, 0].max(), 0.002) y_ = np.arange(f[:, 1].min(), f[:, 1].max(), 0.002) xx, yy = np.meshgrid(x_, y_) X_t = np.c_[xx.ravel(), yy.ravel()] print('X_t: {0};'.format(X_t.shape)) Z = cl.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) / 2 # and visualize the grid on the same plot (decision surfaces) cs = go.Heatmap(x=x_, y=y_, z=Z, showscale=False, colorscale=[[0, 'rgba(255, 182, 193, .3)'], [1, 'rgba(100, 100, 220, .3)']]) mylayout = go.Layout(xaxis=dict(title="spectral_centroid_mean"), yaxis=dict(title="energy_entropy_mean")) #plotly.offline.iplot(go.Figure(data=[p1, p2, cs], layout=mylayout)) plotly.offline.plot({ 'data': [p1, p2, cs], 'layout': mylayout }, auto_open=True)
for audio in os.listdir(directory): wav_file_path = os.path.join(directory, audio) with sf.SoundFile(wav_file_path) as f: duration = (len(f) / f.samplerate) if (duration < 1): os.remove(wav_file_path) remove_len = len(os.listdir(directory)) print('In total, {} audios have been removed due to short duration'.format(original_len-remove_len)) check_duration(pos_path) check_duration(neg_path) [mid_term_features_pos, wav_file_list_pos, mid_feature_names] = mF.directory_feature_extraction(pos_path, 0.5,0.5, 0.05, 0.05, compute_beat=False) [mid_term_features_neg, wav_file_list_neg, mid_feature_names] = mF.directory_feature_extraction(neg_path, 0.5,0.5, 0.05, 0.05, compute_beat=False) filenames_pos = [] for file in wav_file_list_pos: filenames_pos.append(file.split('/')[-1].split('\\')[-1].split('.')[0]) filenames_neg = [] for file in wav_file_list_neg: filenames_neg.append(file.split('/')[-1].split('\\')[-1].split('.')[0]) df_pos = pd.DataFrame(mid_term_features_pos, columns = mid_feature_names) df_pos['filename'] = filenames_pos df_pos['label'] = np.ones(len(df_pos)) df_neg = pd.DataFrame(mid_term_features_neg, columns = mid_feature_names)