def plotfeats(dict_train_features, mask=arange(10) + 8): for feat_index, feat in enumerate(mask): plt.figure(figsize=(10, 10)) for i in arange(len(classes())): sns.distplot(dict_train_features[i][:, feat], label='Histogram for {} of feature {}'.format( classes()[i], featuresnames()[feat])) plt.legend() plt.grid(True) for d, c in enumerate(mask): plt.figure(figsize=(10, 10)) for i in arange(len(classes())): plt.subplot(len(classes()), 1, i + 1) plt.ylim(0, 1) feature = dict_train_features[i].transpose()[c] plt.stem(feature, markerfmt='C3.', basefmt='C9-', use_line_collection=True) if i == 0: plt.title(featuresnames()[c] + ' coefficients') plt.ylabel(classes()[i]) plt.grid(True) plt.show()
def getpredictions(X_train_normalized, y_train, X_test_mc_normalized): scores = ['accuracy','precision_micro','recall_macro','f1_macro','roc_auc'] overall_scores = {'accuracy': [], 'precision_micro': [], 'recall_macro': [], 'f1_macro': [], 'roc_auc': []} couples = list(itertools.combinations(classes(), 2)) num_couples = len(couples) num_test_files = X_test_mc_normalized.shape[0] scores_per_couple = np.zeros((num_couples, len(scores))) y_test_predicted_mc = [] for n in np.arange(num_couples): class_0 = couples[n][0] index_0 = classes().index(class_0) class_1 = couples[n][1] index_1 = classes().index(class_1) SVM_parameters = { 'C': 1, 'kernel': 'rbf', }; clf = sklearn.svm.SVC(**SVM_parameters, probability=True) X_train_0 = X_train_normalized[index_0] X_train_1 = X_train_normalized[index_1] y_train_0 = y_train[index_0] y_train_1 = y_train[index_1] y = np.concatenate((y_train_0, y_train_1)) X = np.concatenate((X_train_0, X_train_1), axis=0) clf.fit(X, y) print(class_0,"/",class_1,"\nCross validated scores for kfolds = ", kfold(), ": \n") for s in scores: score_array = np.around(cross_val_score(clf, X, y, cv=kfold(), scoring=s), decimals=4) *100 score = np.around(np.average(score_array), decimals=2 ) print(s,"\t", score_array,"\t\taverage:", score) scores_per_couple[n, scores.index(s)] = score print("\n") y_predicted = clf.predict(X_test_mc_normalized).reshape(-1, 1) y_test_predicted_mc = np.append(y_test_predicted_mc, y_predicted) for s in scores: final_score = np.average(scores_per_couple.transpose()[scores.index(s)]) overall_scores[s] = np.around(final_score, decimals = 2) print("Average of crossvalidated scores considering all binary cases") print(overall_scores) y_test_predicted_mc = y_test_predicted_mc.reshape(num_couples, num_test_files).transpose() y_test_predicted_mc = np.array(y_test_predicted_mc, dtype=np.int) y_test_predicted_mv = np.zeros((y_test_predicted_mc.shape[0],)) for i, e in enumerate(y_test_predicted_mc): y_test_predicted_mv[i] = np.bincount(e).argmax() return y_test_predicted_mv
def plotfeats_per_class(dict_train_features): for c in arange(len(classes())): plt.figure(figsize=(10, 10)) for i in arange(len(featuresnames())): plt.subplot(len(featuresnames()), 1, i + 1) plt.ylim(0, 1) feature = dict_train_features[c].transpose()[i] plt.stem(feature, use_line_collection=True) if i == 0: plt.title(classes()[c] + ' coefficients') plt.ylabel(featuresnames()[i]) plt.grid(True) plt.show()
def get_metrics(dict_test_features): metrics_matrix = [] n = len(classes()) m = 4 for subset in itertools.combinations(classes(), 2): class_0 = subset[0] class_1 = subset[1] X_train_0 = dataloader.dict_train_features(class_0) X_train_1 = dataloader.dict_train_features(class_1) X_train = np.concatenate((X_train_0, X_train_1), axis=0) X_train = X_train[:, dataloader.columns_selected()] y_train_0 = np.zeros((X_train_0.shape[0],)) y_train_1 = np.ones((X_train_1.shape[0],)) y_train = np.concatenate((y_train_0, y_train_1), axis=0) X_test_0 = dict_test_features[class_0] X_test_1 = dict_test_features[class_1] X_test = np.concatenate((X_test_0, X_test_1), axis=0) X_test = X_test[:, dataloader.columns_selected()] y_test_0 = np.zeros((X_test_0.shape[0],)) y_test_1 = np.ones((X_test_1.shape[0],)) y_test = np.concatenate((y_test_0, y_test_1), axis=0) feat_max = np.max(X_train, axis=0) feat_min = np.min(X_train, axis=0) X_train_normalized = (X_train - feat_min) / (feat_max - feat_min) X_test_normalized = (X_test - feat_min) / (feat_max - feat_min) SVM_parameters = { 'C': 1, 'kernel': 'rbf', } clf = sklearn.svm.SVC(**SVM_parameters) clf.fit(X_train_normalized, y_train) y_test_predicted = clf.predict(X_test_normalized) print("{} // {}".format(class_0, class_1)) metrics_couple = compute_metrics(y_test, y_test_predicted) metrics_matrix = np.append(metrics_matrix, metrics_couple) metrics_matrix = np.reshape(metrics_matrix, (n, 4)) compute_overall_metrics(metrics_matrix)
def savedata(dict_train_features, featurelst, feat_max, feat_min): for c in user_interface.classes(): dict_train_features[c].dump('dict_train_features_' + c + '.dat') np.array([feat_max, feat_min]).dump('feat_max_min.dat') featurelst['featurematrix'].dump('features_selected.dat') featurelst['selectedcolumns'].dump('columns_selected.dat') return True
def test(): # begin compute and select features path = pathlib.Path(__file__).parent.absolute() classes = user_interface.classes() if user_interface.generate_datasets(): dict_test_features = {'NoFX': [], 'Distortion': [], 'Tremolo': []} for c in classes: dict_test_features[c] = dataloader.dict_test_feats(c) else: dict_test_features = testloop.getdicttestfeatures( path) # test features X_test = [dict_test_features[c] for c in user_interface.classes()] columns_selected = dataloader.columns_selected( ) # positions of selected features X_test_selected = [ X_test[i][:, columns_selected] for i in np.arange(len(user_interface.classes())) ] # selection y_test = [ np.ones(X_test[i].shape[0], ) * i for i in np.arange(len(user_interface.classes())) ] # keys y_test_mc = np.concatenate((y_test[0], y_test[1], y_test[2]), axis=0) X_test_normalized = [ ((X_test_selected[c] - dataloader.featmin()[columns_selected]) / (dataloader.featmax()[columns_selected] - dataloader.featmin()[columns_selected])) for c in np.arange(len(user_interface.classes())) ] # normalized matrix X_train_normalized_loaded = [ (dataloader.dict_train_features(c) - dataloader.featmin()) / (dataloader.featmax() - dataloader.featmin()) for c in user_interface.classes() ] # train features X_train_normalized_loaded_selected = [ X_train_normalized_loaded[i][:, columns_selected] for i in np.arange(len(user_interface.classes())) ] # selection X_test_mc_normalized = np.concatenate( (X_test_normalized[0], X_test_normalized[1], X_test_normalized[2]), axis=0) y_train_selected = [ np.ones(X_train_normalized_loaded_selected[i].shape[0], ) * i for i in np.arange(len(user_interface.classes())) ] # end compute and select features y_test_predicted_mv = supportvectormachines.getpredictions( X_train_normalized_loaded_selected, y_train_selected, X_test_mc_normalized) # SVM print('\n\nMetrics:') metrics.get_metrics(dict_test_features) print('\n\nConfusion matrix:') confusionmatrix.compute_cm_multiclass( y_test_mc, y_test_predicted_mv) # print confusion matrix return True
def plotfeature(feat, i=-1): plt.figure(figsize=(18, 4)) plt.subplot(1, 1, 1) plt.stem(feat, use_line_collection=True) if i != -1: plt.ylabel(featuresnames()[i] + ' coefficients') plt.title(featuresnames()[i] + ' coefficients {}'.format(classes()[i])) plt.grid(True) plt.show()
def train(): path = pathlib.Path(__file__).parent.absolute() classes = user_interface.classes() if user_interface.generate_datasets(): main_traintest.traintest(path) dict_train_features = {'NoFX': [], 'Distortion': [], 'Tremolo': []} for c in classes: dict_train_features[c] = dataloader.dict_train_features(c) else: dict_train_features = trainingloop.getdicttrainfeatures( path) # compute train features X_train = [dict_train_features[c] for c in classes] y_train = [ np.ones(X_train[i].shape[0], ) * i for i in np.arange(len(user_interface.classes())) ] # keys feat_max = np.max(np.concatenate((X_train[0], X_train[1], X_train[2]), axis=0), axis=0) feat_min = np.min(np.concatenate((X_train[0], X_train[1], X_train[2]), axis=0), axis=0) X_train_normalized = [(X_train[c] - feat_min) / (feat_max - feat_min) for c in np.arange(len(user_interface.classes())) ] # normalized matrix X_train_mc_normalized = np.concatenate( (X_train_normalized[0], X_train_normalized[1], X_train_normalized[2]), axis=0) y_train_mc = np.concatenate((y_train[0], y_train[1], y_train[2]), axis=0) featurelst = featureselection.getfeaturelist( X_train_mc_normalized, y_train_mc) # feature selection if user_interface.do_plot(): plotfeatures.plotfeats(X_train_normalized, mask=np.arange(10) + 7) # plot train features else: plotselected.plotsel(X_train_normalized, featurelst['selectedcolumns']) savetraindata.savedata(dict_train_features, featurelst, feat_max, feat_min) # save data print('All features') print(user_interface.featuresnames()) print_feature_sel.print_features(featurelst) return True
def traintest(path): dict_features = trainingloop.getdicttrainfeatures( path) # compute train features classes = user_interface.classes() test_size = user_interface.test_size() X = [dict_features[c] for c in classes] y = [np.ones(X[i].shape[0], ) * i for i in np.arange(len(classes))] # keys y_mc = np.concatenate((y[0], y[1], y[2]), axis=0) X_mc = np.concatenate((X[0], X[1], X[2]), axis=0) X_train, X_test, y_train, y_test = train_test_split(X_mc, y_mc, test_size=test_size) dict_train_feats = {'NoFX': [], 'Distortion': [], 'Tremolo': []} dict_test_feats = {'NoFX': [], 'Distortion': [], 'Tremolo': []} n_features = len(user_interface.featuresnames()) for c in np.arange(len(classes)): condition = np.mod(y_train, 3) == c n_train = len(y_train[condition]) train_feats = np.zeros((n_train, n_features)) k = 0 for i in np.arange(len(y_train)): if y_train[i] == c: train_feats[k, :] = X_train[i, :] k = k + 1 dict_train_feats[classes[c]] = train_feats for c in np.arange(len(classes)): condition = np.mod(y_test, 3) == c n_test = len(y_test[condition]) test_feats = np.zeros((n_test, n_features)) k = 0 for i in np.arange(len(y_test)): if y_test[i] == c: test_feats[k, :] = X_test[i, :] k = k + 1 dict_test_feats[classes[c]] = test_feats savetraindata.save_datasets(dict_train_feats, dict_test_feats) return dict_train_feats, dict_test_feats
def getdicttrainfeatures(path): dict_train_features = {'NoFX': [], 'Distortion': [], 'Tremolo': []} fullpath = str(path) + '/environment/databases/train/{}' for c in user_interface.classes(): # loops over classes n_features = len(user_interface.featuresnames()) train_root = fullpath.format(c) class_train_files = [ f for f in os.listdir(train_root) if f.endswith('.wav') ] n_train = len(class_train_files) train_features = np.zeros((n_train, n_features)) for index, f in enumerate( class_train_files): # loops over all the files of the class audio, fs = librosa.load(os.path.join(train_root, f), sr=None) train_features[index, :] = features.getfeatures(audio) dict_train_features[c] = train_features return dict_train_features
import plotfeatures def plotsel(features, colums_selected): plotfeatures.plotfeats(features, colums_selected) if __name__ == "__main__": # executable from terminal from environment.modules import dataloader from environment.modules.analysislab.user_interface import classes dict_tr = [(dataloader.dict_train_features(c) - dataloader.featmin()) / (dataloader.featmax() - dataloader.featmin()) for c in classes()] plotsel(dict_tr, dataloader.colums_selected())
print('\n\nSelected matrix:') print([featuresnames()[i] for i in featurelst['selectedcolumns']]) print(featurelst['featurematrix']) print('\nfeature scores:') print(featuresnames()) print(featurelst['scores']) print('\nselected features') print([featuresnames()[i] for i in featurelst['selectedcolumns']]) if __name__ == '__main__': from environment.modules import featureselection, dataloader from environment.modules.analysislab.user_interface import classes import numpy as np X_train_normalized = [ (dataloader.dict_train_features(c) - dataloader.featmin()) / (dataloader.featmax() - dataloader.featmin()) for c in classes() ] y_train = [ np.ones(X_train_normalized[i].shape[0], ) * i for i in np.arange(len(classes())) ] X_train_mc_normalized = np.concatenate( (X_train_normalized[0], X_train_normalized[1], X_train_normalized[2]), axis=0) y_train_mc = np.concatenate((y_train[0], y_train[1], y_train[2]), axis=0) featurelst = featureselection.getfeaturelist(X_train_mc_normalized, y_train_mc) print_features(featurelst)
def save_datasets(dict_train_feats, dict_test_feats): for c in user_interface.classes(): dict_train_feats[c].dump('dict_train_features_' + c + '.dat') for c in user_interface.classes(): dict_test_feats[c].dump('dict_test_feats_' + c + '.dat') return True