Esempio n. 1
0
def plotfeats(dict_train_features, mask=arange(10) + 8):
    for feat_index, feat in enumerate(mask):

        plt.figure(figsize=(10, 10))
        for i in arange(len(classes())):
            sns.distplot(dict_train_features[i][:, feat],
                         label='Histogram for {} of feature {}'.format(
                             classes()[i],
                             featuresnames()[feat]))

        plt.legend()
        plt.grid(True)

    for d, c in enumerate(mask):
        plt.figure(figsize=(10, 10))
        for i in arange(len(classes())):
            plt.subplot(len(classes()), 1, i + 1)
            plt.ylim(0, 1)
            feature = dict_train_features[i].transpose()[c]
            plt.stem(feature,
                     markerfmt='C3.',
                     basefmt='C9-',
                     use_line_collection=True)
            if i == 0:
                plt.title(featuresnames()[c] + ' coefficients')
            plt.ylabel(classes()[i])
            plt.grid(True)
    plt.show()
Esempio n. 2
0
def getpredictions(X_train_normalized, y_train, X_test_mc_normalized):

    scores = ['accuracy','precision_micro','recall_macro','f1_macro','roc_auc']
    overall_scores = {'accuracy': [], 'precision_micro': [], 'recall_macro': [], 'f1_macro': [], 'roc_auc': []}

    couples = list(itertools.combinations(classes(), 2))
    num_couples = len(couples)
    num_test_files = X_test_mc_normalized.shape[0]

    scores_per_couple = np.zeros((num_couples, len(scores)))
    y_test_predicted_mc = []

    for n in np.arange(num_couples):
        class_0 = couples[n][0]
        index_0 = classes().index(class_0)
        class_1 = couples[n][1]
        index_1 = classes().index(class_1)

        SVM_parameters = {
            'C': 1,
            'kernel': 'rbf',
        };

        clf = sklearn.svm.SVC(**SVM_parameters, probability=True)
        X_train_0 = X_train_normalized[index_0]
        X_train_1 = X_train_normalized[index_1]
        y_train_0 = y_train[index_0]
        y_train_1 = y_train[index_1]
        y = np.concatenate((y_train_0, y_train_1))
        X = np.concatenate((X_train_0, X_train_1), axis=0)

        clf.fit(X, y)
        print(class_0,"/",class_1,"\nCross validated scores for kfolds = ", kfold(), ": \n")

        for s in scores:
            score_array = np.around(cross_val_score(clf, X, y, cv=kfold(), scoring=s), decimals=4) *100
            score = np.around(np.average(score_array), decimals=2 )
            print(s,"\t", score_array,"\t\taverage:", score)
            scores_per_couple[n, scores.index(s)] = score
        print("\n")
        y_predicted = clf.predict(X_test_mc_normalized).reshape(-1, 1)
        y_test_predicted_mc = np.append(y_test_predicted_mc, y_predicted)

    for s in scores:
        final_score = np.average(scores_per_couple.transpose()[scores.index(s)])
        overall_scores[s] = np.around(final_score, decimals = 2)

    print("Average of crossvalidated scores considering all binary cases")
    print(overall_scores)

    y_test_predicted_mc = y_test_predicted_mc.reshape(num_couples, num_test_files).transpose()
    y_test_predicted_mc = np.array(y_test_predicted_mc, dtype=np.int)
    y_test_predicted_mv = np.zeros((y_test_predicted_mc.shape[0],))

    for i, e in enumerate(y_test_predicted_mc):
        y_test_predicted_mv[i] = np.bincount(e).argmax()

    return y_test_predicted_mv
Esempio n. 3
0
def plotfeats_per_class(dict_train_features):
    for c in arange(len(classes())):
        plt.figure(figsize=(10, 10))
        for i in arange(len(featuresnames())):
            plt.subplot(len(featuresnames()), 1, i + 1)
            plt.ylim(0, 1)
            feature = dict_train_features[c].transpose()[i]
            plt.stem(feature, use_line_collection=True)
            if i == 0:
                plt.title(classes()[c] + ' coefficients')
            plt.ylabel(featuresnames()[i])
            plt.grid(True)
        plt.show()
Esempio n. 4
0
def get_metrics(dict_test_features):
    metrics_matrix = []
    n = len(classes())
    m = 4
    for subset in itertools.combinations(classes(), 2):
        class_0 = subset[0]
        class_1 = subset[1]


        X_train_0 = dataloader.dict_train_features(class_0)
        X_train_1 = dataloader.dict_train_features(class_1)
        X_train = np.concatenate((X_train_0, X_train_1), axis=0)
        X_train = X_train[:, dataloader.columns_selected()]

        y_train_0 = np.zeros((X_train_0.shape[0],))
        y_train_1 = np.ones((X_train_1.shape[0],))
        y_train = np.concatenate((y_train_0, y_train_1), axis=0)

        X_test_0 = dict_test_features[class_0]
        X_test_1 = dict_test_features[class_1]
        X_test = np.concatenate((X_test_0, X_test_1), axis=0)
        X_test = X_test[:, dataloader.columns_selected()]

        y_test_0 = np.zeros((X_test_0.shape[0],))
        y_test_1 = np.ones((X_test_1.shape[0],))
        y_test = np.concatenate((y_test_0, y_test_1), axis=0)

        feat_max = np.max(X_train, axis=0)
        feat_min = np.min(X_train, axis=0)
        X_train_normalized = (X_train - feat_min) / (feat_max - feat_min)
        X_test_normalized = (X_test - feat_min) / (feat_max - feat_min)

        SVM_parameters = {
            'C': 1,
            'kernel': 'rbf',
        }

        clf = sklearn.svm.SVC(**SVM_parameters)

        clf.fit(X_train_normalized, y_train)
        y_test_predicted = clf.predict(X_test_normalized)

        print("{} // {}".format(class_0, class_1))
        metrics_couple = compute_metrics(y_test, y_test_predicted)
        metrics_matrix = np.append(metrics_matrix, metrics_couple)

    metrics_matrix = np.reshape(metrics_matrix, (n, 4))

    compute_overall_metrics(metrics_matrix)
def savedata(dict_train_features, featurelst, feat_max, feat_min):
    for c in user_interface.classes():
        dict_train_features[c].dump('dict_train_features_' + c + '.dat')
    np.array([feat_max, feat_min]).dump('feat_max_min.dat')
    featurelst['featurematrix'].dump('features_selected.dat')
    featurelst['selectedcolumns'].dump('columns_selected.dat')
    return True
Esempio n. 6
0
def test():
    # begin compute and select features
    path = pathlib.Path(__file__).parent.absolute()
    classes = user_interface.classes()
    if user_interface.generate_datasets():
        dict_test_features = {'NoFX': [], 'Distortion': [], 'Tremolo': []}
        for c in classes:
            dict_test_features[c] = dataloader.dict_test_feats(c)
    else:
        dict_test_features = testloop.getdicttestfeatures(
            path)  # test features
    X_test = [dict_test_features[c] for c in user_interface.classes()]
    columns_selected = dataloader.columns_selected(
    )  # positions of selected features
    X_test_selected = [
        X_test[i][:, columns_selected]
        for i in np.arange(len(user_interface.classes()))
    ]  # selection
    y_test = [
        np.ones(X_test[i].shape[0], ) * i
        for i in np.arange(len(user_interface.classes()))
    ]  # keys
    y_test_mc = np.concatenate((y_test[0], y_test[1], y_test[2]), axis=0)
    X_test_normalized = [
        ((X_test_selected[c] - dataloader.featmin()[columns_selected]) /
         (dataloader.featmax()[columns_selected] -
          dataloader.featmin()[columns_selected]))
        for c in np.arange(len(user_interface.classes()))
    ]  # normalized matrix
    X_train_normalized_loaded = [
        (dataloader.dict_train_features(c) - dataloader.featmin()) /
        (dataloader.featmax() - dataloader.featmin())
        for c in user_interface.classes()
    ]  # train features
    X_train_normalized_loaded_selected = [
        X_train_normalized_loaded[i][:, columns_selected]
        for i in np.arange(len(user_interface.classes()))
    ]  # selection
    X_test_mc_normalized = np.concatenate(
        (X_test_normalized[0], X_test_normalized[1], X_test_normalized[2]),
        axis=0)
    y_train_selected = [
        np.ones(X_train_normalized_loaded_selected[i].shape[0], ) * i
        for i in np.arange(len(user_interface.classes()))
    ]
    # end compute and select features
    y_test_predicted_mv = supportvectormachines.getpredictions(
        X_train_normalized_loaded_selected, y_train_selected,
        X_test_mc_normalized)  # SVM
    print('\n\nMetrics:')
    metrics.get_metrics(dict_test_features)
    print('\n\nConfusion matrix:')
    confusionmatrix.compute_cm_multiclass(
        y_test_mc, y_test_predicted_mv)  # print confusion matrix
    return True
Esempio n. 7
0
def plotfeature(feat, i=-1):
    plt.figure(figsize=(18, 4))
    plt.subplot(1, 1, 1)
    plt.stem(feat, use_line_collection=True)
    if i != -1:
        plt.ylabel(featuresnames()[i] + ' coefficients')
        plt.title(featuresnames()[i] + ' coefficients {}'.format(classes()[i]))
    plt.grid(True)
    plt.show()
Esempio n. 8
0
def train():
    path = pathlib.Path(__file__).parent.absolute()
    classes = user_interface.classes()
    if user_interface.generate_datasets():
        main_traintest.traintest(path)
        dict_train_features = {'NoFX': [], 'Distortion': [], 'Tremolo': []}
        for c in classes:
            dict_train_features[c] = dataloader.dict_train_features(c)
    else:
        dict_train_features = trainingloop.getdicttrainfeatures(
            path)  # compute train features
    X_train = [dict_train_features[c] for c in classes]
    y_train = [
        np.ones(X_train[i].shape[0], ) * i
        for i in np.arange(len(user_interface.classes()))
    ]  # keys
    feat_max = np.max(np.concatenate((X_train[0], X_train[1], X_train[2]),
                                     axis=0),
                      axis=0)
    feat_min = np.min(np.concatenate((X_train[0], X_train[1], X_train[2]),
                                     axis=0),
                      axis=0)
    X_train_normalized = [(X_train[c] - feat_min) / (feat_max - feat_min)
                          for c in np.arange(len(user_interface.classes()))
                          ]  # normalized matrix
    X_train_mc_normalized = np.concatenate(
        (X_train_normalized[0], X_train_normalized[1], X_train_normalized[2]),
        axis=0)
    y_train_mc = np.concatenate((y_train[0], y_train[1], y_train[2]), axis=0)

    featurelst = featureselection.getfeaturelist(
        X_train_mc_normalized, y_train_mc)  # feature selection
    if user_interface.do_plot():
        plotfeatures.plotfeats(X_train_normalized,
                               mask=np.arange(10) + 7)  # plot train features
    else:
        plotselected.plotsel(X_train_normalized, featurelst['selectedcolumns'])
    savetraindata.savedata(dict_train_features, featurelst, feat_max,
                           feat_min)  # save data
    print('All features')
    print(user_interface.featuresnames())
    print_feature_sel.print_features(featurelst)
    return True
def traintest(path):
    dict_features = trainingloop.getdicttrainfeatures(
        path)  # compute train features
    classes = user_interface.classes()
    test_size = user_interface.test_size()

    X = [dict_features[c] for c in classes]
    y = [np.ones(X[i].shape[0], ) * i for i in np.arange(len(classes))]  # keys
    y_mc = np.concatenate((y[0], y[1], y[2]), axis=0)
    X_mc = np.concatenate((X[0], X[1], X[2]), axis=0)

    X_train, X_test, y_train, y_test = train_test_split(X_mc,
                                                        y_mc,
                                                        test_size=test_size)

    dict_train_feats = {'NoFX': [], 'Distortion': [], 'Tremolo': []}
    dict_test_feats = {'NoFX': [], 'Distortion': [], 'Tremolo': []}
    n_features = len(user_interface.featuresnames())

    for c in np.arange(len(classes)):
        condition = np.mod(y_train, 3) == c
        n_train = len(y_train[condition])

        train_feats = np.zeros((n_train, n_features))
        k = 0

        for i in np.arange(len(y_train)):
            if y_train[i] == c:
                train_feats[k, :] = X_train[i, :]
                k = k + 1
        dict_train_feats[classes[c]] = train_feats

    for c in np.arange(len(classes)):
        condition = np.mod(y_test, 3) == c
        n_test = len(y_test[condition])

        test_feats = np.zeros((n_test, n_features))
        k = 0
        for i in np.arange(len(y_test)):
            if y_test[i] == c:
                test_feats[k, :] = X_test[i, :]
                k = k + 1
        dict_test_feats[classes[c]] = test_feats

    savetraindata.save_datasets(dict_train_feats, dict_test_feats)

    return dict_train_feats, dict_test_feats
Esempio n. 10
0
def getdicttrainfeatures(path):
    dict_train_features = {'NoFX': [], 'Distortion': [], 'Tremolo': []}
    fullpath = str(path) + '/environment/databases/train/{}'

    for c in user_interface.classes():  # loops over classes
        n_features = len(user_interface.featuresnames())
        train_root = fullpath.format(c)
        class_train_files = [
            f for f in os.listdir(train_root) if f.endswith('.wav')
        ]
        n_train = len(class_train_files)
        train_features = np.zeros((n_train, n_features))

        for index, f in enumerate(
                class_train_files):  # loops over all the files of the class
            audio, fs = librosa.load(os.path.join(train_root, f), sr=None)
            train_features[index, :] = features.getfeatures(audio)

        dict_train_features[c] = train_features
    return dict_train_features
Esempio n. 11
0
import plotfeatures


def plotsel(features, colums_selected):
    plotfeatures.plotfeats(features, colums_selected)


if __name__ == "__main__":  # executable from terminal
    from environment.modules import dataloader
    from environment.modules.analysislab.user_interface import classes

    dict_tr = [(dataloader.dict_train_features(c) - dataloader.featmin()) /
               (dataloader.featmax() - dataloader.featmin())
               for c in classes()]
    plotsel(dict_tr, dataloader.colums_selected())
Esempio n. 12
0
    print('\n\nSelected matrix:')
    print([featuresnames()[i] for i in featurelst['selectedcolumns']])
    print(featurelst['featurematrix'])
    print('\nfeature scores:')
    print(featuresnames())
    print(featurelst['scores'])
    print('\nselected features')
    print([featuresnames()[i] for i in featurelst['selectedcolumns']])


if __name__ == '__main__':
    from environment.modules import featureselection, dataloader
    from environment.modules.analysislab.user_interface import classes
    import numpy as np

    X_train_normalized = [
        (dataloader.dict_train_features(c) - dataloader.featmin()) /
        (dataloader.featmax() - dataloader.featmin()) for c in classes()
    ]
    y_train = [
        np.ones(X_train_normalized[i].shape[0], ) * i
        for i in np.arange(len(classes()))
    ]
    X_train_mc_normalized = np.concatenate(
        (X_train_normalized[0], X_train_normalized[1], X_train_normalized[2]),
        axis=0)
    y_train_mc = np.concatenate((y_train[0], y_train[1], y_train[2]), axis=0)
    featurelst = featureselection.getfeaturelist(X_train_mc_normalized,
                                                 y_train_mc)
    print_features(featurelst)
def save_datasets(dict_train_feats, dict_test_feats):
    for c in user_interface.classes():
        dict_train_feats[c].dump('dict_train_features_' + c + '.dat')
    for c in user_interface.classes():
        dict_test_feats[c].dump('dict_test_feats_' + c + '.dat')
    return True