Exemple #1
0
def plot_feature_importances(clf,
                             feature_names,
                             clf_name='',
                             top_n=10,
                             figsize=(8, 8),
                             T=None,
                             save_fig=True):
    '''
    clf: fitted classifier
    feature_names = train_x.columns
    train_x is pandas dataFrame
    '''

    title = "Feature Importances of " + clf_name

    feat_imp = pd.DataFrame({'importance': clf.feature_importances_})
    feat_imp['feature'] = feature_names
    feat_imp.sort_values(by='importance', ascending=False, inplace=True)
    feat_imp = feat_imp.iloc[:top_n]

    feat_imp.sort_values(by='importance', inplace=True)
    feat_imp = feat_imp.set_index('feature', drop=True)
    feat_imp.plot.barh(title=title, figsize=figsize)
    plt.xlabel('Feature Importance Score')
    if save_fig == True:
        if T == None:
            utils.savePlots('dt', plt, name=clf_name)
        else:
            utils.savePlots('dt', plt, name=clf_name + str(T))
    else:
        plt.close()
        print("not saving the plot of " + clf_name)
    #plt.show()

    return feat_imp
Exemple #2
0
def getPerformanceMeasures(dataDict, prefix="", T=5, kernel='linear'):
    for key in dataDict:
        print(">> Dataset: " + key.upper())
        [train_x, train_y, test_x, test_y] = dataDict[key]

        #Train AdaBoost with SVM
        y_ada_svm = buildAdaSVM(train_x, train_y, test_x, T=T, kernel=kernel)
        print(">> AdaSVM: \n")
        print("Accuracy: " + str(round(metrics.accuracy_score(test_y, y_ada_svm),3)) +
            ", Precision: " + str(round(metrics.precision_score(test_y, y_ada_svm, average='macro'),3)) +
            ", Recall: " + str(round(metrics.recall_score(test_y, y_ada_svm, average='macro'),3))
            )
        utils.plotCM(metrics.confusion_matrix(test_y, y_ada_svm), title= "Adaboost with SVM: " + key.capitalize())
        utils.savePlots(modelName, plt, prefix + "ada_" + key, "png")


        #Train plain SVM
        y_svm = buildSVM(train_x, train_y, test_x, kernel=kernel)
        print(">> SVM: \n")
        print("Accuracy: " + str(round(metrics.accuracy_score(test_y, y_svm),3)) +
            ", Precision: " + str(round(metrics.precision_score(test_y, y_svm, average='macro'),3)) +
            ", Recall: " + str(round(metrics.recall_score(test_y, y_svm, average='macro'),3))
            )
        utils.plotCM(metrics.confusion_matrix(test_y, y_svm), title= "SVM: " + key.capitalize())
        utils.savePlots(modelName, plt, prefix + "svm_" + key, "png")
Exemple #3
0
def plotKernelComparison(dataDict, prefix=""):
    plt.clf()

    for key in dataDict:
        [train_x, train_y, test_x, test_y] = dataDict[key]

        accuracies = compareKernels(train_x, train_y, test_x, test_y,)
        plt.plot(kernels, accuracies, color=colors[key], label=key.capitalize())

    plt.xlabel('SVM Kernel')
    plt.ylabel('Accuracy Score')
    plt.title('Comparison of SVM kernels in AdaBoost')
    plt.legend()
    utils.savePlots(modelName, plt, prefix + 'kernel_comparison', "png")
Exemple #4
0
def plotAlphaComparison(dataDict):
    plt.clf()

    for key in dataDict:
        print(">> Dataset: " + key.upper())
        [train_x, train_y, test_x, test_y] = dataDict[key]

        accuracies = compareAlpha(train_x, train_y, test_x, test_y)
        plt.plot(accuracies, color=colors[key], label=key.capitalize())

    plt.xlabel('Value of alpha (a)')
    plt.ylabel('Accuracy Score')
    plt.title('MLP')
    plt.legend()
    utils.savePlots(modelName, plt, 'alpha_comparison', "png")
Exemple #5
0
def plotTComparison(dataDict, prefix=""):
    plt.clf()

    for kernel in kernels:
        print(kernel.upper())
        for key in dataDict:
            print(">> Dataset: " + key.upper())

            [train_x, train_y, test_x, test_y] = dataDict[key]

            accuracies = compareTs(train_x, train_y, test_x, test_y, kernel)
            #taskAccuracies[key] = tAccuracies
            plt.plot(T, accuracies, color=colors[key], label=key.capitalize())


        plt.xlabel('Number of estimators (T)')
        plt.ylabel('Accuracy Score')
        plt.title('AdaBoost with SVM')
        plt.legend()
        utils.savePlots(modelName, plt, prefix + kernel + '_t_comparison', "png")
Exemple #6
0
plot_colors = ['red', 'blue', 'green']
line_labels = ['beach', 'finger', 'rest']
count = 0
for data in total_data:
    train_x, train_y, test_x, test_y = data_separation(data)
    #iteration_num = [1, 5, 10, 50, 100, 500, 1000]
    #test_err_list, polar_y_ada = error_list_ada(iteration_num, accuracy=True)
    clf_tree = tree.DecisionTreeClassifier()
    y_pred = clf_tree.fit(train_x, train_y).predict(test_x)

    svm_clf = SVC(gamma='auto', kernel='poly', degree=3, coef0=0.015625)
    y_svm = svm_clf.fit(train_x, train_y).predict(test_x)

    graph = confusion_matrix(test_y, y_svm)
    utils.plotCM(graph, title="SVM - Polynomial: " + line_labels[count])
    utils.savePlots("dt", plt, "CM_SVM_ARMP_" + line_labels[count], "png")
    #plt.plot(iteration_num, test_err_list, color = plot_colors[count], label=line_labels[count])
    count += 1

plt.legend()
plt.xlabel('Number of estimators (T)')
plt.ylabel('Accuracy Score')
plt.title('AdaBoost with Decision Stump')
#graph = confusion_matrix(test_y, y_gnb)
'''
svm_clf = SVC(gamma='auto')
y_svm = svm_clf.fit(train_x,train_y).predict(test_x)

#gmm = GaussianMixture(n_components=20)
#y_gmm = gmm.fit(train_x,train_y).predict(test_x)