def plot_feature_importances(clf, feature_names, clf_name='', top_n=10, figsize=(8, 8), T=None, save_fig=True): ''' clf: fitted classifier feature_names = train_x.columns train_x is pandas dataFrame ''' title = "Feature Importances of " + clf_name feat_imp = pd.DataFrame({'importance': clf.feature_importances_}) feat_imp['feature'] = feature_names feat_imp.sort_values(by='importance', ascending=False, inplace=True) feat_imp = feat_imp.iloc[:top_n] feat_imp.sort_values(by='importance', inplace=True) feat_imp = feat_imp.set_index('feature', drop=True) feat_imp.plot.barh(title=title, figsize=figsize) plt.xlabel('Feature Importance Score') if save_fig == True: if T == None: utils.savePlots('dt', plt, name=clf_name) else: utils.savePlots('dt', plt, name=clf_name + str(T)) else: plt.close() print("not saving the plot of " + clf_name) #plt.show() return feat_imp
def getPerformanceMeasures(dataDict, prefix="", T=5, kernel='linear'): for key in dataDict: print(">> Dataset: " + key.upper()) [train_x, train_y, test_x, test_y] = dataDict[key] #Train AdaBoost with SVM y_ada_svm = buildAdaSVM(train_x, train_y, test_x, T=T, kernel=kernel) print(">> AdaSVM: \n") print("Accuracy: " + str(round(metrics.accuracy_score(test_y, y_ada_svm),3)) + ", Precision: " + str(round(metrics.precision_score(test_y, y_ada_svm, average='macro'),3)) + ", Recall: " + str(round(metrics.recall_score(test_y, y_ada_svm, average='macro'),3)) ) utils.plotCM(metrics.confusion_matrix(test_y, y_ada_svm), title= "Adaboost with SVM: " + key.capitalize()) utils.savePlots(modelName, plt, prefix + "ada_" + key, "png") #Train plain SVM y_svm = buildSVM(train_x, train_y, test_x, kernel=kernel) print(">> SVM: \n") print("Accuracy: " + str(round(metrics.accuracy_score(test_y, y_svm),3)) + ", Precision: " + str(round(metrics.precision_score(test_y, y_svm, average='macro'),3)) + ", Recall: " + str(round(metrics.recall_score(test_y, y_svm, average='macro'),3)) ) utils.plotCM(metrics.confusion_matrix(test_y, y_svm), title= "SVM: " + key.capitalize()) utils.savePlots(modelName, plt, prefix + "svm_" + key, "png")
def plotKernelComparison(dataDict, prefix=""): plt.clf() for key in dataDict: [train_x, train_y, test_x, test_y] = dataDict[key] accuracies = compareKernels(train_x, train_y, test_x, test_y,) plt.plot(kernels, accuracies, color=colors[key], label=key.capitalize()) plt.xlabel('SVM Kernel') plt.ylabel('Accuracy Score') plt.title('Comparison of SVM kernels in AdaBoost') plt.legend() utils.savePlots(modelName, plt, prefix + 'kernel_comparison', "png")
def plotAlphaComparison(dataDict): plt.clf() for key in dataDict: print(">> Dataset: " + key.upper()) [train_x, train_y, test_x, test_y] = dataDict[key] accuracies = compareAlpha(train_x, train_y, test_x, test_y) plt.plot(accuracies, color=colors[key], label=key.capitalize()) plt.xlabel('Value of alpha (a)') plt.ylabel('Accuracy Score') plt.title('MLP') plt.legend() utils.savePlots(modelName, plt, 'alpha_comparison', "png")
def plotTComparison(dataDict, prefix=""): plt.clf() for kernel in kernels: print(kernel.upper()) for key in dataDict: print(">> Dataset: " + key.upper()) [train_x, train_y, test_x, test_y] = dataDict[key] accuracies = compareTs(train_x, train_y, test_x, test_y, kernel) #taskAccuracies[key] = tAccuracies plt.plot(T, accuracies, color=colors[key], label=key.capitalize()) plt.xlabel('Number of estimators (T)') plt.ylabel('Accuracy Score') plt.title('AdaBoost with SVM') plt.legend() utils.savePlots(modelName, plt, prefix + kernel + '_t_comparison', "png")
plot_colors = ['red', 'blue', 'green'] line_labels = ['beach', 'finger', 'rest'] count = 0 for data in total_data: train_x, train_y, test_x, test_y = data_separation(data) #iteration_num = [1, 5, 10, 50, 100, 500, 1000] #test_err_list, polar_y_ada = error_list_ada(iteration_num, accuracy=True) clf_tree = tree.DecisionTreeClassifier() y_pred = clf_tree.fit(train_x, train_y).predict(test_x) svm_clf = SVC(gamma='auto', kernel='poly', degree=3, coef0=0.015625) y_svm = svm_clf.fit(train_x, train_y).predict(test_x) graph = confusion_matrix(test_y, y_svm) utils.plotCM(graph, title="SVM - Polynomial: " + line_labels[count]) utils.savePlots("dt", plt, "CM_SVM_ARMP_" + line_labels[count], "png") #plt.plot(iteration_num, test_err_list, color = plot_colors[count], label=line_labels[count]) count += 1 plt.legend() plt.xlabel('Number of estimators (T)') plt.ylabel('Accuracy Score') plt.title('AdaBoost with Decision Stump') #graph = confusion_matrix(test_y, y_gnb) ''' svm_clf = SVC(gamma='auto') y_svm = svm_clf.fit(train_x,train_y).predict(test_x) #gmm = GaussianMixture(n_components=20) #y_gmm = gmm.fit(train_x,train_y).predict(test_x)