def plot_auc_barchart(classifiers, tests): attitudes = ["proactivo", "reactivo", "agresivo", "provoto"] results = {} dfs = {} for classifier in classifiers: classifier_name = type(classifier).__name__ for feature_names in tests: feature = "+".join( [feature_name.upper() for feature_name in feature_names] + [classifier_name]) dfs[feature] = dat.getFeaturesDataFrame(*feature_names) for i, attitude in enumerate(attitudes): results[attitude] = {} for classifier in classifiers: classifier_name = type(classifier).__name__ for feature_names in tests: feature = "+".join( [feature_name.upper() for feature_name in feature_names] + [classifier_name]) df = dfs[feature] sorted_train_labels = sorted(list(set(df.loc[:, attitude]))) for label in sorted_train_labels: t = df[(df[attitude] == label)] if t.shape[0] < 10: df = df.append([t] * 10) print("========================={}===========================". format(attitude.upper())) data = df.loc[:, [ c for c in df.columns for feature_name in feature_names if "_" + feature_name.lower() in c ]] target = df.loc[:, attitude] results[attitude][feature] = tc.compute_auc_multiclass( data, target, classifier) attitudes = ["provoto", "agresivo", "reactivo", "proactivo"] dist = [] for attitude in attitudes: dist.append( go.Bar( x=list(results[attitude].keys()), y=[results[attitude][r] for r in results[attitude].keys()], name=attitude)) fig = go.Figure(data=dist, layout=go.Layout(title="Average ROC AUC per attitude")) plotly.offline.plot( fig, filename='images/{}-average-roc-auc-per-attitude.html'.format( classifier_name))
from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split import os import __init__ import matplotlib.pyplot as plt import numpy as np import load_dataframe as dat import config as conf feature_names = ["big", "w2v", "bow"] df = dat.getFeaturesDataFrame(*feature_names) classifier = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=conf.seed) attitudes = ["proactivo", "reactivo", "agresivo", "provoto"] for i, attitude in enumerate(attitudes): data = df.loc[:, [ c for c in df.columns for feature_name in feature_names if "_" + feature_name.lower() in c ]] target = df.loc[:, attitude] train_data, test_data, train_labels, test_labels = train_test_split( data, target, test_size=.5, random_state=0) model = classifier.fit(train_data, train_labels) importances = model.feature_importances_ std = np.std([tree.feature_importances_ for tree in model.estimators_], axis=0) indices = np.argsort(importances)[::-1] max_features = 25 plt.figure() title = '{}+RandomForest feature importance for attitude {}'.format( "+".join([feature_name.upper() for feature_name in feature_names]),
"Attitude '{}'".format(attitude), save=conf.images_dir, plot=axarr[i]) plt.setp([a.get_yticklabels() for a in axarr[1:]], visible=False) f.subplots_adjust(hspace=0.3) plt.suptitle('{}+{} for all attributes'.format( "+".join([feature_name.upper() for feature_name in feature_names]), classifier_name), y=1) f.text(0.5, 0.01, 'False Positive Rate', ha='center') f.text(0.09, 0.5, 'True Positive Rate', va='center', rotation='vertical') plt.savefig(os.path.join( conf.images_dir, '{}+{} for all attributes.png'.format( "+".join([feature_name.upper() for feature_name in feature_names]), classifier_name)), bbox_inches='tight') # plt.show() if (__name__ == "__main__"): classifiers = [ BernoulliNB() # ,RandomForestClassifier(n_estimators=300,n_jobs=-1, random_state=conf.seed) ] tests = [["bow"], ["big"], ["w2v"], ["bow", "big"], ["bow", "w2v"], ["big", "w2v"], ["big", "w2v", "bow"]] for classifier in classifiers: for test in tests: AUC(classifier, data.getFeaturesDataFrame(*test), test) # AUC(BernoulliNB(), bow.ExtractW2V) # AUC(BernoulliNB(), bow.ExtractBOW, w2v.ExtractW2V, big.ExtractBIG)
def plot_confusion_matrix(classifiers, tests): for classifier in classifiers: for test in tests: confusion_matrix(classifier, dat.getFeaturesDataFrame(*test), test)
# Plot ROC curves for the multiclass problem # Compute macro-average ROC curve and ROC area # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += np.interp(all_fpr, fpr[i], tpr[i]) # Finally average it and compute AUC mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) return roc_auc["macro"] if(__name__=="__main__"): classifiers=[ BernoulliNB(), RandomForestClassifier(n_estimators=300,n_jobs=-1, random_state=conf.seed) ] tests=[["bow"],["big"],["w2v"],["bow","big"],["bow","w2v"],["big","w2v"],["big","w2v","bow"]] for classifier in classifiers: for test in tests: get_accuracy(classifier, dat.getFeaturesDataFrame(*test), test)