def plot_auc_barchart(classifiers, tests):
    attitudes = ["proactivo", "reactivo", "agresivo", "provoto"]
    results = {}
    dfs = {}
    for classifier in classifiers:
        classifier_name = type(classifier).__name__
        for feature_names in tests:
            feature = "+".join(
                [feature_name.upper()
                 for feature_name in feature_names] + [classifier_name])
            dfs[feature] = dat.getFeaturesDataFrame(*feature_names)
    for i, attitude in enumerate(attitudes):
        results[attitude] = {}
        for classifier in classifiers:
            classifier_name = type(classifier).__name__
            for feature_names in tests:
                feature = "+".join(
                    [feature_name.upper()
                     for feature_name in feature_names] + [classifier_name])
                df = dfs[feature]
                sorted_train_labels = sorted(list(set(df.loc[:, attitude])))
                for label in sorted_train_labels:
                    t = df[(df[attitude] == label)]
                    if t.shape[0] < 10:
                        df = df.append([t] * 10)
                print("========================={}===========================".
                      format(attitude.upper()))
                data = df.loc[:, [
                    c for c in df.columns for feature_name in feature_names
                    if "_" + feature_name.lower() in c
                ]]
                target = df.loc[:, attitude]
                results[attitude][feature] = tc.compute_auc_multiclass(
                    data, target, classifier)

        attitudes = ["provoto", "agresivo", "reactivo", "proactivo"]
        dist = []
        for attitude in attitudes:
            dist.append(
                go.Bar(
                    x=list(results[attitude].keys()),
                    y=[results[attitude][r] for r in results[attitude].keys()],
                    name=attitude))

        fig = go.Figure(data=dist,
                        layout=go.Layout(title="Average ROC AUC per attitude"))
        plotly.offline.plot(
            fig,
            filename='images/{}-average-roc-auc-per-attitude.html'.format(
                classifier_name))
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import os
import __init__
import matplotlib.pyplot as plt
import numpy as np
import load_dataframe as dat
import config as conf
feature_names = ["big", "w2v", "bow"]
df = dat.getFeaturesDataFrame(*feature_names)
classifier = RandomForestClassifier(n_estimators=100,
                                    n_jobs=-1,
                                    random_state=conf.seed)
attitudes = ["proactivo", "reactivo", "agresivo", "provoto"]
for i, attitude in enumerate(attitudes):
    data = df.loc[:, [
        c for c in df.columns for feature_name in feature_names
        if "_" + feature_name.lower() in c
    ]]
    target = df.loc[:, attitude]
    train_data, test_data, train_labels, test_labels = train_test_split(
        data, target, test_size=.5, random_state=0)
    model = classifier.fit(train_data, train_labels)
    importances = model.feature_importances_
    std = np.std([tree.feature_importances_ for tree in model.estimators_],
                 axis=0)
    indices = np.argsort(importances)[::-1]
    max_features = 25
    plt.figure()
    title = '{}+RandomForest feature importance for attitude {}'.format(
        "+".join([feature_name.upper() for feature_name in feature_names]),
                        "Attitude '{}'".format(attitude),
                        save=conf.images_dir,
                        plot=axarr[i])
    plt.setp([a.get_yticklabels() for a in axarr[1:]], visible=False)
    f.subplots_adjust(hspace=0.3)
    plt.suptitle('{}+{} for all attributes'.format(
        "+".join([feature_name.upper() for feature_name in feature_names]),
        classifier_name),
                 y=1)
    f.text(0.5, 0.01, 'False Positive Rate', ha='center')
    f.text(0.09, 0.5, 'True Positive Rate', va='center', rotation='vertical')
    plt.savefig(os.path.join(
        conf.images_dir, '{}+{} for all attributes.png'.format(
            "+".join([feature_name.upper() for feature_name in feature_names]),
            classifier_name)),
                bbox_inches='tight')
    # plt.show()


if (__name__ == "__main__"):
    classifiers = [
        BernoulliNB()
        # ,RandomForestClassifier(n_estimators=300,n_jobs=-1, random_state=conf.seed)
    ]
    tests = [["bow"], ["big"], ["w2v"], ["bow", "big"], ["bow", "w2v"],
             ["big", "w2v"], ["big", "w2v", "bow"]]
    for classifier in classifiers:
        for test in tests:
            AUC(classifier, data.getFeaturesDataFrame(*test), test)
            # AUC(BernoulliNB(), bow.ExtractW2V)
            # AUC(BernoulliNB(), bow.ExtractBOW, w2v.ExtractW2V, big.ExtractBIG)
def plot_confusion_matrix(classifiers, tests):
    for classifier in classifiers:
        for test in tests:
            confusion_matrix(classifier, dat.getFeaturesDataFrame(*test), test)
		# Plot ROC curves for the multiclass problem

		# Compute macro-average ROC curve and ROC area

		# First aggregate all false positive rates
	all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

	# Then interpolate all ROC curves at this points
	mean_tpr = np.zeros_like(all_fpr)
	for i in range(n_classes):
		mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])

	# Finally average it and compute AUC
	mean_tpr /= n_classes

	fpr["macro"] = all_fpr
	tpr["macro"] = mean_tpr
	roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
	return roc_auc["macro"]

if(__name__=="__main__"):
	classifiers=[
		BernoulliNB(),
		RandomForestClassifier(n_estimators=300,n_jobs=-1, random_state=conf.seed)
	]
	tests=[["bow"],["big"],["w2v"],["bow","big"],["bow","w2v"],["big","w2v"],["big","w2v","bow"]]
	for classifier in classifiers:
		for test in tests:
			get_accuracy(classifier, dat.getFeaturesDataFrame(*test), test)