def trainFinalClassifier(db, random_state=0):
    clf = ExtraTreesClassifier(n_estimators=100, random_state=random_state, verbose=100, n_jobs=-1)
    print("Loading training set...")
    loaded = joblib.load(db + ".dump")
    print("Fitting...")
    clf.fit(loaded[:, 0:-1], loaded[:, -1])
    loaded = 0
    print("Saving...")
    path = "clfs{}/".format(random_state)
    if (os.path.exists(path) == False):
        os.mkdir(path)
    clf.verbose = 0
    joblib.dump(clf, path + db)
def roc_precision(db, usecols=None, test="unnamed", random_state=0, show_plots=False):
    if (os.path.exists(MAT_PATH) == False):
        os.mkdir(MAT_PATH)
        
    random_state = check_random_state(random_state)
    
    clf = 0
    if (not os.path.exists("clfs/" + db)):
        clf = ExtraTreesClassifier(n_estimators=100, random_state=0, n_jobs=-1)
        print("Loading training set...")
        loaded = loadClassifiedDB(db + ".train.csv", random_state=random_state, usecols=usecols)#, skipheader=234100)
        print("Fitting...")
        clf.fit(loaded[:, 0:-1], loaded[:, -1])
        loaded = 0
        print("Saving...")
        if (os.path.exists("clfs/") == False):
            os.mkdir("clfs")
        clf.verbose = 0
        joblib.dump(clf, "clfs/" + db)
    else:
        print("Loading {}...".format(db))
        clf = joblib.load("clfs/" + db)
        
    classes = clf.classes_
    
    print("Loading test set...")
    loaded = loadClassifiedDB(db + ".csv", random_state=random_state, usecols=usecols)#, skipheader=232800)
    y_true = loaded[:, -1]

    
    print("Predict proba...")
    y_score = clf.predict_proba(loaded[:, 0:-1])
    loaded = 0
    clf = 0
    y_score = y_score[:, classes == 1]
    
    print("ROC...")
    fpr, tpr, thresholds = roc_curve(y_true, y_score)
    sio.savemat(MAT_PATH + test + '.roc.' + db + '.mat', {'fpr':fpr, 'tpr':tpr, 'thresholds':thresholds})
    
    if (show_plots):
        plt.plot(fpr, tpr)
        plt.title("ROC curve")
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        for i in range(0, thresholds.size):
            plt.annotate(str(thresholds[i]), xy=(fpr[i], tpr[i]), xytext=(10,10), textcoords='offset points', arrowprops=dict(facecolor='black', shrink=0.025))
        plt.show()
    
    print("Precision/Recall...")
    precision, recall, thresholds = precision_recall_curve(y_true, y_score)
    sio.savemat(MAT_PATH + test + '.precall.' + db + '.mat', {'precision':precision, 'recall':recall, 'thresholds':thresholds})
    
    if (show_plots):
        plt.plot(recall, precision)
        plt.title("Precision/Recall")
        plt.xlabel("Recall (TP / (TP+FN))")
        plt.ylabel("Precision (TP / (TP + FP))")
        for i in range(0, thresholds.size):
            plt.annotate(str(thresholds[i]), xy=(recall[i], precision[i]), xytext=(10,10), textcoords='offset points', arrowprops=dict(facecolor='black', shrink=0.025))
        plt.show()