Esempio n. 1
0
def numb_trees(class_, method, y_train, y_test):
    trees = range(1, 50)
    #trees = [1,3,5, 10, 15, 20, 50, 100, 150]
    #trees = np.linspace(1,n,n)
    train_accuracy = []
    val_accuracy = []
    train_auc = []
    val_auc = []
    for i in trees:
        if method == 'random_forest':
            y_pred_train, y_pred_val = class_.random_forest(n_estimators=i)[:2]
            xlabel = 'Number of trees'
            title = 'Random forest - number of trees'
            name_fig = 'RF_numbTrees'

        anal = Analyse(y_train, y_test, y_pred_train, y_pred_val)
        accuracy_train, accuracy_val = anal.accuracy()
        auc_train, auc_val = anal.roc()[4:]
        train_accuracy.append(accuracy_train)
        val_accuracy.append(accuracy_val)
        train_auc.append(auc_train)
        val_auc.append(auc_val)

    #all_accuracies = cross_val_score(estimator=class_.gradient_boosting(n_estimators=i))
    make_plot(trees, train_accuracy, val_accuracy, train_auc, val_auc, xlabel,
              title, name_fig)
Esempio n. 2
0
def tree_depth(class_, method, y_train, y_test):
    depth = range(1, 30)
    #depth = [2,3,5,7,10,13,15,18, 20,30]
    train_accuracy = []
    val_accuracy = []
    train_auc = []
    val_auc = []

    for i in depth:
        if method == 'random_forest':
            y_pred_train, y_pred_val = class_.random_forest(max_depth=i)[:2]
            xlabel = 'max depth'
            title = 'Random forest - tree depth'
            name_fig = 'RF_TreeDepth'
        if method == 'decision_tree':
            y_pred_train, y_pred_val = class_.decision_tree(max_depth=i)[:2]
            xlabel = 'max depth'
            title = 'Decision tree - tree depth'
            name_fig = 'DT_TreeDepth'

        anal = Analyse(y_train, y_test, y_pred_train, y_pred_val)
        accuracy_train, accuracy_val = anal.accuracy()
        auc_train, auc_val = anal.roc()[4:]
        train_accuracy.append(accuracy_train)
        val_accuracy.append(accuracy_val)
        train_auc.append(auc_train)
        val_auc.append(auc_val)

    make_plot(depth, train_accuracy, val_accuracy, train_auc, val_auc, xlabel,
              title, name_fig)
Esempio n. 3
0
def max_iterations(class_, method, y_train, y_test, max_it):

    iterations = np.linspace(1, max_it, dtype=int)
    train_accuracy = []
    val_accuracy = []
    train_auc = []
    val_auc = []
    for i in iterations:
        if method == 'neural_network':
            y_pred_train, y_pred_val = class_.neural_network(max_iter=i)[:2]
            xlabel = 'max iterations'
            title = 'Neural network - max iterations'
            name_fig = 'NN_max_iterations'
        elif method == 'logistic_regression':
            y_pred_train, y_pred_val = class_.logistic_regression(
                max_iter=i)[:2]
            xlabel = 'max iterations'
            title = 'Logistic regression- max iterations'
            name_fig = 'LR_max_iterations'

        anal = Analyse(y_train, y_test, y_pred_train, y_pred_val)
        accuracy_train, accuracy_val = anal.accuracy()
        auc_train, auc_val = anal.roc()[4:]
        train_accuracy.append(accuracy_train)
        val_accuracy.append(accuracy_val)
        train_auc.append(auc_train)
        val_auc.append(auc_val)

    make_plot(iterations, train_accuracy, val_accuracy, train_auc, val_auc,
              xlabel, title, name_fig)
Esempio n. 4
0
def min_samples_leaf(class_, method, y_train, y_test):
    samples_leaf = [0.1, 0.25, 0.5, 1, 2, 3]
    #samples_leaf = np.linspace(0.1,0.05*n,n)
    train_accuracy = []
    val_accuracy = []
    train_auc = []
    val_auc = []
    for i in samples_leaf:
        if method == 'decision_tree':
            y_pred_train, y_pred_val = class_.random_forest(
                min_samples_leaf=i)[:2]
            xlabel = 'min samples leaf'
            title = 'Decision tree - min samples leaf'
            name_fig = 'DT_min_samples_leaf'
        if method == 'random_forest':
            y_pred_train, y_pred_val = class_.random_forest(
                min_samples_leaf=i)[:2]
            xlabel = 'min samples leaf'
            title = 'Random forest - min samples leaf'
            name_fig = 'RF_min_samples_leaf'

        anal = Analyse(y_train, y_test, y_pred_train, y_pred_val)
        accuracy_train, accuracy_val = anal.accuracy()
        auc_train, auc_val = anal.roc()[4:]
        train_accuracy.append(accuracy_train)
        val_accuracy.append(accuracy_val)
        train_auc.append(auc_train)
        val_auc.append(auc_val)

    make_plot(samples_leaf, train_accuracy, val_accuracy, train_auc, val_auc,
              xlabel, title, name_fig)
Esempio n. 5
0
def learning_rate(class_, method, y_train, y_test):
    rates = [0.0001, 0.001, 0.01, 0.1, 1.0]
    train_accuracy = []
    val_accuracy = []
    train_auc = []
    val_auc = []
    for i in rates:
        y_pred_train, y_pred_val = class_.gradient_boosting(learning_rate=i)
        anal = Analyse(y_train, y_test, y_pred_train, y_pred_val)
        accuracy_train, accuracy_val = anal.accuracy()
        auc_train, auc_val = anal.roc()[4:]
        train_accuracy.append(accuracy_train)
        val_accuracy.append(accuracy_val)
        train_auc.append(auc_train)
        val_auc.append(auc_val)

    xlabel = 'Learning rate'
    title = 'Gradient boosting - learning rate'
    name_fig = 'GB_learningrate'
    make_plot(rates, train_accuracy, val_accuracy, train_auc, val_auc, xlabel,
              title, name_fig)
Esempio n. 6
0
def hidden_layers(class_, y_train, y_test):
    layers = [1, 4, 10, 20, 50, 100, 200, 250]
    train_accuracy = []
    val_accuracy = []
    train_auc = []
    val_auc = []

    for i in layers:
        y_pred_train, y_pred_val = class_.neural_network(
            hidden_layer_sizes=i)[:2]

        anal = Analyse(y_train, y_test, y_pred_train, y_pred_val)
        accuracy_train, accuracy_val = anal.accuracy()
        auc_train, auc_val = anal.roc()[4:]
        train_accuracy.append(accuracy_train)
        val_accuracy.append(accuracy_val)
        train_auc.append(auc_train)
        val_auc.append(auc_val)
    xlabel = 'hidden layers'
    title = 'Neural network - hidden layers'
    name_fig = 'NN_hiddenlayers'

    make_plot(layers, train_accuracy, val_accuracy, train_auc, val_auc, xlabel,
              title, name_fig)
Esempio n. 7
0
def min_samples_split(class_, method, y_train, y_test):
    samples_split = [0.1, 0.5, 2, 4, 6]
    #samples_leaf = np.linspace(0.1,0.05*n,n)
    train_accuracy = []
    val_accuracy = []
    train_auc = []
    val_auc = []
    for i in samples_split:
        if method == 'decision_tree':
            y_pred_train, y_pred_val = class_.random_forest(
                min_samples_split=i)[:2]
            xlabel = 'min samples split'
            title = 'Decision tree - min samples spit'
            name_fig = 'DT_min_samples_split'
        if method == 'random_forest':
            y_pred_train, y_pred_val = class_.random_forest(
                min_samples_split=i)[:2]
            xlabel = 'min samples split'
            title = 'Random forest - min samples split'
            name_fig = 'RF_min_samples_split'
        if method == 'adaboost':
            y_pred_train, y_pred_val = class_.adaboost(min_samples_split=i)[:2]
            xlabel = 'min samples split'
            title = 'Gradient boosting - min samples split'
            name_fig = 'GB_min_samples_split'

        anal = Analyse(y_train, y_test, y_pred_train, y_pred_val)
        accuracy_train, accuracy_val = anal.accuracy()
        auc_train, auc_val = anal.roc()[4:]
        train_accuracy.append(accuracy_train)
        val_accuracy.append(accuracy_val)
        train_auc.append(auc_train)
        val_auc.append(auc_val)

    make_plot(samples_split, train_accuracy, val_accuracy, train_auc, val_auc,
              xlabel, title, name_fig)
Esempio n. 8
0
anal_RF = Analyse(y_train, y_test, RF_tuned[0], RF_tuned[1])
anal_DT = Analyse(y_train, y_test, DT_tuned[0], DT_tuned[1])
anal_AB = Analyse(y_train, y_test, AB_tuned[0], AB_tuned[1])
anal_NN = Analyse(y_train, y_test, NN_tuned[0], NN_tuned[1])

## Getting the most important predictors for each method, as well as bar plot

DT = anal_DT.importance_predictors(DT_tuned[-1], feature_names, 'Feature importance - Decision tree', 'feature_importance_IP_DT')
RF = anal_RF.importance_predictors(RF_tuned[-1], feature_names, 'Feature importance - Random forest', 'feature_importance_IP_RT')
AB = anal_AB.importance_predictors(AB_tuned[-1], feature_names,'Feature importance - AnaBoost', 'feature_importance_IP_AB')

## Printing out accuracy and AUC

print("Accuracy scores:")
print("AdaBoost:", "train:", anal_AB.accuracy()[0], "val:", anal_AB.accuracy()[1])
print("Random forest:", "train:", anal_RF.accuracy()[0], "val:", anal_RF.accuracy()[1])
print("decision tree:", "train:", anal_DT.accuracy()[0], "val:", anal_DT.accuracy()[1])
print("neural network:", "train:", anal_NN.accuracy()[0], "val:", anal_NN.accuracy()[1])
#print("logistic regression:", "train:", anal_LR.accuracy()[0], "val:", anal_LR.accuracy()[1])

print("AUC scores:")
print("Adaboost:", "train:", anal_AB.roc()[-2], "val:", anal_AB.roc()[-1])
print("Random forest:", "train:", anal_RF.roc()[-2], "val:", anal_RF.roc()[-1])
print("decision tree:", "train:", anal_DT.roc()[-2], "val:", anal_DT.roc()[-1])
print("neural network:", "train:", anal_NN.roc()[-2], "val:", anal_NN.roc()[-1])
#print("logistic regression:", "train:", anal_LR.roc()[-2], "val:", anal_LR.roc()[-1])



## Plotting ROC curve. Training data was dropped due to confusing plot. Can be activated.