def train_neural_net_with_loan_data(path, with_plots):
    data_set = 'loan'
    x_train, y_train = load_data(path + 'data/' + data_set + '/train/')

    if with_plots == "False":
        model_1 = train_and_time(
            MLPClassifier(solver='sgd',
                          validation_fraction=0.0,
                          alpha=1e-1,
                          hidden_layer_sizes=(20, 5),
                          random_state=1), x_train, y_train)
        model_2 = train_and_time(
            MLPClassifier(solver='sgd',
                          validation_fraction=0.0,
                          alpha=1e-1,
                          hidden_layer_sizes=(50, 5),
                          random_state=1), x_train, y_train)
        model_3 = train_and_time(
            MLPClassifier(solver='sgd',
                          validation_fraction=0.0,
                          alpha=1e-1,
                          hidden_layer_sizes=(100, 5),
                          random_state=1), x_train, y_train)
        model_4 = train_and_time(
            MLPClassifier(solver='sgd',
                          validation_fraction=0.0,
                          alpha=1e-1,
                          hidden_layer_sizes=(500, 5),
                          random_state=1), x_train, y_train)

        save_model(model_1, path + 'model/' + data_set, 'neural_net_model_1')
        save_model(model_2, path + 'model/' + data_set, 'neural_net_model_2')
        save_model(model_3, path + 'model/' + data_set, 'neural_net_model_3')
        save_model(model_4, path + 'model/' + data_set, 'neural_net_model_4')

    else:
        print('Training Neural Network...')
        model_1 = MLPClassifier(solver='sgd',
                                validation_fraction=0.0,
                                alpha=1e-3,
                                hidden_layer_sizes=(20, 5),
                                random_state=1)
        model_2 = MLPClassifier(solver='sgd',
                                validation_fraction=0.0,
                                alpha=1e-3,
                                hidden_layer_sizes=(50, 5),
                                random_state=1)
        model_3 = MLPClassifier(solver='sgd',
                                validation_fraction=0.0,
                                alpha=1e-3,
                                hidden_layer_sizes=(100, 5),
                                random_state=1)
        model_4 = MLPClassifier(solver='sgd',
                                validation_fraction=0.0,
                                alpha=1e-3,
                                hidden_layer_sizes=(500, 5),
                                random_state=1)
Exemple #2
0
def train_svm(path, with_plots):
    data_set = 'cardio'
    x_train, y_train = load_data(path + 'data/' + data_set + '/train/')

    if not with_plots:
        print('Training SVM...')
        model_1 = train_and_time(
            svm.SVC(kernel='linear', cache_size=400, probability=True),
            x_train, y_train)
        model_2 = train_and_time(
            svm.SVC(kernel='poly', cache_size=400, probability=True), x_train,
            y_train)

        save_model(model_1, path + 'model/' + data_set, 'svm_model_1')
        save_model(model_2, path + 'model/' + data_set, 'svm_model_2')
    else:
        print('No learning curves for SVM...')
Exemple #3
0
def train_k_NN(path, with_plots):
    data_set = 'cardio'
    x_train, y_train = load_data(path + 'data/' + data_set + '/train/')

    if with_plots == "False":
        model_1 = train_and_time(KNeighborsClassifier(n_neighbors=25), x_train,
                                 y_train)
        model_2 = train_and_time(KNeighborsClassifier(n_neighbors=150),
                                 x_train, y_train)
        model_3 = train_and_time(KNeighborsClassifier(n_neighbors=225),
                                 x_train, y_train)
        model_4 = train_and_time(KNeighborsClassifier(n_neighbors=300),
                                 x_train, y_train)

        save_model(model_1, path + 'model/' + data_set, 'kNN_model_1')
        save_model(model_2, path + 'model/' + data_set, 'kNN_model_2')
        save_model(model_3, path + 'model/' + data_set, 'kNN_model_3')
        save_model(model_4, path + 'model/' + data_set, 'kNN_model_4')

    else:
        print('Training kNN...')

        model_1 = KNeighborsClassifier(n_neighbors=25)
        model_2 = KNeighborsClassifier(n_neighbors=150)
        model_3 = KNeighborsClassifier(n_neighbors=225)
        model_4 = KNeighborsClassifier(n_neighbors=300)
        plt = multiple_learning_curves_plot(
            [model_1, model_2, model_3, model_4], x_train, y_train,
            ["r", "y", "b", "m"], ['k = 25', 'k = 150', 'k = 225', 'k = 300'])

        plt.title("k Nearest Neighbor \n Learning Curves")
        plt.xlabel("Training examples")
        plt.ylabel("F1 Score")
        plt.grid()

        plt.legend(loc="best")
        # plt.show()
        save_figure(plt, path + "plot/" + data_set, 'kNN_learning_curves.png')
def train_boosted_dtc(path, with_plots):
    data_set = 'cardio'
    x_train, y_train = load_data(path + 'data/' + data_set + '/train/')

    if with_plots == "False":
        model_nodes_1 = train_and_time(AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=5)), x_train, y_train)
        model_nodes_2 = train_and_time(AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=10)), x_train, y_train)
        model_nodes_3 = train_and_time(AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=15)), x_train, y_train)
        model_nodes_4 = train_and_time(AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=20)), x_train, y_train)
        model_nodes_5 = train_and_time(AdaBoostClassifier(tree.DecisionTreeClassifier()), x_train, y_train)

        save_model(model_nodes_1, path + "model/" + data_set, 'boosted_dtc_model_nodes_1')
        save_model(model_nodes_2, path + "model/" + data_set, 'boosted_dtc_model_nodes_2')
        save_model(model_nodes_3, path + "model/" + data_set, 'boosted_dtc_model_nodes_3')
        save_model(model_nodes_4, path + "model/" + data_set, 'boosted_dtc_model_nodes_4')
        save_model(model_nodes_5, path + "model/" + data_set, 'boosted_dtc_none')

    else:
        print('Training boosted dtc...')
        model_1 = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=5))
        model_2 = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=10))
        model_3 = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=15))
        model_4 = AdaBoostClassifier(tree.DecisionTreeClassifier(max_depth=20))
        model_5 = AdaBoostClassifier(tree.DecisionTreeClassifier())
        plt = multiple_learning_curves_plot(
            [model_1, model_2, model_3, model_4, model_5],
            x_train, y_train,
            ["r", "y", "g", "m", "b"],
            ["MD = 5", "MD = 10", "MD = 15", "MD = 20", "MD = None"]
        )
        plt.title("Boosted Decision Tree With Max Depth (MD) \n Pruning Learning Curves")
        plt.xlabel("Training examples")
        plt.ylabel("F1 Score")
        plt.grid()
        plt.legend(loc="best")
        save_figure(plt, path + "plot/" + data_set, 'boosted_dtc_md_learning_curves.png')
Exemple #5
0
def train_dtc_loan(path, with_plots):
    data_set = "loan"
    print("Training Decision Tree Classifier...")

    x_train, y_train = load_data(path + 'data/' + data_set + '/train/')

    if with_plots == "False":
        model_nodes_1 = train_and_time(
            tree.DecisionTreeClassifier(max_depth=4), x_train, y_train)
        model_nodes_2 = train_and_time(
            tree.DecisionTreeClassifier(max_depth=8), x_train, y_train)
        model_nodes_3 = train_and_time(
            tree.DecisionTreeClassifier(max_depth=15), x_train, y_train)
        model_nodes_4 = train_and_time(
            tree.DecisionTreeClassifier(max_depth=30), x_train, y_train)
        model_nodes_5 = train_and_time(tree.DecisionTreeClassifier(), x_train,
                                       y_train)

        save_model(model_nodes_1, path + "model/" + data_set,
                   'dtc_model_depth_1')
        save_model(model_nodes_2, path + "model/" + data_set,
                   'dtc_model_depth_2')
        save_model(model_nodes_3, path + "model/" + data_set,
                   'dtc_model_depth_3')
        save_model(model_nodes_4, path + "model/" + data_set,
                   'dtc_model_depth_4')
        save_model(model_nodes_5, path + "model/" + data_set, 'dtc_none')

        model_leaf_nodes_1 = train_and_time(
            tree.DecisionTreeClassifier(max_leaf_nodes=5), x_train, y_train)
        model_leaf_nodes_2 = train_and_time(
            tree.DecisionTreeClassifier(max_leaf_nodes=20), x_train, y_train)
        model_leaf_nodes_3 = train_and_time(
            tree.DecisionTreeClassifier(max_leaf_nodes=100), x_train, y_train)
        model_leaf_nodes_4 = train_and_time(
            tree.DecisionTreeClassifier(max_leaf_nodes=300), x_train, y_train)

        save_model(model_leaf_nodes_1, path + "model/" + data_set,
                   'dtc_model_leaf_nodes_1')
        save_model(model_leaf_nodes_2, path + "model/" + data_set,
                   'dtc_model_leaf_nodes_2')
        save_model(model_leaf_nodes_3, path + "model/" + data_set,
                   'dtc_model_leaf_nodes_3')
        save_model(model_leaf_nodes_4, path + "model/" + data_set,
                   'dtc_model_leaf_nodes_4')

    else:
        # model_1 = tree.DecisionTreeClassifier(max_leaf_nodes=5)
        # model_2 = tree.DecisionTreeClassifier(max_leaf_nodes=20)
        # model_3 = tree.DecisionTreeClassifier(max_leaf_nodes=100)
        # model_4 = tree.DecisionTreeClassifier(max_leaf_nodes=300)
        # model_5 = tree.DecisionTreeClassifier()
        #
        # plt = multiple_learning_curves_plot(
        #     [model_1, model_2, model_3, model_4, model_5],
        #     x_train, y_train,
        #     ["r", "y", "g", "m", "b"],
        #     ["MLN = 5", "MLN = 20", "MLN = 100", "MLN = 300", "MLN = None"]
        # )
        #
        # plt.title("Decision Tree Learning Curves \n With Max Leaf Nodes (MLN)")
        # plt.xlabel("Training examples")
        # plt.ylabel("F1 Score")
        # plt.grid()
        # plt.legend(loc="best")
        # save_figure(plt, path + "plot/" + data_set, 'dtc_mln_learning_curve.png')

        # ---------------------------------------------------------------------

        model_1 = tree.DecisionTreeClassifier(max_depth=4)
        model_2 = tree.DecisionTreeClassifier(max_depth=8)
        model_3 = tree.DecisionTreeClassifier(max_depth=15)
        model_4 = tree.DecisionTreeClassifier(max_depth=30)
        model_5 = tree.DecisionTreeClassifier()
        plt = multiple_learning_curves_plot(
            [model_1, model_2, model_3, model_4, model_5], x_train, y_train,
            ["r", "y", "g", "m", "b"],
            ["MD = 4", "MD = 8", "MD = 15", "MD = 30", "MD = None"])
        plt.title("Decision Tree Learning Curves \n With Max Depth (MD)")
        plt.xlabel("Training examples")
        plt.ylabel("F1 Score")
        plt.grid()
        plt.legend(loc="best")
        save_figure(plt, path + "plot/" + data_set,
                    'dtc_md_learning_curve.png')
Exemple #6
0
def train_neural_net_with_loan_data(path, with_plots):
    data_set = 'loan'
    x_train, y_train = load_data(path + 'data/' + data_set + '/train/')

    if with_plots == "False":
        model_1 = train_and_time(
            MLPClassifier(solver='sgd',
                          validation_fraction=0.0,
                          alpha=1e-1,
                          hidden_layer_sizes=(20, 5),
                          random_state=1), x_train, y_train)
        model_2 = train_and_time(
            MLPClassifier(solver='sgd',
                          validation_fraction=0.0,
                          alpha=1e-1,
                          hidden_layer_sizes=(50, 5),
                          random_state=1), x_train, y_train)
        model_3 = train_and_time(
            MLPClassifier(solver='sgd',
                          validation_fraction=0.0,
                          alpha=1e-1,
                          hidden_layer_sizes=(100, 5),
                          random_state=1), x_train, y_train)
        model_4 = train_and_time(
            MLPClassifier(solver='sgd',
                          validation_fraction=0.0,
                          alpha=1e-1,
                          hidden_layer_sizes=(500, 5),
                          random_state=1), x_train, y_train)

        save_model(model_1, path + 'model/' + data_set, 'neural_net_model_1')
        save_model(model_2, path + 'model/' + data_set, 'neural_net_model_2')
        save_model(model_3, path + 'model/' + data_set, 'neural_net_model_3')
        save_model(model_4, path + 'model/' + data_set, 'neural_net_model_4')

    else:
        print('Training Neural Network...')
        model_1 = MLPClassifier(solver='sgd',
                                validation_fraction=0.0,
                                alpha=1e-3,
                                hidden_layer_sizes=(20, 5),
                                random_state=1)
        model_2 = MLPClassifier(solver='sgd',
                                validation_fraction=0.0,
                                alpha=1e-3,
                                hidden_layer_sizes=(50, 5),
                                random_state=1)
        model_3 = MLPClassifier(solver='sgd',
                                validation_fraction=0.0,
                                alpha=1e-3,
                                hidden_layer_sizes=(100, 5),
                                random_state=1)
        model_4 = MLPClassifier(solver='sgd',
                                validation_fraction=0.0,
                                alpha=1e-3,
                                hidden_layer_sizes=(500, 5),
                                random_state=1)

        plt = multiple_learning_curves_plot(
            [model_1, model_2, model_3, model_4], x_train, y_train,
            ["r", "y", "b", "m"],
            ['HLS = 20 x 5', 'HLS = 50 x 5', 'HLS = 100 x 5', 'HLS = 500 x 5'])

        plt.title(
            "Neural Network with Varying Hidden Layer Size (HLS) \n Learning Curves"
        )
        plt.xlabel("Training examples")
        plt.ylabel("F1 Score")
        plt.grid()

        plt.legend(loc="best")
        # plt.show()
        save_figure(plt, path + "plot/" + data_set,
                    'neural_net_learning_curves.png')
        print("done")