Ejemplo n.º 1
0
def knn_train(dimensions, train_count, test_count, iterations=10):

    # For evaluation
    BCE_list_train = []
    BCE_list_test = []

    for i in range(iterations):
        # File import
        train_x_data, train_y_data = create_toy_data(dimensions,
                                                     train_count,
                                                     test_count,
                                                     add_outliers=True,
                                                     training=True)
        test_x_data, test_y_data = create_toy_data(dimensions,
                                                   train_count,
                                                   test_count,
                                                   add_outliers=True,
                                                   training=False)

        # Feature engineering
        X_train = train_x_data
        X_test = test_x_data

        # Model
        knn = kNN()

        # Training (Learning)
        knn.fit(X_train, train_y_data)

        # Predicting a training set
        y_hat_train = knn.predict(X_train)

        # Predicting a test set
        y_hat_test = knn.predict(X_test)

        # Evaluating
        BCE_value_train = binary_cross_entropy(train_y_data, y_hat_train)
        BCE_value_test = binary_cross_entropy(test_y_data, y_hat_test)

        # Appending
        BCE_list_train.append(BCE_value_train)
        BCE_list_test.append(BCE_value_test)

        # Plotting
        if dimensions == 2 and i == 0:
            x1_test, x2_test = np.meshgrid(np.linspace(-5, 15, 100),
                                           np.linspace(-5, 15, 100))
            X_test_plot = np.array([x1_test, x2_test]).reshape(2, -1).T

            # logistic regression
            y_hat_plot = knn.predict(X_test_plot)
            plot(train_x_data, train_y_data, test_x_data, test_y_data, x1_test,
                 x2_test, y_hat_plot, "./Results/knn_result")

    # average MSE
    (average_BCE_train, BCE_std_train) = average_metric(BCE_list_train)
    (average_BCE_test, BCE_std_test) = average_metric(BCE_list_test)

    return (average_BCE_train, BCE_std_train), (average_BCE_test, BCE_std_test)
Ejemplo n.º 2
0
import numpy as np
import random
from App.Pre_processing.data_generation import create_toy_data

if __name__ == "__main__":

    for i in range(10):
        if i < 5:
            train_x_data, train_y_data = create_toy_data()
            test_x_data, test_y_data = create_toy_data(training=False)
        else:
            train_x_data, train_y_data = create_toy_data(add_outliers=True)
            test_x_data, test_y_data = create_toy_data(add_outliers=True,
                                                       training=False)

        np.savetxt("./Data/x_{}.csv".format(i), train_x_data)
        np.savetxt("./Data/y_{}.csv".format(i), train_y_data)
        np.savetxt("./Data/test_x_{}.csv".format(i), test_x_data)
        np.savetxt("./Data/test_y_{}.csv".format(i), test_y_data)
def decision_tree_train(dimensions, train_count, test_count, iterations=10):
    # Set Hyperparameters
    max_depth = 5
    min_size = 10
    
    # For evaluation
    BCE_list_train = []
    BCE_list_test = []
    Terminal_count_list = []

    for i in range(iterations):
        # File import
        train_x_data, train_y_data = create_toy_data(dimensions, train_count, test_count, add_outliers=True, training=True)
        test_x_data, test_y_data = create_toy_data(dimensions, train_count, test_count, add_outliers=True, training=False)
        
        # Feature engineering
        X_train = train_x_data
        X_test = test_x_data
        
        # Model
        decision_tree = Decision_tree()

        # combine the features and targets
        train_y_data_tree = train_y_data[:, None]
        Tree_input = np.hstack((X_train, train_y_data_tree))

        # Training (Learning)
        tree, terminal_count = decision_tree.build_tree(Tree_input, max_depth, min_size, np.shape(Tree_input)[1]-1)
        
        # Predicting a training set    
        y_hat_train = decision_tree.predicts(tree, X_train)

        # Predicting a test set
        y_hat_test = decision_tree.predicts(tree, X_test)
        
        # Evaluating
        BCE_value_train = binary_cross_entropy(train_y_data, y_hat_train)
        BCE_value_test = binary_cross_entropy(test_y_data, y_hat_test)
        
        # Appending
        BCE_list_train.append(BCE_value_train)
        BCE_list_test.append(BCE_value_test)
        Terminal_count_list.append(terminal_count)
        
        # Plotting
        if dimensions == 2 and i == 0:
            x1_test, x2_test = np.meshgrid(np.linspace(-5, 15, 100), np.linspace(-5, 15, 100))
            X_test_plot = np.array([x1_test, x2_test]).reshape(2, -1).T

            # logistic regression
            y_hat_plot = decision_tree.predicts(tree, X_test_plot)
            plot(train_x_data, train_y_data, test_x_data, test_y_data, x1_test, x2_test, y_hat_plot, "./Results/tree_result")
            

    # average MSE 
    (average_BCE_train, BCE_std_train) = average_metric(BCE_list_train)
    (average_BCE_test, BCE_std_test) = average_metric(BCE_list_test)
    average_terminal_count = np.average(Terminal_count_list)
    

    return (average_BCE_train, BCE_std_train), (average_BCE_test, BCE_std_test), average_terminal_count