def knn_train(dimensions, train_count, test_count, iterations=10): # For evaluation BCE_list_train = [] BCE_list_test = [] for i in range(iterations): # File import train_x_data, train_y_data = create_toy_data(dimensions, train_count, test_count, add_outliers=True, training=True) test_x_data, test_y_data = create_toy_data(dimensions, train_count, test_count, add_outliers=True, training=False) # Feature engineering X_train = train_x_data X_test = test_x_data # Model knn = kNN() # Training (Learning) knn.fit(X_train, train_y_data) # Predicting a training set y_hat_train = knn.predict(X_train) # Predicting a test set y_hat_test = knn.predict(X_test) # Evaluating BCE_value_train = binary_cross_entropy(train_y_data, y_hat_train) BCE_value_test = binary_cross_entropy(test_y_data, y_hat_test) # Appending BCE_list_train.append(BCE_value_train) BCE_list_test.append(BCE_value_test) # Plotting if dimensions == 2 and i == 0: x1_test, x2_test = np.meshgrid(np.linspace(-5, 15, 100), np.linspace(-5, 15, 100)) X_test_plot = np.array([x1_test, x2_test]).reshape(2, -1).T # logistic regression y_hat_plot = knn.predict(X_test_plot) plot(train_x_data, train_y_data, test_x_data, test_y_data, x1_test, x2_test, y_hat_plot, "./Results/knn_result") # average MSE (average_BCE_train, BCE_std_train) = average_metric(BCE_list_train) (average_BCE_test, BCE_std_test) = average_metric(BCE_list_test) return (average_BCE_train, BCE_std_train), (average_BCE_test, BCE_std_test)
import numpy as np import random from App.Pre_processing.data_generation import create_toy_data if __name__ == "__main__": for i in range(10): if i < 5: train_x_data, train_y_data = create_toy_data() test_x_data, test_y_data = create_toy_data(training=False) else: train_x_data, train_y_data = create_toy_data(add_outliers=True) test_x_data, test_y_data = create_toy_data(add_outliers=True, training=False) np.savetxt("./Data/x_{}.csv".format(i), train_x_data) np.savetxt("./Data/y_{}.csv".format(i), train_y_data) np.savetxt("./Data/test_x_{}.csv".format(i), test_x_data) np.savetxt("./Data/test_y_{}.csv".format(i), test_y_data)
def decision_tree_train(dimensions, train_count, test_count, iterations=10): # Set Hyperparameters max_depth = 5 min_size = 10 # For evaluation BCE_list_train = [] BCE_list_test = [] Terminal_count_list = [] for i in range(iterations): # File import train_x_data, train_y_data = create_toy_data(dimensions, train_count, test_count, add_outliers=True, training=True) test_x_data, test_y_data = create_toy_data(dimensions, train_count, test_count, add_outliers=True, training=False) # Feature engineering X_train = train_x_data X_test = test_x_data # Model decision_tree = Decision_tree() # combine the features and targets train_y_data_tree = train_y_data[:, None] Tree_input = np.hstack((X_train, train_y_data_tree)) # Training (Learning) tree, terminal_count = decision_tree.build_tree(Tree_input, max_depth, min_size, np.shape(Tree_input)[1]-1) # Predicting a training set y_hat_train = decision_tree.predicts(tree, X_train) # Predicting a test set y_hat_test = decision_tree.predicts(tree, X_test) # Evaluating BCE_value_train = binary_cross_entropy(train_y_data, y_hat_train) BCE_value_test = binary_cross_entropy(test_y_data, y_hat_test) # Appending BCE_list_train.append(BCE_value_train) BCE_list_test.append(BCE_value_test) Terminal_count_list.append(terminal_count) # Plotting if dimensions == 2 and i == 0: x1_test, x2_test = np.meshgrid(np.linspace(-5, 15, 100), np.linspace(-5, 15, 100)) X_test_plot = np.array([x1_test, x2_test]).reshape(2, -1).T # logistic regression y_hat_plot = decision_tree.predicts(tree, X_test_plot) plot(train_x_data, train_y_data, test_x_data, test_y_data, x1_test, x2_test, y_hat_plot, "./Results/tree_result") # average MSE (average_BCE_train, BCE_std_train) = average_metric(BCE_list_train) (average_BCE_test, BCE_std_test) = average_metric(BCE_list_test) average_terminal_count = np.average(Terminal_count_list) return (average_BCE_train, BCE_std_train), (average_BCE_test, BCE_std_test), average_terminal_count