def learn_with_purity(purity): # training decision tree with specific purity dtree = DecisionTree(x_train, y_train, max_depth=n_attr, purity=purity) dtree.fit() # train_accuracy = dtree.accuracy(x_train, y_train) # test_accuracy = dtree.accuracy(x_test, y_test) test_preds = dtree.predict(x_test) return test_preds
def train(x, y): tree.train(x, y) if enable_prune[0]: tree.prune(train_data[:, 0:6], train_data[:, 6], min_gain=0.001) trainer = LabelTrainer(data=train_data[:, 0:6], data_labels=train_data[:, 6], labels=(0, 1), parameters={ "max_depth": (6, 7, 8, 9), "min_samples_split": (2, 10, 20), "min_samples_leaf": (1, 5, 10), "prune": [False, True] }, config_function=configure, train_function=train, predict_function=tree.predict) trainer.auto_train(maximize="f1", validation_split_times=1, print_info=True) trainer.config_model() trainer.train_model() tree_visualizer = DecisionTreeVisualizer(tree=tree) dot_data = tree_visualizer.visualize() tree_visualizer.print() graph = pydotplus.graph_from_dot_data(dot_data) graph.write_png("output/visualize.png") labels = tree.predict(test_data[:, 0:6]) utils.save_predict(labels, test_out_path)
# %% import numpy as np from preprocess import get_train_data, get_test_data from dtree import DecisionTree x_train, y_train = get_train_data() x_test, y_test = get_test_data() decision_tree = DecisionTree(x_train, y_train, max_depth=1) decision_tree.fit() decision_tree.traverse() y_hat = decision_tree.predict(x_test) print("accuracy: ", decision_tree.accuracy(x_test, y_test)) # %% def get_stats(): TP = np.sum(np.logical_and(y_test == 1, y_hat == 1)) FP = np.sum(np.logical_and(y_test == 0, y_hat == 1)) TN = np.sum(np.logical_and(y_test == 0, y_hat == 0)) FN = np.sum(np.logical_and(y_test == 1, y_hat == 0)) return TP, FP, TN, FN def specificity(): TP, FP, TN, FN = get_stats() return TN / (TN + FP) def sensitivity(): TP, FP, TN, FN = get_stats() return TP / (TP + FN)