def learn_with_purity(purity):  # training decision tree with specific purity
    dtree = DecisionTree(x_train, y_train, max_depth=n_attr, purity=purity)
    dtree.fit()
    # train_accuracy = dtree.accuracy(x_train, y_train)
    # test_accuracy = dtree.accuracy(x_test, y_test)
    test_preds = dtree.predict(x_test)
    return test_preds
Ejemplo n.º 2
0
Archivo: test1.py Proyecto: iffiX/Ion
    def train(x, y):
        tree.train(x, y)
        if enable_prune[0]:
            tree.prune(train_data[:, 0:6], train_data[:, 6], min_gain=0.001)

    trainer = LabelTrainer(data=train_data[:, 0:6],
                           data_labels=train_data[:, 6],
                           labels=(0, 1),
                           parameters={
                               "max_depth": (6, 7, 8, 9),
                               "min_samples_split": (2, 10, 20),
                               "min_samples_leaf": (1, 5, 10),
                               "prune": [False, True]
                           },
                           config_function=configure,
                           train_function=train,
                           predict_function=tree.predict)
    trainer.auto_train(maximize="f1",
                       validation_split_times=1,
                       print_info=True)
    trainer.config_model()
    trainer.train_model()
    tree_visualizer = DecisionTreeVisualizer(tree=tree)
    dot_data = tree_visualizer.visualize()
    tree_visualizer.print()
    graph = pydotplus.graph_from_dot_data(dot_data)
    graph.write_png("output/visualize.png")

    labels = tree.predict(test_data[:, 0:6])
    utils.save_predict(labels, test_out_path)
Ejemplo n.º 3
0
# %%
import numpy as np
from preprocess import get_train_data, get_test_data
from dtree import DecisionTree

x_train, y_train = get_train_data()
x_test, y_test = get_test_data()
decision_tree = DecisionTree(x_train, y_train, max_depth=1)
decision_tree.fit()
decision_tree.traverse()
y_hat = decision_tree.predict(x_test)
print("accuracy: ", decision_tree.accuracy(x_test, y_test))


# %%
def get_stats():
    TP = np.sum(np.logical_and(y_test == 1, y_hat == 1))
    FP = np.sum(np.logical_and(y_test == 0, y_hat == 1))
    TN = np.sum(np.logical_and(y_test == 0, y_hat == 0))
    FN = np.sum(np.logical_and(y_test == 1, y_hat == 0))
    return TP, FP, TN, FN


def specificity():
    TP, FP, TN, FN = get_stats()
    return TN / (TN + FP)


def sensitivity():
    TP, FP, TN, FN = get_stats()
    return TP / (TP + FN)