コード例 #1
0
def learn_depths():  # training decision tree for different heights
    train_acc = np.zeros(n_attr)
    test_acc = np.zeros(n_attr)
    for depth in range(n_attr):
        dtree = DecisionTree(x_train, y_train, max_depth=depth)
        dtree.fit()
        train_acc[depth] = dtree.accuracy(x_train, y_train)
        test_acc[depth] = dtree.accuracy(x_test, y_test)
    df = pd.DataFrame({
        'depth': range(1, n_attr + 1),
        'Train accuracy': train_acc,
        'Test accuracy': test_acc
    })
    # df.to_csv('res/acc.csv')
    return df
コード例 #2
0
def k_fold_cross_validation(x, y, k, shf=False):
    if shf:
        to_shf = np.column_stack((x, y))
        to_shf = list(to_shf)
        shuffle(to_shf)
        to_shf = np.array(to_shf)
        x = np.delete(to_shf, -1, axis=1)
        y = to_shf[:, -1]
    train_acc = np.zeros((k, n_attr))
    val_acc = np.zeros((k, n_attr))
    for d in range(k):
        print(d, "th fold...")
        x_train = np.array([row for i, row in enumerate(x) if i % k != d])
        x_val = np.array([row for i, row in enumerate(x) if i % k == d])
        y_train = np.array([val for i, val in enumerate(y) if i % k != d])
        y_val = np.array([val for i, val in enumerate(y) if i % k == d])
        for depth in range(n_attr):
            dtree = DecisionTree(x_train, y_train, max_depth=depth)
            dtree.fit()
            # train_acc[d, depth] = dtree.accuracy(x_train, y_train)
            val_acc[d, depth] = dtree.accuracy(x_val, y_val)
    return val_acc
コード例 #3
0
# %%
import numpy as np
from preprocess import get_train_data, get_test_data
from dtree import DecisionTree

x_train, y_train = get_train_data()
x_test, y_test = get_test_data()
decision_tree = DecisionTree(x_train, y_train, max_depth=1)
decision_tree.fit()
decision_tree.traverse()
y_hat = decision_tree.predict(x_test)
print("accuracy: ", decision_tree.accuracy(x_test, y_test))


# %%
def get_stats():
    TP = np.sum(np.logical_and(y_test == 1, y_hat == 1))
    FP = np.sum(np.logical_and(y_test == 0, y_hat == 1))
    TN = np.sum(np.logical_and(y_test == 0, y_hat == 0))
    FN = np.sum(np.logical_and(y_test == 1, y_hat == 0))
    return TP, FP, TN, FN


def specificity():
    TP, FP, TN, FN = get_stats()
    return TN / (TN + FP)


def sensitivity():
    TP, FP, TN, FN = get_stats()
    return TP / (TP + FN)