Esempio n. 1
0

def getAccuracy(pred_y, actu_y):
    if len(pred_y) != len(actu_y):
        raise ("The f**k?")
    correct = 0
    for i in range(0, len(pred_y)):
        if pred_y[i] == actu_y[i]:
            correct += 1

    return correct / len(pred_y)


st1 = time()
t = Tree(max_depth=DEPTH)
t.train(X, y)
en1 = time()

t.printTree()

print(y)
st1p = time()
y_pred = t.predict(X)
en1p = time()
#print(y_pred)

print(getAccuracy(y, y_pred))

clf = DecisionTreeClassifier(criterion='entropy', max_depth=DEPTH)
st2 = time()
clf.fit(X, y)
Esempio n. 2
0
y = iris.target


def getAccuracy(pred_y, actu_y):
    if len(pred_y) != len(actu_y):
        raise ("The f**k?")
    correct = 0
    for i in range(0, len(pred_y)):
        if pred_y[i] == actu_y[i]:
            correct += 1

    return correct / len(pred_y)


t = Tree(max_depth=DEPTH, PFSRT=True, omega=1.5)

t.train(X, y)
t.printTree()

t.updatePFSRT()

acc = []
for i in range(0, NR_TREES):
    t.train()  # train with same data
    t.updatePFSRT()
    acc.append((i, t._cur_accuracy))

acc = sorted(acc, key=lambda kv: kv[1])
for tup in acc:
    print("Tree " + str(tup[0]) + " acc:", tup[1])
Esempio n. 3
0
def do_experiments(X, y, depth, nr_rand_trees, data_label):
    # BASIC TREE
    print("\n------ BASIC TREE ------\n")
    print("Depth: ", depth)
    t = Tree(max_depth=depth)

    st1 = time()
    t.train(X, y)
    en1 = time()
    st1p = time()
    y_pred = t.predict(X)
    en1p = time()

    basic_acc = accuracy_score(y, y_pred)
    print(Get_ConfusionMatrix(y, y_pred))
    print("F-Score: ", Get_F_Score(y, y_pred))
    print("Accuracy: ", basic_acc)
    print("Time to train:", en1 - st1)
    print("Time to test:", en1p - st1p)

    # this is 10 fold cross validation
    cv_arr = cross_validate(t, X, y, cv=10)
    print("Accuracy after 10-fold CV:",
          float(sum(cv_arr)) / max(len(cv_arr), 1), "(", stdev(cv_arr), ")")

    if len(np.unique(y)) == 2:
        Generate_ROC_Curve(y, t.getClassProb(X), "ID3", label_text=data_label)
    # BASIC TREE END
    # RANDOM TREES
    print("\n------ RANDOM TREE ------\n")
    print("Depth: ", depth)
    print("Nr trees: ", nr_rand_trees)
    t2 = Tree(max_depth=depth, random_feat=True)

    t2_max = None
    y2_pred = None
    acc_max = 0

    iterations_taken = nr_rand_trees
    acc_list = []
    max_accs = []

    st2 = time()
    for i in range(0, nr_rand_trees):
        t2.train(X, y)
        y2_pred = t2.predict(X)
        acc = accuracy_score(y2_pred, y)
        acc_list.append(acc)
        if acc >= basic_acc:
            iterations_taken = i + 1
            t2_max = t2
            acc_max = acc
            break
        if acc > acc_max:
            acc_max = acc
            t2_max = t2
        max_accs.append(acc_max)

    en2 = time()
    st2p = time()
    y2_pred = t2_max.predict(X)
    en2p = time()

    print(Get_ConfusionMatrix(y, y2_pred))
    print("F-Score: ", Get_F_Score(y, y2_pred))
    print("Accuracy: ", acc_max)
    print("Time to train:", en2 - st2)
    print("Iterations taken:", iterations_taken)
    print("Time to test:", en2p - st2p)

    # this is 10 fold cross validation
    cv_arr = cross_validate(t2_max, X, y, cv=10)
    print("Accuracy after 10-fold CV:",
          float(sum(cv_arr)) / max(len(cv_arr), 1), "(", stdev(cv_arr), ")")

    random_decision_tree_accuracy(acc_list, label_text=data_label)
    accuracyRiseForRandomTrees(max_accs, label_text=data_label)

    if len(np.unique(y)) == 2:
        Generate_ROC_Curve(y,
                           t2_max.getClassProb(X),
                           "Random Forest",
                           label_text=data_label)
    # RANDOM TREES END
    # LOOKAHEAD TREE
    print("\n------ LOOKAHEAD TREE ------\n")
    print("Depth: ", depth)
    t3 = Tree(max_depth=depth, lookahead=True)

    st3 = time()
    t3.train(X, y)
    en3 = time()
    print("yo")
    st3p = time()
    y3_pred = t3.predict(X)
    en3p = time()
    print("yu")

    basic_acc = accuracy_score(y, y3_pred)
    print(Get_ConfusionMatrix(y, y3_pred))
    print("F-Score: ", Get_F_Score(y, y3_pred))
    print("Accuracy: ", accuracy_score(y, y3_pred))
    print("Time to train:", en3 - st3)
    print("Time to test:", en3p - st3p)

    # this is 10 fold cross validation
    cv_arr = cross_validate(t3, X, y, cv=5)
    print("Accuracy after 10-fold CV:",
          float(sum(cv_arr)) / max(len(cv_arr), 1), "(", stdev(cv_arr), ")")

    if len(np.unique(y)) == 2:
        Generate_ROC_Curve(y,
                           t3.getClassProb(X),
                           "Lookahead DT",
                           label_text=data_label)