Exemple #1
0
def feature_bagging(attr_list, reviews_list, test_data, number_of_trees):

    forest = []
    for i in range(number_of_trees):
        print(i)
        rn.shuffle(attr_list)
        var = tc.decision_tree(list(reviews_list.keys()), None, None)
        mat = return_mat(attr_list, reviews_list)
        prev = []
        var.MakeTree(mat, attr_list, prev)
        forest.append(var)

    error = 0
    for data in test_data:
        actual_label = test_data[data][0]
        prediction = None
        pred = 0

        for tree in forest:
            p = predict(tree, test_data[data][1])
            if p == 1:
                pred = pred + 1

        if (len(forest) - pred) > pred:
            prediction = -1
        else:
            prediction = 1

        if actual_label != prediction:
            error = error + 1

    print("error: ", error / len(test_data))
Exemple #2
0
def feature_bagging(attr_list, reviews_list, test_data, train_set,
                    number_of_trees):

    forest = []
    for i in range(number_of_trees):
        rn.shuffle(attr_list)
        var = tc.decision_tree(list(reviews_list.keys()), None, None)
        mat = return_mat(attr_list, reviews_list)
        prev = []
        var.MakeTree(mat, attr_list, prev)
        forest.append(var)

    print("Number Of tree: ", number_of_trees)
    print("train error: ", return_dataset_error(reviews_list, forest))
    print("test error: ", return_dataset_error(test_data, forest))
Exemple #3
0
def DecisionTree(attr_list, reviews_list):
    level_tree = {}
    print("start making tree")
    m = open('./model.pickle', 'wb')
    mat = return_mat(attr_list, reviews_list)
    var = tc.decision_tree(list(reviews_list.keys()), None, None)
    start = timeit.default_timer()
    prev = []
    var.MakeTree(mat, attr_list, prev)
    print(timeit.default_timer() - start)
    pickle.dump(var, m)
    m.close()
    PrintTree(var)

    CheckModel(var, "./train/train_dataset.txt")
    CheckModel(var, "./test/test_dataset.txt")
Exemple #4
0
def EarlyStoppingDT(mat, attr_list, reviews_list, depth, percentage_review,
                    ratio):

    level_tree = {}
    print("start making tree")
    m = open('./model_earlystopping.pickle', 'wb')
    var = tc.decision_tree(list(reviews_list.keys()), None, None)
    prev = []
    var.MakeTree(mat,
                 attr_list,
                 prev,
                 reviews_list=reviews_list,
                 depth=depth,
                 percentage_review=percentage_review,
                 ratio=ratio)
    pickle.dump(var, m)
    m.close()
    PrintTree(var)

    CheckModel(var, "./train/train_dataset.txt")
    CheckModel(var, "./test/test_dataset.txt")
Exemple #5
0
def noise_add(noise_percentage, attr_list, reviews_list):

    total_noise = (noise_percentage / 100) * len(reviews_list)

    while total_noise > 0:

        r = rn.randint(0, len(reviews_list) - 1)
        if reviews_list[r][0] >= 7:
            reviews_list[r] = (-1, reviews_list[r][1])
        else:
            reviews_list[r] = (1, reviews_list[r][1])
        total_noise = total_noise - 1

    mat = return_mat(attr_list, reviews_list)

    level_tree = {}
    print("start making tree")
    m = open('./noise_model.pickle', 'wb')
    var = tc.decision_tree(list(reviews_list.keys()), None, None)
    prev = []
    var.MakeTree(mat, attr_list, prev)
    pickle.dump(var, m)
    m.close()
    if var:
        depth = 0
        var.printTree(depth, level_tree)

        for level in sorted(level_tree.keys()):
            print(level, " : ", end=" ")
            for value in level_tree[level]:
                print(value.attr, "\t", value.label, end="\t")

            print("\n")

    CheckModel(var, "./train/train_dataset.txt")
    CheckModel(var, "./test/test_dataset.txt")