def feature_bagging(attr_list, reviews_list, test_data, number_of_trees): forest = [] for i in range(number_of_trees): print(i) rn.shuffle(attr_list) var = tc.decision_tree(list(reviews_list.keys()), None, None) mat = return_mat(attr_list, reviews_list) prev = [] var.MakeTree(mat, attr_list, prev) forest.append(var) error = 0 for data in test_data: actual_label = test_data[data][0] prediction = None pred = 0 for tree in forest: p = predict(tree, test_data[data][1]) if p == 1: pred = pred + 1 if (len(forest) - pred) > pred: prediction = -1 else: prediction = 1 if actual_label != prediction: error = error + 1 print("error: ", error / len(test_data))
def feature_bagging(attr_list, reviews_list, test_data, train_set, number_of_trees): forest = [] for i in range(number_of_trees): rn.shuffle(attr_list) var = tc.decision_tree(list(reviews_list.keys()), None, None) mat = return_mat(attr_list, reviews_list) prev = [] var.MakeTree(mat, attr_list, prev) forest.append(var) print("Number Of tree: ", number_of_trees) print("train error: ", return_dataset_error(reviews_list, forest)) print("test error: ", return_dataset_error(test_data, forest))
def DecisionTree(attr_list, reviews_list): level_tree = {} print("start making tree") m = open('./model.pickle', 'wb') mat = return_mat(attr_list, reviews_list) var = tc.decision_tree(list(reviews_list.keys()), None, None) start = timeit.default_timer() prev = [] var.MakeTree(mat, attr_list, prev) print(timeit.default_timer() - start) pickle.dump(var, m) m.close() PrintTree(var) CheckModel(var, "./train/train_dataset.txt") CheckModel(var, "./test/test_dataset.txt")
def EarlyStoppingDT(mat, attr_list, reviews_list, depth, percentage_review, ratio): level_tree = {} print("start making tree") m = open('./model_earlystopping.pickle', 'wb') var = tc.decision_tree(list(reviews_list.keys()), None, None) prev = [] var.MakeTree(mat, attr_list, prev, reviews_list=reviews_list, depth=depth, percentage_review=percentage_review, ratio=ratio) pickle.dump(var, m) m.close() PrintTree(var) CheckModel(var, "./train/train_dataset.txt") CheckModel(var, "./test/test_dataset.txt")
def noise_add(noise_percentage, attr_list, reviews_list): total_noise = (noise_percentage / 100) * len(reviews_list) while total_noise > 0: r = rn.randint(0, len(reviews_list) - 1) if reviews_list[r][0] >= 7: reviews_list[r] = (-1, reviews_list[r][1]) else: reviews_list[r] = (1, reviews_list[r][1]) total_noise = total_noise - 1 mat = return_mat(attr_list, reviews_list) level_tree = {} print("start making tree") m = open('./noise_model.pickle', 'wb') var = tc.decision_tree(list(reviews_list.keys()), None, None) prev = [] var.MakeTree(mat, attr_list, prev) pickle.dump(var, m) m.close() if var: depth = 0 var.printTree(depth, level_tree) for level in sorted(level_tree.keys()): print(level, " : ", end=" ") for value in level_tree[level]: print(value.attr, "\t", value.label, end="\t") print("\n") CheckModel(var, "./train/train_dataset.txt") CheckModel(var, "./test/test_dataset.txt")