def RandomForest(data, labels, values, M, attr_num, T): trees = [] for t in range(T): # get data subset data_subset = [] labels_subset = [] for m in range(M): index = randint(0, len(data)-1) data_subset.append(data[index]) labels_subset.append(labels[index]) data_subset_0 = np.array(data_subset) labels_subset = np.array(labels_subset) # get attr subset indices = [] while len(indices) < attr_num: ind = randint(0, len(values)-1) if ind not in indices: indices.append(ind) data_subset = data_subset_0 values_subset = values rm_indices = [] for index in range(len(values)): if index not in indices: rm_indices.append(index) data_subset = np.delete(data_subset_0, rm_indices, 1) values_subset = np.delete(values, rm_indices, 0) # get model tree = DecisionTree.ID3(data_subset, labels_subset, values_subset, "information_gain", 0, data_subset.shape[1]) trees.append(tree) return trees
def Bagging(data, labels, values, M, T): trees = [] for t in range(T): # get data subset data_subset = [] labels_subset = [] for m in range(M): index = randint(0, len(data)-1) data_subset.append(data[index]) labels_subset.append(labels[index]) data_subset = np.array(data_subset) labels_subset = np.array(labels_subset) # get model tree = DecisionTree.ID3(data_subset, labels_subset, values, "information_gain", 0, data.shape[1]) trees.append(tree) return trees
# Jadie Adams import DecisionTree import numpy as np import sys sys.path.append("..") import DataUtils if __name__ == "__main__": output = open("CarResults.csv", "w+") output.write("heuristic, depth, train accuracy, test accuracy \n") train_data, train_labels, test_data, test_labels = DataUtils.getData('../Data/car') train_values = [[0,np.array(['vhigh', 'high', 'med', 'low'])], [1,np.array(['vhigh', 'high', 'med', 'low'])], [2,np.array(['2', '3', '4', '5more'])], [3,np.array(['2', '4', 'more'])], [4,np.array(['small', 'med', 'big'])], [5,np.array(['low', 'med', 'high'])]] heuristics = ["information_gain", "gini_index", "majority_error"] for heuristic in heuristics: print(heuristic) for max_depth in range(1,7): tree = DecisionTree.ID3(train_data, train_labels, train_values, heuristic, 0, max_depth) train_accuracy = DecisionTree.testTree(tree, train_data, train_labels) test_accuracy = DecisionTree.testTree(tree, test_data, test_labels) print(str(max_depth) + " " + str(train_accuracy) + " " + str(test_accuracy)) output.write(heuristic + "," + str(max_depth) + "," + str(train_accuracy) + "," + str(test_accuracy) + '\n') output.close()