예제 #1
0
def RandomForest(data, labels, values, M, attr_num, T):
	trees = []
	for t in range(T):
		# get data subset
		data_subset = []
		labels_subset = []
		for m in range(M):
			index = randint(0, len(data)-1)
			data_subset.append(data[index])
			labels_subset.append(labels[index])
		data_subset_0 = np.array(data_subset)
		labels_subset = np.array(labels_subset)
		# get attr subset
		indices = []
		while len(indices) < attr_num:
			ind = randint(0, len(values)-1)
			if ind not in indices:
				indices.append(ind)
		data_subset = data_subset_0
		values_subset = values
		rm_indices = []
		for index in range(len(values)):
			if index not in indices:
				rm_indices.append(index)
		data_subset = np.delete(data_subset_0, rm_indices, 1)
		values_subset = np.delete(values, rm_indices, 0)
		# get model
		tree = DecisionTree.ID3(data_subset, labels_subset, values_subset, "information_gain", 0, data_subset.shape[1])
		trees.append(tree)
	return trees
예제 #2
0
def Bagging(data, labels, values, M, T):
	trees = []
	for t in range(T):
		# get data subset
		data_subset = []
		labels_subset = []
		for m in range(M):
			index = randint(0, len(data)-1)
			data_subset.append(data[index])
			labels_subset.append(labels[index])
		data_subset = np.array(data_subset)
		labels_subset = np.array(labels_subset)
		# get model
		tree = DecisionTree.ID3(data_subset, labels_subset, values, "information_gain", 0, data.shape[1])
		trees.append(tree)
	return trees
예제 #3
0
파일: RunCar.py 프로젝트: jadie1/ML-Lib
# Jadie Adams
import DecisionTree
import numpy as np
import sys
sys.path.append("..")
import DataUtils

if __name__ == "__main__":
	output = open("CarResults.csv", "w+")
	output.write("heuristic, depth, train accuracy, test accuracy \n")
	train_data, train_labels, test_data, test_labels = DataUtils.getData('../Data/car')
	train_values = [[0,np.array(['vhigh', 'high', 'med', 'low'])],
		[1,np.array(['vhigh', 'high', 'med', 'low'])],
		[2,np.array(['2', '3', '4', '5more'])],
		[3,np.array(['2', '4', 'more'])],
		[4,np.array(['small', 'med', 'big'])],
		[5,np.array(['low', 'med', 'high'])]]
	heuristics = ["information_gain", "gini_index", "majority_error"]
	for heuristic in heuristics:
		print(heuristic)
		for max_depth in range(1,7):
			tree = DecisionTree.ID3(train_data, train_labels, train_values, heuristic, 0, max_depth)
			train_accuracy = DecisionTree.testTree(tree, train_data, train_labels)
			test_accuracy = DecisionTree.testTree(tree, test_data, test_labels)
			print(str(max_depth) + " " + str(train_accuracy) + " " +  str(test_accuracy))
			output.write(heuristic + "," + str(max_depth) + "," + str(train_accuracy) + "," +  str(test_accuracy) + '\n')
	output.close()