def testSimpleCase(self): tree = DecisionTree() tree.fit(simpleData) for datum in simpleData: self.assertEqual(datum[-1], tree.predict(datum)) tree.print()
def test(): X = [[1, 2, 0, 1, 0], [0, 1, 1, 0, 1], [1, 0, 0, 0, 1], [2, 1, 1, 0, 1], [1, 1, 0, 1, 1]] y = ['yes','yes','no','no','no'] decision_tree = DecisionTree(mode = 'C4.5') decision_tree.fit(X,y) res = decision_tree.predict(X) print res model_name = 'test.dt' decision_tree.saveModel(model_name) new_tree = DecisionTree() new_tree.loadModel(model_name) print new_tree.predict(X)
# zip categorical and non-categorical data together train_data = np.concatenate((cat_data, non_cat_data), axis=1) train_label = data[:, -1].astype(int) validation_data = train_data[:200, :] validation_label = train_label[:200] train_data = train_data[:, :] train_label = train_label[:] test_data = np.concatenate((cat_data_test, non_cat_data_test), axis=1) # decision tree tree = DecisionTree(5, train_data.shape[0]) tree.train(train_data, train_label) res = tree.predict(validation_data) score = 0 for i in range(len(res)): if res[i] == validation_label[i]: score += 1 score /= len(res) print(score) # random forest forest = RandomForest(100,5,train_data.shape[0],6) forest.train(train_data, train_label) res = forest.predict(validation_data) score = 0
label2id = {'loss': 0, 'draw': 1, 'win': 2} p2f = {'b': 0, 'x': 1, 'o': 2} for line in fin: line = line.strip().split(',') label = label2id[line[-1]] feature = np.array([p2f[p] for p in line[:-1]]) data.append((feature, label)) fin.close() return data data = read('connect-4.data') x = np.array([d[0] for d in data]) y = np.array([d[1] for d in data]) #y = label_binarize(y, classes=list(range(3))) kf = KFold(5, True) all_f1 = [] for train_index, test_index in kf.split(x): x_train, y_train, x_test, y_test = x[train_index], y[train_index], x[ test_index], y[test_index] #x_train, x_test, y_train, y_test = train_test_split(x, y) #print('training') #model = OneVsRestClassifier(SVC(kernel='rbf')) model = DecisionTree() model.fit(x_train, y_train) #print('testing') y_pred = model.predict(x_test) all_f1.append(f1_score(y_test, y_pred, average='macro')) print(sum(all_f1) / 5)
# zip categorical and non-categorical data together train_data = np.concatenate((cat_data, non_cat_data), axis=1) train_label = data[:, -1].astype(int) validation_data = train_data[:6000, :] validation_label = train_label[:6000] train_data = train_data[6000:,:] train_label = train_label[6000:] test_data = np.concatenate((cat_data_test,non_cat_data_test), axis=1) # plot accuracy accuracy = [] for i in range(40): tree = DecisionTree(i,105) tree.train(train_data,train_label) res = tree.predict(validation_data) score = 0 for i in range(len(res)): if res[i] == validation_label[i]: score += 1 score /= len(res) accuracy.append(score) plt.plot(accuracy) plt.xlabel('depth') plt.ylabel('accuracy') plt.title('Accuracy vs. Decision Tree Depth') plt.savefig('p6.png') plt.show()
#decisionTree.LSID3(3) #decisionTree.LSID3PathSample(2) #decisionTree.LSID3MC(1, 0.1) #decisionTree.BLSID3(1) #decisionTree.BLSID3PathSample(1) #decisionTree.LSID3Sequenced(2) #decisionTree.IIDT(10, 0.5) print("****Tree Data BEFORE Pruning****") print("Tree Size - Number of Nodes:", decisionTree.size()) print("Number of Leafs:", decisionTree.getNumLeafs()) print("Tree Depth:", decisionTree.getTreeDepth()) testSetFile = open('SampleSets/test_set.csv') testSetData = testSetFile.readlines() testSetFile.close() print("Prediction:", str(decisionTree.predict(testSetData) * 100) + "%") #decisionTreePlot = DecisionTreePlot() #decisionTreePlot.createDecisionTreePlot(decisionTree) print("****Tree Data AFTER Pruning****") validationSetFile = open('SampleSets/validation_set.csv') validationSetData = validationSetFile.readlines() validationSetFile.close() decisionTree.prune(validationSetData) print("Tree Size - Number of Nodes:", decisionTree.size()) print("Number of Leafs:", decisionTree.getNumLeafs()) print("Tree Depth:", decisionTree.getTreeDepth()) print("Prediction:", str(decisionTree.predict(testSetData) * 100) + "%") #decisionTreePlot = DecisionTreePlot() #decisionTreePlot.createDecisionTreePlot(decisionTree)
def testComplexCase(self): tree = DecisionTree() tree.fit(complexData) for datum in complexData: self.assertEqual(datum[-1], tree.predict(datum)) # Overcast = YES self.assertEqual( Label.YES, tree.predict( [Outlook.Overcast, Temperature.Hot, Humidity.High, Wind.Weak])) self.assertEqual( Label.YES, tree.predict([ Outlook.Overcast, Temperature.Cool, Humidity.Normal, Wind.Strong ])) # Sunny + Normal = YES self.assertEqual( Label.YES, tree.predict([ Outlook.Sunny, Temperature.Cool, Humidity.Normal, Wind.Strong ])) self.assertEqual( Label.YES, tree.predict( [Outlook.Sunny, Temperature.Hot, Humidity.Normal, Wind.Weak])) # Sunny + High = NO self.assertEqual( Label.NO, tree.predict( [Outlook.Sunny, Temperature.Cool, Humidity.High, Wind.Strong])) self.assertEqual( Label.NO, tree.predict( [Outlook.Sunny, Temperature.Hot, Humidity.High, Wind.Weak])) # Rain + Weak = Yes self.assertEqual( Label.YES, tree.predict( [Outlook.Rain, Temperature.Cool, Humidity.Normal, Wind.Weak])) self.assertEqual( Label.YES, tree.predict( [Outlook.Rain, Temperature.Hot, Humidity.High, Wind.Weak])) # Rain + Strong = No self.assertEqual( Label.NO, tree.predict( [Outlook.Rain, Temperature.Cool, Humidity.Normal, Wind.Strong])) self.assertEqual( Label.NO, tree.predict( [Outlook.Rain, Temperature.Hot, Humidity.High, Wind.Strong])) tree.print()