Ejemplo n.º 1
0
    def testSimpleCase(self):
        tree = DecisionTree()
        tree.fit(simpleData)

        for datum in simpleData:
            self.assertEqual(datum[-1], tree.predict(datum))

        tree.print()
Ejemplo n.º 2
0
def test():
	X = [[1, 2, 0, 1, 0],
		 [0, 1, 1, 0, 1],
		 [1, 0, 0, 0, 1],
		 [2, 1, 1, 0, 1],
		 [1, 1, 0, 1, 1]]
	y = ['yes','yes','no','no','no']

	decision_tree = DecisionTree(mode = 'C4.5')

	decision_tree.fit(X,y)

	res = decision_tree.predict(X)
	print res
	
	model_name = 'test.dt'
	decision_tree.saveModel(model_name)
	
	new_tree = DecisionTree()
	new_tree.loadModel(model_name)
	print new_tree.predict(X)
Ejemplo n.º 3
0
# zip categorical and non-categorical data together

train_data = np.concatenate((cat_data, non_cat_data), axis=1)
train_label = data[:, -1].astype(int)
validation_data = train_data[:200, :]
validation_label = train_label[:200]
train_data = train_data[:, :]
train_label = train_label[:]
test_data = np.concatenate((cat_data_test, non_cat_data_test), axis=1)


# decision tree
tree = DecisionTree(5, train_data.shape[0])
tree.train(train_data, train_label)
res = tree.predict(validation_data)
score = 0
for i in range(len(res)):
    if res[i] == validation_label[i]:
        score += 1
score /= len(res)
print(score)


# random forest

forest = RandomForest(100,5,train_data.shape[0],6)
forest.train(train_data, train_label)
res = forest.predict(validation_data)

score = 0
Ejemplo n.º 4
0
    label2id = {'loss': 0, 'draw': 1, 'win': 2}
    p2f = {'b': 0, 'x': 1, 'o': 2}
    for line in fin:
        line = line.strip().split(',')
        label = label2id[line[-1]]
        feature = np.array([p2f[p] for p in line[:-1]])
        data.append((feature, label))
    fin.close()
    return data


data = read('connect-4.data')
x = np.array([d[0] for d in data])
y = np.array([d[1] for d in data])
#y = label_binarize(y, classes=list(range(3)))
kf = KFold(5, True)
all_f1 = []
for train_index, test_index in kf.split(x):
    x_train, y_train, x_test, y_test = x[train_index], y[train_index], x[
        test_index], y[test_index]

    #x_train, x_test, y_train, y_test = train_test_split(x, y)
    #print('training')
    #model = OneVsRestClassifier(SVC(kernel='rbf'))
    model = DecisionTree()
    model.fit(x_train, y_train)
    #print('testing')
    y_pred = model.predict(x_test)
    all_f1.append(f1_score(y_test, y_pred, average='macro'))
print(sum(all_f1) / 5)
Ejemplo n.º 5
0
# zip categorical and non-categorical data together
train_data = np.concatenate((cat_data, non_cat_data), axis=1)
train_label = data[:, -1].astype(int)
validation_data = train_data[:6000, :]
validation_label = train_label[:6000]
train_data = train_data[6000:,:]
train_label = train_label[6000:]
test_data = np.concatenate((cat_data_test,non_cat_data_test), axis=1)

# plot accuracy
accuracy = []
for i in range(40):
    tree = DecisionTree(i,105)
    tree.train(train_data,train_label)
    res = tree.predict(validation_data)
    score = 0
    for i in range(len(res)):
        if res[i] == validation_label[i]:
            score += 1
    score /= len(res)
    accuracy.append(score)

plt.plot(accuracy)
plt.xlabel('depth')
plt.ylabel('accuracy')
plt.title('Accuracy vs. Decision Tree Depth')
plt.savefig('p6.png')
plt.show()

    #decisionTree.LSID3(3)
    #decisionTree.LSID3PathSample(2)
    #decisionTree.LSID3MC(1, 0.1)
    #decisionTree.BLSID3(1)
    #decisionTree.BLSID3PathSample(1)
    #decisionTree.LSID3Sequenced(2)
    #decisionTree.IIDT(10, 0.5)

    print("****Tree Data BEFORE Pruning****")
    print("Tree Size - Number of Nodes:", decisionTree.size())
    print("Number of Leafs:", decisionTree.getNumLeafs())
    print("Tree Depth:", decisionTree.getTreeDepth())
    testSetFile = open('SampleSets/test_set.csv')
    testSetData = testSetFile.readlines()
    testSetFile.close()
    print("Prediction:", str(decisionTree.predict(testSetData) * 100) + "%")
    #decisionTreePlot = DecisionTreePlot()
    #decisionTreePlot.createDecisionTreePlot(decisionTree)

    print("****Tree Data AFTER Pruning****")
    validationSetFile = open('SampleSets/validation_set.csv')
    validationSetData = validationSetFile.readlines()
    validationSetFile.close()
    decisionTree.prune(validationSetData)
    print("Tree Size - Number of Nodes:", decisionTree.size())
    print("Number of Leafs:", decisionTree.getNumLeafs())
    print("Tree Depth:", decisionTree.getTreeDepth())
    print("Prediction:", str(decisionTree.predict(testSetData) * 100) + "%")
    #decisionTreePlot = DecisionTreePlot()
    #decisionTreePlot.createDecisionTreePlot(decisionTree)
Ejemplo n.º 7
0
    def testComplexCase(self):
        tree = DecisionTree()
        tree.fit(complexData)

        for datum in complexData:
            self.assertEqual(datum[-1], tree.predict(datum))

        # Overcast = YES
        self.assertEqual(
            Label.YES,
            tree.predict(
                [Outlook.Overcast, Temperature.Hot, Humidity.High, Wind.Weak]))
        self.assertEqual(
            Label.YES,
            tree.predict([
                Outlook.Overcast, Temperature.Cool, Humidity.Normal,
                Wind.Strong
            ]))

        # Sunny + Normal = YES
        self.assertEqual(
            Label.YES,
            tree.predict([
                Outlook.Sunny, Temperature.Cool, Humidity.Normal, Wind.Strong
            ]))
        self.assertEqual(
            Label.YES,
            tree.predict(
                [Outlook.Sunny, Temperature.Hot, Humidity.Normal, Wind.Weak]))

        # Sunny + High = NO
        self.assertEqual(
            Label.NO,
            tree.predict(
                [Outlook.Sunny, Temperature.Cool, Humidity.High, Wind.Strong]))
        self.assertEqual(
            Label.NO,
            tree.predict(
                [Outlook.Sunny, Temperature.Hot, Humidity.High, Wind.Weak]))

        # Rain + Weak = Yes
        self.assertEqual(
            Label.YES,
            tree.predict(
                [Outlook.Rain, Temperature.Cool, Humidity.Normal, Wind.Weak]))
        self.assertEqual(
            Label.YES,
            tree.predict(
                [Outlook.Rain, Temperature.Hot, Humidity.High, Wind.Weak]))

        # Rain + Strong = No
        self.assertEqual(
            Label.NO,
            tree.predict(
                [Outlook.Rain, Temperature.Cool, Humidity.Normal,
                 Wind.Strong]))
        self.assertEqual(
            Label.NO,
            tree.predict(
                [Outlook.Rain, Temperature.Hot, Humidity.High, Wind.Strong]))

        tree.print()