Esempio n. 1
0
def testClass():
    myDat, labels = tree.createDataSet()
    myTree = tree.createTree(myDat, labels)

    # persistenting the decision tree
    tree.storeTree(myTree, 'myTree.train')

    myTree2 = tree.grabTree('myTree.train')
    testVec = [1, 0]
    print "Test ",testVec," result: ", tree.classify(myTree2, labels, testVec)
    testVec = [1, 1]
    print "Test ",testVec," result: ", tree.classify(myTree2, labels, testVec)
Esempio n. 2
0
def classifyTest():
    import tree as t
    import treePlotter as tp

    dataSet, labels = t.createDataSet()
    myTree = t.createTree(dataSet, labels.copy())
    print(myTree)
    print(labels)

    print(classify(myTree, labels, [1, 0]))
    print(classify(myTree, labels, [1, 1]))

    tp.createPlot(myTree)
Esempio n. 3
0
#!/usr/bin/python
#encoding:utf-8
import tree
from ScrolledText import example
import plottree
dataSet , labels = tree.createDataSet();
print tree.createTree(dataSet, labels)
plottree.createPlot()
Esempio n. 4
0
# -*- coding: utf-8 -*-
import tree
import copy
dataset, label = tree.createDataSet()
print(label)
# 这里仅仅用 labels=label是不行的,因为它们指向同一个内存
labels = copy.deepcopy(label)
myTree = tree.createTree(dataset, labels)
# print(myTree)
print(label)
testResult = tree.classify(myTree, label, [1, 1])
print(testResult)
tree.storeTree(myTree, "F:\NatureRecognition/tree.txt")
tt = tree.grabTree("F:\NatureRecognition/tree.txt")
print(tt)
Esempio n. 5
0
import tree

# 计算数据集的香农熵
#myDat, labels = tree.createDataSet()
#print(myDat)
#print(tree.calcShannonEnt(myDat))
#myDat[0][-1] = 'maybe'
#print(myDat)
#print(tree.calcShannonEnt(myDat))

print("*****************************************************************")
# 在前面的简单样本数据上测试函数splitDataSet()
#print(tree.splitDataSet(myDat, 0, 1))
#print(tree.splitDataSet(myDat, 0, 0))

print("*****************************************************************")
#myDat, labels = tree.createDataSet()
#print(myDat)
#print(tree.chooseBestFeatureToSplit(myDat))

print("*****************************************************************")
# 变量myTree包含了很多代表树结构信息的嵌套字典。
# 从左边开始第一个关键字no surfacing是第一个划分数据集的特征名称,该关键字的值也是另一个数据字典。
# 第二个关键字是no surfacing特征划分的数据集,这些关键字的值是no surfacing节点的子节点
# 这些值可能是类标签,也可能是另一个数据字典。如果值是类标签,则该子节点是叶子节点;
# 如果值是另一个数据字典,则子节点是一个判断节点,这种格式结构不断重复就构成了整棵树,本节的例子中,这棵树包含了3个叶子节点以及2个判断节点
myDat, labels = tree.createDataSet()
myTree = tree.createTree(myDat, labels)
print(myTree)

Esempio n. 6
0
#-*- coding:utf-8 -*-
import tree
mydat,label = tree.createDataSet()
#mydat
#tree.calcShannonEnt(mydat)#得到数据集的熵值

#reload(tree)
#tree.splitDataSet(mydat,0,1)#得到第0个特征值为1的元素list
#tree.splitDataSet(mydat,0,0)

#tree.chooseBestFeatureToSplit(mydat)#得到最佳特征值索引

mytree = tree.createTree(mydat,label)#得到决策树信息
mytree


# -*- coding: utf-8 -*-
"""
Created on Wed May 23 09:56:25 2018

@author: 705family
"""

import os

os.chdir("C:/Users/705family/Desktop/decisiontree")
import tree

data, y = tree.createDataSet()
myTree = tree.createTree(data, y)

# In[] test Part 2
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np

data = pd.read_excel('西瓜数据集.xlsx')
data = data.iloc[:, 1:-1]

clf = LabelEncoder()
label = [[] for i in range(np.size(data, 1))]
for i in range(np.size(data, 1)):
    data.iloc[:, i] = clf.fit_transform(data.iloc[:, i])
    label[i][:] = list(clf.classes_)
y = data.iloc[:, -1]
data = np.array(data)
data = data.tolist()
Esempio n. 8
0
def testEntr():
    myDat, labels = tree.createDataSet()
    print tree.calcShannonEnt(myDat)