Ejemplo n.º 1
0
def main():
	# createPlot()
	dataSet,labels = trees.createDataSet()
	labelsTmp = copy.deepcopy(labels)
	mytree = trees.createTree(dataSet,labelsTmp)
	print mytree
	print dataSet
	print labels

	print getNumLeafs(mytree)
	print getTreeDepth(mytree)
	# createPlot(mytree)
	print trees.classify(mytree,labels,[1,0])
Ejemplo n.º 2
0
sys.setdefaultencoding('utf8')
print(sys.getdefaultencoding())




#
fr = open('lensesCN.txt')
lenses = [unicode(inst, 'utf-8').strip().strip().split('\t') for inst in fr.readlines()]
#lensesLabels = ["年龄组" , "规定", "闪光", "泪液扫除率"]
lensesLabels = ['age' , 'prescript', 'astigmatic', 'tearRate']
lensesTree = tr.createTree(lenses,lensesLabels)
print(lensesTree)
tp.createPlot(lensesTree)

dataSet, labels = tr.createDataSet()

shannonEnt = tr.calcShannonEnt(dataSet)

print(shannonEnt)

print(tp.retrieveTree(1))

myTree = tp.retrieveTree(0)
numLeafs = tp.getNumLeafs(myTree)
treeDepth = tp.getTreeDepth(myTree)

print(numLeafs)
print(treeDepth)

Ejemplo n.º 3
0
    plotNode(firstStr, cntrPt, parentPt, decisionNode)
    secondDict = myTree[firstStr]
    plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD
    for key in secondDict.keys():
        if type(secondDict[key]).__name__=='dict':#test to see if the nodes are dictonaires, if not they are leaf nodes   
            plotTree(secondDict[key],cntrPt,str(key))        #recursion
        else:   #it's a leaf node print the leaf node
            plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW
            plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode)
            plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))
    plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD
#if you do get a dictonary you know it's a tree, and the first element will be another dict

def createPlot(inTree):
    fig = plt.figure(1, facecolor='white')
    fig.clf()
    axprops = dict(xticks=[], yticks=[])
    createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)    #no ticks
    #createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses 
    plotTree.totalW = float(getNumLeafs(inTree))
    plotTree.totalD = float(getTreeDepth(inTree))
    plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0;
    plotTree(inTree, (0.5,1.0), '')
    plt.show()

# collect data
myDat, labels = trees.createDataSet()
mytree = trees.createTree(myDat, labels)

#visualize decision tree
createPlot(mytree)
import trees

myDat, lables = trees.createDataSet()

print("------ shannon ------")
print(myDat)
print(trees.calcShannonEnt(myDat))

# print("------ shannon after changed ------")
# myDat[0][-1] = 'maybe'
# print(myDat)
# print(trees.calcShannonEnt(myDat))

print("------ split data set ------")
print(trees.splitDataSet(myDat, 0, 1))
print(trees.splitDataSet(myDat, 0, 0))

print("------ choose best feature to split ------")
print(trees.chooseBestFeatureToSplit(myDat))

print("------ create tree ------")
tree = trees.createTree(myDat, lables)
print(tree)

print("------ test tree classify ------")
print(trees.classify(tree, ['no surfacing', 'flippers'], [1, 0]))
Ejemplo n.º 5
0
# -*- coding:utf-8 -*-
import trees

myData,myLabels = trees.createDataSet()
testLabels = myLabels.copy()
print ('myData is ' , myData)

#计算无序数据集的香农熵
#myShannonEnt = trees.calcShannonEnt(myData)
#print ('myShannonEnt is ' , myShannonEnt )

###测试划分数据集函数
#mySplitDat = trees.splitDataSet(myData, 1, 0)
#print ('mySplitDat is ' , mySplitDat )

#myBestData = trees.chooseBestFeatureToSplit(myData)
#print ('myBestData is ' , myBestData )

myTree = trees.createTree(myData, myLabels)
print ('myTree is ' ,myTree)


#测试训练集
print ('testLabels is ' ,testLabels)
testResult = trees.classify(myTree, testLabels, [1,1])
print ('testResult is ' ,testResult)

#trees.storeTree(myTree, 'classifierStorage.txt')
fromFileTree = trees.grabTree('classifierStorage.txt')
print ('fromFileTree is' , fromFileTree)
Ejemplo n.º 6
0
# This is a sample Python script.

# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
import trees

# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    myDat, labels = trees.createDataSet()
    # 划分数据集
    retDataSet = trees.splitDataSet(myDat, 0, 5)
    print(retDataSet)
    bestFeature = trees.chooseBestFeatureToSplit(myDat)
    print("best feature: %d" % bestFeature)
Ejemplo n.º 7
0
# -*- coding: utf-8 -*-

# 求使数据集熵最大的列
import trees
ds, ls = trees.createDataSet()
trees.chooseBestFeatureToSplit(ds)

# 创建决策树
import trees
ds, ls = trees.createDataSet()
trees.createTree(ds, ls)

# 绘制树
import treePlotter
mt = treePlotter.retrieveTree(0)
treePlotter.createPlot(mt)

# 利用决策树判断分类
import trees
import treePlotter
it = treePlotter.retrieveTree(0)
ds, ls = trees.createDataSet()
trees.classify(it, ls, [0, 0])

# 序列化与反序列化决策树
import trees
import treePlotter
it = treePlotter.retrieveTree(0)
trees.storeTree(it, 'classifierStorage.txt')
ot = trees.grabTree('classifierStorage.txt')
'''
决策树测试类
'''
import trees

'''
dataSet,lables = trees.createDataSet()
print(dataSet)
print(lables)
shannonEnt = trees.calcShannonEnt(dataSet)
print(shannonEnt)
'''
dataSet,labels = trees.createDataSet()
tree = trees.createTree(dataSet,labels)

d,l = trees.createDataSet()
result = trees.classify(tree,l,[1,0])

print(result)





Ejemplo n.º 9
0
def testCreateDataSet():
    myData, labels = trees.createDataSet()
    # print myData
    # print labels

    return myData, labels
Ejemplo n.º 10
0
import trees as tr
"""
函数说明:按照给定特征划分数据集

Parameters:
    dataSet - 待划分的数据集
    axis - 划分数据集的特征
    value - 需要返回的特征的值
Returns:
    无

Modify:
    2020-04-11
"""


def splitDataSet(dataSet, axis, value):
    retDataSet = []  # 创建返回的数据集列表
    for featVec in dataSet:  # 遍历数据集
        if featVec[axis] == value:
            reducedFeatVec = featVec[:axis]  # 去掉axis特征
            reducedFeatVec.extend(featVec[axis + 1:])  # 将符合条件的添加到返回的数据集
            retDataSet.append(reducedFeatVec)
    return retDataSet  # 返回划分后的数据集


if __name__ == '__main__':
    dataSet, features = tr.createDataSet()
    print(splitDataSet(dataSet, 0, 1))
    print(splitDataSet(dataSet, 0, 0))
Ejemplo n.º 11
0
def main():
    import trees
    myDat, labels = trees.createDataSet()
    myTree = trees.createTree(myDat, labels)
    print(myTree)
Ejemplo n.º 12
0
# autor: zhumenger
import trees
myDat, lables = trees.createDataSet()
print(myDat)
print(lables)
print(trees.calcShannonEnt(myDat))#返回期望值, 期望值越高,则混合的数据也越多

myDat[0][-1] = 'maybe'
print(trees.calcShannonEnt(myDat))

#测试splitDataSet()
print(trees.splitDataSet(myDat, 0, 1))
print(trees.splitDataSet(myDat, 0, 0))

trees.chooseBestFeatureToSplit(myDat)

#寻找最好的划分方式
print(trees.chooseBestFeatureToSplit(myDat)) #得到按照第 0 个特征值进行划分的结果最好

#3-4:
print(trees.createTree(myDat, lables))
Ejemplo n.º 13
0
    inputTree - 已经生成的决策树
    featLabels - 存储选择的最优特征标签
    testVec - 测试数据列表,顺序对应最优特征标签
Returns:
    classLabel - 分类结果
Modify:
    2020-04-11
"""


def classify(inputTree, featLabels, testVec):
    firstStr = next(iter(inputTree))  # 获取决策树结点
    secondDict = inputTree[firstStr]  # 下一个字典
    featIndex = featLabels.index(firstStr)
    for key in secondDict.keys():
        if testVec[featIndex] == key:
            if type(secondDict[key]).__name__ == 'dict':
                classLabel = classify(secondDict[key], featLabels, testVec)
            else:
                classLabel = secondDict[key]
    return classLabel


if __name__ == '__main__':
    dataSet, labels = tr.createDataSet()
    print(labels)
    myTree = at.retriveTree(0)
    print(myTree)
    print(classify(myTree, labels, [1, 0]))
    print(classify(myTree, labels, [1, 1]))
Ejemplo n.º 14
0
# -*- coding: utf-8 -*-
"""
Created on Wed May 23 11:35:31 2018

@author: lijie
"""
import trees
import treePlotter


def classify(inputTree, featLabels, testVec):
    firstSide = list(inputTree.keys())
    firstStr = firstSide[0]
    secondDict = inputTree[firstStr]
    featIndex = featLabels.index(firstStr)
    for key in secondDict.keys():
        if testVec[featIndex] == key:
            if type(secondDict[key]).__name__ == 'dict':
                classLabel = classify(secondDict[key], featLabels, testVec)
            else:
                classLabel = secondDict[key]
    return classLabel


if __name__ == '__main__':
    #test
    tree = treePlotter.retrieveTree(0)
    dataset, labels = trees.createDataSet()
    a = classify(tree, labels, [1, 0])
    print(a)