def test_treeNums(self): dataSet, labels = trees.createDataSet() print("\n dataSet == %s" % (dataSet)) tree = trees.createTree(dataSet, labels) print("\n tree == %s" % (tree)) leafs = treePlotter.getNumLeafs(tree) depth = treePlotter.getTreeDepth(tree) print("\n leafs == %s depth == %s " % (leafs, depth))
def createPlot(self, inTree): fig = plt.figure() axprops = dict(xticks=[], yticks=[]) treePlotter.createPlot.ax1 = plt.subplot(111, frameon=False, **axprops) treePlotter.plotTree.totalW = float(treePlotter.getNumLeafs(inTree)) treePlotter.plotTree.totalD = float(treePlotter.getTreeDepth(inTree)) treePlotter.plotTree.x0ff = -0.5 / treePlotter.plotTree.totalW treePlotter.plotTree.y0ff = 1.0 treePlotter.plotTree(inTree, (0.5, 1.0), '') return fig
print shannonEnt #split dataset print '-------------- split dataset --------------------' print splitDataSet(myDat,0,1) print splitDataSet(myDat,0,0) #get best feature print '-------------- best feature --------------------' print 'best feature:' ,chooseBestFeatureToSplit(myDat) print 'createTree ', createTree(myDat,labels) #plot trees print '-------------- plot-trees --------------------' myTree = tp.retrieveTree(0) print 'myTree ', myTree print 'labels', labels print 'numLeafs ', tp.getNumLeafs(myTree) print 'treeDepth ', tp.getTreeDepth(myTree) #tp.createPlot(myTree) #update dict and plot again #myTree['no surfacing'][3] = 'maybe' #tp.createPlot(myTree) #classify print '-------------- classify --------------------' myDat, labels = createDataSet() print 'labels', labels myTree = tp.retrieveTree(0) print 'myTree ', myTree print '[1,0]: ', classify(myTree, labels, [1,0]) print '[1,1]: ', classify(myTree, labels, [1,1])
# 测试创建树 reload(trees) myDat, labels = trees.createDataSet() myTree = trees.createTree(myDat, labels) myTree # 测试matplotlib import treePlotter treePlotter.createPlot() # 测试获取叶子数量及树深度的函数 reload(treePlotter) treePlotter.retrieveTree(1) myTree = treePlotter.retrieveTree(0) treePlotter.getNumLeafs(myTree) treePlotter.getTreeDepth(myTree) # 绘制树 reload(treePlotter) myTree=treePlotter.retrieveTree(0) treePlotter.createPlot(myTree) # 变更字典,重新绘制 myTree['no surfacing'][3]='maybe' myTree treePlotter.createPlot(myTree) # 测试分类函数 myDat, labels = trees.createDataSet() labels
import treePlotter as tp print tp.retrieveTree(0) print tp.retrieveTree(1) myTree = tp.retrieveTree(0) print tp.getNumLeafs(myTree) print tp.getTreeDepth(myTree) # tp.createPlot(myTree) tp.createPlot(tp.retrieveTree(1))
@author: weixw """ import myTrees as mt import treePlotter as tp # 测试 dataSet, feature_names = mt.createDataSet() # copy函数:新开辟一块内存,然后将list的所有值复制到新开辟的内存中 feature_names1 = feature_names.copy() # createTree函数中将labels1的值改变了,所以在分类测试时不能用labels1 myTree = mt.createTree(dataSet, feature_names) # 保存树到本地 mt.storeTree(myTree, 'myTree.txt') # 在本地磁盘获取树 myTree = mt.grabTree('myTree.txt') print(u"决策树结构:%s" % myTree) # 绘制决策树 print(u"绘制决策树:") tp.createPlot(myTree) numLeafs = tp.getNumLeafs(myTree) treeDepth = tp.getTreeDepth(myTree) print(u"叶子节点数目:%d" % numLeafs) print(u"树深度:%d" % treeDepth) # 测试分类 简单样本数据3列 labelResult = mt.classify(myTree, feature_names, [1, 1]) print(u"[1,1] 测试结果为:%s" % labelResult) labelResult = mt.classify(myTree, feature_names, [1, 0]) print(u"[1,0] 测试结果为:%s" % labelResult)
print shannonEnt #split dataset print '-------------- split dataset --------------------' print splitDataSet(myDat, 0, 1) print splitDataSet(myDat, 0, 0) #get best feature print '-------------- best feature --------------------' print 'best feature:', chooseBestFeatureToSplit(myDat) print 'createTree ', createTree(myDat, labels) #plot trees print '-------------- plot-trees --------------------' myTree = tp.retrieveTree(0) print 'myTree ', myTree print 'labels', labels print 'numLeafs ', tp.getNumLeafs(myTree) print 'treeDepth ', tp.getTreeDepth(myTree) #tp.createPlot(myTree) #update dict and plot again #myTree['no surfacing'][3] = 'maybe' #tp.createPlot(myTree) #classify print '-------------- classify --------------------' myDat, labels = createDataSet() print 'labels', labels myTree = tp.retrieveTree(0) print 'myTree ', myTree print '[1,0]: ', classify(myTree, labels, [1, 0]) print '[1,1]: ', classify(myTree, labels, [1, 1])
#lensesLabels = ["年龄组" , "规定", "闪光", "泪液扫除率"] lensesLabels = ['age' , 'prescript', 'astigmatic', 'tearRate'] lensesTree = tr.createTree(lenses,lensesLabels) print(lensesTree) tp.createPlot(lensesTree) dataSet, labels = tr.createDataSet() shannonEnt = tr.calcShannonEnt(dataSet) print(shannonEnt) print(tp.retrieveTree(1)) myTree = tp.retrieveTree(0) numLeafs = tp.getNumLeafs(myTree) treeDepth = tp.getTreeDepth(myTree) print(numLeafs) print(treeDepth) myTree = tp.retrieveTree(0) tp.createPlot(myTree) myTree['no surfacing'][3] = 'maybe' tp.createPlot(myTree) myDat,labels = tr.createDataSet() print(labels) myTree = tp.retrieveTree(0)
} } }, { 'no surfacing': { 0: 'no', 1: { 'flippers': { 0: { 'head': { 0: 'no', 1: 'yes' } }, 1: 'no' } } } }] return listOfTrees[i] # createPlot(thisTree) if __name__ == "__main__": import treePlotter myTree = treePlotter.retrieveTree(0) print("\ntreePlotter.getTreeDepth:\n", treePlotter.getTreeDepth(myTree)) print("\ntreePlotter.getNumLeafs:\n", treePlotter.getNumLeafs(myTree)) print("\nmyTree:\n", myTree) createPlot(myTree)
1) # [[1, 'yes'], [1, 'yes'], [0, 'no']] splittedDat = DT.splitDataSet(myDat, 0, 0) # [[1, 'no'], [1, 'no']] bestFeature = DT.chooseBestFeatureToSplit(myDat) # 0 myTree = DT.createTree( myDat, labels ) # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} import treePlotter as TP # TP.createPlot() myTree = TP.retrieveTree( 0) #{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} n = TP.getNumLeafs(myTree) # 3 d = TP.getTreeDepth(myTree) # 2 TP.createPlot(myTree) # classify myDat, labels = DT.createDataSet() myTree = TP.retrieveTree( 0) # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} class1 = DT.classify(myTree, labels, [1, 0]) # no class2 = DT.classify(myTree, labels, [1, 1]) # yes # storing the tree pickeld form DT.storeTree(myTree, 'data/classifierStorage.txt') grabedTree = DT.grabTree( 'data/classifierStorage.txt'
fig.clf() createPlot.ax1 = plt.subplot(111, frameon=False) #绘制子图 plotNode('a decision node', (0.5, 0.1), (0.1, 0.5), decisionNode) plotNode('a leaf node', (0.8, 0.1), (0.3, 0.8), leafNode) plt.show() # 绘制树型的示例程序 createPlot() # 统计叶子个数和树的层数 import treePlotter myTree = treePlotter.retrieveTree(0) # 直接从上面的结果导入树结构 print('叶子节点数:', treePlotter.getNumLeafs(myTree)) # 统计叶子节点个数,以计算x轴长度 print('树的层数:', treePlotter.getTreeDepth(myTree)) # 统计树的层数,以计算y轴高度 # 绘制完整的决策树模型 print('绘制完整的决策树模型') treePlotter.createPlot(myTree) ### 应用数据构造决策树并用于预测 import W_tree import treePlotter myDat, labels = W_tree.createDataSet() myTree = treePlotter.retrieveTree(0) print('[1,0]的分类结果是', W_tree.classify(myTree, labels, [1, 0])) # [1,0]的分类结果是 no print('[1,1]的分类结果是', W_tree.classify(myTree, labels, [1, 1])) # [1,1]的分类结果是 yes
import treePlotter treePlotter.retrieveTree(1) myTree = treePlotter.retrieveTree(0) print(treePlotter.getNumLeafs(myTree)) print(treePlotter.getTreeDepth(myTree))
__author__ = 'Wei Yin' import treePlotter treePlotter.retrieveTree(1) myTree = treePlotter.retrieveTree(0) treePlotter.getNumLeafs(myTree) treePlotter.getTreeDepth(myTree) treePlotter.createPlot(myTree)
import trees import treePlotter myDat, labels = trees.createDataSet() print myDat print trees.calcShannonEnt(myDat) print trees.splitDataSet(myDat, 0, 1) print trees.splitDataSet(myDat, 0, 0) print trees.splitDataSet(myDat, 1, 1) print trees.chooseBestFeatureToSplit(myDat) print trees.createTree(myDat, labels) treePlotter.createPlot() print 'createPlot over' print treePlotter.retrieveTree(1) myTree = treePlotter.retrieveTree(0) print treePlotter.getNumLeafs(myTree) print treePlotter.getTreeDepth(myTree)
import treePlotter import tree myDat, labels = tree.createDataSet() myTree = tree.createTree(myDat, labels) print('myTree = ', myTree) print('treePlotter.getNumLeafs(myTree) = ', treePlotter.getNumLeafs(myTree)) print('treePlotter.getTreeDepth(myTree) = ', treePlotter.getTreeDepth(myTree)) treePlotter.createPlot(myTree)
trees.splitDataSet(myDat, 0, 1) trees.splitDataSet(myDat, 0, 0) print "选择最好的数据集划分方式:", trees.chooseBestFeatureToSplit(myDat) #3.1.3 递归构建决策树 myDat, labels = trees.createDataSet() myTree = trees.createTree(myDat, labels) print "myTree:", myTree #3.2.1 Matplotlib注解 treePlotter.createPlot() #3.2.2 构造注解树 treePlotter.retrieveTree(1) myTree = treePlotter.retrieveTree(0) print "获取叶节点的数目:", treePlotter.getNumLeafs(myTree) print "获取树的层数:", treePlotter.getTreeDepth(myTree) treePlotter.createPlot(myTree) myTree['no surfacing'][3] = 'maybe' print "myTree:", myTree treePlotter.createPlot(myTree) #3.3.1 测试算法:使用决策树执行分类 myDat, labels = trees.createDataSet() print "labels:", labels myTree = treePlotter.retrieveTree(0) print "myTree:", myTree print "分类1:", trees.classify(myTree, labels, [1, 0]) print "分类2:", trees.classify(myTree, labels, [1, 1]) #3.3.2 决策树的存储
import treePlotter if __name__ == '__main__': print treePlotter.retrieveTree(1) myTree = treePlotter.retrieveTree(1) print treePlotter.getNumLeafs(myTree) print treePlotter.getTreeDepth(myTree)
print tree.calcShannonEnt([[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'yes'], [0, 1, 'yes'], [0, 1, 'yes']]) print tree.splitDataSet( [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']], 0, 1) print tree.chooseBestFeatureToSplit([[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']]) print tree.createTree( [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']], ['No Surfacing?', 'Flippers?']) t = {'No Surfacing?': {0: 'no', 1: {'Flippers?': {0: 'no', 1: 'yes'}}}} print treePlotter.getNumLeafs(t) print treePlotter.getTreeDepth(t) treePlotter.createPlot(t) print tree.classify( {'No Surfacing?': { 0: 'no', 1: { 'Flippers?': { 0: 'no', 1: 'yes' } } }}, ['No Surfacing?', 'Flippers?'], [1, 0])