firstLabelIndex = labels.index(firstLabel) # 属性对应的index secondDict = tree[firstLabel] value = testData[firstLabelIndex] # 属性值 if type(secondDict[value]).__name__ == "dict": # 假如还是一棵树则递归 classLabel = classifyDecisionTree(secondDict[value], labels, testData) else: classLabel = secondDict[value] return classLabel if __name__ == '__main__': # 读取数据 lensesData, lensesLable = loadDataSet("lenses.txt") # 复制属性标签, # createTree()操作会影响传入的类别标签 lensesLableCopy = lensesLable[:] # 创建树 decisionTree = createTree(lensesData, lensesLableCopy) print(decisionTree) # 对树进行绘图 TreePlot.createPlot(decisionTree) # 进行预测 classLabel = classifyDecisionTree(decisionTree, lensesLable, ["young", "hyper", "yes", "normal"]) print(classLabel)
''' 读取序列化对象 ''' def grabTree(filename): fr = open(filename, "rb") return pickle.load(fr) if __name__ == '__main__': # 创建示例数据集 dataSet, labels = createDataSet() lebelsCopy = labels[:] # 学习构建决策树 tree = createTree(dataSet, labels) print(tree) # 画决策树 TreePlot.createPlot(tree) # 序列化存储树结构 storeTree(tree, "object.txt") # 文件中读取数结构 myTree = grabTree("object.txt") print(myTree) print(classifyDecisionTree(myTree, lebelsCopy, [1, 1]))
classCount[vote]=0 classCount[vote]+=1 sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) return sortedClassCount[0][0] def createTree(dataSet,labels): classList=[example[-1] for example in dataSet] # 类别:男或女 if classList.count(classList[0])==len(classList): return classList[0] if len(dataSet[0])==1: return majorityCnt(classList) bestFeat=chooseBestFeatureToSplit(dataSet) #选择最优特征 bestFeatLabel=labels[bestFeat] myTree={bestFeatLabel:{}} #分类结果以字典形式保存 del(labels[bestFeat]) featValues=[example[bestFeat] for example in dataSet] uniqueVals=set(featValues) for value in uniqueVals: subLabels=labels[:] myTree[bestFeatLabel][value]=createTree(splitDataSet\ (dataSet,bestFeat,value),subLabels) return myTree if __name__=='__main__': dataSet, labels=createDataSet1() # 创造示列数据 mytree=createTree(dataSet, labels) # 输出决策树模型结果 print mytree TreePlot.createPlot(mytree)
__author__ = 'wanghao' """ the glasses examples """ import ID3Tree import TreePlot fr = open('./lenses.txt') DataList = fr.readlines() DataSet = [] for data in DataList: DataSet.append(data.strip().split('\t')) print "The dateSet is ", DataSet Labels = ['age', 'prescript', 'astigmatic', 'tearRate'] LenseTree = ID3Tree.createTree(DataSet, Labels) print "the result ID3 Tree is ", LenseTree TreePlot.createPlot(LenseTree)
print "---------------------------------------\n" # Get the leaf Node LeafNodeNum = ID3Tree.getNumLeafs(myTree) print "This ID3 Tree leaf node num is ", LeafNodeNum print "---------------------------------------\n" # Get the depth of the ID3 MaxDepth = ID3Tree.getTreeDepth(myTree) print "This ID3 tree max depth is ", MaxDepth print "---------------------------------------\n" # Plot the tree TreePlot.createPlot(myTree) print "---------------------------------------\n"
from TestTrees import calcShannonEnt from TestTrees import chooseBestFeatureToSplit from TestTrees import createTree import TestTrees import TreePlot def createDataSet(): dataSet = [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']] labels = ['no surfacing', 'flippers'] return dataSet, labels fr = open('matpilb\lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['ages', 'prescript', 'astigmatic', 'tearRate'] print(lenses) lensesTree = TestTrees.createTree(lenses, lensesLabels) TreePlot.createPlot(lensesTree)
'>=' + str(splite))] = CreateTree( son_data, label_lisan, label_lianxu) elif j[feature] < splite and i == 1: son_data.append(j) if len(son_data) == 0: return leaf(data) else: Tree[labels_word[feature]][str( '<' + str(splite))] = CreateTree( son_data, label_lisan, label_lianxu) else: for j in data: if j[feature] == label_value[feature][i]: son_data.append(j) if len(son_data) == 0: return leaf(data) else: Tree[labels_word[feature]][ label_value[feature][i]] = CreateTree( son_data, label_lisan, labels_lianxu) return Tree if __name__ == '__main__': Tree = CreateTree(dataSet, labels_lisan, labels_lianxu) TreePlot.createPlot(Tree)