def demo(): print '... demo' myDat, featNames = createDataSet() print myDat shannonEnt = decisionTree.calcShannonEnt(myDat) print '当前数据集的熵是: ', shannonEnt print '... 测试拆分数据集' print decisionTree.splitDataSet(myDat, 0, 1) print '... 测试最佳特征选择' bestFeature = decisionTree.chooseBestFeatureToSplit(myDat) print '最好的分类特征是 %s' % bestFeature print '... 测试决策树的生成' myTree = decisionTree.createTree(myDat, featNames) print '生成的决策树是: \n', myTree print '... 测试SortedCount' classList = ['a', 'b', 'b', 'c', 'e'] print decisionTree.majorityCnt(classList) # print '... 测试绘制树节点' # plotDecisionTree.createPlot() print '... 测试绘制决策树' myTree = plotDecisionTree.retrieveTree(0) leafNums = decisionTree.getNumLeafs(myTree) treeDepth = decisionTree.getTreeDepth(myTree) print myTree print '叶子数量:%d, 树高度:%d' % (leafNums, treeDepth) # plotDecisionTree.createPlot(myTree) print '... 预测' featNames = ['no surfacing', 'flippers', 'fish'] print decisionTree.classify(myTree, featNames, [1, 1]) print '... 测试和读取决策树存储' decisionTree.storeTree(myTree, 'classfierStorage.txt') print decisionTree.grabTree('classfierStorage.txt') print '... 测试中文情况的决策树读取和存储' cnTree = plotDecisionTree.retrieveTree(2) print '存储前的决策树: \n', cnTree # 对于中文来说,正常的print只能打印出utf-8编码格式 # 但是可以递归的打印字典 就可以输出中文 decisionTree.storeTree(cnTree, 'cnTree.txt') print decisionTree.grabTree('cnTree.txt')
def randomForest(dataSet, labels, num_trees=2, max_depth=None, by='random', max_features='log2'): decisionTrees = [] for i in range(num_trees): subDataSet = subSample(dataSet) tree = createTree(subDataSet, labels[:], max_depth=max_depth, by=by, max_features=max_features) decisionTrees.append(tree) return decisionTrees
from decisionTree import createDataSet from decisionTree import createTree import matplotlib.pyplot as plt dataSet, labels = createDataSet() #决策树 decisionTree = createTree(dataSet, labels) #决策节点样式为锯齿框 decisionNode = dict(boxstyle="sawtooth", fc="0.8") #叶结点样式 leafNode = dict(boxstyle="round4", fc="0.8") #箭头样式 arrow_args = dict(arrowstyle="<-") def plotNode(nodeTxt, centerPt, parentPt, nodeType): createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords="axes fraction",xytext=centerPt,\ textcoords="axes fraction", va="center", ha="center", bbox=nodeType, arrowprops=arrow_args) def createPlot(): fig = plt.figure(1, facecolor="white") fig.clf() createPlot.ax1 = plt.subplot(111, frameon=False) plotNode("decisionNode", (0.5, 0.1), (0.1, 0.5), decisionNode) plotNode('leafNode', (0.8, 0.1), (0.3, 0.8), leafNode) plt.show() def getNumLeafs(myTree): numLeafs = 0 for key in myTree:
# coding:utf-8 # 对隐形眼镜的分类 import decisionTree import plotDecisionTree import trees fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] fr.close() print type(lenses) for i in range(len(lenses)): print lenses[i] print '... end lenses' lensesFeatName = ['age', 'prescript', 'astigmatic', 'tearRate', 'lenses type'] lensesTree = decisionTree.createTree(lenses, lensesFeatName) print lensesTree plotDecisionTree.createPlot(lensesTree)
#!/usr/bin/env python import decisionTree as dt def createDataset() : dataset = [[1, 1], [1, 1], [1, 0], [0, 1], [0, 1]] labels = ['yes', 'yes', 'no', 'no', 'no'] return (dataset, labels) if __name__ == '__main__' : (dataset, labels) = createDataset() labelName = ["no surface", "flipper"] tree = dt.createTree(dataset, labels, labelName) print(tree) labelName = ["no surface", "flipper"] label = dt.classify(tree, labelName, [0,0]) print(label)