def testLenseDataset(): from trees import createTree fr=open('lenses.txt') lenses=[inst.strip().split('\t') for inst in fr.readlines()] lensesLabels=['age','prescript','astigmatic','tearRate'] lensesTree=createTree(lenses,lensesLabels) createPlot(lensesTree)
def lenses(): fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) createPlot(lensesTree) return
def testClassify(self): myDat,labels = TreesTestCase.createDataSet() tree = trees.createTree(myDat, labels) c = trees.classify(tree, labels, [1, 0]) self.assertEqual(c, 'no') c = trees.classify(tree, labels, [1, 1]) self.assertEqual(c, 'yes')
def create_tree(): # lenses_lables = get_attr() lenses = rawdata.get_train_data() lenses_lables = rawdata.get_attr_value() lenses_two = lenses[:] dec_tree = trees.createTree(lenses, lenses_lables) return dec_tree
def test_createPlot(self): dataSet, labels = trees.createDataSet() print("\n dataSet == %s" % (dataSet)) tree = trees.createTree(dataSet, labels) # 增加一个标签 tree['no surfacing'][3] = "maybe" treePlotter.createPlot(tree)
def eyesTree(): fr = open("lenses.txt") # 按照tab分割数据 lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic1', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) treePlotter.createPlot(lensesTree)
def execute(): """use Decision Tree to address problem 'which lense?' """ fr = open('lenses.txt')# read data set lenses = [inst.strip().split('\t') for inst in fr.readlines()]# data set lensesLabels = ['age', 'prescirpt', 'astigmatic', 'tearRate']# labels lensesTree = trees.createTree(lenses, lensesLabels)# build the Decision Tree print lensesTree tp.createPlot(lensesTree)
def lenses(): filename = './data/lenses.txt' fr=open(filename) lenses=[inst.strip().split('\t') for inst in fr.readlines()] lensesLabels=['age','prescript','astigmatic','tearRate'] lensesTree = tree.createTree(lenses,lensesLabels) print(lensesTree) createPlot(lensesTree,'lenses.jpg')
def test_treeNums(self): dataSet, labels = trees.createDataSet() print("\n dataSet == %s" % (dataSet)) tree = trees.createTree(dataSet, labels) print("\n tree == %s" % (tree)) leafs = treePlotter.getNumLeafs(tree) depth = treePlotter.getTreeDepth(tree) print("\n leafs == %s depth == %s " % (leafs, depth))
def main(): fr = open("../file/Ch03/lenses.txt") Map = [line.strip().split('\t') for line in fr] print(Map) Label = ['age', 'prescript', 'astigmatic', 'tearRate'] Tree = trees.createTree(Map, Label) print(Tree) treePlotter.createPlot(Tree)
def main(): fr = open('lenses.txt') lenses = [] for inst in fr.readlines(): lenses.append(inst.strip().split('\t')) lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) lensesTree treePlotter.createPlot(lensesTree)
def test(): fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) #treePlotter.createPlot(lensesTree) result = trees.classify(lensesTree, lensesLabels, ['young', 'myope', 'no', 'normal']) print result
def test_store_load(self): dataSet, labels = trees.createDataSet() print("\n dataSet == %s" % (dataSet)) tree = trees.createTree(dataSet, labels) print("\n tree == %s" % (tree)) fileName = "./mytree.txt" trees.storeTree(tree, fileName) newTree = trees.grabTree(fileName) print("\n newTree == %s" % (newTree))
def main(): import trees import treePlotter myDat, labels = trees.createDataSet() myTree = trees.createTree(myDat, labels) # myTree = treePlotter.retrieveTree(1) treePlotter.createPlot(myTree, 'test.png') trees.storeTree(myTree, 'classifierStorage') myTree = trees.grabTree('classifierStorage') print(myTree)
def tests(): dataSet, labels = trees.createDataSet() print dataSet print trees.calcShannonEnt(dataSet) myTree = trees.createTree(dataSet, labels) print myTree, labels print trees.classify(myTree, labels, [1, 0]) print trees.classify(myTree, labels, [1, 1]) print trees.classify(myTree, labels, [0, 0]) print trees.classify(myTree, labels, [0, 1])
def main(): ''' 使用决策树对话者需要佩戴的隐形眼镜类型进行预测 隐形眼镜的类型包括硬材质、软材质以及不适合佩戴隐形眼镜 ''' fr = open('lenses.txt') lenses = [linst.strip().split('\t') for linst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) print lensesTree treePlotter.createPlot(lensesTree)
def main(): # createPlot() dataSet,labels = trees.createDataSet() labelsTmp = copy.deepcopy(labels) mytree = trees.createTree(dataSet,labelsTmp) print mytree print dataSet print labels print getNumLeafs(mytree) print getTreeDepth(mytree) # createPlot(mytree) print trees.classify(mytree,labels,[1,0])
def main(): #打开文件 fr = open('lenses.txt') #读取文件信息,得到一个dataSet,是一个二维列表 lenses = [inst.strip().split('\t') for inst in fr.readlines()] #定义标签 lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] #创建树 lensesTree = trees.createTree(lenses, lensesLabels) print(lensesTree) #画图 treePlotter.createPlot(lensesTree)
def classContactLens(): """ using decision trees to predict contact lens type """ lenses = [] with open(r'./data/lenses.txt','rb') as fr: for inst in fr.readlines(): inst = inst.strip() lenses.append(str(inst,encoding = 'utf-8').split('\t')) #byte to str # lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) print ("lensesTree is :%s"%(lensesTree))
def job_tree(): ''' 重新建进行预测的决策树 :param labels: :return: ''' fr = open(r'data/job_test.csv', encoding='UTF-8') listWm = [inst.strip().split('\t') for inst in fr.readlines()] labels = get_labels2() Trees = trees.createTree(listWm, labels) print("决策树:") print(json.dumps(Trees, ensure_ascii=False)) #保存树 fileName = r'data/tree.txt' trees.storeTree(Trees, fileName)
def gain_results(foldnum): # 获取属性列表 lenses_labels = rawdata.get_attr_value() dirs = os.listdir('D:/PyCharm/decision_tree/dataDir/sample_data1') decision_trees = [] accuracies = [] tests = dirs[:foldnum] # 测试集 trains = dirs[-foldnum:] # 训练集 for i in range(len(trains)): lenses = rawdata.get_train_data(trains[i]) decision_tree = trees.createTree(lenses, lenses_labels) # print treePlotter.createPlot(decision_tree) # 循环打印决策树 decision_trees.append(decision_tree) # print len(decision_trees) # 5 # print treePlotter.createPlot(decision_trees) for m in range(len(tests)): accu = [] decs_tree = decision_trees[m] test_data = rawdata.get_test_data(tests[m]) # print decs_tree # 决策树 # print test_data # 被测数据 for y in range(len(test_data)): result = trees.classify(decs_tree, lenses_labels, test_data[y][:-1]) accu.append(result) accuracies.append(accu) test_labs = [] correct_ratio = [] for p in range(len(tests)): test_lab = [] test_data = rawdata.get_test_data(tests[p]) for t in range(len(test_data)): test_lab.append(test_data[t][-1]) test_labs.append(test_lab) # print test_labs[4][0] # print len(test_labs) for w in range(len(tests)): count = 0.0 for q in range(len(test_labs[w])): # print '真实标签值为:%s; 决策树检测的标签为:%s' % (test_labs[w][q], accuracies[w][q]) if test_labs[w][q] == accuracies[w][q]: count += 1 # print '正确率为:%f' % (count/(len(test_labs[w]))) ratio = count / (len(test_labs[w])) correct_ratio.append(ratio) return test_labs, accuracies, correct_ratio
def main(): """ Function: 主函数 Args: 无 Returns: 无 """ #打开文件 fr = open('lenses.txt') #读取文件信息 lenses = [inst.strip().split('\t') for inst in fr.readlines()] #定义标签 lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] #创建树 lensesTree = trees.createTree(lenses, lensesLabels) #打印树信息 print(lensesTree) #绘制树信息 treePlotter.createPlot(lensesTree)
# -*- coding: utf-8 -*- """ Created on Sun Aug 17 16:00:41 2014 @author: gq """ import trees myDat,labels=trees.createDataSet() print myDat #print trees.splitDataSet(myDat,0,1) #print trees.splitDataSet(myDat,0,0) #print trees.calcShannonEnt(myDat) #print trees.chooseBestFeatureToSplit(myDat) print trees.createTree(myDat,labels)
import trees if __name__ == '__main__': myDat, labels = trees.createDataSet() print trees.createTree(myDat, labels)
# -*- coding:utf-8 -*- import trees myData,myLabels = trees.createDataSet() testLabels = myLabels.copy() print ('myData is ' , myData) #计算无序数据集的香农熵 #myShannonEnt = trees.calcShannonEnt(myData) #print ('myShannonEnt is ' , myShannonEnt ) ###测试划分数据集函数 #mySplitDat = trees.splitDataSet(myData, 1, 0) #print ('mySplitDat is ' , mySplitDat ) #myBestData = trees.chooseBestFeatureToSplit(myData) #print ('myBestData is ' , myBestData ) myTree = trees.createTree(myData, myLabels) print ('myTree is ' ,myTree) #测试训练集 print ('testLabels is ' ,testLabels) testResult = trees.classify(myTree, testLabels, [1,1]) print ('testResult is ' ,testResult) #trees.storeTree(myTree, 'classifierStorage.txt') fromFileTree = trees.grabTree('classifierStorage.txt') print ('fromFileTree is' , fromFileTree)
myData, labels = trees.createDataSet() print(myData) print(trees.calcShannonEnt(myData)) print('---------------------------------') print(trees.splitDataSet(myData, 0, 1)) print(trees.splitDataSet(myData, 0, 0)) print(trees.splitDataSet(myData, 1, 1)) print(trees.splitDataSet(myData, 1, 0)) print('---------------------------------') myData, labels = trees.createDataSet() print(myData) print('第', trees.chooseBestFeatureToSplit(myData), '个特征是最好的用于划分数据集的特征') print('---------------------------------') myData, labels = trees.createDataSet() myTree = trees.createTree(myData, labels) print('myTree=', myTree) print('---------------------------------') # createPlot() print('---------------------------------') print(retrieveTree(1)) myTree = retrieveTree(0) print(myTree) print(getNumLeafs(myTree)) print(getTreeDepth(myTree)) print('---------------------------------') myTree = retrieveTree(0) createPlot(myTree) f = open( 'D:/paper/kNN-master/机器学习实战(中文版+英文版+源代码)/机器学习实战源代码/machinelearninginaction/Ch03/lenses.txt' )
#-*- coding:utf-8 -*- import trees import treePlotter fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLables = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLables) print lensesTree treePlotter.createPlot(lensesTree)
test_set.append(cars[rand_index][0:-1]) test_label.append(cars[rand_index][-1]) del(cars[rand_index]) #print test_set #print #print test_label #exit(0) print cars_labels #m,n = shape(cars) #print m,n #m,n = shape(test_set) #print m,n cars_tree = trees.createTree(cars, cars_labels) #print cars_tree m,n = shape(test_set) #print cars_labels #exit(0) #print cars_labels2 #exit(0) err_count = 0 for i in range(m): ret = trees.classify(cars_tree, cars_labels2, test_set[i]) if ret != test_label[i]: err_count +=1 print "err=", err_count
import trees import treePlotter if '__main__' == __name__: with open('lenses.txt') as fin: lenses = [inst.strip().split('\t') for inst in fin.readlines()] labels = ['age', 'prescript', 'astigmatic', 'tear-rate'] decisionTree = trees.createTree(lenses, labels) treePlotter.createPlot(decisionTree)
import trees myDat, lables = trees.createDataSet() print("------ shannon ------") print(myDat) print(trees.calcShannonEnt(myDat)) # print("------ shannon after changed ------") # myDat[0][-1] = 'maybe' # print(myDat) # print(trees.calcShannonEnt(myDat)) print("------ split data set ------") print(trees.splitDataSet(myDat, 0, 1)) print(trees.splitDataSet(myDat, 0, 0)) print("------ choose best feature to split ------") print(trees.chooseBestFeatureToSplit(myDat)) print("------ create tree ------") tree = trees.createTree(myDat, lables) print(tree) print("------ test tree classify ------") print(trees.classify(tree, ['no surfacing', 'flippers'], [1, 0]))
plotTree.totalD = float(getTreeDepth(inTree)) plotTree.xOff = -.5/plotTree.totalW; plotTree.yOff = 1.0; plotTree(inTree, (.5, 1.0), '') plt.show() if __name__ == '__main__': # createPlot() # print retrieveTree(1) # myTree = retrieveTree(0) # print getNumLeafs(myTree) # print getTreeDepth(myTree) # myTree['no surfacing'][3] = 'maybe' # print myTree import trees file_name = 'lenses.txt' raw = open(file_name) lenses_data = [] for line in raw: lenses_data.append(line.rstrip().split('\t')) lenses_label = ['age', 'prescript', 'astigmatic', 'tearRate'] ## this is to make the decision tree lenses_tree = trees.createTree(lenses_data, lenses_label) # print lenses_tree createPlot(lenses_tree) ### create the decision tree
import trees myDat, labels = trees.createDataSet() #print myDat #print trees.createTree(myDat, labels) import treePlotter #treePlotter.createPlot() #myTree = treePlotter.retrieveTree(0) #treePlotter.createPlot(myTree) #print trees.classify(myTree, labels, [1,1]) #trees.storeTree(myTree,'classifierStorage.txt') #print trees.grabTree('classifierStorage.txt') fr = open('lenses.txt') print fr lenses = [inst.strip().split('\t') for inst in fr.readlines()] print lenses lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) print lensesTree treePlotter.createPlot(lensesTree)
# _*_ coding:utf-8 _*_ import trees fr = open('lenses.txt') #print (fr.readlines()) listsss = [] for inst in fr.readlines(): listsss.append(inst.strip().split('\t')) #lenses=[inst.strip().split('\t') ] lenseslabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTeee = trees.createTree(listsss, lenseslabels) print(listsss) print(lensesTeee)
import trees myDat, labels = trees.createDataSet() print(myDat) print(labels) print(trees.calcShannonEnt(myDat)) print(trees.splitDataSet(myDat, 0, 0)) print(trees.createTree(myDat, labels))
#This test goes with Python3 import trees import treePlotter if '__main__' == __name__: dataSet, labels = trees.createDataSet() decisionTree = trees.createTree(dataSet, labels) treePlotter.createPlot(decisionTree)
plotNode(firstStr, cntrPt, parentPt, decisionNode) secondDict = myTree[firstStr] plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD for key in secondDict.keys(): if type(secondDict[key]).__name__=='dict':#test to see if the nodes are dictonaires, if not they are leaf nodes plotTree(secondDict[key],cntrPt,str(key)) #recursion else: #it's a leaf node print the leaf node plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode) plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key)) plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD #if you do get a dictonary you know it's a tree, and the first element will be another dict def createPlot(inTree): fig = plt.figure(1, facecolor='white') fig.clf() axprops = dict(xticks=[], yticks=[]) createPlot.ax1 = plt.subplot(111, frameon=False, **axprops) #no ticks #createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses plotTree.totalW = float(getNumLeafs(inTree)) plotTree.totalD = float(getTreeDepth(inTree)) plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0; plotTree(inTree, (0.5,1.0), '') plt.show() # collect data myDat, labels = trees.createDataSet() mytree = trees.createTree(myDat, labels) #visualize decision tree createPlot(mytree)
fr.close() print dataset print '\n' print '数据集类的香农熵:' print trees.calcShannonEnt(dataset) print '\n' bestFeatureColumn = trees.chooseBestFeatureToSplit(dataset) print '数据集最佳分类的属性是:' print labels[bestFeatureColumn] print '\n' print '决策树:' Tree = trees.createTree(dataset, labels) print Tree firstFeature = Tree.keys()[0] print firstFeature firstFeatureValues = Tree[firstFeature].keys() print firstFeatureValues print '\n' treePlotter.createPlot(Tree) testVec = ['pre', 'myope', 'yes', 'normal'] print '测试数据' print testVec labels.append('tearRate') print '匹配过程:' result = trees.classify(Tree, labels, testVec)
## clac ShannonEnt # a = trees.calcShannonEnt(myData) # print(a) ## split dataSet # a = trees.splitDataSet(myData, 0, 0) # print(a) ## choose best Feature to split # a = trees.chooseBestFeatureToSplit(myData) # print(a, '\n', myData) ## built tree with dict type subLabels = labels[:] tree = trees.createTree(myData, subLabels) # print(tree) ### 2. Plot tree # treePlotter.createPlot() ## get leafs num of tree # b = treePlotter.getNumLeafs(tree) # print(b) ## get depth of tree # c = treePlotter.getTreeDepth(tree) # print(c) # treePlotter.createPlot(tree) # tree['no surfacing'][2] = 'maybe'
# -*- coding: UTF-8 -*- ''' this module is an example of classifying stealth by decision tree @author: Liu Weijie ''' import trees import drawATree #get data def getData(filename): fr = open(filename) dataList = [line.strip().split('\t') for line in fr.readlines()] labelList = ['age', 'prescript', 'astigmatic', 'tearRate'] return dataList, labelList if __name__ == '__main__': dataSet, labelList = getData('lenses.txt') print dataSet myTree = trees.createTree(dataSet, labelList) drawATree.drawTree(myTree) data1 = ['pre','myope','no','reduced'] print 'this is ', trees.classifyByTree(myTree, labelList, data1)
import treePlotter import trees fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lenseTree = trees.createTree(lenses, lensesLabels) print lenseTree
import trees myData, labels = trees.createDataSet2() #print myData,labels #print trees.calcShannonEnt(myData) #print trees.chooseBestFeatureToSplit(myData) myTree = trees.createTree(myData, labels) print myTree import treePlotter treePlotter.createPlot(myTree) ''' import treePlotter #treePlotter.createPlot() #myTree = treePlotter.retrieveTree(1) myTree = treePlotter.retrieveTree(0) print myTree #print treePlotter.getNumLeafs(myTree) #print treePlotter.getTreeDepth(myTree) #treePlotter.createPlot(myTree) myData, labels = trees.createDataSet() print labels print trees.classify(myTree, labels, [1,0])
def test_createTree(self): dataSet, labels = trees.createDataSet() print("\n dataSet == %s" % (dataSet)) tree = trees.createTree(dataSet, labels) print("\n tree == %s" % (tree))
import treePlotter import trees a1, a2 = trees.createDataSet() b1 = trees.createTree(a1, a2) treePlotter.createPlot(b1)
import trees import pandas as pd df = pd.read_csv('data/1.csv') col = df.columns.tolist() data = df.values.tolist() tree = trees.createTree(data, col) print(tree) col = df.columns.tolist()[:-1] input1 = ['a3', 'b1', 'c2', 'd1'] # N input2 = ['a3', 'b3', 'c1', 'd1'] # N input3 = ['a2', 'b1', 'c1', 'd2'] # Y input4 = ['a1', 'b1', 'c1', 'd1'] # Y print(trees.classify(tree, col, input1)) print(trees.classify(tree, col, input2)) print(trees.classify(tree, col, input3)) print(trees.classify(tree, col, input4)) df = pd.read_csv('data/2.csv') col = df.columns.tolist() data = df.values.tolist() tree = trees.createTree(data, col) print(tree) col = df.columns.tolist()[:-1] input1 = ['undergraduate', 'man', 'cet6', 'a1', 'b1'] # N input2 = ['undergraduate', 'man', 'cet4', 'a1', 'b1'] # Y input3 = ['postgraduate', 'man', 'no', 'a1', 'b2'] # Y input4 = ['undergraduate', 'man', 'no', 'a1', 'b3'] # Y input5 = ['undergraduate', 'man', 'no', 'a3', 'b3'] # Y
import trees as tree import create_data_from_two_streams as get_data import plotting_trees as ptree myDat,labels = get_data.get_data() myTree = tree.createTree(myDat,labels) print ptree.getTreeDepth(myTree)
def testID3(filename): DataList,classLabelVector = trees.file2strlist(filename) mytree=trees.createTree(DataList,classLabelVector) treePlotter.createPlot(mytree)
# name industry profession sex 摄影 自驾游 SNS达人 github 翻墙 常阅读 科幻迷 兴趣广泛 吹牛 分类 def getTrainingDatas(): dataSet = [ ["it", "gm", "man", 1, 1, 0, 0, 0, 0, 0, 1, 0, "liver"], ["it", "engineer", "man", 0, 1, 0, 0, 0, 0, 0, 0, 0, "empty"], ["it", "sale", "man", 0, 1, 0, 0, 0, 0, 0, 0, 1, "liver"], ["it", "founder", "man", 0, 1, 1, 0, 0, 0, 0, 1, 0, "boss"], ["it", "phd", "man", 1, 0, 0, 0, 0, 1, 0, 1, 0, "liver, fake hacker"], ["it", "engineer", "man", 0, 1, 0, 1, 1, 1, 1, 1, 0, "fake hacker"], ["it", "engineer", "man", 0, 0, 0, 0, 0, 1, 0, 1, 0, "fake hacker"], ["it", "engineer", "man", 0, 0, 0, 1, 1, 1, 1, 1, 0, "fake hacker"], ] labels = ["industry", "profession", "sex", "camera", "drive tour", "SNS", "github", "over GFW", "reader", "Science fiction fan","hobby","brag"] return dataSet, labels if __name__ == "__main__": if len(sys.argv) > 1: # classify test, tortoise classmate = ["tortoise", "it", "engineer", "man", 1, 0, 0, 0, 0, 1, 0, 1, 1] dataSet, labels = getTrainingDatas() tree = trees.grabTree("cm_tree.txt") print "{} is \"{}\"".format(classmate[0], trees.classify(tree, labels, classmate[1:])) else: # training dataSet, labels = getTrainingDatas() tree = trees.createTree(dataSet, list(labels)) trees.storeTree(tree, "cm_tree.txt") treePlotter.createPlot(tree)
[1, 1, 1, 'no'], [1, 1, 1, 'no'], [1, 1, 1, 'no'], [1, 0, 0, 'no'], [0, 1, 1, 'no'], [0, 1, 1, 'no'], [0, 1, 0, 'no'] ] labels = ['glasses','man','170'] return dataSet, labels dataSet, labels = misc.loadLensesData("test") print entropy.calcShannonEnt(dataSet) # print trees.chooseBestFeatureToSplit(dataSet) myTree = trees.createTree(dataSet,labels) print myTree import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import treePlotter treePlotter.createPlot(myTree) # misc.storeTree(myTree, 'lenses_tree.txt') # tree2 = misc.grabTree('classifierStorage.txt') # treePlotter.createPlot(tree2)
import sys reload(sys) #http://www.pythoner.com/200.html #运行时没问题, 但是编译有有错误提示, 可以忽略。 sys.setdefaultencoding('utf8') print(sys.getdefaultencoding()) # fr = open('lensesCN.txt') lenses = [unicode(inst, 'utf-8').strip().strip().split('\t') for inst in fr.readlines()] #lensesLabels = ["年龄组" , "规定", "闪光", "泪液扫除率"] lensesLabels = ['age' , 'prescript', 'astigmatic', 'tearRate'] lensesTree = tr.createTree(lenses,lensesLabels) print(lensesTree) tp.createPlot(lensesTree) dataSet, labels = tr.createDataSet() shannonEnt = tr.calcShannonEnt(dataSet) print(shannonEnt) print(tp.retrieveTree(1)) myTree = tp.retrieveTree(0) numLeafs = tp.getNumLeafs(myTree) treeDepth = tp.getTreeDepth(myTree)
from imp import reload import trees myDat, labels = trees.createDataSet() trees.calcShannonEnt(myDat) myTree = trees.createTree(myDat, labels) import treePlotter treePlotter.createPlot()
def testCreateTree(self): myDat,labels = TreesTestCase.createDataSet() tree = trees.createTree(myDat, labels) theTree = {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} self.assertEqual(theTree, tree)