def testClassify(self): myDat,labels = TreesTestCase.createDataSet() tree = trees.createTree(myDat, labels) c = trees.classify(tree, labels, [1, 0]) self.assertEqual(c, 'no') c = trees.classify(tree, labels, [1, 1]) self.assertEqual(c, 'yes')
def test_classify(self): """Unittest for function classify. :return: classification result. """ # test 1: training data item = [1, 0] feat_names = ['no surfacing', 'flippers'] result = 'no' decision_tree = { 'no surfacing': { 0: 'no', 1: { 'flippers': { 0: 'no', 1: 'yes' } } } } self.assertEqual(result, trees.classify(decision_tree, feat_names, item)) # test 2: training data with different feat_names item = [0, 1] feat_names = ['flippers', 'no surfacing'] result = 'no' decision_tree = { 'no surfacing': { 0: 'no', 1: { 'flippers': { 0: 'no', 1: 'yes' } } } } self.assertEqual(result, trees.classify(decision_tree, feat_names, item)) # test 3: not training data item = [0, 0] feat_names = ['flippers', 'no surfacing'] result = 'no' decision_tree = { 'no surfacing': { 0: 'no', 1: { 'flippers': { 0: 'no', 1: 'yes' } } } } self.assertEqual(result, trees.classify(decision_tree, feat_names, item))
def main(): # createPlot() dataSet,labels = trees.createDataSet() labelsTmp = copy.deepcopy(labels) mytree = trees.createTree(dataSet,labelsTmp) print mytree print dataSet print labels print getNumLeafs(mytree) print getTreeDepth(mytree) # createPlot(mytree) print trees.classify(mytree,labels,[1,0])
def tests(): dataSet, labels = trees.createDataSet() print dataSet print trees.calcShannonEnt(dataSet) myTree = trees.createTree(dataSet, labels) print myTree, labels print trees.classify(myTree, labels, [1, 0]) print trees.classify(myTree, labels, [1, 1]) print trees.classify(myTree, labels, [0, 0]) print trees.classify(myTree, labels, [0, 1])
def test(): fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) #treePlotter.createPlot(lensesTree) result = trees.classify(lensesTree, lensesLabels, ['young', 'myope', 'no', 'normal']) print result
def ack(): # 处理函数 usrOne = varTestOne.get() # 用户输入 varTestOne.set(usrOne) print(usrOne.strip(',').split(',')) labels = ['age', 'prescript', 'astigmatic', 'tearRate'] result = trees.classify(myTree, labels, usrOne.strip(',').split(',')) tkinter.messagebox.showinfo(title="判断结果", message="您的输入是:" + usrOne + "\n结果是:" + result)
def test_trees_classify(self): matrix = [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']] labels = ['no surfacing', 'flippers'] tree = trees.create_tree(matrix, labels) prediction = trees.classify([1, 1], tree, labels) self.failUnless(prediction == 'yes') prediction = trees.classify([1, 0], tree, labels) self.failUnless(prediction == 'no') prediction = trees.classify([0, 1], tree, labels) self.failUnless(prediction == 'no') prediction = trees.classify([0, 0], tree, labels) self.failUnless(prediction == 'no')
def gain_results(foldnum): # 获取属性列表 lenses_labels = rawdata.get_attr_value() dirs = os.listdir('D:/PyCharm/decision_tree/dataDir/sample_data1') decision_trees = [] accuracies = [] tests = dirs[:foldnum] # 测试集 trains = dirs[-foldnum:] # 训练集 for i in range(len(trains)): lenses = rawdata.get_train_data(trains[i]) decision_tree = trees.createTree(lenses, lenses_labels) # print treePlotter.createPlot(decision_tree) # 循环打印决策树 decision_trees.append(decision_tree) # print len(decision_trees) # 5 # print treePlotter.createPlot(decision_trees) for m in range(len(tests)): accu = [] decs_tree = decision_trees[m] test_data = rawdata.get_test_data(tests[m]) # print decs_tree # 决策树 # print test_data # 被测数据 for y in range(len(test_data)): result = trees.classify(decs_tree, lenses_labels, test_data[y][:-1]) accu.append(result) accuracies.append(accu) test_labs = [] correct_ratio = [] for p in range(len(tests)): test_lab = [] test_data = rawdata.get_test_data(tests[p]) for t in range(len(test_data)): test_lab.append(test_data[t][-1]) test_labs.append(test_lab) # print test_labs[4][0] # print len(test_labs) for w in range(len(tests)): count = 0.0 for q in range(len(test_labs[w])): # print '真实标签值为:%s; 决策树检测的标签为:%s' % (test_labs[w][q], accuracies[w][q]) if test_labs[w][q] == accuracies[w][q]: count += 1 # print '正确率为:%f' % (count/(len(test_labs[w]))) ratio = count / (len(test_labs[w])) correct_ratio.append(ratio) return test_labs, accuracies, correct_ratio
#print(trees.splitDataSet(mydata,0,1)) index = trees.chooseBestFeatureToSplit(mydata) #print(index) ''' mytree = trees.createTree(mydata,features) print(mytree) ''' import treePlotter ''' mytree = treePlotter.retrieveTree(0) treePlotter.createPlot(mytree) mytree['no surfacing'][3] = 'maybe' treePlotter.createPlot(mytree) ''' mytree = treePlotter.retrieveTree(0) print(trees.classify(mytree,features,[0,0])) print(trees.classify(mytree,features,[1,1])) trees.storeTree(mytree, 'classifier.txt') grabtree = trees.grabTree('classifier.txt') print(grabtree) fr = open('lenses.txt') lense =[inst.strip().split('\t') for inst in fr.readlines()] lensefeatures = ['age', 'prescript', 'astigmatic', 'tearrate'] lensetree = trees.createTree(lense,lensefeatures) print(lensetree) treePlotter.createPlot(lensetree)
import trees import treePlotter fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age','prescript','astigmatic','tearRate'] lensesTree = trees.createTree(lenses, lensesLabels[:]) print trees.classify(lensesTree, lensesLabels, ['young', 'hyper','no','normal']) print trees.classify(lensesTree, lensesLabels, ['presbyopic', 'myope','no','normal'])
''' 决策树测试类 ''' import trees ''' dataSet,lables = trees.createDataSet() print(dataSet) print(lables) shannonEnt = trees.calcShannonEnt(dataSet) print(shannonEnt) ''' dataSet,labels = trees.createDataSet() tree = trees.createTree(dataSet,labels) d,l = trees.createDataSet() result = trees.classify(tree,l,[1,0]) print(result)
# 创建决策树 import trees ds, ls = trees.createDataSet() trees.createTree(ds, ls) # 绘制树 import treePlotter mt = treePlotter.retrieveTree(0) treePlotter.createPlot(mt) # 利用决策树判断分类 import trees import treePlotter it = treePlotter.retrieveTree(0) ds, ls = trees.createDataSet() trees.classify(it, ls, [0, 0]) # 序列化与反序列化决策树 import trees import treePlotter it = treePlotter.retrieveTree(0) trees.storeTree(it, 'classifierStorage.txt') ot = trees.grabTree('classifierStorage.txt') # 隐形眼镜数据集测试 import trees import treePlotter fr = open('lenses.txt') ds = [example.strip().split("\t") for example in fr.readlines()] ls = ['age', 'prescript', 'antigmatic', 'tearRate'] mt = trees.createTree(ds, ls)
import trees import treePlotter myDat, labels = trees.createDataSet() print labels myTree = treePlotter.retrieveTree(0) print myTree print trees.classify(myTree, labels, [1, 0]) print trees.classify(myTree, labels, [1, 1]) # trees.storeTree(myTree, 'classifierStorage.txt') print trees.grabTree('classifierStorage.txt') fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) print lensesTree treePlotter.createPlot(lensesTree)
print(numLeafs) print(treeDepth) myTree = tp.retrieveTree(0) tp.createPlot(myTree) myTree['no surfacing'][3] = 'maybe' tp.createPlot(myTree) myDat,labels = tr.createDataSet() print(labels) myTree = tp.retrieveTree(0) print(myTree) print(tr.classify(myTree, labels, [1,0])) print(tr.classify(myTree, labels, [1,1])) #restore the tree and print. restoreTree = tr.grabTree('classifierStorage.txt') print(restoreTree) # fr = open('lenses.txt') lenses = [inst.strip().strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age' , 'prescript', 'astigmatic', 'tearRate'] lensesTree = tr.createTree(lenses,lensesLabels) print(lensesTree)
print '决策树:' Tree = trees.createTree(dataset, labels) print Tree firstFeature = Tree.keys()[0] print firstFeature firstFeatureValues = Tree[firstFeature].keys() print firstFeatureValues print '\n' treePlotter.createPlot(Tree) testVec = ['pre', 'myope', 'yes', 'normal'] print '测试数据' print testVec labels.append('tearRate') print '匹配过程:' result = trees.classify(Tree, labels, testVec) print '匹配结果:' print result print '\n' # 把树存在磁盘中 print '将树存放磁盘...' trees.storeTree(Tree, 'myTree.txt') print '\n' # 从磁盘中取出树 print '再从磁盘中读取树:' print trees.grabTree('myTree.txt')
# -*- coding:utf-8 -*- import trees myData,myLabels = trees.createDataSet() testLabels = myLabels.copy() print ('myData is ' , myData) #计算无序数据集的香农熵 #myShannonEnt = trees.calcShannonEnt(myData) #print ('myShannonEnt is ' , myShannonEnt ) ###测试划分数据集函数 #mySplitDat = trees.splitDataSet(myData, 1, 0) #print ('mySplitDat is ' , mySplitDat ) #myBestData = trees.chooseBestFeatureToSplit(myData) #print ('myBestData is ' , myBestData ) myTree = trees.createTree(myData, myLabels) print ('myTree is ' ,myTree) #测试训练集 print ('testLabels is ' ,testLabels) testResult = trees.classify(myTree, testLabels, [1,1]) print ('testResult is ' ,testResult) #trees.storeTree(myTree, 'classifierStorage.txt') fromFileTree = trees.grabTree('classifierStorage.txt') print ('fromFileTree is' , fromFileTree)
import trees import tree_plotter tree = tree_plotter.retrieve_tree(0) print(tree) dataset, labels = trees.create_dataset() print(labels) label = trees.classify(tree, labels, [1, 0]) print(label) label = trees.classify(tree, labels, [1, 1]) print(label)
print cars_labels #m,n = shape(cars) #print m,n #m,n = shape(test_set) #print m,n cars_tree = trees.createTree(cars, cars_labels) #print cars_tree m,n = shape(test_set) #print cars_labels #exit(0) #print cars_labels2 #exit(0) err_count = 0 for i in range(m): ret = trees.classify(cars_tree, cars_labels2, test_set[i]) if ret != test_label[i]: err_count +=1 print "err=", err_count print "sum=", m #treePlotter.createPlot(cars_tree)
import trees myDat, lables = trees.createDataSet() print("------ shannon ------") print(myDat) print(trees.calcShannonEnt(myDat)) # print("------ shannon after changed ------") # myDat[0][-1] = 'maybe' # print(myDat) # print(trees.calcShannonEnt(myDat)) print("------ split data set ------") print(trees.splitDataSet(myDat, 0, 1)) print(trees.splitDataSet(myDat, 0, 0)) print("------ choose best feature to split ------") print(trees.chooseBestFeatureToSplit(myDat)) print("------ create tree ------") tree = trees.createTree(myDat, lables) print(tree) print("------ test tree classify ------") print(trees.classify(tree, ['no surfacing', 'flippers'], [1, 0]))
#3.2.2 构造注解树 treePlotter.retrieveTree(1) myTree = treePlotter.retrieveTree(0) print "获取叶节点的数目:", treePlotter.getNumLeafs(myTree) print "获取树的层数:", treePlotter.getTreeDepth(myTree) treePlotter.createPlot(myTree) myTree['no surfacing'][3] = 'maybe' print "myTree:", myTree treePlotter.createPlot(myTree) #3.3.1 测试算法:使用决策树执行分类 myDat, labels = trees.createDataSet() print "labels:", labels myTree = treePlotter.retrieveTree(0) print "myTree:", myTree print "分类1:", trees.classify(myTree, labels, [1, 0]) print "分类2:", trees.classify(myTree, labels, [1, 1]) #3.3.2 决策树的存储 trees.storeTree(myTree, homedir + 'classifierStorage.txt') print "决策树调取:", trees.grabTree(homedir + 'classifierStorage.txt') print ":", print ":", #3.4 示例:使用决策树预测隐形眼镜类型 fr = open(homedir + 'lenses.txt') print 'fr:', fr lenses = [inst.strip().split('\t') for inst in fr.readlines()] print 'lenses:', lenses lensesLabels = [' age', 'prescript', 'astigmatic', 'tearRate'] print 'lensesLabels:', lensesLabels
# myTree=trees.createTree(myDat,labels) # print(myTree) # treePlotter.createPlot() # mytree=treePlotter.retrieveTree(0) # numLeafs=treePlotter.getNumLeafs(mytree) # print(numLeafs) # treeDepth=treePlotter.getTreeDepth(mytree) # print(treeDepth) # myTree=treePlotter.retrieveTree(0) # treePlotter.createPlot(myTree) # print(myTree) # myTree['no surfacing'][3]='maybe' # print(myTree) # treePlotter.createPlot(myTree) myDat, labels = trees.createDataSet() myTree = treePlotter.retrieveTree(0) res = trees.classify(myTree, labels, [1, 1]) print(res) print(myTree) trees.storeTree(myTree, 'classifierStorage.txt') newTree = trees.grabTree('classifierStorage.txt') print(newTree) fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) treePlotter.createPlot(lensesTree)
#print(mydat) ''' cc=trees.calcShannonEnt(mydat) print(cc) aa=trees.splitDataSet(mydat,0,1) print(aa) bb=trees.splitDataSet(mydat,1,0) print(bb) kk=trees.chooseBestFeatureToSplit(mydat) print(kk) ''' #mytree=trees.createTree(mydat,labels) #print(mytree) import treePlotter #treePlotter.createPlot() #dd=treePlotter.retrieveTree(1) #print(dd) myTree=treePlotter.retrieveTree(0) #print(myTree) #a=treePlotter.getNumLeafs(myTree) #b=treePlotter.getTreeDepth(myTree) #print(a,b) #treePlotter.createPlot(myTree)#### aa=trees.classify(myTree,labels,[1,1]) print(aa)
# 绘制树 reload(treePlotter) myTree=treePlotter.retrieveTree(0) treePlotter.createPlot(myTree) # 变更字典,重新绘制 myTree['no surfacing'][3]='maybe' myTree treePlotter.createPlot(myTree) # 测试分类函数 myDat, labels = trees.createDataSet() labels myTree = treePlotter.retrieveTree(0) myTree trees.classify(myTree, labels, [1, 0]) trees.classify(myTree, labels, [1, 1]) from importlib import reload reload(trees) # 测试pickle决策树存储 trees.storeTree(myTree, 'classifierStorage.txt') trees.grabTree('classifierStorage.txt') # 加载隐形眼镜数据 fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) lensesTree treePlotter.createPlot(lensesTree)
# lenses_two = lenses[:] # lenses_labels_two = lenses_labels[:] lenses = rawdata.get_train_data(trains[i]) decision_tree = trees.createTree(lenses, lenses_labels) # print treePlotter.createPlot(decision_tree) # 循环打印决策树 decision_trees.append(decision_tree) # print len(decision_trees) # 5 # print treePlotter.createPlot(decision_trees) for m in range(len(tests)): accu = [] decs_tree = decision_trees[m] test_data = rawdata.get_test_data(tests[m]) # print decs_tree # 决策树 # print test_data # 被测数据 for y in range(len(test_data)): result = trees.classify(decs_tree, lenses_labels, test_data[y][:-1]) # print '结果:%s' % result # print '循环次数:%d' % y accu.append(result) # print '数组长度:%d' % len(accu) # print '*****' accuracies.append(accu) # print len(accuracies) # print accuracies[-1][-4] print len(accuracies) print '华丽的分割线' test_labs = [] for p in range(len(tests)): test_lab = []
#trees.splitDataSet(myDat, 0, 1) #trees.splitDataSet(myDat, 0, 0) #trees.splitDataSet(myDat, 0, 0) #myDat = [[1, 'yes'], [1, 'yes'], [1, 'no'], [0, 'no'], [0, 'no']] #myDat = [[1, 1, 'yes'], [1, 1, 'yes'], [0, 1, 'no'], [1, 0, 'no'], [1, 0, 'no']] #print(trees.chooseBestFeatureToSplit(myDat)) #print(myDat) #print(trees.createTree(myDat, labels)) #treePlotter.createPlot() myTree = treePlotter.retrieveTree(0) #print(myTree) #numLeaf = treePlotter.getNumLeafs(myTree) #print(numLeaf) #depth = treePlotter.getTreeDepth(myTree) #print(depth) #treePlotter.createPlot(myTree) print(myTree) trees.classify(myTree, labels, [0, 1]) #[1, 2, 3, 4, 5, 6] #a = [1, 2, 3] #b = [4, 5, 6] #a.extend(b) #print(a) #[1, 2, 3, [4, 5, 6]] #a = [1, 2, 3] #b = [4, 5, 6] #a.append(b) #print(a)
def test_classify_simple(self): data_set, labels = trees.load_simple_data() my_tree = tree_plot.retrieve_tree(0) self.assertEqual('no', trees.classify(my_tree, labels, [1, 0])) self.assertEqual('yes', trees.classify(my_tree, labels, [1, 1]))
import trees import treePlotter myData, labels = trees.createDataSet() #print(myData) print(labels) #print(trees.calcShannonEnt(myData)) #retDataSet = trees.splitDataSet(myData,1,0) #print(retDataSet) #print(trees.chooseBestFeatureToSplit(myData)) #myTree = trees.createTree(myData,labels) #print(myTree) #treePlotter.createPlot() #print(treePlotter.retrieveTree(1)) myTree2 = treePlotter.retrieveTree(0) print(myTree2) classLabel = trees.classify(myTree2, labels, [1, 1]) print("classLabel:", classLabel) trees.storeTree(myTree2, 'classifierStorage.txt') #print(treePlotter.getNumLeafs(myTree2)) #print(treePlotter.getTreeDepth(myTree2)) #treePlotter.createPlot(myTree2)
import trees import pandas as pd df = pd.read_csv('data/1.csv') col = df.columns.tolist() data = df.values.tolist() tree = trees.createTree(data, col) print(tree) col = df.columns.tolist()[:-1] input1 = ['a3', 'b1', 'c2', 'd1'] # N input2 = ['a3', 'b3', 'c1', 'd1'] # N input3 = ['a2', 'b1', 'c1', 'd2'] # Y input4 = ['a1', 'b1', 'c1', 'd1'] # Y print(trees.classify(tree, col, input1)) print(trees.classify(tree, col, input2)) print(trees.classify(tree, col, input3)) print(trees.classify(tree, col, input4)) df = pd.read_csv('data/2.csv') col = df.columns.tolist() data = df.values.tolist() tree = trees.createTree(data, col) print(tree) col = df.columns.tolist()[:-1] input1 = ['undergraduate', 'man', 'cet6', 'a1', 'b1'] # N input2 = ['undergraduate', 'man', 'cet4', 'a1', 'b1'] # Y input3 = ['postgraduate', 'man', 'no', 'a1', 'b2'] # Y input4 = ['undergraduate', 'man', 'no', 'a1', 'b3'] # Y input5 = ['undergraduate', 'man', 'no', 'a3', 'b3'] # Y
# name industry profession sex 摄影 自驾游 SNS达人 github 翻墙 常阅读 科幻迷 兴趣广泛 吹牛 分类 def getTrainingDatas(): dataSet = [ ["it", "gm", "man", 1, 1, 0, 0, 0, 0, 0, 1, 0, "liver"], ["it", "engineer", "man", 0, 1, 0, 0, 0, 0, 0, 0, 0, "empty"], ["it", "sale", "man", 0, 1, 0, 0, 0, 0, 0, 0, 1, "liver"], ["it", "founder", "man", 0, 1, 1, 0, 0, 0, 0, 1, 0, "boss"], ["it", "phd", "man", 1, 0, 0, 0, 0, 1, 0, 1, 0, "liver, fake hacker"], ["it", "engineer", "man", 0, 1, 0, 1, 1, 1, 1, 1, 0, "fake hacker"], ["it", "engineer", "man", 0, 0, 0, 0, 0, 1, 0, 1, 0, "fake hacker"], ["it", "engineer", "man", 0, 0, 0, 1, 1, 1, 1, 1, 0, "fake hacker"], ] labels = ["industry", "profession", "sex", "camera", "drive tour", "SNS", "github", "over GFW", "reader", "Science fiction fan","hobby","brag"] return dataSet, labels if __name__ == "__main__": if len(sys.argv) > 1: # classify test, tortoise classmate = ["tortoise", "it", "engineer", "man", 1, 0, 0, 0, 0, 1, 0, 1, 1] dataSet, labels = getTrainingDatas() tree = trees.grabTree("cm_tree.txt") print "{} is \"{}\"".format(classmate[0], trees.classify(tree, labels, classmate[1:])) else: # training dataSet, labels = getTrainingDatas() tree = trees.createTree(dataSet, list(labels)) trees.storeTree(tree, "cm_tree.txt") treePlotter.createPlot(tree)
import trees import matplotlib.pyplot as plt import treePlotter d, l = trees.createDataSet() # print(l) # print (d) # print(trees.createTree(d,l)) mytree = treePlotter.retrieveTree(0) print(trees.classify(mytree, l, [1, 0])) # print(treePlotter.getTreeDepth(mytree))
#! /usr/bin/env python # -*- coding: utf-8 -*- import trees if __name__ == '__main__': data = trees.createDataSet1() # print data dataSet = data[0] lables = data[1] print dataSet feature = trees.chooseBestFeatureToSplit(dataSet) feature1 = trees.chooseBestFeatureToSplit1(dataSet) print feature print feature1 mytree = trees.createTree(dataSet, lables) print mytree print trees.splitDataSet(dataSet, 0, 1) featLabels = ['outlook', 'temperature', 'humidity', 'windy'] testVec = [0, 1, 0, 0] print trees.classify(mytree, featLabels, testVec)
# coding: utf-8 import trees import treePlotter myData, labels = trees.createDataSet() # # print(trees.calcShannonEntropy(myData)) # # print(trees.splitDataSet(myData, 0, 1)) # # print(trees.chooseBestFeatureToSplit(myData)) # myTree = trees.createTree(myData, labels) # print(myTree) myTree = treePlotter.retrieveTree(0) # treePlotter.createPlot(myTree) print(trees.classify(myTree, labels, [1, 0]))
import tree_plotter import random fr = open('car.data') lenses = [line.strip().split(',') for line in fr] labels = ['buying', 'maint', 'doors', 'doors', 'persons', 'safety'] fr.close() random.shuffle(lenses) train = lenses[:1000] test = lenses[1000:] lenses_tree = trees.create_tree(train, labels) #print lenses_tree #tree_plotter.create_plot(lenses_tree) err = 0 total = 0 call = 0 for vec in test: real = vec[-1] test_vec = vec[:-1] ret = trees.classify(lenses_tree, labels, test_vec) total += 1 if ret == '-': continue call += 1 if real != ret: err += 1 print '总体,召回,错误数,召回率,错误率' print total, call, err, call * 1.0 / total, err * 1.0 / call
# treePlotter.createPlot() ## get leafs num of tree # b = treePlotter.getNumLeafs(tree) # print(b) ## get depth of tree # c = treePlotter.getTreeDepth(tree) # print(c) # treePlotter.createPlot(tree) # tree['no surfacing'][2] = 'maybe' # print(tree) ### 3. test classifier a = trees.classify(tree, labels, [1, 1]) # print(a) ### 4. Storage classifier # trees.storeTree(tree, 'classifierStorage.txt') # b = trees.grabTree('classifierStorage.txt') # print(b) ### 5. Example: Use dicision-tree as contact lenses classifier fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) print(lensesTree) treePlotter.createPlot(lensesTree)
import treePlotter as TP # TP.createPlot() myTree = TP.retrieveTree( 0) #{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} n = TP.getNumLeafs(myTree) # 3 d = TP.getTreeDepth(myTree) # 2 TP.createPlot(myTree) # classify myDat, labels = DT.createDataSet() myTree = TP.retrieveTree( 0) # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} class1 = DT.classify(myTree, labels, [1, 0]) # no class2 = DT.classify(myTree, labels, [1, 1]) # yes # storing the tree pickeld form DT.storeTree(myTree, 'data/classifierStorage.txt') grabedTree = DT.grabTree( 'data/classifierStorage.txt' ) # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} # lens tree fr = open('data/lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = DT.createTree(lenses, lensesLabels) """ output:
import treePlotter def test(): print "hello world" if __name__ == '__main__': # train_data, labels = trees.createDataSet() # my_trees = trees.createTree(train_data, labels) # print(my_trees) #trees.storeTree(my_trees, 'classifiermelon.txt') melon_tree = trees.grabTree('classifiermelon.txt') print(melon_tree) melon_labels = ['color', 'root', 'sound', 'texture', 'navel', 'touch'] melon_feature = [1, 1, 1, 1, 1, 1] print("the predicted result is:", trees.classify(melon_tree, melon_labels, melon_feature)) treePlotter.createPlot(melon_tree) # print(treePlotter.getNumLeafs(my_trees), treePlotter.getTreeDepth(my_trees)) # ent = trees.calcShannonEnt(train_data) # feature1 = trees.splitDataSet(train_data, 0, 0) # feature2 = trees.splitDataSet(train_data, 0, 1) # best_feature = trees.chooseBestFeatureToSplit(train_data) # print(ent) # print(feature1, feature2) # print(best_feature)
print '--信息增益' ig = shannon - hxy print ig print '--找到最佳分类特征' feature = trees.chooseBestFeatureToSplit(dateset) print labels[feature] print '--创建决策树' labelsCopy = labels[:] tree = trees.createTree(dateset, labelsCopy) print tree # print '--画图' # treePlotter.createPlot(tree) print '--用决策树测试数据' #mytree = treePlotter.retrieveTree(0) testdata = [4, 4, 1, 'cha'] label = trees.classify(tree, labels, testdata) print label print '--保存树' trees.storeTree(tree, 'houseTree') print '--测试隐形眼镜类型' fr = open('lenses.txt') # for line in fr.readlines(): # print line # row = line.strip().split('\t'); # print row lenses = [inst.strip().split('\t') for inst in fr.readlines()] len_labels = ['age', 'prescript', 'astigmatic', 'tearRate'] len_tree = trees.createTree(lenses, len_labels) print len_tree print len_labels treePlotter.createPlot(len_tree)