def test_store_load(self): dataSet, labels = trees.createDataSet() print("\n dataSet == %s" % (dataSet)) tree = trees.createTree(dataSet, labels) print("\n tree == %s" % (tree)) fileName = "./mytree.txt" trees.storeTree(tree, fileName) newTree = trees.grabTree(fileName) print("\n newTree == %s" % (newTree))
def main(): import trees import treePlotter myDat, labels = trees.createDataSet() myTree = trees.createTree(myDat, labels) # myTree = treePlotter.retrieveTree(1) treePlotter.createPlot(myTree, 'test.png') trees.storeTree(myTree, 'classifierStorage') myTree = trees.grabTree('classifierStorage') print(myTree)
# -*- coding: utf-8 -*- import JobTree import trees import keras fileName = r'tree.txt' trees.storeTree(JobTree.Trees, fileName) # import json # print(json.dumps(trees.grabTree('job_tree.txt'), encoding="cp936", ensure_ascii=False)) import json print(json.dumps(trees.grabTree(fileName), ensure_ascii=False)) print("1代表熟练掌握,2代表精通,3代表熟悉,4代表了解")
print "获取叶节点的数目:", treePlotter.getNumLeafs(myTree) print "获取树的层数:", treePlotter.getTreeDepth(myTree) treePlotter.createPlot(myTree) myTree['no surfacing'][3] = 'maybe' print "myTree:", myTree treePlotter.createPlot(myTree) #3.3.1 测试算法:使用决策树执行分类 myDat, labels = trees.createDataSet() print "labels:", labels myTree = treePlotter.retrieveTree(0) print "myTree:", myTree print "分类1:", trees.classify(myTree, labels, [1, 0]) print "分类2:", trees.classify(myTree, labels, [1, 1]) #3.3.2 决策树的存储 trees.storeTree(myTree, homedir + 'classifierStorage.txt') print "决策树调取:", trees.grabTree(homedir + 'classifierStorage.txt') print ":", print ":", #3.4 示例:使用决策树预测隐形眼镜类型 fr = open(homedir + 'lenses.txt') print 'fr:', fr lenses = [inst.strip().split('\t') for inst in fr.readlines()] print 'lenses:', lenses lensesLabels = [' age', 'prescript', 'astigmatic', 'tearRate'] print 'lensesLabels:', lensesLabels lensesTree = trees.createTree(lenses, lensesLabels) treePlotter.createPlot(lensesTree)
#print(trees.splitDataSet(mydata,0,1)) index = trees.chooseBestFeatureToSplit(mydata) #print(index) ''' mytree = trees.createTree(mydata,features) print(mytree) ''' import treePlotter ''' mytree = treePlotter.retrieveTree(0) treePlotter.createPlot(mytree) mytree['no surfacing'][3] = 'maybe' treePlotter.createPlot(mytree) ''' mytree = treePlotter.retrieveTree(0) print(trees.classify(mytree,features,[0,0])) print(trees.classify(mytree,features,[1,1])) trees.storeTree(mytree, 'classifier.txt') grabtree = trees.grabTree('classifier.txt') print(grabtree) fr = open('lenses.txt') lense =[inst.strip().split('\t') for inst in fr.readlines()] lensefeatures = ['age', 'prescript', 'astigmatic', 'tearrate'] lensetree = trees.createTree(lense,lensefeatures) print(lensetree) treePlotter.createPlot(lensetree)
import trees ds, ls = trees.createDataSet() trees.createTree(ds, ls) # 绘制树 import treePlotter mt = treePlotter.retrieveTree(0) treePlotter.createPlot(mt) # 利用决策树判断分类 import trees import treePlotter it = treePlotter.retrieveTree(0) ds, ls = trees.createDataSet() trees.classify(it, ls, [0, 0]) # 序列化与反序列化决策树 import trees import treePlotter it = treePlotter.retrieveTree(0) trees.storeTree(it, 'classifierStorage.txt') ot = trees.grabTree('classifierStorage.txt') # 隐形眼镜数据集测试 import trees import treePlotter fr = open('lenses.txt') ds = [example.strip().split("\t") for example in fr.readlines()] ls = ['age', 'prescript', 'antigmatic', 'tearRate'] mt = trees.createTree(ds, ls) treePlotter.createPlot(mt)
print '决策树:' Tree = trees.createTree(dataset, labels) print Tree firstFeature = Tree.keys()[0] print firstFeature firstFeatureValues = Tree[firstFeature].keys() print firstFeatureValues print '\n' treePlotter.createPlot(Tree) testVec = ['pre', 'myope', 'yes', 'normal'] print '测试数据' print testVec labels.append('tearRate') print '匹配过程:' result = trees.classify(Tree, labels, testVec) print '匹配结果:' print result print '\n' # 把树存在磁盘中 print '将树存放磁盘...' trees.storeTree(Tree, 'myTree.txt') print '\n' # 从磁盘中取出树 print '再从磁盘中读取树:' print trees.grabTree('myTree.txt')
# print(myDat) # print(trees.calcShannonEnt(myDat)) # 三个参数 数据集 要划分的特征 特征值 # 在数据集中找特征等于特征值的项 # print(trees.splitDataSet(myDat,0,1)) # 选择最适合分类的一个特征 # print(trees.chooseBestFeatureToSplit(myDat)) # 树结构 字典 # print(trees.createTree(myDat,labels)) # 画出树结构 # treePlotter.createPlot() # 树的节点数和深度 # print(treePlotter.getNumleafs(trees.createTree(myDat,labels))) # print(treePlotter.getTreeDepth(trees.createTree(myDat,labels))) # # treePlotter.createPlot(trees.createTree(myDat,labels)) # 测试分类器 myTree = {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} # print(trees.classify(myTree,labels,[1,1])) # 测试存储 读取决策树模型 trees.storeTree(myTree, 'testClassify.txt') print(trees.grabTree('testClassify.txt'))
myDat, labels = trees.createDataSet() print(myDat) # print(trees.calcShannonEnt(myDat)) # data_set = trees.splitDataSet(myDat, 0, 1) # print(data_set) print(trees.chooseBestFeatureToSplit(myDat)) myTree = trees.createTree(myDat, labels) print(myTree) # treePlotter.createPlot() # 由于回退代码,部分代码丢失 trees.storeTree(myTree, 'classifierStorage.npy') tree = trees.grabTree('classifierStorage.npy') print(tree) # print(tree) fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] # :age(年龄)、prescript(症状)、astigmatic(是否散光)、tearRate(眼泪数量) lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) print(lensesTree) treePlotter.createPlot(lensesTree) print(trees.classify(myTree, labels, [1, 0]))
# -*- coding=utf-8 -*- """ make_lenses_tree Date: 16/2/27 Company: Copyright (c) 2016 Ninlgde co.,Ltd. All right reserved. """ import trees import treePlotter as tp __author__ = "Ninlgde" if __name__ == "__main__": fr = open("lenses.txt") lenses = [inst.strip().split("\t") for inst in fr.readlines()] lensesLabels = ["age", "prescript", "astigmatic", "tearRate"] lensesTree = trees.createTree(lenses, lensesLabels) trees.storeTree(lensesTree, "lenses_tree.bin") tree = trees.grabTree("lenses_tree.bin") tp.createPlot(tree)
import trees import treePlotter #将分类器存储到硬盘上,使其持久化 myDat, labels = trees.createDataSet() myTree = treePlotter.retrieveTree(0) trees.storeTree( myTree, 'F:\\python库包\机器学习实战源代码\machinelearninginaction\Ch03\classifierStorage.txt' ) trees.grabTree( 'F:\\python库包\机器学习实战源代码\machinelearninginaction\Ch03\classifierStorage.txt' ) fr = open('F:\\python库包\机器学习实战源代码\machinelearninginaction\Ch03\lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) #创建决策树 print(lensesTree) treePlotter.createPlot(lensesTree) #画图
# name industry profession sex 摄影 自驾游 SNS达人 github 翻墙 常阅读 科幻迷 兴趣广泛 吹牛 分类 def getTrainingDatas(): dataSet = [ ["it", "gm", "man", 1, 1, 0, 0, 0, 0, 0, 1, 0, "liver"], ["it", "engineer", "man", 0, 1, 0, 0, 0, 0, 0, 0, 0, "empty"], ["it", "sale", "man", 0, 1, 0, 0, 0, 0, 0, 0, 1, "liver"], ["it", "founder", "man", 0, 1, 1, 0, 0, 0, 0, 1, 0, "boss"], ["it", "phd", "man", 1, 0, 0, 0, 0, 1, 0, 1, 0, "liver, fake hacker"], ["it", "engineer", "man", 0, 1, 0, 1, 1, 1, 1, 1, 0, "fake hacker"], ["it", "engineer", "man", 0, 0, 0, 0, 0, 1, 0, 1, 0, "fake hacker"], ["it", "engineer", "man", 0, 0, 0, 1, 1, 1, 1, 1, 0, "fake hacker"], ] labels = ["industry", "profession", "sex", "camera", "drive tour", "SNS", "github", "over GFW", "reader", "Science fiction fan","hobby","brag"] return dataSet, labels if __name__ == "__main__": if len(sys.argv) > 1: # classify test, tortoise classmate = ["tortoise", "it", "engineer", "man", 1, 0, 0, 0, 0, 1, 0, 1, 1] dataSet, labels = getTrainingDatas() tree = trees.grabTree("cm_tree.txt") print "{} is \"{}\"".format(classmate[0], trees.classify(tree, labels, classmate[1:])) else: # training dataSet, labels = getTrainingDatas() tree = trees.createTree(dataSet, list(labels)) trees.storeTree(tree, "cm_tree.txt") treePlotter.createPlot(tree)
# -*- coding:utf-8 -*- import trees myData,myLabels = trees.createDataSet() testLabels = myLabels.copy() print ('myData is ' , myData) #计算无序数据集的香农熵 #myShannonEnt = trees.calcShannonEnt(myData) #print ('myShannonEnt is ' , myShannonEnt ) ###测试划分数据集函数 #mySplitDat = trees.splitDataSet(myData, 1, 0) #print ('mySplitDat is ' , mySplitDat ) #myBestData = trees.chooseBestFeatureToSplit(myData) #print ('myBestData is ' , myBestData ) myTree = trees.createTree(myData, myLabels) print ('myTree is ' ,myTree) #测试训练集 print ('testLabels is ' ,testLabels) testResult = trees.classify(myTree, testLabels, [1,1]) print ('testResult is ' ,testResult) #trees.storeTree(myTree, 'classifierStorage.txt') fromFileTree = trees.grabTree('classifierStorage.txt') print ('fromFileTree is' , fromFileTree)
myTree = tp.retrieveTree(0) tp.createPlot(myTree) myTree['no surfacing'][3] = 'maybe' tp.createPlot(myTree) myDat,labels = tr.createDataSet() print(labels) myTree = tp.retrieveTree(0) print(myTree) print(tr.classify(myTree, labels, [1,0])) print(tr.classify(myTree, labels, [1,1])) #restore the tree and print. restoreTree = tr.grabTree('classifierStorage.txt') print(restoreTree) # fr = open('lenses.txt') lenses = [inst.strip().strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age' , 'prescript', 'astigmatic', 'tearRate'] lensesTree = tr.createTree(lenses,lensesLabels) print(lensesTree) tp.createPlot(lensesTree) # fr = open('lensesCN.txt')
import math import operator import matplotlib.pyplot as plt import pickle import trees import treeplotter myDat,labels=trees.createDataSet() myTree=treeplotter.retrieveTree(0) trees.storeTree(myTree,'classifierstorage.txt') ans=trees.grabTree('classifierstorage.txt') with open('output.out','w') as f: f.write(str(ans))
import trees import treePlotter myDat, labels = trees.createDataSet() print labels myTree = treePlotter.retrieveTree(0) print myTree print trees.classify(myTree, labels, [1, 0]) print trees.classify(myTree, labels, [1, 1]) # trees.storeTree(myTree, 'classifierStorage.txt') print trees.grabTree('classifierStorage.txt') fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) print lensesTree treePlotter.createPlot(lensesTree)
# _*_ coding:utf-8 _*_ import trees ''' mydat,labels=trees.createDataSet() result=trees.splitDataSet(mydat,0,1) print (result) ''' ''' mydat,labels=trees.createDataSet() print (trees.chooseBestFeatureToSplit(mydat)) ''' ''' mydat,labels=trees.createDataSet() mytree=trees.createTree(mydat,labels) print (mytree) ''' mydat, labels = trees.createDataSet() mytree = trees.retrieveTree(0) trees.storeTree(mytree, 'classStorage.txt') print(trees.grabTree('classStorage.txt'))
0: 'no', 1: 'yes' } }, 1: 'no' } } } }] return listOfTrees[i] if __name__ == '__main__': import trees import treePlotter dataSet, labels = trees.createDataSet() myTree = trees.createTree(dataSet, labels) print(myTree) treePlotter.createPlot(myTree) fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) print(lensesTree) treePlotter.createPlot(lensesTree) trees.storeTree(lensesTree, 'test.txt') trees.grabTree('test.txt')
n = TP.getNumLeafs(myTree) # 3 d = TP.getTreeDepth(myTree) # 2 TP.createPlot(myTree) # classify myDat, labels = DT.createDataSet() myTree = TP.retrieveTree( 0) # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} class1 = DT.classify(myTree, labels, [1, 0]) # no class2 = DT.classify(myTree, labels, [1, 1]) # yes # storing the tree pickeld form DT.storeTree(myTree, 'data/classifierStorage.txt') grabedTree = DT.grabTree( 'data/classifierStorage.txt' ) # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}} # lens tree fr = open('data/lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = DT.createTree(lenses, lensesLabels) """ output: {'tearRate': {'reduced': 'no lenses', 'normal': {'astigmatic': {'yes': {'prescript': {'myope': 'hard', 'hyper': {'age': {'young': 'hard', 'presbyopic': 'no lenses', 'pre': 'no lenses'}}}}, 'no': {'age': {'young': 'soft',
# -*- coding: utf-8 -*- import treeplot import trees trees.storeTree(myTree , 'classifierStorage') print trees.grabTree('classifierStorage.txt') `
labels # 调用函数,指定列,划分数据用的 trees.splitDataSet(myData, 0, 1) trees.splitDataSet(myData, 0, 0) # 调用函数,获取最好的数据集划分方式,使用熵来计算 trees.chooseBestFeatureToSplit(myData) # 调用决策树函数 myTree = trees.createTree(myData, labels) myTree # 在测试数据上看决策树效果 myData, labels = trees.creatDataSet() trees.classify(myTree, labels, [1, 0]) trees.classify(myTree, labels, [1, 1]) # 调用存储决策树的函数 trees.storeTree(myTree, 'classifierStore.txt') # 调用存储为文件形式的决策树 trees.grabTree('classifierStore.txt') # 生成隐形眼镜的类型的函数 fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = trees.createTree(lenses, lensesLabels) lensesTree
import trees import treePlotter def test(): print "hello world" if __name__ == '__main__': # train_data, labels = trees.createDataSet() # my_trees = trees.createTree(train_data, labels) # print(my_trees) #trees.storeTree(my_trees, 'classifiermelon.txt') melon_tree = trees.grabTree('classifiermelon.txt') print(melon_tree) melon_labels = ['color', 'root', 'sound', 'texture', 'navel', 'touch'] melon_feature = [1, 1, 1, 1, 1, 1] print("the predicted result is:", trees.classify(melon_tree, melon_labels, melon_feature)) treePlotter.createPlot(melon_tree) # print(treePlotter.getNumLeafs(my_trees), treePlotter.getTreeDepth(my_trees)) # ent = trees.calcShannonEnt(train_data) # feature1 = trees.splitDataSet(train_data, 0, 0) # feature2 = trees.splitDataSet(train_data, 0, 1) # best_feature = trees.chooseBestFeatureToSplit(train_data) # print(ent) # print(feature1, feature2)
# main #读取眼镜数据并构建树 fr = open('lenses.txt') lenses = [inst.strip().split('\t') for inst in fr.readlines()] lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] lensesTree = createTree(lenses,lensesLabels) print(lensesTree) # plot tree tP.createPlot(lensesTree) #对新数据进行分类 lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] testVec=['young','hyper','yes','normal'] result=classify(lensesTree,lensesLabels, testVec) print(result) #存储构建的树并加载树 tr_f.storeTree(lensesTree,'ClassfyTree_lenses.txt') load_tree=tr_f.grabTree('ClassfyTree_lenses.txt') print(load_tree) # 原始数据集分类 #lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate'] #classify(lensesTree, lensesLabels, lenses[0][:-1]) # #preds = [] #for i in range(len(lenses)): # pred = classify(lensesTree, lensesLabels, lenses[i][:-1]) # preds.append(pred) #print(preds)