Ejemplo n.º 1
0
 def test_store_load(self):
     dataSet, labels = trees.createDataSet()
     print("\n dataSet == %s" % (dataSet))
     tree = trees.createTree(dataSet, labels)
     print("\n tree == %s" % (tree))
     fileName = "./mytree.txt"
     trees.storeTree(tree, fileName)
     newTree = trees.grabTree(fileName)
     print("\n newTree == %s" % (newTree))
Ejemplo n.º 2
0
def main():
    import trees
    import treePlotter
    myDat, labels = trees.createDataSet()
    myTree = trees.createTree(myDat, labels)
    # myTree = treePlotter.retrieveTree(1)
    treePlotter.createPlot(myTree, 'test.png')
    trees.storeTree(myTree, 'classifierStorage')
    myTree = trees.grabTree('classifierStorage')
    print(myTree)
Ejemplo n.º 3
0
# -*- coding: utf-8 -*-
import JobTree
import trees
import keras
fileName = r'tree.txt'
trees.storeTree(JobTree.Trees, fileName)
# import json
# print(json.dumps(trees.grabTree('job_tree.txt'), encoding="cp936", ensure_ascii=False))
import json
print(json.dumps(trees.grabTree(fileName), ensure_ascii=False))
print("1代表熟练掌握,2代表精通,3代表熟悉,4代表了解")

print "获取叶节点的数目:", treePlotter.getNumLeafs(myTree)
print "获取树的层数:", treePlotter.getTreeDepth(myTree)
treePlotter.createPlot(myTree)
myTree['no surfacing'][3] = 'maybe'
print "myTree:", myTree
treePlotter.createPlot(myTree)

#3.3.1 测试算法:使用决策树执行分类
myDat, labels = trees.createDataSet()
print "labels:", labels
myTree = treePlotter.retrieveTree(0)
print "myTree:", myTree
print "分类1:", trees.classify(myTree, labels, [1, 0])
print "分类2:", trees.classify(myTree, labels, [1, 1])

#3.3.2  决策树的存储
trees.storeTree(myTree, homedir + 'classifierStorage.txt')
print "决策树调取:", trees.grabTree(homedir + 'classifierStorage.txt')
print ":",
print ":",

#3.4 示例:使用决策树预测隐形眼镜类型
fr = open(homedir + 'lenses.txt')
print 'fr:', fr
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
print 'lenses:', lenses
lensesLabels = [' age', 'prescript', 'astigmatic', 'tearRate']
print 'lensesLabels:', lensesLabels
lensesTree = trees.createTree(lenses, lensesLabels)
treePlotter.createPlot(lensesTree)
Ejemplo n.º 5
0
#print(trees.splitDataSet(mydata,0,1))

index = trees.chooseBestFeatureToSplit(mydata)
#print(index)
'''
mytree = trees.createTree(mydata,features)
print(mytree)
'''
import treePlotter
'''
mytree = treePlotter.retrieveTree(0)
treePlotter.createPlot(mytree)
mytree['no surfacing'][3] = 'maybe'
treePlotter.createPlot(mytree)
'''

mytree = treePlotter.retrieveTree(0)
print(trees.classify(mytree,features,[0,0]))
print(trees.classify(mytree,features,[1,1]))

trees.storeTree(mytree, 'classifier.txt')
grabtree = trees.grabTree('classifier.txt')
print(grabtree)


fr = open('lenses.txt')
lense =[inst.strip().split('\t') for inst in fr.readlines()]
lensefeatures = ['age', 'prescript', 'astigmatic', 'tearrate']
lensetree = trees.createTree(lense,lensefeatures)
print(lensetree)
treePlotter.createPlot(lensetree)
Ejemplo n.º 6
0
import trees
ds, ls = trees.createDataSet()
trees.createTree(ds, ls)

# 绘制树
import treePlotter
mt = treePlotter.retrieveTree(0)
treePlotter.createPlot(mt)

# 利用决策树判断分类
import trees
import treePlotter
it = treePlotter.retrieveTree(0)
ds, ls = trees.createDataSet()
trees.classify(it, ls, [0, 0])

# 序列化与反序列化决策树
import trees
import treePlotter
it = treePlotter.retrieveTree(0)
trees.storeTree(it, 'classifierStorage.txt')
ot = trees.grabTree('classifierStorage.txt')

# 隐形眼镜数据集测试
import trees
import treePlotter
fr = open('lenses.txt')
ds = [example.strip().split("\t") for example in fr.readlines()]
ls = ['age', 'prescript', 'antigmatic', 'tearRate']
mt = trees.createTree(ds, ls)
treePlotter.createPlot(mt)
Ejemplo n.º 7
0
print '决策树:'
Tree = trees.createTree(dataset, labels)
print Tree
firstFeature = Tree.keys()[0]
print firstFeature
firstFeatureValues = Tree[firstFeature].keys()
print firstFeatureValues
print '\n'

treePlotter.createPlot(Tree)

testVec = ['pre', 'myope', 'yes', 'normal']
print '测试数据'
print testVec
labels.append('tearRate')
print '匹配过程:'
result = trees.classify(Tree, labels, testVec)
print '匹配结果:'
print result
print '\n'

# 把树存在磁盘中
print '将树存放磁盘...'
trees.storeTree(Tree, 'myTree.txt')
print '\n'

# 从磁盘中取出树
print '再从磁盘中读取树:'
print trees.grabTree('myTree.txt')
Ejemplo n.º 8
0
# print(myDat)
# print(trees.calcShannonEnt(myDat))

# 三个参数 数据集 要划分的特征 特征值
# 在数据集中找特征等于特征值的项
# print(trees.splitDataSet(myDat,0,1))

# 选择最适合分类的一个特征
# print(trees.chooseBestFeatureToSplit(myDat))

# 树结构 字典
# print(trees.createTree(myDat,labels))

# 画出树结构
# treePlotter.createPlot()

# 树的节点数和深度
# print(treePlotter.getNumleafs(trees.createTree(myDat,labels)))
# print(treePlotter.getTreeDepth(trees.createTree(myDat,labels)))

#
# treePlotter.createPlot(trees.createTree(myDat,labels))

# 测试分类器
myTree = {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}
# print(trees.classify(myTree,labels,[1,1]))

# 测试存储 读取决策树模型
trees.storeTree(myTree, 'testClassify.txt')
print(trees.grabTree('testClassify.txt'))
Ejemplo n.º 9
0
myDat, labels = trees.createDataSet()
print(myDat)

# print(trees.calcShannonEnt(myDat))
# data_set = trees.splitDataSet(myDat, 0, 1)
# print(data_set)

print(trees.chooseBestFeatureToSplit(myDat))

myTree = trees.createTree(myDat, labels)
print(myTree)

# treePlotter.createPlot()

# 由于回退代码,部分代码丢失
trees.storeTree(myTree, 'classifierStorage.npy')
tree = trees.grabTree('classifierStorage.npy')
print(tree)

# print(tree)

fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
# :age(年龄)、prescript(症状)、astigmatic(是否散光)、tearRate(眼泪数量)
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)
print(lensesTree)

treePlotter.createPlot(lensesTree)

print(trees.classify(myTree, labels, [1, 0]))
Ejemplo n.º 10
0
# -*- coding=utf-8 -*-
"""
make_lenses_tree
Date: 16/2/27
Company: Copyright (c) 2016 Ninlgde co.,Ltd. All right reserved.
"""

import trees
import treePlotter as tp

__author__ = "Ninlgde"

if __name__ == "__main__":
    fr = open("lenses.txt")
    lenses = [inst.strip().split("\t") for inst in fr.readlines()]
    lensesLabels = ["age", "prescript", "astigmatic", "tearRate"]
    lensesTree = trees.createTree(lenses, lensesLabels)
    trees.storeTree(lensesTree, "lenses_tree.bin")
    tree = trees.grabTree("lenses_tree.bin")
    tp.createPlot(tree)
Ejemplo n.º 11
0
import trees
import treePlotter

#将分类器存储到硬盘上,使其持久化
myDat, labels = trees.createDataSet()
myTree = treePlotter.retrieveTree(0)
trees.storeTree(
    myTree,
    'F:\\python库包\机器学习实战源代码\machinelearninginaction\Ch03\classifierStorage.txt'
)
trees.grabTree(
    'F:\\python库包\机器学习实战源代码\machinelearninginaction\Ch03\classifierStorage.txt'
)

fr = open('F:\\python库包\机器学习实战源代码\machinelearninginaction\Ch03\lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)  #创建决策树
print(lensesTree)
treePlotter.createPlot(lensesTree)  #画图
Ejemplo n.º 12
0
# name   industry   profession    sex    摄影     自驾游    SNS达人   github   翻墙   常阅读   科幻迷  兴趣广泛    吹牛    分类

def getTrainingDatas():
    dataSet = [
        ["it", "gm",       "man", 1, 1, 0, 0, 0, 0, 0, 1, 0, "liver"],
        ["it", "engineer", "man", 0, 1, 0, 0, 0, 0, 0, 0, 0, "empty"],
        ["it", "sale",     "man", 0, 1, 0, 0, 0, 0, 0, 0, 1, "liver"],
        ["it", "founder",  "man", 0, 1, 1, 0, 0, 0, 0, 1, 0, "boss"],
        ["it", "phd",      "man", 1, 0, 0, 0, 0, 1, 0, 1, 0, "liver, fake hacker"],
        ["it", "engineer", "man", 0, 1, 0, 1, 1, 1, 1, 1, 0, "fake hacker"],
        ["it", "engineer", "man", 0, 0, 0, 0, 0, 1, 0, 1, 0, "fake hacker"],
        ["it", "engineer", "man", 0, 0, 0, 1, 1, 1, 1, 1, 0, "fake hacker"],
    ]
    labels = ["industry", "profession", "sex", "camera", "drive tour", 
            "SNS", "github", "over GFW", "reader", "Science fiction fan","hobby","brag"]
    return dataSet, labels

if __name__ == "__main__":
    if len(sys.argv) > 1:
        # classify test, tortoise
        classmate = ["tortoise", "it", "engineer",       "man", 1, 0, 0, 0, 0, 1, 0, 1, 1]
        dataSet, labels = getTrainingDatas()
        tree = trees.grabTree("cm_tree.txt")
        print "{} is \"{}\"".format(classmate[0], trees.classify(tree, labels, classmate[1:]))
    else:
        # training
        dataSet, labels = getTrainingDatas()
        tree = trees.createTree(dataSet, list(labels))
        trees.storeTree(tree, "cm_tree.txt")
        treePlotter.createPlot(tree)
Ejemplo n.º 13
0
# -*- coding:utf-8 -*-
import trees

myData,myLabels = trees.createDataSet()
testLabels = myLabels.copy()
print ('myData is ' , myData)

#计算无序数据集的香农熵
#myShannonEnt = trees.calcShannonEnt(myData)
#print ('myShannonEnt is ' , myShannonEnt )

###测试划分数据集函数
#mySplitDat = trees.splitDataSet(myData, 1, 0)
#print ('mySplitDat is ' , mySplitDat )

#myBestData = trees.chooseBestFeatureToSplit(myData)
#print ('myBestData is ' , myBestData )

myTree = trees.createTree(myData, myLabels)
print ('myTree is ' ,myTree)


#测试训练集
print ('testLabels is ' ,testLabels)
testResult = trees.classify(myTree, testLabels, [1,1])
print ('testResult is ' ,testResult)

#trees.storeTree(myTree, 'classifierStorage.txt')
fromFileTree = trees.grabTree('classifierStorage.txt')
print ('fromFileTree is' , fromFileTree)
Ejemplo n.º 14
0
myTree = tp.retrieveTree(0)
tp.createPlot(myTree)

myTree['no surfacing'][3] = 'maybe'
tp.createPlot(myTree)

myDat,labels = tr.createDataSet()
print(labels)
myTree = tp.retrieveTree(0)
print(myTree)
print(tr.classify(myTree, labels, [1,0]))
print(tr.classify(myTree, labels, [1,1]))


#restore the tree and print.
restoreTree = tr.grabTree('classifierStorage.txt')
print(restoreTree)


#
fr = open('lenses.txt')
lenses = [inst.strip().strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age' , 'prescript', 'astigmatic', 'tearRate']
lensesTree = tr.createTree(lenses,lensesLabels)
print(lensesTree)

tp.createPlot(lensesTree)


#
fr = open('lensesCN.txt')
Ejemplo n.º 15
0
import math
import operator
import matplotlib.pyplot as plt
import pickle
import trees
import treeplotter

myDat,labels=trees.createDataSet()
myTree=treeplotter.retrieveTree(0)
trees.storeTree(myTree,'classifierstorage.txt')
ans=trees.grabTree('classifierstorage.txt')

with open('output.out','w') as f:
	f.write(str(ans))
import trees
import treePlotter

myDat, labels = trees.createDataSet()
print labels
myTree = treePlotter.retrieveTree(0)
print myTree
print trees.classify(myTree, labels, [1, 0])
print trees.classify(myTree, labels, [1, 1])

# trees.storeTree(myTree, 'classifierStorage.txt')
print trees.grabTree('classifierStorage.txt')

fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)
print lensesTree
treePlotter.createPlot(lensesTree)
Ejemplo n.º 17
0
# _*_ coding:utf-8 _*_
import trees
'''
mydat,labels=trees.createDataSet()
result=trees.splitDataSet(mydat,0,1)
print  (result)

'''
'''
mydat,labels=trees.createDataSet()
print (trees.chooseBestFeatureToSplit(mydat))

'''
'''
mydat,labels=trees.createDataSet()
mytree=trees.createTree(mydat,labels)
print (mytree)
'''

mydat, labels = trees.createDataSet()
mytree = trees.retrieveTree(0)

trees.storeTree(mytree, 'classStorage.txt')
print(trees.grabTree('classStorage.txt'))
Ejemplo n.º 18
0
                            0: 'no',
                            1: 'yes'
                        }
                    },
                    1: 'no'
                }
            }
        }
    }]
    return listOfTrees[i]


if __name__ == '__main__':
    import trees
    import treePlotter

    dataSet, labels = trees.createDataSet()
    myTree = trees.createTree(dataSet, labels)
    print(myTree)
    treePlotter.createPlot(myTree)

    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = trees.createTree(lenses, lensesLabels)
    print(lensesTree)
    treePlotter.createPlot(lensesTree)

    trees.storeTree(lensesTree, 'test.txt')
    trees.grabTree('test.txt')
Ejemplo n.º 19
0
n = TP.getNumLeafs(myTree)  # 3
d = TP.getTreeDepth(myTree)  # 2

TP.createPlot(myTree)

# classify
myDat, labels = DT.createDataSet()
myTree = TP.retrieveTree(
    0)  # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}
class1 = DT.classify(myTree, labels, [1, 0])  # no
class2 = DT.classify(myTree, labels, [1, 1])  # yes

# storing the tree pickeld form
DT.storeTree(myTree, 'data/classifierStorage.txt')
grabedTree = DT.grabTree(
    'data/classifierStorage.txt'
)  # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}

# lens tree
fr = open('data/lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = DT.createTree(lenses, lensesLabels)
"""
output:
{'tearRate': {'reduced': 'no lenses',
  'normal': {'astigmatic': {'yes': {'prescript': {'myope': 'hard',
      'hyper': {'age': {'young': 'hard',
        'presbyopic': 'no lenses',
        'pre': 'no lenses'}}}},
    'no': {'age': {'young': 'soft',
Ejemplo n.º 20
0
# -*- coding: utf-8 -*-
import treeplot
import trees
trees.storeTree(myTree , 'classifierStorage')
print trees.grabTree('classifierStorage.txt')
    `
Ejemplo n.º 21
0
labels

# 调用函数,指定列,划分数据用的
trees.splitDataSet(myData, 0, 1)
trees.splitDataSet(myData, 0, 0)

# 调用函数,获取最好的数据集划分方式,使用熵来计算
trees.chooseBestFeatureToSplit(myData)

# 调用决策树函数
myTree = trees.createTree(myData, labels)
myTree

# 在测试数据上看决策树效果
myData, labels = trees.creatDataSet()
trees.classify(myTree, labels, [1, 0])
trees.classify(myTree, labels, [1, 1])

# 调用存储决策树的函数
trees.storeTree(myTree, 'classifierStore.txt')

# 调用存储为文件形式的决策树
trees.grabTree('classifierStore.txt')

# 生成隐形眼镜的类型的函数
fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)
lensesTree
Ejemplo n.º 22
0
import trees
import treePlotter


def test():
    print "hello world"


if __name__ == '__main__':
    # train_data, labels = trees.createDataSet()
    # my_trees = trees.createTree(train_data, labels)
    # print(my_trees)
    #trees.storeTree(my_trees, 'classifiermelon.txt')

    melon_tree = trees.grabTree('classifiermelon.txt')
    print(melon_tree)
    melon_labels = ['color', 'root', 'sound', 'texture', 'navel', 'touch']
    melon_feature = [1, 1, 1, 1, 1, 1]
    print("the predicted result is:",
          trees.classify(melon_tree, melon_labels, melon_feature))

    treePlotter.createPlot(melon_tree)
    # print(treePlotter.getNumLeafs(my_trees), treePlotter.getTreeDepth(my_trees))

    # ent = trees.calcShannonEnt(train_data)
    # feature1 = trees.splitDataSet(train_data, 0, 0)
    # feature2 = trees.splitDataSet(train_data, 0, 1)
    # best_feature = trees.chooseBestFeatureToSplit(train_data)
    # print(ent)
    # print(feature1, feature2)
Ejemplo n.º 23
0

# main
  #读取眼镜数据并构建树
fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = createTree(lenses,lensesLabels)
print(lensesTree)

# plot tree
tP.createPlot(lensesTree)
#对新数据进行分类
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
testVec=['young','hyper','yes','normal']
result=classify(lensesTree,lensesLabels, testVec)
print(result)

#存储构建的树并加载树
tr_f.storeTree(lensesTree,'ClassfyTree_lenses.txt')
load_tree=tr_f.grabTree('ClassfyTree_lenses.txt')
print(load_tree)
# 原始数据集分类
#lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
#classify(lensesTree, lensesLabels, lenses[0][:-1])
#
#preds = []
#for i in range(len(lenses)):
#    pred = classify(lensesTree, lensesLabels, lenses[i][:-1])
#    preds.append(pred)
#print(preds)