Example #1
0
def test_classify():
    mydat, labels = creatDataSet()
    import treePlotter
    myTree = treePlotter.retrieveTree(0)
    print(myTree)
    print(classify(myTree, labels, [1, 0]))
    print(classify(myTree, labels, [1, 1]))
Example #2
0
def test_store_tree():
    mydat, labels = creatDataSet()
    import treePlotter
    myTree = treePlotter.retrieveTree(0)
    print(myTree)
    storeTree(myTree, 'classifierStorage.txt')
    grabTree('classifierStorage.txt')
def test():
    dataSet, labels = createDataSet()
    # sdataSet = splitDataSet(dataSet, 0, 1)
    # ent = calcShannonEnt(dataSet)
    # bestFeature = chooseBestFeatureToSplit(dataSet)
    # myTree = createTree(dataSet, labels)
    myTree = treePlotter.retrieveTree(0)
    print(classify(myTree, labels, [1, 1]))
Example #4
0
def main7():
    '''
    生成决策树并存储为.txt
    从.txt文件导入决策树
    '''
    import treePlotter
    myDat, labels = createDataSet()
    myTree = treePlotter.retrieveTree(0)
    print classify(myTree, labels, [1, 0])
    storeTree(myTree, 'classifierStorage.txt')
    print grabTree('classifierStorage.txt')
import treePlotter as tp

print tp.retrieveTree(0)
print tp.retrieveTree(1)
myTree = tp.retrieveTree(0)
print tp.getNumLeafs(myTree)
print tp.getTreeDepth(myTree)
# tp.createPlot(myTree)
tp.createPlot(tp.retrieveTree(1))
Example #6
0
                classLabel = secondDict[key]

    return classLabel


def storeTreee(inputTree, filename):
    """存储树"""
    import pickle
    with open(filename, 'wb') as f:
        pickle.dump(inputTree, f)


def grabTree(filename):
    """读取树"""
    import pickle
    with open(filename, 'rb') as f:
        return pickle.load(f)


if __name__ == '__main__':
    # myDat, labels = createDataSet()
    # print(labels)
    myTree = tp.retrieveTree(0)
    print(myTree)
    # result = classify(myTree, labels, [1, 1])
    # print(result)

    # storeTreee(myTree, 'classifierStorage.txt')
    tree = grabTree('classifierStorage.txt')
    print(tree)
Example #7
0
#
fr = open('lensesCN.txt')
lenses = [unicode(inst, 'utf-8').strip().strip().split('\t') for inst in fr.readlines()]
#lensesLabels = ["年龄组" , "规定", "闪光", "泪液扫除率"]
lensesLabels = ['age' , 'prescript', 'astigmatic', 'tearRate']
lensesTree = tr.createTree(lenses,lensesLabels)
print(lensesTree)
tp.createPlot(lensesTree)

dataSet, labels = tr.createDataSet()

shannonEnt = tr.calcShannonEnt(dataSet)

print(shannonEnt)

print(tp.retrieveTree(1))

myTree = tp.retrieveTree(0)
numLeafs = tp.getNumLeafs(myTree)
treeDepth = tp.getTreeDepth(myTree)

print(numLeafs)
print(treeDepth)


myTree = tp.retrieveTree(0)
tp.createPlot(myTree)

myTree['no surfacing'][3] = 'maybe'
tp.createPlot(myTree)
Example #8
0
splittedDat = DT.splitDataSet(myDat, 0,
                              1)  # [[1, 'yes'], [1, 'yes'], [0, 'no']]

splittedDat = DT.splitDataSet(myDat, 0, 0)  # [[1, 'no'], [1, 'no']]

bestFeature = DT.chooseBestFeatureToSplit(myDat)  # 0

myTree = DT.createTree(
    myDat, labels
)  # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}

import treePlotter as TP

# TP.createPlot()
myTree = TP.retrieveTree(
    0)  #{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}
n = TP.getNumLeafs(myTree)  # 3
d = TP.getTreeDepth(myTree)  # 2

TP.createPlot(myTree)

# classify
myDat, labels = DT.createDataSet()
myTree = TP.retrieveTree(
    0)  # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}
class1 = DT.classify(myTree, labels, [1, 0])  # no
class2 = DT.classify(myTree, labels, [1, 1])  # yes

# storing the tree pickeld form
DT.storeTree(myTree, 'data/classifierStorage.txt')
grabedTree = DT.grabTree(
Example #9
0
    fig = plt.figure(1, facecolor='white')
    fig.clf()
    createPlot.ax1 = plt.subplot(111, frameon=False)  #绘制子图
    plotNode('a decision node', (0.5, 0.1), (0.1, 0.5), decisionNode)
    plotNode('a leaf node', (0.8, 0.1), (0.3, 0.8), leafNode)
    plt.show()


# 绘制树型的示例程序
createPlot()

# 统计叶子个数和树的层数

import treePlotter

myTree = treePlotter.retrieveTree(0)  # 直接从上面的结果导入树结构
print('叶子节点数:', treePlotter.getNumLeafs(myTree))  # 统计叶子节点个数,以计算x轴长度
print('树的层数:', treePlotter.getTreeDepth(myTree))  # 统计树的层数,以计算y轴高度

# 绘制完整的决策树模型
print('绘制完整的决策树模型')
treePlotter.createPlot(myTree)

### 应用数据构造决策树并用于预测
import W_tree
import treePlotter
myDat, labels = W_tree.createDataSet()
myTree = treePlotter.retrieveTree(0)

print('[1,0]的分类结果是', W_tree.classify(myTree, labels, [1, 0]))  # [1,0]的分类结果是 no
print('[1,1]的分类结果是', W_tree.classify(myTree, labels,
Example #10
0
def test1():
    mydat, labels = createDataSet()
    myTree = treePlotter.retrieveTree(0)
    print(myTree)
    print(classify(myTree, labels, [1, 1]))
Example #11
0
    import pickle
    fr = open(filename)
    return pickle.load(fr)


if __name__ == "__main__":
    myDat, labels = createDataSet()
    # print clacShannonEnt(myDat)
    # print splitDataSet(myDat, 0, 1)
    # print splitDataSet(myDat, 0, 0)
    # print chooseBestFeatureToSplit(myDat)
    # myTree = createTree(myDat, labels)
    # print myTree

    from treePlotter import retrieveTree, createPlot
    #
    myTree = retrieveTree(0)
    # print myTree
    # print classify(myTree, labels, [1, 0])
    # print classify(myTree, labels, [1, 1])

    # storeTree(myTree, 'classifierStorage.txt')
    # print grabTree('classifierStorage.txt')

    fr = open('lenses.txt')
    lenses = [inst.strip().split("\t") for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = createTree(lenses, lensesLabels)
    print lensesTree
    createPlot(lensesTree)
import trees
import treePlotter
treePlotter.retrieveTree(1)
myTree = treePlotter.retrieveTree(0)

fr = open('lenses.txt')
lenses=[inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels=['age', 'prescript', 'astigmatix', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)
treePlotter.createPlot(lensesTree)

Example #13
0
def testDumpAndLoadDecisionTree():
    myTree = treePlotter.retrieveTree(0)
    storeTree(myTree, 'classTree.txt')
    print(grabTree('classTree.txt'))
Example #14
0
    firstStr = inputTree.keys()[0]
    secondDict = inputTree[firstStr]
    featIndex = featLabels.index(firstStr)  #将标签字符串转换为索引
    for key in secondDict.keys():
        if testVec[featIndex] == key:
            if type(secondDict[key]).__name__ == 'dict':
                classLabel = classify(secondDict[key], featLabels, testVec)
            else:
                classLabel = secondDict[key]
    return classLabel


myDat, labels = createDataSet()

#myTree = createTree(myDat,labels)

print(labels)

print(treePlotter.retrieveTree(1))
print(treePlotter.retrieveTree(0))
myTree = treePlotter.retrieveTree(0)
print(classify(myTree, labels, [1, 0]))
print(classify(myTree, labels, [1, 1]))
#treePlotter.createPlot()

print('-------隐形眼镜数据-----')
fr = open('/Users/wakemeup/Documents/MLiA/ch03/lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = createTree(lenses, lensesLabels)
print(lensesTree)
Example #15
0
    import pickle
    fw = open(filename, 'wb')
    pickle.dump(inputTree, fw)
    fw.close()


def grabTree(filename):
    import pickle
    fr = open(filename, 'rb')
    return pickle.load(fr)


if __name__ == "__main__":
    myDat, labels = createDataSet()
    print(labels)
    myTrees = tplt.retrieveTree(0)
    print(myTrees)
    print(classify(myTrees, labels, [1, 0]))
    print(classify(myTrees, labels, [1, 1]))
    storeTree(myTrees, "classifierStorage.txt")
    print("Saved!")
    gt = grabTree("classifierStorage.txt")
    print("Loaded!")
    print(gt)
    fr = open("lenses.txt")
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ["age", "prescript", "astigmatic", "tearRate"]
    lenseTree = createTree(lenses, lensesLabels)
    print(lenseTree)
    tplt.createPlot(lenseTree)
# -*- coding: utf-8 -*-

# 求使数据集熵最大的列
import trees
ds, ls = trees.createDataSet()
trees.chooseBestFeatureToSplit(ds)

# 创建决策树
import trees
ds, ls = trees.createDataSet()
trees.createTree(ds, ls)

# 绘制树
import treePlotter
mt = treePlotter.retrieveTree(0)
treePlotter.createPlot(mt)

# 利用决策树判断分类
import trees
import treePlotter
it = treePlotter.retrieveTree(0)
ds, ls = trees.createDataSet()
trees.classify(it, ls, [0, 0])

# 序列化与反序列化决策树
import trees
import treePlotter
it = treePlotter.retrieveTree(0)
trees.storeTree(it, 'classifierStorage.txt')
ot = trees.grabTree('classifierStorage.txt')
Example #17
0
def main():
    dataSet, labels = createDataSet()
    myTree = treePlotter.retrieveTree()
    print classify(myTree, labels, [1, 0])
    print classify(myTree, labels, [1, 1])
Example #18
0
    bestFeatLabel = labels[bestFeat]
    myTree = {bestFeatLabel:{}}
    del(labels[bestFeat])              #delete the best feature , so it can find the next best feature
    featValues = [example[bestFeat] for example in dataSet]
    uniqueVals = set(featValues)
    for value in uniqueVals:
        subLabels = labels[:]       #copy all of labels, so trees don't mess up existing labels
        myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value),subLabels)
    return myTree


#决策树的存储
def storeTree( inputTree, filename ):
    import json
    with open( filename, 'w') as f:
        f.write(json.dumps(inputTree))

def grabTree( filename ):
    import json
    f = open( filename)
    return json.loads(f.read())

# dataSet, labels = createDataSet()
# tree = createTree(dataSet, labels)
# storeTree(tree,'classifierStorage.txt')


import treePlotter

myTree = treePlotter.retrieveTree(0)
treePlotter.createPlot(myTree)
Example #19
0
# plt.xlabel('count')
# plt.ylabel('result')
# plt.title('Hahaha Goooood!!!')
# fig.savefig('plot.svg')

# import matplotlib
# matplotlib.use('Agg')
# import matplotlib.pyplot as plt
# fig = plt.figure(1, facecolor='white')
# fig.clf()
# ax = plt.subplot(111, frameon=True)
# # ax.scatter([.2, .5], [.1, .5])
# plt.figure(1, figsize=(3,3))
# ax = plt.subplot(111)
# ax.annotate("Test", xy=(0.2, 0.2), xycoords='data', xytext=(0.8, 0.8),
# textcoords='data', size=20, va="center", ha="center",
# bbox=dict(boxstyle="round4", fc="w"),
# arrowprops=dict(arrowstyle="-|>",
# connectionstyle="arc3,rad=-0.2", fc="w"), )
# ax.annotate("This is my text", xy=(0.2, 0.1), xycoords='data',
#     xytext=(0.4, 0.3), textcoords='data', ha='center', va='center',
#     arrowprops=dict(arrowstyle="->", connectionstyle="arc3"), )

# fig.savefig('plot.svg')

# import textPlotter
# textPlotter.createPlot()

import treePlotter
treePlotter.createPlot(treePlotter.retrieveTree(0))
Example #20
0
#输出手动创建的数据集,计算香农熵
myDat,labels=trees.createDataSet()
print "myDat 数据集是:",myDat
print "\nlabels 标签是:",labels
rawCalc =trees.calcShannonEnt(myDat)
print "\ncalcShannonEnt(myDat) 数据集的原始熵是:",rawCalc
print "\ntrees.splitDataSet( myDat,1,1)将数据集的按 特征[1]=1(即 flippers==1) 提取出来的矩阵是:",trees.splitDataSet(myDat,1,1)
#
bestLabel = trees.chooseBestFeatureToSplit(myDat)
print "\nchooseBestFeatureToSplit(myDat) 数据集的bestLabel最好特征的[下标]是:",bestLabel,"\tlabels[bestLabel]最好特征是:",labels[bestLabel]
#
myTree = trees.createTree(myDat,labels)
print "\ntrees.createTree(myDat,labels) 根据数据集创建的树是:", myTree
#读取预先存储的树[0] 并绘制图形
print "\n读取预先存储的树[0] 并绘制出第一个图形:"
myTree0 = treePlotter.retrieveTree(0)
treePlotter.createPlot(myTree0)
#读取预先存储的树[1] 并绘制图形
print "\n读取预先存储的树[1] 并绘制出第二个图形:"
myTree1 = treePlotter.retrieveTree(1)
treePlotter.createPlot(myTree1)

#change one date in "no surfacing"
#and print
'''
myTree['no surfacing'][3] = 'maybe'
print('after change is:')
print myTree
treePlotter.createPlot(myTree)rag
'''
Example #21
0
    #calculate shannonEnt
    print '-------------- calculate shannonEnt --------------------'
    shannonEnt = calcShannonEnt(myDat)
    print shannonEnt
    #split dataset
    print '-------------- split dataset --------------------'
    print splitDataSet(myDat,0,1)
    print splitDataSet(myDat,0,0)
    #get best feature
    print '-------------- best feature --------------------'
    print 'best feature:' ,chooseBestFeatureToSplit(myDat)
    print 'createTree ', createTree(myDat,labels)

    #plot trees
    print '-------------- plot-trees --------------------'
    myTree =  tp.retrieveTree(0)
    print 'myTree ', myTree
    print 'labels', labels
    print 'numLeafs ', tp.getNumLeafs(myTree)
    print 'treeDepth ', tp.getTreeDepth(myTree)
    #tp.createPlot(myTree)

    #update dict and plot again
    #myTree['no surfacing'][3] = 'maybe'
    #tp.createPlot(myTree)

    #classify
    print '-------------- classify --------------------'
    myDat, labels = createDataSet()
    print 'labels', labels
    myTree =  tp.retrieveTree(0)
import trees
import treePlotter

myDat, labels = trees.createDataSet()
print myDat
print trees.calcShannonEnt(myDat)
print trees.splitDataSet(myDat, 0, 1)
print trees.splitDataSet(myDat, 0, 0)
print trees.splitDataSet(myDat, 1, 1)
print trees.chooseBestFeatureToSplit(myDat)
print trees.createTree(myDat, labels)

treePlotter.createPlot()
print 'createPlot over'

print treePlotter.retrieveTree(1)
myTree = treePlotter.retrieveTree(0)
print treePlotter.getNumLeafs(myTree)
print treePlotter.getTreeDepth(myTree)
Example #23
0
import treePlotter

if __name__ == '__main__':
    print treePlotter.retrieveTree(1)
    myTree = treePlotter.retrieveTree(1)
    print treePlotter.getNumLeafs(myTree)
    print treePlotter.getTreeDepth(myTree)
# -*- coding:utf-8 -*-

import trees
import treePlotter


def createDataSet():
    dataSet = [
        [1, 1, 'yes'],
        [1, 1, 'yes'],
        [1, 0, 'no'],
        [0, 1, 'no'],
        [0, 1, 'no']
    ]
    labels = ['no surfacing', 'flippers']
    return dataSet, labels


myDat, labels = createDataSet()
print myDat
# print trees.calcShannonEnt(myDat)   #熵越高,则混合的数据越多,如果增加maybe分类myDat[0][-1]='maybe',则信息熵更高了

# print trees.splitDataSet(myDat, 0, 1)  # 循环dataSet对象,对于每一个循环值,索引为0的值看是否和1相等,如果相等,返回后边的值
# print trees.chooseBestFeatureToSplit(myDat)

# print trees.createTree(myDat, labels)
myTree = treePlotter.retrieveTree(0)  # 这是生成的决策树
print trees.classify(myTree, labels, [1, 1])