Example #1
0
def testLenseDataset():
    from trees import createTree
    fr=open('lenses.txt')
    lenses=[inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels=['age','prescript','astigmatic','tearRate']
    lensesTree=createTree(lenses,lensesLabels)
    createPlot(lensesTree)
def lenses():
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = trees.createTree(lenses, lensesLabels)
    createPlot(lensesTree)
    return
Example #3
0
	def testClassify(self):
		myDat,labels = TreesTestCase.createDataSet()
		tree = trees.createTree(myDat, labels)
		c = trees.classify(tree, labels, [1, 0])
		self.assertEqual(c, 'no')
		c = trees.classify(tree, labels, [1, 1])
		self.assertEqual(c, 'yes')
def lenses():
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = trees.createTree(lenses, lensesLabels)
    createPlot(lensesTree)
    return
Example #5
0
def create_tree():
    # lenses_lables = get_attr()
    lenses = rawdata.get_train_data()
    lenses_lables = rawdata.get_attr_value()
    lenses_two = lenses[:]
    dec_tree = trees.createTree(lenses, lenses_lables)
    return dec_tree
Example #6
0
 def test_createPlot(self):
     dataSet, labels = trees.createDataSet()
     print("\n dataSet == %s" % (dataSet))
     tree = trees.createTree(dataSet, labels)
     # 增加一个标签
     tree['no surfacing'][3] = "maybe"
     treePlotter.createPlot(tree)
Example #7
0
def eyesTree():
    fr = open("lenses.txt")
    # 按照tab分割数据
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic1', 'tearRate']
    lensesTree = trees.createTree(lenses, lensesLabels)
    treePlotter.createPlot(lensesTree)
Example #8
0
def execute():
    """use Decision Tree to address problem 'which lense?' """
    fr = open('lenses.txt')# read data set
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]# data set
    lensesLabels = ['age', 'prescirpt', 'astigmatic', 'tearRate']# labels
    lensesTree = trees.createTree(lenses, lensesLabels)# build the Decision Tree
    print lensesTree
    tp.createPlot(lensesTree)
Example #9
0
def lenses():
	filename = './data/lenses.txt'
	fr=open(filename)
	lenses=[inst.strip().split('\t') for inst in fr.readlines()]
	lensesLabels=['age','prescript','astigmatic','tearRate']
	lensesTree = tree.createTree(lenses,lensesLabels)
	print(lensesTree)
	createPlot(lensesTree,'lenses.jpg')
Example #10
0
 def test_treeNums(self):
     dataSet, labels = trees.createDataSet()
     print("\n dataSet == %s" % (dataSet))
     tree = trees.createTree(dataSet, labels)
     print("\n tree == %s" % (tree))
     leafs = treePlotter.getNumLeafs(tree)
     depth = treePlotter.getTreeDepth(tree)
     print("\n leafs == %s depth == %s " % (leafs, depth))
Example #11
0
def main():
    fr = open("../file/Ch03/lenses.txt")
    Map = [line.strip().split('\t') for line in fr]
    print(Map)
    Label = ['age', 'prescript', 'astigmatic', 'tearRate']
    Tree = trees.createTree(Map, Label)
    print(Tree)
    treePlotter.createPlot(Tree)
def main():
    fr = open('lenses.txt')
    lenses = []
    for inst in fr.readlines():
        lenses.append(inst.strip().split('\t'))
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = trees.createTree(lenses, lensesLabels)
    lensesTree
    treePlotter.createPlot(lensesTree)
Example #13
0
def test():
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = trees.createTree(lenses, lensesLabels)
    #treePlotter.createPlot(lensesTree)
    result = trees.classify(lensesTree, lensesLabels,
                            ['young', 'myope', 'no', 'normal'])
    print result
Example #14
0
 def test_store_load(self):
     dataSet, labels = trees.createDataSet()
     print("\n dataSet == %s" % (dataSet))
     tree = trees.createTree(dataSet, labels)
     print("\n tree == %s" % (tree))
     fileName = "./mytree.txt"
     trees.storeTree(tree, fileName)
     newTree = trees.grabTree(fileName)
     print("\n newTree == %s" % (newTree))
Example #15
0
def main():
    import trees
    import treePlotter
    myDat, labels = trees.createDataSet()
    myTree = trees.createTree(myDat, labels)
    # myTree = treePlotter.retrieveTree(1)
    treePlotter.createPlot(myTree, 'test.png')
    trees.storeTree(myTree, 'classifierStorage')
    myTree = trees.grabTree('classifierStorage')
    print(myTree)
Example #16
0
def tests():
    dataSet, labels = trees.createDataSet()
    print dataSet
    print trees.calcShannonEnt(dataSet)
    myTree = trees.createTree(dataSet, labels)
    print myTree, labels
    print trees.classify(myTree, labels, [1, 0])
    print trees.classify(myTree, labels, [1, 1])
    print trees.classify(myTree, labels, [0, 0])
    print trees.classify(myTree, labels, [0, 1])
Example #17
0
def main():
    '''
    使用决策树对话者需要佩戴的隐形眼镜类型进行预测
    隐形眼镜的类型包括硬材质、软材质以及不适合佩戴隐形眼镜
    '''
    fr = open('lenses.txt')
    lenses = [linst.strip().split('\t') for linst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = trees.createTree(lenses, lensesLabels)
    print lensesTree
    treePlotter.createPlot(lensesTree)
Example #18
0
def main():
	# createPlot()
	dataSet,labels = trees.createDataSet()
	labelsTmp = copy.deepcopy(labels)
	mytree = trees.createTree(dataSet,labelsTmp)
	print mytree
	print dataSet
	print labels

	print getNumLeafs(mytree)
	print getTreeDepth(mytree)
	# createPlot(mytree)
	print trees.classify(mytree,labels,[1,0])
Example #19
0
def main():
    #打开文件
    fr = open('lenses.txt')
    #读取文件信息,得到一个dataSet,是一个二维列表
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    #定义标签
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    #创建树
    lensesTree = trees.createTree(lenses, lensesLabels)
    print(lensesTree)

    #画图
    treePlotter.createPlot(lensesTree)
Example #20
0
def classContactLens():
    """
    using decision trees to predict contact lens type
    """
    lenses = []
    with open(r'./data/lenses.txt','rb') as fr:
        for inst in fr.readlines():
            inst = inst.strip()
            lenses.append(str(inst,encoding = 'utf-8').split('\t')) #byte to str
#        lenses = [inst.strip().split('\t')  for inst in fr.readlines()]
        lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
        lensesTree = trees.createTree(lenses, lensesLabels)
        print ("lensesTree is :%s"%(lensesTree))
Example #21
0
def job_tree():
    '''
    重新建进行预测的决策树
    :param labels:
    :return:
    '''
    fr = open(r'data/job_test.csv', encoding='UTF-8')

    listWm = [inst.strip().split('\t') for inst in fr.readlines()]
    labels = get_labels2()
    Trees = trees.createTree(listWm, labels)
    print("决策树:")
    print(json.dumps(Trees, ensure_ascii=False))
    #保存树
    fileName = r'data/tree.txt'
    trees.storeTree(Trees, fileName)
Example #22
0
def gain_results(foldnum):
    # 获取属性列表
    lenses_labels = rawdata.get_attr_value()
    dirs = os.listdir('D:/PyCharm/decision_tree/dataDir/sample_data1')
    decision_trees = []
    accuracies = []
    tests = dirs[:foldnum]  # 测试集
    trains = dirs[-foldnum:]  # 训练集
    for i in range(len(trains)):
        lenses = rawdata.get_train_data(trains[i])
        decision_tree = trees.createTree(lenses, lenses_labels)
        # print treePlotter.createPlot(decision_tree)  # 循环打印决策树
        decision_trees.append(decision_tree)
    # print len(decision_trees)  # 5
    # print treePlotter.createPlot(decision_trees)
    for m in range(len(tests)):
        accu = []
        decs_tree = decision_trees[m]
        test_data = rawdata.get_test_data(tests[m])
        # print decs_tree  # 决策树
        # print test_data  # 被测数据
        for y in range(len(test_data)):
            result = trees.classify(decs_tree, lenses_labels,
                                    test_data[y][:-1])
            accu.append(result)
        accuracies.append(accu)
    test_labs = []
    correct_ratio = []
    for p in range(len(tests)):
        test_lab = []
        test_data = rawdata.get_test_data(tests[p])
        for t in range(len(test_data)):
            test_lab.append(test_data[t][-1])
        test_labs.append(test_lab)
    # print test_labs[4][0]
    # print len(test_labs)
    for w in range(len(tests)):
        count = 0.0
        for q in range(len(test_labs[w])):
            # print '真实标签值为:%s; 决策树检测的标签为:%s' % (test_labs[w][q], accuracies[w][q])
            if test_labs[w][q] == accuracies[w][q]:
                count += 1
        # print '正确率为:%f' % (count/(len(test_labs[w])))
        ratio = count / (len(test_labs[w]))
        correct_ratio.append(ratio)
    return test_labs, accuracies, correct_ratio
Example #23
0
def main():
    """
	Function:	主函数

	Args:		无

	Returns:	无
	"""
    #打开文件
    fr = open('lenses.txt')
    #读取文件信息
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    #定义标签
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    #创建树
    lensesTree = trees.createTree(lenses, lensesLabels)
    #打印树信息
    print(lensesTree)
    #绘制树信息
    treePlotter.createPlot(lensesTree)
Example #24
0
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 17 16:00:41 2014

@author: gq
"""

import trees
myDat,labels=trees.createDataSet()
print myDat
#print trees.splitDataSet(myDat,0,1)
#print trees.splitDataSet(myDat,0,0)
#print trees.calcShannonEnt(myDat)
#print trees.chooseBestFeatureToSplit(myDat)
print trees.createTree(myDat,labels)
Example #25
0
import trees

if __name__ == '__main__':
    myDat, labels = trees.createDataSet()
    print trees.createTree(myDat, labels)
Example #26
0
# -*- coding:utf-8 -*-
import trees

myData,myLabels = trees.createDataSet()
testLabels = myLabels.copy()
print ('myData is ' , myData)

#计算无序数据集的香农熵
#myShannonEnt = trees.calcShannonEnt(myData)
#print ('myShannonEnt is ' , myShannonEnt )

###测试划分数据集函数
#mySplitDat = trees.splitDataSet(myData, 1, 0)
#print ('mySplitDat is ' , mySplitDat )

#myBestData = trees.chooseBestFeatureToSplit(myData)
#print ('myBestData is ' , myBestData )

myTree = trees.createTree(myData, myLabels)
print ('myTree is ' ,myTree)


#测试训练集
print ('testLabels is ' ,testLabels)
testResult = trees.classify(myTree, testLabels, [1,1])
print ('testResult is ' ,testResult)

#trees.storeTree(myTree, 'classifierStorage.txt')
fromFileTree = trees.grabTree('classifierStorage.txt')
print ('fromFileTree is' , fromFileTree)
Example #27
0
myData, labels = trees.createDataSet()
print(myData)
print(trees.calcShannonEnt(myData))
print('---------------------------------')
print(trees.splitDataSet(myData, 0, 1))
print(trees.splitDataSet(myData, 0, 0))
print(trees.splitDataSet(myData, 1, 1))
print(trees.splitDataSet(myData, 1, 0))
print('---------------------------------')
myData, labels = trees.createDataSet()
print(myData)
print('第', trees.chooseBestFeatureToSplit(myData), '个特征是最好的用于划分数据集的特征')
print('---------------------------------')
myData, labels = trees.createDataSet()
myTree = trees.createTree(myData, labels)
print('myTree=', myTree)
print('---------------------------------')
# createPlot()
print('---------------------------------')
print(retrieveTree(1))
myTree = retrieveTree(0)
print(myTree)
print(getNumLeafs(myTree))
print(getTreeDepth(myTree))
print('---------------------------------')
myTree = retrieveTree(0)
createPlot(myTree)
f = open(
    'D:/paper/kNN-master/机器学习实战(中文版+英文版+源代码)/机器学习实战源代码/machinelearninginaction/Ch03/lenses.txt'
)
#-*- coding:utf-8 -*-
import trees
import treePlotter

fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLables = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLables)
print lensesTree

treePlotter.createPlot(lensesTree)
Example #29
0
	test_set.append(cars[rand_index][0:-1])
	test_label.append(cars[rand_index][-1])
	del(cars[rand_index])
#print test_set
#print
#print test_label
#exit(0)

print cars_labels

#m,n = shape(cars)
#print m,n

#m,n = shape(test_set)
#print m,n
cars_tree = trees.createTree(cars, cars_labels)
#print cars_tree

m,n = shape(test_set)

#print cars_labels
#exit(0)
#print cars_labels2
#exit(0)

err_count = 0
for i in range(m):
	ret = trees.classify(cars_tree, cars_labels2, test_set[i])
	if ret != test_label[i]:
		err_count +=1
print "err=", err_count
Example #30
0

import trees
import treePlotter

if '__main__' == __name__:
	with open('lenses.txt') as fin:
		lenses = [inst.strip().split('\t') for inst in fin.readlines()]
	labels = ['age', 'prescript', 'astigmatic', 'tear-rate']
	decisionTree = trees.createTree(lenses, labels)
	treePlotter.createPlot(decisionTree)
import trees

myDat, lables = trees.createDataSet()

print("------ shannon ------")
print(myDat)
print(trees.calcShannonEnt(myDat))

# print("------ shannon after changed ------")
# myDat[0][-1] = 'maybe'
# print(myDat)
# print(trees.calcShannonEnt(myDat))

print("------ split data set ------")
print(trees.splitDataSet(myDat, 0, 1))
print(trees.splitDataSet(myDat, 0, 0))

print("------ choose best feature to split ------")
print(trees.chooseBestFeatureToSplit(myDat))

print("------ create tree ------")
tree = trees.createTree(myDat, lables)
print(tree)

print("------ test tree classify ------")
print(trees.classify(tree, ['no surfacing', 'flippers'], [1, 0]))
	plotTree.totalD = float(getTreeDepth(inTree)) 
	plotTree.xOff = -.5/plotTree.totalW; plotTree.yOff = 1.0;
	plotTree(inTree, (.5, 1.0), '')
	plt.show()

if __name__ == '__main__':
	# createPlot()
	# print retrieveTree(1)
	# myTree = retrieveTree(0)
	# print getNumLeafs(myTree)
	# print getTreeDepth(myTree)

	# myTree['no surfacing'][3] = 'maybe'
	# print myTree
	import trees
	file_name = 'lenses.txt'
	raw = open(file_name)
	lenses_data = []
	for line in raw:
		lenses_data.append(line.rstrip().split('\t'))
	lenses_label = ['age', 'prescript', 'astigmatic', 'tearRate']

	## this is to make the decision tree
	lenses_tree = trees.createTree(lenses_data, lenses_label)
	# print lenses_tree
	createPlot(lenses_tree)

	### create the decision tree


Example #33
0
import trees
myDat, labels = trees.createDataSet()
#print myDat
#print trees.createTree(myDat, labels)

import treePlotter
#treePlotter.createPlot()
#myTree = treePlotter.retrieveTree(0)
#treePlotter.createPlot(myTree)
#print trees.classify(myTree, labels, [1,1])

#trees.storeTree(myTree,'classifierStorage.txt')
#print trees.grabTree('classifierStorage.txt')

fr = open('lenses.txt')
print fr

lenses = [inst.strip().split('\t') for inst in fr.readlines()]
print lenses
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)
print lensesTree
treePlotter.createPlot(lensesTree)
Example #34
0
# _*_ coding:utf-8 _*_
import trees

fr = open('lenses.txt')
#print (fr.readlines())
listsss = []
for inst in fr.readlines():
    listsss.append(inst.strip().split('\t'))
#lenses=[inst.strip().split('\t') ]
lenseslabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTeee = trees.createTree(listsss, lenseslabels)
print(listsss)
print(lensesTeee)
import trees

myDat, labels = trees.createDataSet()
print(myDat)
print(labels)
print(trees.calcShannonEnt(myDat))

print(trees.splitDataSet(myDat, 0, 0))

print(trees.createTree(myDat, labels))
Example #36
0
#This test goes with Python3
import trees
import treePlotter

if '__main__' == __name__:
	dataSet, labels = trees.createDataSet()
	decisionTree = trees.createTree(dataSet, labels)
	treePlotter.createPlot(decisionTree)
Example #37
0
    plotNode(firstStr, cntrPt, parentPt, decisionNode)
    secondDict = myTree[firstStr]
    plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD
    for key in secondDict.keys():
        if type(secondDict[key]).__name__=='dict':#test to see if the nodes are dictonaires, if not they are leaf nodes   
            plotTree(secondDict[key],cntrPt,str(key))        #recursion
        else:   #it's a leaf node print the leaf node
            plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW
            plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode)
            plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))
    plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD
#if you do get a dictonary you know it's a tree, and the first element will be another dict

def createPlot(inTree):
    fig = plt.figure(1, facecolor='white')
    fig.clf()
    axprops = dict(xticks=[], yticks=[])
    createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)    #no ticks
    #createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses 
    plotTree.totalW = float(getNumLeafs(inTree))
    plotTree.totalD = float(getTreeDepth(inTree))
    plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0;
    plotTree(inTree, (0.5,1.0), '')
    plt.show()

# collect data
myDat, labels = trees.createDataSet()
mytree = trees.createTree(myDat, labels)

#visualize decision tree
createPlot(mytree)
Example #38
0
fr.close()

print dataset
print '\n'

print '数据集类的香农熵:'
print trees.calcShannonEnt(dataset)
print '\n'

bestFeatureColumn = trees.chooseBestFeatureToSplit(dataset)
print '数据集最佳分类的属性是:'
print labels[bestFeatureColumn]
print '\n'

print '决策树:'
Tree = trees.createTree(dataset, labels)
print Tree
firstFeature = Tree.keys()[0]
print firstFeature
firstFeatureValues = Tree[firstFeature].keys()
print firstFeatureValues
print '\n'

treePlotter.createPlot(Tree)

testVec = ['pre', 'myope', 'yes', 'normal']
print '测试数据'
print testVec
labels.append('tearRate')
print '匹配过程:'
result = trees.classify(Tree, labels, testVec)
## clac ShannonEnt
# a = trees.calcShannonEnt(myData)
# print(a)

## split dataSet
# a = trees.splitDataSet(myData, 0, 0)
# print(a)

## choose best Feature to split
# a = trees.chooseBestFeatureToSplit(myData)
# print(a, '\n', myData)

## built tree with dict type
subLabels = labels[:]
tree = trees.createTree(myData, subLabels)
# print(tree)

### 2. Plot tree
# treePlotter.createPlot()

## get leafs num of tree
# b = treePlotter.getNumLeafs(tree)
# print(b)

## get depth of tree
# c = treePlotter.getTreeDepth(tree)
# print(c)

# treePlotter.createPlot(tree)
# tree['no surfacing'][2] = 'maybe'
# -*- coding: UTF-8 -*-
'''
	this module is an example of classifying stealth by decision tree
	@author: Liu Weijie
'''
import trees
import drawATree

#get data
def getData(filename):
    fr = open(filename)
    dataList = [line.strip().split('\t') for line in fr.readlines()]
    labelList = ['age', 'prescript', 'astigmatic', 'tearRate']
    return dataList, labelList

if __name__ == '__main__':
    dataSet, labelList = getData('lenses.txt')
    print dataSet
    myTree = trees.createTree(dataSet, labelList)
    drawATree.drawTree(myTree)
    data1 = ['pre','myope','no','reduced']
    print 'this is ', trees.classifyByTree(myTree, labelList, data1)
Example #41
0
import treePlotter
import trees

fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lenseTree = trees.createTree(lenses, lensesLabels)

print lenseTree
Example #42
0
import trees

myData, labels = trees.createDataSet2()
#print myData,labels

#print trees.calcShannonEnt(myData)

#print trees.chooseBestFeatureToSplit(myData)

myTree = trees.createTree(myData, labels)
print myTree


import treePlotter
treePlotter.createPlot(myTree)
'''
import treePlotter
#treePlotter.createPlot()

#myTree = treePlotter.retrieveTree(1)
myTree = treePlotter.retrieveTree(0)
print myTree
#print treePlotter.getNumLeafs(myTree)
#print treePlotter.getTreeDepth(myTree)

#treePlotter.createPlot(myTree)

myData, labels = trees.createDataSet()
print labels

print trees.classify(myTree, labels, [1,0])
Example #43
0
 def test_createTree(self):
     dataSet, labels = trees.createDataSet()
     print("\n dataSet == %s" % (dataSet))
     tree = trees.createTree(dataSet, labels)
     print("\n tree == %s" % (tree))
Example #44
0
import treePlotter
import trees
a1, a2 = trees.createDataSet()
b1 = trees.createTree(a1, a2)
treePlotter.createPlot(b1)
Example #45
0
import trees
import pandas as pd

df = pd.read_csv('data/1.csv')
col = df.columns.tolist()
data = df.values.tolist()
tree = trees.createTree(data, col)
print(tree)

col = df.columns.tolist()[:-1]
input1 = ['a3', 'b1', 'c2', 'd1']  # N
input2 = ['a3', 'b3', 'c1', 'd1']  # N
input3 = ['a2', 'b1', 'c1', 'd2']  # Y
input4 = ['a1', 'b1', 'c1', 'd1']  # Y
print(trees.classify(tree, col, input1))
print(trees.classify(tree, col, input2))
print(trees.classify(tree, col, input3))
print(trees.classify(tree, col, input4))

df = pd.read_csv('data/2.csv')
col = df.columns.tolist()
data = df.values.tolist()
tree = trees.createTree(data, col)
print(tree)

col = df.columns.tolist()[:-1]
input1 = ['undergraduate', 'man', 'cet6', 'a1', 'b1']  # N
input2 = ['undergraduate', 'man', 'cet4', 'a1', 'b1']  # Y
input3 = ['postgraduate', 'man', 'no', 'a1', 'b2']  # Y
input4 = ['undergraduate', 'man', 'no', 'a1', 'b3']  # Y
input5 = ['undergraduate', 'man', 'no', 'a3', 'b3']  # Y
import trees as tree
import create_data_from_two_streams as get_data
import plotting_trees as ptree

myDat,labels = get_data.get_data()
myTree       = tree.createTree(myDat,labels)
print ptree.getTreeDepth(myTree)

Example #47
0
def testID3(filename):
    DataList,classLabelVector = trees.file2strlist(filename)
    mytree=trees.createTree(DataList,classLabelVector)
    treePlotter.createPlot(mytree)
# name   industry   profession    sex    摄影     自驾游    SNS达人   github   翻墙   常阅读   科幻迷  兴趣广泛    吹牛    分类

def getTrainingDatas():
    dataSet = [
        ["it", "gm",       "man", 1, 1, 0, 0, 0, 0, 0, 1, 0, "liver"],
        ["it", "engineer", "man", 0, 1, 0, 0, 0, 0, 0, 0, 0, "empty"],
        ["it", "sale",     "man", 0, 1, 0, 0, 0, 0, 0, 0, 1, "liver"],
        ["it", "founder",  "man", 0, 1, 1, 0, 0, 0, 0, 1, 0, "boss"],
        ["it", "phd",      "man", 1, 0, 0, 0, 0, 1, 0, 1, 0, "liver, fake hacker"],
        ["it", "engineer", "man", 0, 1, 0, 1, 1, 1, 1, 1, 0, "fake hacker"],
        ["it", "engineer", "man", 0, 0, 0, 0, 0, 1, 0, 1, 0, "fake hacker"],
        ["it", "engineer", "man", 0, 0, 0, 1, 1, 1, 1, 1, 0, "fake hacker"],
    ]
    labels = ["industry", "profession", "sex", "camera", "drive tour", 
            "SNS", "github", "over GFW", "reader", "Science fiction fan","hobby","brag"]
    return dataSet, labels

if __name__ == "__main__":
    if len(sys.argv) > 1:
        # classify test, tortoise
        classmate = ["tortoise", "it", "engineer",       "man", 1, 0, 0, 0, 0, 1, 0, 1, 1]
        dataSet, labels = getTrainingDatas()
        tree = trees.grabTree("cm_tree.txt")
        print "{} is \"{}\"".format(classmate[0], trees.classify(tree, labels, classmate[1:]))
    else:
        # training
        dataSet, labels = getTrainingDatas()
        tree = trees.createTree(dataSet, list(labels))
        trees.storeTree(tree, "cm_tree.txt")
        treePlotter.createPlot(tree)
Example #49
0
    		   [1, 1, 1, 'no'],
    		   [1, 1, 1, 'no'],
    		   [1, 1, 1, 'no'],
    		   [1, 0, 0, 'no'],
    		   [0, 1, 1, 'no'],
    		   [0, 1, 1, 'no'],
    		   [0, 1, 0, 'no']
              ]
    labels = ['glasses','man','170']
    return dataSet, labels

dataSet, labels = misc.loadLensesData("test")

print entropy.calcShannonEnt(dataSet)

# print trees.chooseBestFeatureToSplit(dataSet)

myTree = trees.createTree(dataSet,labels)

print myTree

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

import treePlotter
treePlotter.createPlot(myTree)

# misc.storeTree(myTree, 'lenses_tree.txt')
# tree2 = misc.grabTree('classifierStorage.txt')
# treePlotter.createPlot(tree2)
Example #50
0
import sys
reload(sys)
#http://www.pythoner.com/200.html
#运行时没问题, 但是编译有有错误提示, 可以忽略。  
sys.setdefaultencoding('utf8')
print(sys.getdefaultencoding())




#
fr = open('lensesCN.txt')
lenses = [unicode(inst, 'utf-8').strip().strip().split('\t') for inst in fr.readlines()]
#lensesLabels = ["年龄组" , "规定", "闪光", "泪液扫除率"]
lensesLabels = ['age' , 'prescript', 'astigmatic', 'tearRate']
lensesTree = tr.createTree(lenses,lensesLabels)
print(lensesTree)
tp.createPlot(lensesTree)

dataSet, labels = tr.createDataSet()

shannonEnt = tr.calcShannonEnt(dataSet)

print(shannonEnt)

print(tp.retrieveTree(1))

myTree = tp.retrieveTree(0)
numLeafs = tp.getNumLeafs(myTree)
treeDepth = tp.getTreeDepth(myTree)
Example #51
0
from imp import reload
import trees

myDat, labels = trees.createDataSet()
trees.calcShannonEnt(myDat)
myTree = trees.createTree(myDat, labels)

import treePlotter
treePlotter.createPlot()
Example #52
0
	def testCreateTree(self):
		myDat,labels = TreesTestCase.createDataSet()
		tree = trees.createTree(myDat, labels)
		theTree = {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}
		self.assertEqual(theTree, tree)