コード例 #1
0
ファイル: test.py プロジェクト: guozengxin/machine-learning
	def testClassify(self):
		myDat,labels = TreesTestCase.createDataSet()
		tree = trees.createTree(myDat, labels)
		c = trees.classify(tree, labels, [1, 0])
		self.assertEqual(c, 'no')
		c = trees.classify(tree, labels, [1, 1])
		self.assertEqual(c, 'yes')
コード例 #2
0
    def test_classify(self):
        """Unittest for function classify.

        :return: classification result.
        """
        # test 1: training data
        item = [1, 0]
        feat_names = ['no surfacing', 'flippers']
        result = 'no'
        decision_tree = {
            'no surfacing': {
                0: 'no',
                1: {
                    'flippers': {
                        0: 'no',
                        1: 'yes'
                    }
                }
            }
        }
        self.assertEqual(result, trees.classify(decision_tree, feat_names,
                                                item))

        # test 2: training data with different feat_names
        item = [0, 1]
        feat_names = ['flippers', 'no surfacing']
        result = 'no'
        decision_tree = {
            'no surfacing': {
                0: 'no',
                1: {
                    'flippers': {
                        0: 'no',
                        1: 'yes'
                    }
                }
            }
        }
        self.assertEqual(result, trees.classify(decision_tree, feat_names,
                                                item))

        # test 3: not training data
        item = [0, 0]
        feat_names = ['flippers', 'no surfacing']
        result = 'no'
        decision_tree = {
            'no surfacing': {
                0: 'no',
                1: {
                    'flippers': {
                        0: 'no',
                        1: 'yes'
                    }
                }
            }
        }
        self.assertEqual(result, trees.classify(decision_tree, feat_names,
                                                item))
コード例 #3
0
ファイル: treePlotter.py プロジェクト: elephantzhai/Learn
def main():
	# createPlot()
	dataSet,labels = trees.createDataSet()
	labelsTmp = copy.deepcopy(labels)
	mytree = trees.createTree(dataSet,labelsTmp)
	print mytree
	print dataSet
	print labels

	print getNumLeafs(mytree)
	print getTreeDepth(mytree)
	# createPlot(mytree)
	print trees.classify(mytree,labels,[1,0])
コード例 #4
0
def tests():
    dataSet, labels = trees.createDataSet()
    print dataSet
    print trees.calcShannonEnt(dataSet)
    myTree = trees.createTree(dataSet, labels)
    print myTree, labels
    print trees.classify(myTree, labels, [1, 0])
    print trees.classify(myTree, labels, [1, 1])
    print trees.classify(myTree, labels, [0, 0])
    print trees.classify(myTree, labels, [0, 1])
コード例 #5
0
ファイル: test.py プロジェクト: lumicae/BMSystem
def test():
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = trees.createTree(lenses, lensesLabels)
    #treePlotter.createPlot(lensesTree)
    result = trees.classify(lensesTree, lensesLabels,
                            ['young', 'myope', 'no', 'normal'])
    print result
コード例 #6
0
def ack():
    # 处理函数
    usrOne = varTestOne.get()  # 用户输入
    varTestOne.set(usrOne)
    print(usrOne.strip(',').split(','))

    labels = ['age', 'prescript', 'astigmatic', 'tearRate']
    result = trees.classify(myTree, labels, usrOne.strip(',').split(','))
    tkinter.messagebox.showinfo(title="判断结果",
                                message="您的输入是:" + usrOne + "\n结果是:" + result)
コード例 #7
0
ファイル: test_classify.py プロジェクト: lobule/machlearn
    def test_trees_classify(self):
        matrix = [[1, 1, 'yes'],
                  [1, 1, 'yes'],
                  [1, 0, 'no'],
                  [0, 1, 'no'],
                  [0, 1, 'no']]
        labels = ['no surfacing', 'flippers']

        tree = trees.create_tree(matrix, labels)

        prediction = trees.classify([1, 1], tree, labels)
        self.failUnless(prediction == 'yes')

        prediction = trees.classify([1, 0], tree, labels)
        self.failUnless(prediction == 'no')

        prediction = trees.classify([0, 1], tree, labels)
        self.failUnless(prediction == 'no')

        prediction = trees.classify([0, 0], tree, labels)
        self.failUnless(prediction == 'no')
コード例 #8
0
ファイル: testdemo.py プロジェクト: srw962/Decision-tree
def gain_results(foldnum):
    # 获取属性列表
    lenses_labels = rawdata.get_attr_value()
    dirs = os.listdir('D:/PyCharm/decision_tree/dataDir/sample_data1')
    decision_trees = []
    accuracies = []
    tests = dirs[:foldnum]  # 测试集
    trains = dirs[-foldnum:]  # 训练集
    for i in range(len(trains)):
        lenses = rawdata.get_train_data(trains[i])
        decision_tree = trees.createTree(lenses, lenses_labels)
        # print treePlotter.createPlot(decision_tree)  # 循环打印决策树
        decision_trees.append(decision_tree)
    # print len(decision_trees)  # 5
    # print treePlotter.createPlot(decision_trees)
    for m in range(len(tests)):
        accu = []
        decs_tree = decision_trees[m]
        test_data = rawdata.get_test_data(tests[m])
        # print decs_tree  # 决策树
        # print test_data  # 被测数据
        for y in range(len(test_data)):
            result = trees.classify(decs_tree, lenses_labels,
                                    test_data[y][:-1])
            accu.append(result)
        accuracies.append(accu)
    test_labs = []
    correct_ratio = []
    for p in range(len(tests)):
        test_lab = []
        test_data = rawdata.get_test_data(tests[p])
        for t in range(len(test_data)):
            test_lab.append(test_data[t][-1])
        test_labs.append(test_lab)
    # print test_labs[4][0]
    # print len(test_labs)
    for w in range(len(tests)):
        count = 0.0
        for q in range(len(test_labs[w])):
            # print '真实标签值为:%s; 决策树检测的标签为:%s' % (test_labs[w][q], accuracies[w][q])
            if test_labs[w][q] == accuracies[w][q]:
                count += 1
        # print '正确率为:%f' % (count/(len(test_labs[w])))
        ratio = count / (len(test_labs[w]))
        correct_ratio.append(ratio)
    return test_labs, accuracies, correct_ratio
コード例 #9
0
#print(trees.splitDataSet(mydata,0,1))

index = trees.chooseBestFeatureToSplit(mydata)
#print(index)
'''
mytree = trees.createTree(mydata,features)
print(mytree)
'''
import treePlotter
'''
mytree = treePlotter.retrieveTree(0)
treePlotter.createPlot(mytree)
mytree['no surfacing'][3] = 'maybe'
treePlotter.createPlot(mytree)
'''

mytree = treePlotter.retrieveTree(0)
print(trees.classify(mytree,features,[0,0]))
print(trees.classify(mytree,features,[1,1]))

trees.storeTree(mytree, 'classifier.txt')
grabtree = trees.grabTree('classifier.txt')
print(grabtree)


fr = open('lenses.txt')
lense =[inst.strip().split('\t') for inst in fr.readlines()]
lensefeatures = ['age', 'prescript', 'astigmatic', 'tearrate']
lensetree = trees.createTree(lense,lensefeatures)
print(lensetree)
treePlotter.createPlot(lensetree)
コード例 #10
0
ファイル: app.py プロジェクト: liujianhuanzz/MachineLearning
import trees
import treePlotter

fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age','prescript','astigmatic','tearRate']
lensesTree = trees.createTree(lenses, lensesLabels[:])
print trees.classify(lensesTree, lensesLabels, ['young', 'hyper','no','normal'])
print trees.classify(lensesTree, lensesLabels, ['presbyopic', 'myope','no','normal'])
コード例 #11
0
'''
决策树测试类
'''
import trees

'''
dataSet,lables = trees.createDataSet()
print(dataSet)
print(lables)
shannonEnt = trees.calcShannonEnt(dataSet)
print(shannonEnt)
'''
dataSet,labels = trees.createDataSet()
tree = trees.createTree(dataSet,labels)

d,l = trees.createDataSet()
result = trees.classify(tree,l,[1,0])

print(result)





コード例 #12
0
# 创建决策树
import trees
ds, ls = trees.createDataSet()
trees.createTree(ds, ls)

# 绘制树
import treePlotter
mt = treePlotter.retrieveTree(0)
treePlotter.createPlot(mt)

# 利用决策树判断分类
import trees
import treePlotter
it = treePlotter.retrieveTree(0)
ds, ls = trees.createDataSet()
trees.classify(it, ls, [0, 0])

# 序列化与反序列化决策树
import trees
import treePlotter
it = treePlotter.retrieveTree(0)
trees.storeTree(it, 'classifierStorage.txt')
ot = trees.grabTree('classifierStorage.txt')

# 隐形眼镜数据集测试
import trees
import treePlotter
fr = open('lenses.txt')
ds = [example.strip().split("\t") for example in fr.readlines()]
ls = ['age', 'prescript', 'antigmatic', 'tearRate']
mt = trees.createTree(ds, ls)
コード例 #13
0
import trees
import treePlotter

myDat, labels = trees.createDataSet()
print labels
myTree = treePlotter.retrieveTree(0)
print myTree
print trees.classify(myTree, labels, [1, 0])
print trees.classify(myTree, labels, [1, 1])

# trees.storeTree(myTree, 'classifierStorage.txt')
print trees.grabTree('classifierStorage.txt')

fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)
print lensesTree
treePlotter.createPlot(lensesTree)
コード例 #14
0
ファイル: test.py プロジェクト: onehao/opensource
print(numLeafs)
print(treeDepth)


myTree = tp.retrieveTree(0)
tp.createPlot(myTree)

myTree['no surfacing'][3] = 'maybe'
tp.createPlot(myTree)

myDat,labels = tr.createDataSet()
print(labels)
myTree = tp.retrieveTree(0)
print(myTree)
print(tr.classify(myTree, labels, [1,0]))
print(tr.classify(myTree, labels, [1,1]))


#restore the tree and print.
restoreTree = tr.grabTree('classifierStorage.txt')
print(restoreTree)


#
fr = open('lenses.txt')
lenses = [inst.strip().strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age' , 'prescript', 'astigmatic', 'tearRate']
lensesTree = tr.createTree(lenses,lensesLabels)
print(lensesTree)
コード例 #15
0
ファイル: task.py プロジェクト: Zendq1998/data-mining
print '决策树:'
Tree = trees.createTree(dataset, labels)
print Tree
firstFeature = Tree.keys()[0]
print firstFeature
firstFeatureValues = Tree[firstFeature].keys()
print firstFeatureValues
print '\n'

treePlotter.createPlot(Tree)

testVec = ['pre', 'myope', 'yes', 'normal']
print '测试数据'
print testVec
labels.append('tearRate')
print '匹配过程:'
result = trees.classify(Tree, labels, testVec)
print '匹配结果:'
print result
print '\n'

# 把树存在磁盘中
print '将树存放磁盘...'
trees.storeTree(Tree, 'myTree.txt')
print '\n'

# 从磁盘中取出树
print '再从磁盘中读取树:'
print trees.grabTree('myTree.txt')
コード例 #16
0
# -*- coding:utf-8 -*-
import trees

myData,myLabels = trees.createDataSet()
testLabels = myLabels.copy()
print ('myData is ' , myData)

#计算无序数据集的香农熵
#myShannonEnt = trees.calcShannonEnt(myData)
#print ('myShannonEnt is ' , myShannonEnt )

###测试划分数据集函数
#mySplitDat = trees.splitDataSet(myData, 1, 0)
#print ('mySplitDat is ' , mySplitDat )

#myBestData = trees.chooseBestFeatureToSplit(myData)
#print ('myBestData is ' , myBestData )

myTree = trees.createTree(myData, myLabels)
print ('myTree is ' ,myTree)


#测试训练集
print ('testLabels is ' ,testLabels)
testResult = trees.classify(myTree, testLabels, [1,1])
print ('testResult is ' ,testResult)

#trees.storeTree(myTree, 'classifierStorage.txt')
fromFileTree = trees.grabTree('classifierStorage.txt')
print ('fromFileTree is' , fromFileTree)
コード例 #17
0
import trees
import tree_plotter

tree = tree_plotter.retrieve_tree(0)
print(tree)
dataset, labels = trees.create_dataset()
print(labels)
label = trees.classify(tree, labels, [1, 0])
print(label)
label = trees.classify(tree, labels, [1, 1])
print(label)
コード例 #18
0
ファイル: pro_car.py プロジェクト: Kris0724/machine_learning
print cars_labels

#m,n = shape(cars)
#print m,n

#m,n = shape(test_set)
#print m,n
cars_tree = trees.createTree(cars, cars_labels)
#print cars_tree

m,n = shape(test_set)

#print cars_labels
#exit(0)
#print cars_labels2
#exit(0)

err_count = 0
for i in range(m):
	ret = trees.classify(cars_tree, cars_labels2, test_set[i])
	if ret != test_label[i]:
		err_count +=1
print "err=", err_count
print "sum=", m

#treePlotter.createPlot(cars_tree)



コード例 #19
0
import trees

myDat, lables = trees.createDataSet()

print("------ shannon ------")
print(myDat)
print(trees.calcShannonEnt(myDat))

# print("------ shannon after changed ------")
# myDat[0][-1] = 'maybe'
# print(myDat)
# print(trees.calcShannonEnt(myDat))

print("------ split data set ------")
print(trees.splitDataSet(myDat, 0, 1))
print(trees.splitDataSet(myDat, 0, 0))

print("------ choose best feature to split ------")
print(trees.chooseBestFeatureToSplit(myDat))

print("------ create tree ------")
tree = trees.createTree(myDat, lables)
print(tree)

print("------ test tree classify ------")
print(trees.classify(tree, ['no surfacing', 'flippers'], [1, 0]))
コード例 #20
0
#3.2.2 构造注解树
treePlotter.retrieveTree(1)
myTree = treePlotter.retrieveTree(0)
print "获取叶节点的数目:", treePlotter.getNumLeafs(myTree)
print "获取树的层数:", treePlotter.getTreeDepth(myTree)
treePlotter.createPlot(myTree)
myTree['no surfacing'][3] = 'maybe'
print "myTree:", myTree
treePlotter.createPlot(myTree)

#3.3.1 测试算法:使用决策树执行分类
myDat, labels = trees.createDataSet()
print "labels:", labels
myTree = treePlotter.retrieveTree(0)
print "myTree:", myTree
print "分类1:", trees.classify(myTree, labels, [1, 0])
print "分类2:", trees.classify(myTree, labels, [1, 1])

#3.3.2  决策树的存储
trees.storeTree(myTree, homedir + 'classifierStorage.txt')
print "决策树调取:", trees.grabTree(homedir + 'classifierStorage.txt')
print ":",
print ":",

#3.4 示例:使用决策树预测隐形眼镜类型
fr = open(homedir + 'lenses.txt')
print 'fr:', fr
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
print 'lenses:', lenses
lensesLabels = [' age', 'prescript', 'astigmatic', 'tearRate']
print 'lensesLabels:', lensesLabels
コード例 #21
0
# myTree=trees.createTree(myDat,labels)
# print(myTree)

# treePlotter.createPlot()
# mytree=treePlotter.retrieveTree(0)
# numLeafs=treePlotter.getNumLeafs(mytree)
# print(numLeafs)
# treeDepth=treePlotter.getTreeDepth(mytree)
# print(treeDepth)

# myTree=treePlotter.retrieveTree(0)
# treePlotter.createPlot(myTree)
# print(myTree)
# myTree['no surfacing'][3]='maybe'
# print(myTree)
# treePlotter.createPlot(myTree)

myDat, labels = trees.createDataSet()
myTree = treePlotter.retrieveTree(0)
res = trees.classify(myTree, labels, [1, 1])
print(res)
print(myTree)
trees.storeTree(myTree, 'classifierStorage.txt')
newTree = trees.grabTree('classifierStorage.txt')
print(newTree)

fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)
treePlotter.createPlot(lensesTree)
コード例 #22
0
ファイル: test.py プロジェクト: sevenry/python_learning_note
#print(mydat)
'''
cc=trees.calcShannonEnt(mydat)
print(cc)

aa=trees.splitDataSet(mydat,0,1)
print(aa)
bb=trees.splitDataSet(mydat,1,0)
print(bb)

kk=trees.chooseBestFeatureToSplit(mydat)
print(kk)
'''

#mytree=trees.createTree(mydat,labels)
#print(mytree)

import treePlotter
#treePlotter.createPlot()
#dd=treePlotter.retrieveTree(1)
#print(dd)
myTree=treePlotter.retrieveTree(0)
#print(myTree)
#a=treePlotter.getNumLeafs(myTree)
#b=treePlotter.getTreeDepth(myTree)
#print(a,b)

#treePlotter.createPlot(myTree)####

aa=trees.classify(myTree,labels,[1,1])
print(aa)
コード例 #23
0
# 绘制树
reload(treePlotter)
myTree=treePlotter.retrieveTree(0)
treePlotter.createPlot(myTree)

# 变更字典,重新绘制
myTree['no surfacing'][3]='maybe'
myTree
treePlotter.createPlot(myTree)

# 测试分类函数
myDat, labels = trees.createDataSet()
labels
myTree = treePlotter.retrieveTree(0)
myTree
trees.classify(myTree, labels, [1, 0])
trees.classify(myTree, labels, [1, 1])

from importlib import reload
reload(trees)
# 测试pickle决策树存储
trees.storeTree(myTree, 'classifierStorage.txt')
trees.grabTree('classifierStorage.txt')

# 加载隐形眼镜数据
fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)
lensesTree
treePlotter.createPlot(lensesTree)
コード例 #24
0
    # lenses_two = lenses[:]
    # lenses_labels_two = lenses_labels[:]
    lenses = rawdata.get_train_data(trains[i])
    decision_tree = trees.createTree(lenses, lenses_labels)
    # print treePlotter.createPlot(decision_tree)  # 循环打印决策树
    decision_trees.append(decision_tree)
# print len(decision_trees)  # 5
# print treePlotter.createPlot(decision_trees)
for m in range(len(tests)):
    accu = []
    decs_tree = decision_trees[m]
    test_data = rawdata.get_test_data(tests[m])
    # print decs_tree  # 决策树
    # print test_data  # 被测数据
    for y in range(len(test_data)):
        result = trees.classify(decs_tree, lenses_labels, test_data[y][:-1])
        # print '结果:%s' % result
        # print '循环次数:%d' % y
        accu.append(result)

    # print '数组长度:%d' % len(accu)
    # print '*****'
    accuracies.append(accu)
# print len(accuracies)
# print accuracies[-1][-4]
print len(accuracies)
print '华丽的分割线'

test_labs = []
for p in range(len(tests)):
    test_lab = []
コード例 #25
0
#trees.splitDataSet(myDat, 0, 1)
#trees.splitDataSet(myDat, 0, 0)
#trees.splitDataSet(myDat, 0, 0)
#myDat = [[1, 'yes'], [1, 'yes'], [1, 'no'], [0, 'no'], [0, 'no']]
#myDat = [[1, 1, 'yes'], [1, 1, 'yes'], [0, 1, 'no'], [1, 0, 'no'], [1, 0, 'no']]
#print(trees.chooseBestFeatureToSplit(myDat))
#print(myDat)
#print(trees.createTree(myDat, labels))
#treePlotter.createPlot()
myTree = treePlotter.retrieveTree(0)
#print(myTree)
#numLeaf = treePlotter.getNumLeafs(myTree)
#print(numLeaf)
#depth = treePlotter.getTreeDepth(myTree)
#print(depth)
#treePlotter.createPlot(myTree)
print(myTree)
trees.classify(myTree, labels, [0, 1])
#[1, 2, 3, 4, 5, 6]
#a = [1, 2, 3]
#b = [4, 5, 6]
#a.extend(b)
#print(a)

#[1, 2, 3, [4, 5, 6]]
#a = [1, 2, 3]
#b = [4, 5, 6]
#a.append(b)
#print(a)
コード例 #26
0
 def test_classify_simple(self):
     data_set, labels = trees.load_simple_data()
     my_tree = tree_plot.retrieve_tree(0)
     self.assertEqual('no', trees.classify(my_tree, labels, [1, 0]))
     self.assertEqual('yes', trees.classify(my_tree, labels, [1, 1]))
コード例 #27
0
import trees
import treePlotter
myData, labels = trees.createDataSet()
#print(myData)
print(labels)
#print(trees.calcShannonEnt(myData))
#retDataSet = trees.splitDataSet(myData,1,0)
#print(retDataSet)
#print(trees.chooseBestFeatureToSplit(myData))
#myTree = trees.createTree(myData,labels)
#print(myTree)
#treePlotter.createPlot()
#print(treePlotter.retrieveTree(1))
myTree2 = treePlotter.retrieveTree(0)
print(myTree2)
classLabel = trees.classify(myTree2, labels, [1, 1])
print("classLabel:", classLabel)
trees.storeTree(myTree2, 'classifierStorage.txt')
#print(treePlotter.getNumLeafs(myTree2))
#print(treePlotter.getTreeDepth(myTree2))
#treePlotter.createPlot(myTree2)
コード例 #28
0
import trees
import pandas as pd

df = pd.read_csv('data/1.csv')
col = df.columns.tolist()
data = df.values.tolist()
tree = trees.createTree(data, col)
print(tree)

col = df.columns.tolist()[:-1]
input1 = ['a3', 'b1', 'c2', 'd1']  # N
input2 = ['a3', 'b3', 'c1', 'd1']  # N
input3 = ['a2', 'b1', 'c1', 'd2']  # Y
input4 = ['a1', 'b1', 'c1', 'd1']  # Y
print(trees.classify(tree, col, input1))
print(trees.classify(tree, col, input2))
print(trees.classify(tree, col, input3))
print(trees.classify(tree, col, input4))

df = pd.read_csv('data/2.csv')
col = df.columns.tolist()
data = df.values.tolist()
tree = trees.createTree(data, col)
print(tree)

col = df.columns.tolist()[:-1]
input1 = ['undergraduate', 'man', 'cet6', 'a1', 'b1']  # N
input2 = ['undergraduate', 'man', 'cet4', 'a1', 'b1']  # Y
input3 = ['postgraduate', 'man', 'no', 'a1', 'b2']  # Y
input4 = ['undergraduate', 'man', 'no', 'a1', 'b3']  # Y
input5 = ['undergraduate', 'man', 'no', 'a3', 'b3']  # Y
コード例 #29
0
# name   industry   profession    sex    摄影     自驾游    SNS达人   github   翻墙   常阅读   科幻迷  兴趣广泛    吹牛    分类

def getTrainingDatas():
    dataSet = [
        ["it", "gm",       "man", 1, 1, 0, 0, 0, 0, 0, 1, 0, "liver"],
        ["it", "engineer", "man", 0, 1, 0, 0, 0, 0, 0, 0, 0, "empty"],
        ["it", "sale",     "man", 0, 1, 0, 0, 0, 0, 0, 0, 1, "liver"],
        ["it", "founder",  "man", 0, 1, 1, 0, 0, 0, 0, 1, 0, "boss"],
        ["it", "phd",      "man", 1, 0, 0, 0, 0, 1, 0, 1, 0, "liver, fake hacker"],
        ["it", "engineer", "man", 0, 1, 0, 1, 1, 1, 1, 1, 0, "fake hacker"],
        ["it", "engineer", "man", 0, 0, 0, 0, 0, 1, 0, 1, 0, "fake hacker"],
        ["it", "engineer", "man", 0, 0, 0, 1, 1, 1, 1, 1, 0, "fake hacker"],
    ]
    labels = ["industry", "profession", "sex", "camera", "drive tour", 
            "SNS", "github", "over GFW", "reader", "Science fiction fan","hobby","brag"]
    return dataSet, labels

if __name__ == "__main__":
    if len(sys.argv) > 1:
        # classify test, tortoise
        classmate = ["tortoise", "it", "engineer",       "man", 1, 0, 0, 0, 0, 1, 0, 1, 1]
        dataSet, labels = getTrainingDatas()
        tree = trees.grabTree("cm_tree.txt")
        print "{} is \"{}\"".format(classmate[0], trees.classify(tree, labels, classmate[1:]))
    else:
        # training
        dataSet, labels = getTrainingDatas()
        tree = trees.createTree(dataSet, list(labels))
        trees.storeTree(tree, "cm_tree.txt")
        treePlotter.createPlot(tree)
コード例 #30
0
import trees
import matplotlib.pyplot as plt
import treePlotter
d, l = trees.createDataSet()
# print(l)
# print (d)

# print(trees.createTree(d,l))
mytree = treePlotter.retrieveTree(0)
print(trees.classify(mytree, l, [1, 0]))
# print(treePlotter.getTreeDepth(mytree))
コード例 #31
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-

import trees

if __name__ == '__main__':
    data = trees.createDataSet1()
    # print data
    dataSet = data[0]
    lables = data[1]
    print dataSet
    feature = trees.chooseBestFeatureToSplit(dataSet)
    feature1 = trees.chooseBestFeatureToSplit1(dataSet)
    print feature
    print feature1
    mytree = trees.createTree(dataSet, lables)
    print mytree

    print trees.splitDataSet(dataSet, 0, 1)

    featLabels = ['outlook', 'temperature', 'humidity', 'windy']
    testVec = [0, 1, 0, 0]
    print trees.classify(mytree, featLabels, testVec)
コード例 #32
0
# coding: utf-8
import trees
import treePlotter

myData, labels = trees.createDataSet()
# # print(trees.calcShannonEntropy(myData))
# # print(trees.splitDataSet(myData, 0, 1))
# # print(trees.chooseBestFeatureToSplit(myData))
# myTree = trees.createTree(myData, labels)
# print(myTree)
myTree = treePlotter.retrieveTree(0)
# treePlotter.createPlot(myTree)
print(trees.classify(myTree, labels, [1, 0]))
コード例 #33
0
import tree_plotter
import random

fr = open('car.data')
lenses = [line.strip().split(',') for line in fr]
labels = ['buying', 'maint', 'doors', 'doors', 'persons', 'safety']
fr.close()
random.shuffle(lenses)
train = lenses[:1000]
test = lenses[1000:]

lenses_tree = trees.create_tree(train, labels)
#print lenses_tree
#tree_plotter.create_plot(lenses_tree)

err = 0
total = 0
call = 0
for vec in test:
    real = vec[-1]
    test_vec = vec[:-1]
    ret = trees.classify(lenses_tree, labels, test_vec)
    total += 1
    if ret == '-':
        continue
    call += 1
    if real != ret:
        err += 1
print '总体,召回,错误数,召回率,错误率'
print total, call, err, call * 1.0 / total, err * 1.0 / call
コード例 #34
0
# treePlotter.createPlot()

## get leafs num of tree
# b = treePlotter.getNumLeafs(tree)
# print(b)

## get depth of tree
# c = treePlotter.getTreeDepth(tree)
# print(c)

# treePlotter.createPlot(tree)
# tree['no surfacing'][2] = 'maybe'
# print(tree)

### 3. test classifier
a = trees.classify(tree, labels, [1, 1])
# print(a)

### 4. Storage classifier
# trees.storeTree(tree, 'classifierStorage.txt')
# b = trees.grabTree('classifierStorage.txt')
# print(b)

### 5. Example: Use dicision-tree as contact lenses classifier
fr = open('lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)
print(lensesTree)
treePlotter.createPlot(lensesTree)
コード例 #35
0
ファイル: runner.py プロジェクト: gopal151295/ml_nlp_practice
import treePlotter as TP

# TP.createPlot()
myTree = TP.retrieveTree(
    0)  #{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}
n = TP.getNumLeafs(myTree)  # 3
d = TP.getTreeDepth(myTree)  # 2

TP.createPlot(myTree)

# classify
myDat, labels = DT.createDataSet()
myTree = TP.retrieveTree(
    0)  # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}
class1 = DT.classify(myTree, labels, [1, 0])  # no
class2 = DT.classify(myTree, labels, [1, 1])  # yes

# storing the tree pickeld form
DT.storeTree(myTree, 'data/classifierStorage.txt')
grabedTree = DT.grabTree(
    'data/classifierStorage.txt'
)  # {'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}

# lens tree
fr = open('data/lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = DT.createTree(lenses, lensesLabels)
"""
output:
コード例 #36
0
import treePlotter


def test():
    print "hello world"


if __name__ == '__main__':
    # train_data, labels = trees.createDataSet()
    # my_trees = trees.createTree(train_data, labels)
    # print(my_trees)
    #trees.storeTree(my_trees, 'classifiermelon.txt')

    melon_tree = trees.grabTree('classifiermelon.txt')
    print(melon_tree)
    melon_labels = ['color', 'root', 'sound', 'texture', 'navel', 'touch']
    melon_feature = [1, 1, 1, 1, 1, 1]
    print("the predicted result is:",
          trees.classify(melon_tree, melon_labels, melon_feature))

    treePlotter.createPlot(melon_tree)
    # print(treePlotter.getNumLeafs(my_trees), treePlotter.getTreeDepth(my_trees))

    # ent = trees.calcShannonEnt(train_data)
    # feature1 = trees.splitDataSet(train_data, 0, 0)
    # feature2 = trees.splitDataSet(train_data, 0, 1)
    # best_feature = trees.chooseBestFeatureToSplit(train_data)
    # print(ent)
    # print(feature1, feature2)
    # print(best_feature)
コード例 #37
0
print '--信息增益'
ig = shannon - hxy
print ig
print '--找到最佳分类特征'
feature = trees.chooseBestFeatureToSplit(dateset)
print labels[feature]
print '--创建决策树'
labelsCopy = labels[:]
tree = trees.createTree(dateset, labelsCopy)
print tree
# print '--画图'
# treePlotter.createPlot(tree)
print '--用决策树测试数据'
#mytree = treePlotter.retrieveTree(0)
testdata = [4, 4, 1, 'cha']
label = trees.classify(tree, labels, testdata)
print label
print '--保存树'
trees.storeTree(tree, 'houseTree')
print '--测试隐形眼镜类型'
fr = open('lenses.txt')
# for line in fr.readlines():
#     print line
#     row = line.strip().split('\t');
#     print row
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
len_labels = ['age', 'prescript', 'astigmatic', 'tearRate']
len_tree = trees.createTree(lenses, len_labels)
print len_tree
print len_labels
treePlotter.createPlot(len_tree)