Ejemplo n.º 1
0
 def show(self):
     if self._tree==None:
         raise NotFittedError("Estimator not fitted, call `fit` first")
     
     #plot the tree using matplotlib
     import treePlotter
     treePlotter.createPlot(self._tree)
Ejemplo n.º 2
0
    def show(self, outpdf):
        if self._tree == None:
            pass
        # plot the tree using matplotlib
        import treePlotter

        treePlotter.createPlot(self._tree, outpdf)
Ejemplo n.º 3
0
Archivo: trees.py Proyecto: lhw4846/ML
def lenses():
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = createTree(lenses, lensesLabels)
    print lensesTree
    treePlotter.createPlot(lensesTree)
Ejemplo n.º 4
0
def main():
    dataSet, labels = createDataSet()
    labels_tmp = labels[:] # 拷贝,createTree会改变labels
    desicionTree = createTree(dataSet, labels_tmp)
    #storeTree(desicionTree, 'classifierStorage.txt')
    #desicionTree = grabTree('classifierStorage.txt')
    print('desicionTree:\n', desicionTree)
    treePlotter.createPlot(desicionTree)
    testSet = createTestSet()
    print('classifyResult:\n', classifyAll(desicionTree, labels, testSet))
Ejemplo n.º 5
0
def main():
    labels = ['buying', 'maintenance', 'doors', 'persons', 'lug_boot', 'safety']
    data_set = []
    with open('car_data') as f:
        for line in f.readlines():
            data = line.strip().split(',')
            data_set.append(data)
    decision_tree = create_tree(data_set, labels)
    #print "decision_tree", decision_tree
    treePlotter.createPlot(decision_tree)
Ejemplo n.º 6
0
def main_bak():
    data_set, labels = create_data_set()
    labels_tmp = labels[:]
    decision_tree = create_tree(data_set, labels_tmp)
    # print "decision_tree", decision_tree
    #test_set = create_test_set()
    # 验证数据
    #result = classify_all(decision_tree, labels, test_set)
    #print "result", result
    treePlotter.createPlot(decision_tree)
Ejemplo n.º 7
0
	secondDict = inputTree[firstStr] #子树
	featIndex = featLabels.index(firstStr) #找该属性对应的序号

	for key in secondDict.keys(): #遍历子树,判断属于哪一分支
		if testVec[featIndex] == key: 
			if type(secondDict[key]).__name__=='dict': #该结点属于分支结点
				classLabel = classify(secondDict[key],featLabels,testVec)
			else: #叶子结点
				classLabel = secondDict[key]
	return classLabel

"""存储决策树"""
def storeTree(inputTree,fileName):
	fw = open(fileName,'w')
	pickle.dump(inputTree,fw)
	fw.close()

"""从磁盘加载决策树"""
def grabTree(fileName):
	fr = open(fileName)
	return pickle.load(fr)


if __name__ == '__main__':
	myDat,labels = createDataSet()
	#myDat[0][-1] = 'maybe'
	#entropy = calcShannonEnt(myDat)
	mytree = createTree(myDat,labels)
	print mytree
	createPlot(mytree)
	storeTree(mytree,"./tree.model")
Ejemplo n.º 8
0
def abalone_parts_test():
    model = {
        'Viscera': {
            '>0.0145': {
                'Shell': {
                    '<=0.0345': {
                        'Viscera': {
                            '<=0.0285': ' 5 (50.0/9.0)',
                            '>0.0285': ' 4 (3.0)'
                        }
                    },
                    '>0.0345': {
                        'Sex': {
                            '=M': ' 6 (6.0/3.0)',
                            '=F': ' 5 (3.0)',
                            '=I': ' 5 (59.0/12.0)'
                        }
                    }
                }
            },
            '<=0.0145': {
                'Shucked': {
                    '>0.007': ' 4 (66.0/31.0)',
                    '<=0.007': {
                        'Shucked': {
                            '>0.0045': {
                                'Shucked': {
                                    '>0.005': {
                                        'Height': {
                                            '<=0.02': ' 4 (2.0)',
                                            '>0.02': ' 3 (4.0)'
                                        }
                                    },
                                    '<=0.005': ' 4 (3.0)'
                                }
                            },
                            '<=0.0045': {
                                'Height': {
                                    '<=0.025': ' 1 (2.0/1.0)',
                                    '>0.025': ' 3 (2.0)'
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    #---------get Attribute list--------------------------
    name_path = './abalone.names'
    feature_list = get_Attribute(name_path)
    #-----------get datasets------------------------
    path = './abalone_parts.data'
    datasets = read_data(path)
    # #--------Start PEP_pruning---------------------------
    model_pruned = PEP_result(copy.deepcopy(model), feature_list, datasets)

    print "剪枝前的模型=", model
    print "剪枝后的模型=", model_pruned

    createPlot(model)
    createPlot(model_pruned)
    #--------Start accuracy computation---------------------------
    print "unpruned_accuracy,pruned_accuracy", accuracy_analysis(
        model, model_pruned, datasets, feature_list, name_path)
Ejemplo n.º 9
0
# plt.xlabel('count')
# plt.ylabel('result')
# plt.title('Hahaha Goooood!!!')
# fig.savefig('plot.svg')

# import matplotlib
# matplotlib.use('Agg')
# import matplotlib.pyplot as plt
# fig = plt.figure(1, facecolor='white')
# fig.clf()
# ax = plt.subplot(111, frameon=True)
# # ax.scatter([.2, .5], [.1, .5])
# plt.figure(1, figsize=(3,3))
# ax = plt.subplot(111)
# ax.annotate("Test", xy=(0.2, 0.2), xycoords='data', xytext=(0.8, 0.8),
# textcoords='data', size=20, va="center", ha="center",
# bbox=dict(boxstyle="round4", fc="w"),
# arrowprops=dict(arrowstyle="-|>",
# connectionstyle="arc3,rad=-0.2", fc="w"), )
# ax.annotate("This is my text", xy=(0.2, 0.1), xycoords='data',
#     xytext=(0.4, 0.3), textcoords='data', ha='center', va='center',
#     arrowprops=dict(arrowstyle="->", connectionstyle="arc3"), )

# fig.savefig('plot.svg')

# import textPlotter
# textPlotter.createPlot()

import treePlotter
treePlotter.createPlot(treePlotter.retrieveTree(0))
Ejemplo n.º 10
0
 def plot_tree(self):
     """
     visually generated cart tree.
     """
     figure(dpi=400, figsize=(12, 12))
     treePlotter.createPlot(self.tree)
Ejemplo n.º 11
0
import treePlotter
import trees
a1, a2 = trees.createDataSet()
b1 = trees.createTree(a1, a2)
treePlotter.createPlot(b1)
Ejemplo n.º 12
0
    return retDataSet

def createTree(dataSet, labels):
    classList = [example[-1] for example in dataSet]         # ['N', 'N', 'Y', 'Y', 'Y', 'N', 'Y']
    if classList.count(classList[0]) == len(classList):
        # classList所有元素都相等,即类别完全相同,停止划分
        return classList[0]                                  #splitDataSet(dataSet, 0, 0)此时全是N,返回N
    # if len(dataSet[0]) == 1:                                 #[0, 0, 0, 0, 'N'] 
    #     # 遍历完所有特征时返回出现次数最多的
    #     return majorityCnt(classList)
    bestFeat = chooseBestFeatureToSplit(dataSet)             #0-> 2   
        # 选择最大的gain ratio对应的feature
    bestFeatLabel = labels[bestFeat]                         #outlook -> windy     
    myTree = {bestFeatLabel:{}}                   
        #多重字典构建树{'outlook': {0: 'N'
    del(labels[bestFeat])                                    #['temperature', 'humidity', 'windy'] -> ['temperature', 'humidity']        
    featValues = [example[bestFeat] for example in dataSet]  #[0, 0, 1, 2, 2, 2, 1]     
    uniqueVals = set(featValues)
    for value in uniqueVals:
        subLabels = labels[:]                                #['temperature', 'humidity', 'windy']
        myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), subLabels)
            # 划分数据,为下一层计算准备
    return myTree

dataSet, labels = createDataSet()
labels_tmp = labels[:]
desicionTree = createTree(dataSet, labels_tmp)

print(desicionTree)
treePlotter.createPlot(desicionTree)
Ejemplo n.º 13
0
    firstStr = inputTree.keys()[0]
    secondDict = inputTree[firstStr]
    featureIndex = featureLabels.index(firstStr)
    for key in secondDict.keys():
        if testVector[featureIndex] == key:
            if type(secondDict[key]).__name__ == 'dict':
                classLabel = classify(secondDict[key], featureLabels, testVector)
            else:
                classLabel = secondDict[key]
    return classLabel

def storeTree(inputTree, filename): #store the decision tree that had been trained.
    import pickle
    fw = open(filename, 'w')
    pickle.dump(inputTree, fw)
    fw.close()

def grabTree(filename): #get the tree that was stored in the 'filename'.
    import pickle
    fr = open(filename)
    return pickle.load(fr)

if __name__=="__main__":
    fr = open('lenses.txt')     #open the file 'lenses.txt'
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]  #dispose the file.
    lensesLabels = ['age','prescript','astigmatic','tearRate']  #set labels.
    lensesTree = createTree(lenses, lensesLabels)       #create tree.
    print lensesTree,'\n\n\n'   #print lenses tree in text
    import treePlotter
    treePlotter.createPlot(lensesTree)  #print lenses tree in diagram
Ejemplo n.º 14
0
# coding=utf-8
from trees import *
import treePlotter

# dataSet, labels = createDataSet()
# # print calShannonEnt(dataSet)
# # print chooseBestFeatureToSplit(dataSet)
# tree = createTree(dataSet, labels)
# print tree
#
#
# # treePlotter.createPlot()
#
#
# print classify(tree,labels,[1,0])
# print classify(tree,labels,[1,1])

dataSet, labels = fileToDataSet("/media/yuan/Windows8_OS/machinelearninginaction/Ch03/lenses.txt")
tree = createTree(dataSet, labels)
treePlotter.createPlot(tree)
import trees
import treePlotter

myDat, labels = trees.createDataSet()
print myDat
print trees.calcShannonEnt(myDat)
print trees.splitDataSet(myDat, 0, 1)
print trees.splitDataSet(myDat, 0, 0)
print trees.splitDataSet(myDat, 1, 1)
print trees.chooseBestFeatureToSplit(myDat)
print trees.createTree(myDat, labels)

treePlotter.createPlot()
print 'createPlot over'

print treePlotter.retrieveTree(1)
myTree = treePlotter.retrieveTree(0)
print treePlotter.getNumLeafs(myTree)
print treePlotter.getTreeDepth(myTree)
Ejemplo n.º 16
0
def main():
    data_set, labels = create_data_set()
    my_tree = create_tree(data_set, labels)
    #print "my_tree", my_tree
    treePlotter.createPlot(my_tree)
    if len(dataSet[0]) == 1: return majorityCnt(classList)   # [0, 0, 0, 0, 'N']; 遍历完所有特征时返回出现次数最多的
    bestFeat = chooseBestFeatureToSplit(dataSet)             # 0 -> 2; 选择最大的 gain ratio 对应的 feature
    bestFeatLabel = labels[bestFeat]                         # outlook -> windy
    myTree = {bestFeatLabel:{}}                              # 多重字典构建树 {'outlook': {0: 'N'
    del(labels[bestFeat])                                    # ['temperature', 'humidity', 'windy'] -> ['temperature', 'humidity']
    featValues = [example[bestFeat] for example in dataSet]  # [0, 0, 1, 2, 2, 2, 1]
    uniqueVals = set(featValues)
    for value in uniqueVals:
        subLabels = labels[:]                                # ['temperature', 'humidity', 'windy']
        myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), subLabels)  # 划分数据, 为下一层计算准备
    return myTree
##################################################################
## 下面是测试
dataSet, labels = createDataSet(); labels_tmp = labels[:]
desicionTree = createTree(dataSet, labels_tmp)  # 因为建树的过程中会 del(labels[key]), 所以用了一个临时量
treePlotter.createPlot(desicionTree)  # 画出决策树
def classify(inputTree, featLabels, testVec):  # 对新数据进行分类 # 输入 -> 决策树, 分类标签, 测试数据; 输出 -> 决策结果; 描述 -> 跑决策树
    firstStr = list(inputTree.keys())[0]  # ['outlook'], outlook
    secondDict = inputTree[firstStr]  # {0: 'N', 1: 'Y', 2: {'windy': {0: 'Y', 1: 'N'}}}
    featIndex = featLabels.index(firstStr)  # outlook 所在的列序号 0
    for key in secondDict.keys():  # secondDict.keys() = [0, 1, 2]
        if testVec[featIndex] == key:  # secondDict[key] = N; test 向量的当前 feature 是哪个值, 就走哪个树杈
            if type(secondDict[key]).__name__ == 'dict': classLabel = classify(secondDict[key], featLabels, testVec)  # 如果 secondDict[key] 仍然是字典, 则继续向下层走
            else: classLabel = secondDict[key]  # secondDict[key] 已经只是分类标签了, 则返回这个类别标签
    return classLabel
print(classify(desicionTree, labels, [0, 1, 0, 0]))  # N
##################################################################
## 多个样例测试
def classifyAll(inputTree, featLabels, testDataSet): # 输入 -> 决策树, 分类标签, 测试数据集; 输出 -> 决策结果; 描述 -> 跑决策树
    classLabelAll = []
    for testVec in testDataSet: classLabelAll.append(classify(inputTree, featLabels, testVec))  # 逐个 item 进行分类判断
Ejemplo n.º 18
0
import id3
import treePlotter

if __name__ == '__main__':
    f = open('lenses.txt')
    names = f.readline().strip().split('\t')
    x = []
    y = []
    for ele in f.readlines():
        t = ele.strip().split('\t')
        x.append(t[:-1])
        y.append(t[-1])         
#    print x
#    print y

    print names
    print x
    print y

    Classifier = id3.ID3(names, x, y)
    ans = Classifier.result()
    print ans
    treePlotter.createPlot(ans)
Ejemplo n.º 19
0
def testC45(filename):
    DataList,classLabelVector = trees.file2strlist(filename)
    mytree=hw4.createTree(DataList,classLabelVector)
    treePlotter.createPlot(mytree)
Ejemplo n.º 20
0
    #myTree['no surfacing'][3] = 'maybe'
    #tp.createPlot(myTree)

    #classify
    print '-------------- classify --------------------'
    myDat, labels = createDataSet()
    print 'labels', labels
    myTree =  tp.retrieveTree(0)
    print 'myTree ', myTree
    print '[1,0]: ', classify(myTree, labels, [1,0])
    print '[1,1]: ', classify(myTree, labels, [1,1])

    #store and grab tree
    print '-------------- store and grab tree --------------------'
    storeTree(myTree,'classifierStorate.txt')
    newTree = grabTree('classifierStorate.txt')
    print 'grabedTree: ', newTree


    #Example1: choose suitable lens type
    print '-------------- Eg1: choose suitable lens type --------------------'
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astog,atoc', 'tearRate']
    lensesTree = createTree(lenses,lensesLabels)
    print lensesTree
    tp.createPlot(lensesTree)



Ejemplo n.º 21
0
print(features)
print(trees.calcShannonEnt(mydata))
'''
mydata[0][-1] = 'maybe'
print(trees.calcShannonEnt(mydata))
'''
#print(trees.splitDataSet(mydata,0,1))

index = trees.chooseBestFeatureToSplit(mydata)
#print(index)
'''
mytree = trees.createTree(mydata,features)
print(mytree)
'''
import treePlotter
'''
mytree = treePlotter.retrieveTree(0)
treePlotter.createPlot(mytree)
mytree['no surfacing'][3] = 'maybe'
treePlotter.createPlot(mytree)
'''

mytree = treePlotter.retrieveTree(0)
print(trees.classify(mytree,features,[0,0]))
print(trees.classify(mytree,features,[1,1]))

trees.storeTree(mytree, 'classifier.txt')
grabtree = trees.grabTree('classifier.txt')
print(grabtree)

Ejemplo n.º 22
0
# -*- coding: utf-8 -*-

import sys  
import os
import numpy as np
import matplotlib.pyplot as plt
import treePlotter as tp 

# 配置utf-8输出环境
reload(sys)
sys.setdefaultencoding('utf-8')

# 绘制树

myTree = {'root': {0: 'leaf node', 1: {'level 2': {0: 'leaf node', 1: 'leaf node'}},2:{'level2': {0: 'leaf node', 1: 'leaf node'}}}}
tp.createPlot(myTree)

Ejemplo n.º 23
0

if __name__ == '__main__':
    """
    weather: 0-sunny, 1-windy, 2-rainny
    parents: 0-yes, 1-no
    money: 0-rich, 1-poor
    decison: 0-cinema, 1-tennis, 2-stay in, 3-shopping
    """
    data = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0], [2, 0, 1], [2, 1, 0],
                     [2, 0, 1], [1, 1, 1], [1, 1, 0], [1, 0, 0], [0, 1, 0]])
    label = np.array([0, 1, 0, 0, 2, 0, 0, 3, 0, 1])

    # ID3
    decision_tree_id3 = DecisionTree(cls_method='ID3')
    decision_tree_id3.fit(data, label)
    print(decision_tree_id3.tree)
    createPlot(decision_tree_id3.tree)

    # C45
    decision_tree_c45 = DecisionTree(cls_method='C45')
    decision_tree_c45.fit(data, label)
    print(decision_tree_c45.tree)
    createPlot(decision_tree_c45.tree)

    # CART
    decision_tree_cart = DecisionTree(cls_method='CART')
    decision_tree_cart.fit(data, label)
    print(decision_tree_cart.tree)
    createPlot(decision_tree_cart.tree)
Ejemplo n.º 24
0
def abalone_test(m):
    model = {
        'Viscera': {
            '>0.0145': {
                'Shell': {
                    '<=0.0345': {
                        'Viscera': {
                            '<=0.0285': ' 5 (50.0/9.0)',
                            '>0.0285': ' 4 (3.0)'
                        }
                    },
                    '>0.0345': {
                        'Sex': {
                            '=M': ' 6 (6.0/3.0)',
                            '=F': ' 5 (3.0)',
                            '=I': ' 5 (59.0/12.0)'
                        }
                    }
                }
            },
            '<=0.0145': {
                'Shucked': {
                    '>0.007': ' 4 (66.0/31.0)',
                    '<=0.007': {
                        'Shucked': {
                            '>0.0045': {
                                'Shucked': {
                                    '>0.005': {
                                        'Height': {
                                            '<=0.02': ' 4 (2.0)',
                                            '>0.02': ' 3 (4.0)'
                                        }
                                    },
                                    '<=0.005': ' 4 (3.0)'
                                }
                            },
                            '<=0.0045': {
                                'Height': {
                                    '<=0.025': ' 1 (2.0/1.0)',
                                    '>0.025': ' 3 (2.0)'
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    path = "./abalone_parts.data"
    name_path = "./abalone.names"
    fea_list = get_Attribute(name_path)
    datasets = read_data(path)
    pae_dict, class_count = pae_list(
        path)  #不要进入递归,这个是剪枝前就要确定下来,并且在剪枝的过程中不可改变的。

    #Attention,if you want to perform Laplace Law of succession,just set:
    #pae_list=1.0/m
    #m=counts of classes of the whole original datasets

    pae_lists = [pae_dict[key] for key in pae_dict]  #获得先验概率列表
    class_list = [key for key in class_count]  #获取数据集的类别列表

    model_pruned = MEP_result(copy.deepcopy(model), fea_list,
                              copy.deepcopy(datasets), pae_lists, class_list,
                              m)
    accuracy_unprune, accuracy_prune, misjudge_datasets = accuracy_analysis(
        model, model_pruned, copy.deepcopy(datasets), fea_list, name_path)
    print "accuracy_unprune=", accuracy_unprune
    print "accuracy_prune=", accuracy_prune
    createPlot(model)
    createPlot(model_pruned)
    print "model=", model
    print "model_pruned=", model_pruned
Ejemplo n.º 25
0
 def show(self, outpdf):
     if self._tree == None:
         pass
     #plot the tree using matplotlib
     import treePlotter
     treePlotter.createPlot(self._tree, outpdf)
                elif each_round[i] == 1 or each_round[i] == -2:
                    players[i].Bullet -= 1
                elif each_round[i] == 2:
                    players[i].Bullet -= 2
                # print "Player %d's bullet: %d" % ((i + 1), players[i].Bullet)
        # 结算结果
        for i in range(people):
            if players[i].status == 1:
                if (each_round[i] + Max) > 0 and each_round[i] != Max:
                    print "Player %d lose" % (i + 1)
                    players[i].status = 0
                    losers += 1
        if losers == (people - 1):
            for i in range(people):
                # players[i].Rounds += 1
                if players[i].status == 1:
                    print "Player %d win" % (i + 1)
                    result[i] = 1
                    # players[i].Vtimes += 1
                round_history[i + iteration * people][-1] = str(result[i])
            break
        count += 1
    for i in range(people):
        players[i].Bullet = 0
        players[i].status = 1
    iteration += 1
    mytree = createTree(round_history[:7*iteration])

storeTree(mytree, "Tree.txt")
createPlot(mytree)
Ejemplo n.º 27
0
def showTree(tree):
    import treePlotter
    treePlotter.createPlot(tree)
import treePlotter as tp

print tp.retrieveTree(0)
print tp.retrieveTree(1)
myTree = tp.retrieveTree(0)
print tp.getNumLeafs(myTree)
print tp.getTreeDepth(myTree)
# tp.createPlot(myTree)
tp.createPlot(tp.retrieveTree(1))
Ejemplo n.º 29
0
#######################################################递归构造决策树#####################################################
# 递归构造决策树
def creatDecisionTree(dataSet, featureVec):
    '''This function is to built Decision Tree in recursion!'''
    classList = [el[-1] for el in dataSet]  # 数据的类别集合
    if (classList.count(
            classList[0]) == len(classList)):  #  如果数据集中的实例全部属于同一类,则停止递归
        return classList[0]
    if (len(featureVec) == 0):  # 如果特征值用完了,停止递归
        return MajorityCnt(classList)
    bestFeatureIndex = calcInformationGain(dataSet, featureVec)  # 获取最优特征
    DecisionTree = {FeatureLabels[bestFeatureIndex]: {}}  # 构造树特征
    SplitDataSet, SplitDataProb, SplitValueVec = splitDataSet(
        dataSet, bestFeatureIndex)
    for value in SplitValueVec:
        DecisionTree[
            FeatureLabels[bestFeatureIndex]][value] = creatDecisionTree(
                SplitDataSet[SplitValueVec.index(value)], featureVec)
    return DecisionTree


#####################################################隐形眼镜推荐决策树实例#################################################
fr = open('lenses.txt')
DataList = fr.readlines()
DataMat = []
for line in DataList:
    DataMat.append(line.strip().split('\t'))
MyTree = creatDecisionTree(DataMat, [0, 1, 2, 3])
print MyTree
treePlotter.createPlot(MyTree)
Ejemplo n.º 30
0
#This test goes with Python3
import trees
import treePlotter

if '__main__' == __name__:
	dataSet, labels = trees.createDataSet()
	decisionTree = trees.createTree(dataSet, labels)
	treePlotter.createPlot(decisionTree)
Ejemplo n.º 31
0
    if isinstance(valueOfFeat, dict): 
        classLabel = classify(valueOfFeat, featLabels, testVec)
    else: classLabel = valueOfFeat
    return classLabel

def storeTree(inputTree,filename):
    import pickle
    fw = open(filename,'w')
    pickle.dump(inputTree,fw)
    fw.close()
    
def grabTree(filename):
    import pickle
    fr = open(filename)
    return pickle.load(fr)

if __name__ == '__main__':
    #dataSet,labels=createDataSet()
    #myTree=createTree(dataSet,labels)
    #print myTree
    #---------------
    fr=open('lenses.txt')
    lenses=[inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels=['age','prescript','astigmatic','tearRate']
    lensesTree=createTree(lenses,lensesLabels)
    #print lensesTree
    treePlotter.createPlot(lensesTree)

    
    
Ejemplo n.º 32
0
def credit_a_test():
    model = {
        'A9': {
            '=t': {
                'A15': {
                    '>228': ' + (106.0/2.0)',
                    '<=228': {
                        'A11': {
                            '>3': {
                                'A15': {
                                    '>4': {
                                        'A15': {
                                            '<=5': ' - (2.0)',
                                            '>5': {
                                                'A7': {
                                                    '=v': ' + (5.0)',
                                                    '=z': ' - (1.0)',
                                                    '=dd': ' + (0.0)',
                                                    '=ff': ' + (0.0)',
                                                    '=o': ' + (0.0)',
                                                    '=n': ' + (0.0)',
                                                    '=h': ' + (3.0)',
                                                    '=bb': ' + (1.0)',
                                                    '=j': ' + (0.0)'
                                                }
                                            }
                                        }
                                    },
                                    '<=4': ' + (25.0)'
                                }
                            },
                            '<=3': {
                                'A4': {
                                    '=u': {
                                        'A7': {
                                            '=v': {
                                                'A14': {
                                                    '<=110': ' + (18.0/1.0)',
                                                    '>110': {
                                                        'A15': {
                                                            '>8': ' + (4.0)',
                                                            '<=8': {
                                                                'A6': {
                                                                    '=aa': {
                                                                        'A2': {
                                                                            '<=41':
                                                                            ' - (3.0)',
                                                                            '>41':
                                                                            ' + (2.0)'
                                                                        }
                                                                    },
                                                                    '=w': {
                                                                        'A12':
                                                                        {
                                                                            '=t':
                                                                            ' - (2.0)',
                                                                            '=f':
                                                                            ' + (3.0)'
                                                                        }
                                                                    },
                                                                    '=q': {
                                                                        'A12':
                                                                        {
                                                                            '=t':
                                                                            ' + (4.0)',
                                                                            '=f':
                                                                            ' - (2.0)'
                                                                        }
                                                                    },
                                                                    '=ff':
                                                                    ' - (0.0)',
                                                                    '=r':
                                                                    ' - (0.0)',
                                                                    '=i':
                                                                    ' - (0.0)',
                                                                    '=x':
                                                                    ' - (0.0)',
                                                                    '=e':
                                                                    ' - (0.0)',
                                                                    '=d':
                                                                    ' - (2.0)',
                                                                    '=c':
                                                                    ' - (4.0/1.0)',
                                                                    '=m': {
                                                                        'A13':
                                                                        {
                                                                            '=g':
                                                                            ' + (2.0)',
                                                                            '=p':
                                                                            ' - (0.0)',
                                                                            '=s':
                                                                            ' - (5.0)'
                                                                        }
                                                                    },
                                                                    '=cc':
                                                                    ' + (2.0/1.0)',
                                                                    '=k':
                                                                    ' - (2.0)',
                                                                    '=j':
                                                                    ' - (0.0)'
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                            },
                                            '=z': ' + (1.0)',
                                            '=bb': {
                                                'A14': {
                                                    '<=164': ' + (3.4/0.4)',
                                                    '>164': ' - (5.6)'
                                                }
                                            },
                                            '=ff': ' - (1.0)',
                                            '=o': ' + (0.0)',
                                            '=n': ' + (0.0)',
                                            '=h': ' + (18.0)',
                                            '=dd': ' + (0.0)',
                                            '=j': ' - (1.0)'
                                        }
                                    },
                                    '=l': ' + (0.0)',
                                    '=y': {
                                        'A13': {
                                            '=g': {
                                                'A14': {
                                                    '<=204': ' - (16.0/1.0)',
                                                    '>204': ' + (5.0/1.0)'
                                                }
                                            },
                                            '=p': ' - (0.0)',
                                            '=s': ' + (2.0)'
                                        }
                                    },
                                    '=t': ' + (0.0)'
                                }
                            }
                        }
                    }
                }
            },
            '=f': {
                'A13': {
                    '=g': ' - (204.0/10.0)',
                    '=p': {
                        'A2': {
                            '<=36': ' - (4.0/1.0)',
                            '>36': ' + (2.0)'
                        }
                    },
                    '=s': {
                        'A4': {
                            '=u': {
                                'A6': {
                                    '=aa': ' - (0.0)',
                                    '=w': ' - (0.0)',
                                    '=q': ' - (1.0)',
                                    '=ff': ' - (2.0)',
                                    '=r': ' - (0.0)',
                                    '=i': ' - (3.0)',
                                    '=x': ' + (1.0)',
                                    '=e': ' - (0.0)',
                                    '=d': ' - (2.0)',
                                    '=c': ' - (3.0)',
                                    '=m': ' - (3.0)',
                                    '=cc': ' - (1.0)',
                                    '=k': ' - (4.0)',
                                    '=j': ' - (0.0)'
                                }
                            },
                            '=l': ' + (1.0)',
                            '=y': ' - (8.0/1.0)',
                            '=t': ' - (0.0)'
                        }
                    }
                }
            }
        }
    }
    path = "./crx.data"
    name_path = "./crx.names"
    fea_list = get_Attribute(name_path)
    datasets = read_data(path)
    model_pruned = PEP_result(copy.deepcopy(model), fea_list,
                              copy.deepcopy(datasets))

    accuracy_unprune, accuracy_prune = accuracy_analysis(
        model, model_pruned, datasets, fea_list, name_path)
    print "accuracy_unprune=", accuracy_unprune
    print "accuracy_prune=", accuracy_prune
    print "model=", model
    print "model_pruned=", model_pruned
    createPlot(model)
    createPlot(model_pruned)
Ejemplo n.º 33
0
        rootNode = {}
        bestPropIdx=self._chooseBestProp(dataArray)
        rootNode[bestPropIdx] = {}
        uniqValues=np.unique(dataArray[:,bestPropIdx])
        for oneValue in uniqValues:
            splitDataArray=self._splitData(dataArray,bestPropIdx,oneValue)
            rootNode[bestPropIdx][oneValue]=self.createTree(splitDataArray)
        return rootNode
    
def loadData():
    dataMat = []                 
    fr = open("decisiontree.txt")
#     readlines他会一次性将decisiontree.txt文件全部加载到内存的列表中
    lines = fr.readlines()
    for line in lines:
        curLine = line.strip().split('\t')
        dataMat.append(curLine)
    return dataMat

if __name__ == '__main__':
    data = loadData()
    print(data)
    dataarray = np.array(data)
    dt=DecisionTree()
    tree=dt.createTree(dataarray)
    print(tree)
    import treePlotter as tp
    import matplotlib.pyplot as plt
 
    tp.createPlot(tree)
Ejemplo n.º 34
0
def getTreePlt(tree):
    return treePlotter.createPlot(tree)
Ejemplo n.º 35
0
        new_data1 = data[data[best_feature] == value]
        new_data2 = new_data1.drop(best_feature, axis=1)
        if len(list(new_data2.columns)) > 1:
            feature_tree[best_feature][value] = decision_tree(new_data2, col_y)
        else:
            feature_tree[best_feature][value] = list(new_data2[col_y])[0]
            break
    return feature_tree


import pandas as pd


def createData():
    data = {
        'X1': [1, 1, 1, 0, 0, 0],
        'X2': [1, 1, 0, 1, 1, 1],
        'X3': ['yes', 'yes', 'no', 'no', 'no', 'yes'],
        'X4': ['A', 'B', 'B', 'B', 'A', 'A'],
        'X5': ['M', 'FM', 'M', 'M', 'FM', 'M'],
        'target': ['Y', 'Y', 'Y', 'N', 'N', 'N']
    }
    return pd.DataFrame(data)


data = createData()
tree = decision_tree(data, 'target')

import treePlotter
treePlotter.createPlot(tree)
    print(calcShannonEnt(myDat))

    myDat[0][-1] = 'maybe'
    print(calcShannonEnt(myDat))
    '''测试按照给定特征划分数据集函数'''
    myDat, labels = createDataSet()
    print(splitDataSet(myDat, 0, 1))
    print(splitDataSet(myDat, 0, 0))
    '''测试最好的数据集划分方式'''
    print(myDat)
    print(chooseBestFeatureToSplit(myDat))
    '''测试树'''
    myDat, labels = createDataSet()
    myTree = createTree(myDat, labels)
    print(myTree)
    '''测试分类函数'''
    myDat, labels = createDataSet()
    myTree = treePlotter.retrieveTree(0)
    print(classify(myTree, labels, [1, 0]))
    print(classify(myTree, labels, [1, 1]))
    '''测试pick模块储存决策树'''
    storeTree(myTree, 'classifierStorage.txt')
    print(grabTree('classifierStorage.txt'))
    '''使用决策树预测隐形眼镜类型'''
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = createTree(lenses, lensesLabels)
    print(lensesTree)
    print(treePlotter.createPlot(lensesTree))
Ejemplo n.º 37
0
def lense_test():
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = createTree(lenses, lensesLabels)
    treePlotter.createPlot(lensesTree)
Ejemplo n.º 38
0
    if classList.count(classList[0]) == len(classList):
        return classList[0]#stop splitting when all of the classes are equal
    if len(dataSet[0]) == 1: #stop splitting when there are no more features in dataSet
        return majorityCnt(classList)
    bestFeat = chooseBestFeatureToSplit(dataSet)
    bestFeatLabel = labels[bestFeat]
    myTree = {bestFeatLabel:{}}
    del(labels[bestFeat])
    featValues = [example[bestFeat] for example in dataSet]
    uniqueVals = set(featValues)   # 得到列表包含所有属性值
    for value in uniqueVals:
        subLabels = labels[:]       #copy all of labels, so trees don't mess up existing labels
        myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value),subLabels)
    return myTree

if __name__ == '__main__':
    # del表示删除,与remove区别如下
    nums = [1,0, 2 ,0 ,3,0,0]
    nums.remove(0)
    print (nums) #[1, 2, 0, 3, 0, 0]
    del(nums[0])
    print(nums)

    # 预测隐形眼镜类型
    fr=open('lenses.txt')
    lenses=[inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels=['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree=createTree(lenses, lensesLabels)
    print (lensesTree)
    treePlotter.createPlot(lensesTree)  #绘图,treePlotter.py文件如下
Ejemplo n.º 39
0
    inputTree: pre-generated decision tree
    featLabels: labels
    testVec: test dataset
    """
    firstStr = inputTree.keys()[0]
    secondDict = inputTree[firstStr]
    featIndex = featLabels.index(firstStr)
    for key in secondDict.keys():
        if testVec[featIndex] == key:
            if type(secondDict[key]).__name__ == 'dict':
                # contine split
                classLabel = classify(secondDict[key], featLabels, testVec)
            else:
                classLabel = secondDict[key]
    return classLabel


if __name__ == '__main__':
    # example 1
    # dataset, labels = createDataSet()
    # tree = createTree(dataset, labels)
    # tree['no surfaceing'][3] = 'maybe'
    # createPlot(tree)
    # example 2
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = createTree(lenses, lensesLabels)
    createPlot(lensesTree)
Ejemplo n.º 40
0
            leaves_num, leaves = self.get_leaves(sub_tree)
            leaves_count += leaves_num
            leaves += leaves
        return leaves_count, leaves


if __name__ == '__main__':
    train_set = pd.read_csv("train.csv").values
    test_set = pd.read_csv("test.csv").values
    gender_submission = pd.read_csv("gender_submission.csv").values
    test_set = test_set[1:]
    decision_tree = decision_tree(
        train_set, id_index=0, label_index=1, algorithm='c45')
    decision_tree.fit()
    debug = False
    if debug:
        tp.createPlot(decision_tree.tree())
    submission = []
    submission.append(['PassengerId', 'Survived'])
    right_count = 0
    count = len(test_set)
    for i in range(count):
        label = decision_tree.classifier(test_set[i])
        submission.append([test_set[i][0], label])
        if label == gender_submission[i + 1][1]:
            right_count += 1
    submission_df = pd.DataFrame(data=submission,
                                 columns=['PassengerId', 'Survived'])
    submission_df.to_csv('submission.csv', index=False)
    print(str(right_count) + "/" + str(count))
Ejemplo n.º 41
0
Archivo: Dt.py Proyecto: NedHuang/CS686
    print("CARTTree:{}".format(true_count))
    true_count = 0
    for i in range(len(test_label)):
        predict = classify(test_data[i], tree2)
        if predict == test_label[i]:
            true_count += 1
    print("C3Tree:{}".format(true_count))

    #print(attribute_based_on_Giniindex(X[49:51, :], y[49:51]))
    from pylab import *
    mpl.rcParams['font.sans-serif'] = ['SimHei']  # 指定默认字体
    mpl.rcParams['axes.unicode_minus'] = False  # 解决保存图像时负号'-'显示为方块的问题

    
    import matplotlib.pyplot as plt
    treePlotter.createPlot(a, 1)
    treePlotter.createPlot(b, 2)
    # 剪枝处理
    pruning(tree=tree1, alpha=4)
    # pruning(tree=tree2, alpha=4)
    a = printtree(tree=tree1)
    # b = printtree(tree=tree2)

    true_count = 0
    for i in range(len(test_label)):
        predict = classify(test_data[i], tree1)
        if predict == test_label[i]:
            true_count += 1
    print("CARTTree:{}".format(true_count))
    true_count = 0
    # for i in range(len(test_label)):
Ejemplo n.º 42
0
    featIndex = featLabels.index(firstStr)
    key = testVec[featIndex]
    valueOfFeat = secondDict[key]
    if isinstance(valueOfFeat, dict):
        classLabel = classify(valueOfFeat, featLabels, testVec)
    else:
        classLabel = valueOfFeat
    return classLabel


def storeTree(inputTree, filename):
    import pickle
    fw = open(filename, 'wb')
    pickle.dump(inputTree, fw)
    fw.close()


def grabTree(filename):
    import pickle
    fr = open(filename, 'rb')
    return pickle.load(fr)


if __name__ == '__main__':
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = createTree(lenses, lensesLabels)
    print(lensesTree)
    createPlot(lensesTree)
Ejemplo n.º 43
0
def main():
    labels_tmp = labels[:]
    desicionTree = createTree(dataSet, labels_tmp)
    treePlotter.createPlot(desicionTree)
Ejemplo n.º 44
0
    # plotTree.totalW = float(getNumLeafs(inTree))
    # plotTree.totalD = float(getTreeDepth(inTree))
    # plotTree.xOff = -0.5 / plotTree.totalW;
    # plotTree.yOff = 1.0;
    # plotTree(inTree, (0.5, 1.0), '')
    # plt.show()
    
    
    
import sys
# # from tree import *
 
# reload(sys)

# sys.setdefaultencoding('utf-8')

import importlib
importlib.reload(sys)
from pylab import *
 
mpl.rcParams['font.sans-serif'] = ['SimHei']  # 指定默认字体
mpl.rcParams['axes.unicode_minus'] = False  # 解决保存图像时负号'-'显示为方块的问题
##################################
 
# 测试决策树的构建
myDat, labels = createDataSet()
myTree = createTree(myDat, labels)
# 绘制决策树
import treePlotter
treePlotter.createPlot(myTree)
Ejemplo n.º 45
0
        return classList[0]
    # 否则,为每个最优特征取值,递归地创建子树
    else:
        for value in bestFeatValues:
            subDataSet = splitDataSet(dataSet, bestFeat, value)
            subFeatures = features[:]
            myTree[bestFeatName][value] = createTree(subDataSet, subFeatures,
                                                     chooseBestFeature)
    ### END CODE HERE ###

    return myTree


data1, labels1 = createDataSet1()
ID3Tree = createTree(data1, labels1, chooseBestFeature_ID3)
treePlotter.createPlot(ID3Tree)

# ### <center> Sample Output:</center>
# ![tree0.png](attachment:tree0.png)

# ### 任务三:C4.5树
#
# ID3用信息增益选择属性的方式会让他对取值数目较多的属性产生偏好,接下来我们通过一个直观的例子来说明。
#
# 假设数据集变成如下所示,某个属性(如风速)变为每个样本一个值的情况,构建一个ID3树。

# In[7]:


def createDataSet2():
    data = [[0, 0, 1, 0, 'yes'], [1, 1, 0, 1, 'yes'], [0, 0, 0, 2, 'no'],
Ejemplo n.º 46
0
import treePlotter


def test():
    print "hello world"


if __name__ == '__main__':
    # train_data, labels = trees.createDataSet()
    # my_trees = trees.createTree(train_data, labels)
    # print(my_trees)
    #trees.storeTree(my_trees, 'classifiermelon.txt')

    melon_tree = trees.grabTree('classifiermelon.txt')
    print(melon_tree)
    melon_labels = ['color', 'root', 'sound', 'texture', 'navel', 'touch']
    melon_feature = [1, 1, 1, 1, 1, 1]
    print("the predicted result is:",
          trees.classify(melon_tree, melon_labels, melon_feature))

    treePlotter.createPlot(melon_tree)
    # print(treePlotter.getNumLeafs(my_trees), treePlotter.getTreeDepth(my_trees))

    # ent = trees.calcShannonEnt(train_data)
    # feature1 = trees.splitDataSet(train_data, 0, 0)
    # feature2 = trees.splitDataSet(train_data, 0, 1)
    # best_feature = trees.chooseBestFeatureToSplit(train_data)
    # print(ent)
    # print(feature1, feature2)
    # print(best_feature)
Ejemplo n.º 47
0
    print 'treeDepth ', tp.getTreeDepth(myTree)
    #tp.createPlot(myTree)

    #update dict and plot again
    #myTree['no surfacing'][3] = 'maybe'
    #tp.createPlot(myTree)

    #classify
    print '-------------- classify --------------------'
    myDat, labels = createDataSet()
    print 'labels', labels
    myTree = tp.retrieveTree(0)
    print 'myTree ', myTree
    print '[1,0]: ', classify(myTree, labels, [1, 0])
    print '[1,1]: ', classify(myTree, labels, [1, 1])

    #store and grab tree
    print '-------------- store and grab tree --------------------'
    storeTree(myTree, 'classifierStorate.txt')
    newTree = grabTree('classifierStorate.txt')
    print 'grabedTree: ', newTree

    #Example1: choose suitable lens type
    print '-------------- Eg1: choose suitable lens type --------------------'
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astog,atoc', 'tearRate']
    lensesTree = createTree(lenses, lensesLabels)
    print lensesTree
    tp.createPlot(lensesTree)
Ejemplo n.º 48
0
import trees
import treePlotter

fr = open(
    r'C:\Users\MILI\Desktop\Machine learning\MachineLearningInAction-Camp\Week2\Reference Code\lenses.txt'
)
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = trees.createTree(lenses, lensesLabels)
print(lensesTree)
treePlotter.createPlot(lensesTree)
Ejemplo n.º 49
0
import treePlotter
reload(treePlotter)
myTree = treePlotter.retrieveTree(0)
treePlotter.createPlot(myTree)
Ejemplo n.º 50
0
def credit_a_test(m):
    model = {
        'A9': {
            '=t': {
                'A15': {
                    '>228': ' + (106.0/2.0)',
                    '<=228': {
                        'A11': {
                            '>3': {
                                'A15': {
                                    '>4': {
                                        'A15': {
                                            '<=5': ' - (2.0)',
                                            '>5': {
                                                'A7': {
                                                    '=v': ' + (5.0)',
                                                    '=z': ' - (1.0)',
                                                    '=dd': ' + (0.0)',
                                                    '=ff': ' + (0.0)',
                                                    '=o': ' + (0.0)',
                                                    '=n': ' + (0.0)',
                                                    '=h': ' + (3.0)',
                                                    '=bb': ' + (1.0)',
                                                    '=j': ' + (0.0)'
                                                }
                                            }
                                        }
                                    },
                                    '<=4': ' + (25.0)'
                                }
                            },
                            '<=3': {
                                'A4': {
                                    '=u': {
                                        'A7': {
                                            '=v': {
                                                'A14': {
                                                    '<=110': ' + (18.0/1.0)',
                                                    '>110': {
                                                        'A15': {
                                                            '>8': ' + (4.0)',
                                                            '<=8': {
                                                                'A6': {
                                                                    '=aa': {
                                                                        'A2': {
                                                                            '<=41':
                                                                            ' - (3.0)',
                                                                            '>41':
                                                                            ' + (2.0)'
                                                                        }
                                                                    },
                                                                    '=w': {
                                                                        'A12':
                                                                        {
                                                                            '=t':
                                                                            ' - (2.0)',
                                                                            '=f':
                                                                            ' + (3.0)'
                                                                        }
                                                                    },
                                                                    '=q': {
                                                                        'A12':
                                                                        {
                                                                            '=t':
                                                                            ' + (4.0)',
                                                                            '=f':
                                                                            ' - (2.0)'
                                                                        }
                                                                    },
                                                                    '=ff':
                                                                    ' - (0.0)',
                                                                    '=r':
                                                                    ' - (0.0)',
                                                                    '=i':
                                                                    ' - (0.0)',
                                                                    '=x':
                                                                    ' - (0.0)',
                                                                    '=e':
                                                                    ' - (0.0)',
                                                                    '=d':
                                                                    ' - (2.0)',
                                                                    '=c':
                                                                    ' - (4.0/1.0)',
                                                                    '=m': {
                                                                        'A13':
                                                                        {
                                                                            '=g':
                                                                            ' + (2.0)',
                                                                            '=p':
                                                                            ' - (0.0)',
                                                                            '=s':
                                                                            ' - (5.0)'
                                                                        }
                                                                    },
                                                                    '=cc':
                                                                    ' + (2.0/1.0)',
                                                                    '=k':
                                                                    ' - (2.0)',
                                                                    '=j':
                                                                    ' - (0.0)'
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                            },
                                            '=z': ' + (1.0)',
                                            '=bb': {
                                                'A14': {
                                                    '<=164': ' + (3.4/0.4)',
                                                    '>164': ' - (5.6)'
                                                }
                                            },
                                            '=ff': ' - (1.0)',
                                            '=o': ' + (0.0)',
                                            '=n': ' + (0.0)',
                                            '=h': ' + (18.0)',
                                            '=dd': ' + (0.0)',
                                            '=j': ' - (1.0)'
                                        }
                                    },
                                    '=l': ' + (0.0)',
                                    '=y': {
                                        'A13': {
                                            '=g': {
                                                'A14': {
                                                    '<=204': ' - (16.0/1.0)',
                                                    '>204': ' + (5.0/1.0)'
                                                }
                                            },
                                            '=p': ' - (0.0)',
                                            '=s': ' + (2.0)'
                                        }
                                    },
                                    '=t': ' + (0.0)'
                                }
                            }
                        }
                    }
                }
            },
            '=f': {
                'A13': {
                    '=g': ' - (204.0/10.0)',
                    '=p': {
                        'A2': {
                            '<=36': ' - (4.0/1.0)',
                            '>36': ' + (2.0)'
                        }
                    },
                    '=s': {
                        'A4': {
                            '=u': {
                                'A6': {
                                    '=aa': ' - (0.0)',
                                    '=w': ' - (0.0)',
                                    '=q': ' - (1.0)',
                                    '=ff': ' - (2.0)',
                                    '=r': ' - (0.0)',
                                    '=i': ' - (3.0)',
                                    '=x': ' + (1.0)',
                                    '=e': ' - (0.0)',
                                    '=d': ' - (2.0)',
                                    '=c': ' - (3.0)',
                                    '=m': ' - (3.0)',
                                    '=cc': ' - (1.0)',
                                    '=k': ' - (4.0)',
                                    '=j': ' - (0.0)'
                                }
                            },
                            '=l': ' + (1.0)',
                            '=y': ' - (8.0/1.0)',
                            '=t': ' - (0.0)'
                        }
                    }
                }
            }
        }
    }
    path = "./crx.data"
    name_path = "./crx.names"
    fea_list = get_Attribute(name_path)
    datasets = read_data(path)
    print "刚读入的数据集", datasets

    pae_dict, class_count = pae_list(
        path)  #不要进入递归,这个是剪枝前就要确定下来,并且在剪枝的过程中不可改变的。
    pae_lists = [pae_dict[key] for key in pae_dict]  #获得先验概率列表

    #Attention,if you want to perform Laplace Law of succession,just set:
    #pae_list=1.0/m
    #m=counts of classes of the whole original datasets

    class_list = [key for key in class_count]  #获取数据集的类别列表
    model_pruned = MEP_result(copy.deepcopy(model), fea_list,
                              copy.deepcopy(datasets), pae_lists, class_list,
                              m)

    print "这里检查下数据集", datasets

    accuracy_unprune, accuracy_prune, misjudge_datasets = accuracy_analysis(
        model, model_pruned, copy.deepcopy(datasets), fea_list, name_path)
    print "accuracy_unprune=", accuracy_unprune
    print "accuracy_prune=", accuracy_prune
    for item in misjudge_datasets:
        print item

    print "model=", model
    print "model_pruned=", model_pruned
    createPlot(model)
    createPlot(model_pruned)
Ejemplo n.º 51
0
        return classList[0]
    if len(dataset[0]) == 1:
        return majorityCnt(classList)
    bestFeat = chooseBestFeatureToSplit(dataset)
    bestFeatLabel = labels[bestFeat]
    myTree = {bestFeatLabel: {}}
    del (labels[bestFeat])
    featValues = [example[bestFeat] for example in dataset]
    uniqueVals = set(featValues)
    for value in uniqueVals:
        subLabels = labels[:]
        myTree[bestFeatLabel][value] = createTree(
            splitDataSet(dataset, bestFeat, value), subLabels)
    return myTree


def majorityCnt(classList):
    classCount = {}
    for vote in classList:
        classCount[vote] = classCount.get(vote, 0) + 1
    sortedClassCount = sorted(classCount.iteritems(),
                              key=operator.itemgetter(1),
                              reverse=True)
    return sortedClassCount[0][0]


if __name__ == '__main__':
    myDat, labels = createDataset()
    myTree = createTree(myDat, labels)
    treePlotter.createPlot()
Ejemplo n.º 52
0
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import treePlotter as tp

# 配置utf-8输出环境
reload(sys)
sys.setdefaultencoding('utf-8')

# 绘制树

myTree = {
    'root': {
        0: 'leaf node',
        1: {
            'level 2': {
                0: 'leaf node',
                1: 'leaf node'
            }
        },
        2: {
            'level2': {
                0: 'leaf node',
                1: 'leaf node'
            }
        }
    }
}
tp.createPlot(myTree)
Ejemplo n.º 53
0
def tree():
    mytree={'root':{0:'left node',1:{'level2':{3:'left node',4:'right node'}},5:'right node'}}
    tp.createPlot(mytree)