Ejemplo n.º 1
0
    firstLabelIndex = labels.index(firstLabel)  # 属性对应的index
    secondDict = tree[firstLabel]
    value = testData[firstLabelIndex]   #  属性值

    if type(secondDict[value]).__name__ == "dict":  # 假如还是一棵树则递归
        classLabel = classifyDecisionTree(secondDict[value], labels, testData)
    else:
        classLabel = secondDict[value]

    return classLabel


if __name__ == '__main__':
    # 读取数据
    lensesData, lensesLable = loadDataSet("lenses.txt")

    # 复制属性标签,
    # createTree()操作会影响传入的类别标签
    lensesLableCopy = lensesLable[:]

    # 创建树
    decisionTree = createTree(lensesData, lensesLableCopy)
    print(decisionTree)

    # 对树进行绘图
    TreePlot.createPlot(decisionTree)

    # 进行预测
    classLabel = classifyDecisionTree(decisionTree, lensesLable,
                                      ["young", "hyper", "yes", "normal"])
    print(classLabel)
'''
读取序列化对象
'''


def grabTree(filename):
    fr = open(filename, "rb")
    return pickle.load(fr)


if __name__ == '__main__':

    # 创建示例数据集
    dataSet, labels = createDataSet()
    lebelsCopy = labels[:]

    # 学习构建决策树
    tree = createTree(dataSet, labels)
    print(tree)

    # 画决策树
    TreePlot.createPlot(tree)

    # 序列化存储树结构
    storeTree(tree, "object.txt")
    # 文件中读取数结构
    myTree = grabTree("object.txt")
    print(myTree)
    print(classifyDecisionTree(myTree, lebelsCopy, [1, 1]))
Ejemplo n.º 3
0
            classCount[vote]=0
        classCount[vote]+=1
    sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
    return sortedClassCount[0][0]

def createTree(dataSet,labels):
    classList=[example[-1] for example in dataSet]  # 类别:男或女
    if classList.count(classList[0])==len(classList):
        return classList[0]
    if len(dataSet[0])==1:
        return majorityCnt(classList)
    bestFeat=chooseBestFeatureToSplit(dataSet) #选择最优特征
    bestFeatLabel=labels[bestFeat]
    myTree={bestFeatLabel:{}} #分类结果以字典形式保存
    del(labels[bestFeat])
    featValues=[example[bestFeat] for example in dataSet]
    uniqueVals=set(featValues)
    for value in uniqueVals:
        subLabels=labels[:]
        myTree[bestFeatLabel][value]=createTree(splitDataSet\
                            (dataSet,bestFeat,value),subLabels)
    return myTree



if __name__=='__main__':
    dataSet, labels=createDataSet1()  # 创造示列数据
    mytree=createTree(dataSet, labels)  # 输出决策树模型结果
    print mytree
    TreePlot.createPlot(mytree)
Ejemplo n.º 4
0
__author__ = 'wanghao'


"""
    the glasses examples
"""

import ID3Tree
import TreePlot

fr = open('./lenses.txt')
DataList = fr.readlines()

DataSet = []
for data in DataList:
    DataSet.append(data.strip().split('\t'))

print "The dateSet is ", DataSet
Labels = ['age', 'prescript', 'astigmatic', 'tearRate']

LenseTree = ID3Tree.createTree(DataSet, Labels)
print "the result ID3 Tree is ", LenseTree

TreePlot.createPlot(LenseTree)
Ejemplo n.º 5
0
print "---------------------------------------\n"

# Get the leaf Node
LeafNodeNum = ID3Tree.getNumLeafs(myTree)
print "This ID3 Tree leaf node num is ", LeafNodeNum
print "---------------------------------------\n"


# Get the depth of the ID3
MaxDepth = ID3Tree.getTreeDepth(myTree)
print "This ID3 tree max depth is ", MaxDepth
print "---------------------------------------\n"


# Plot the tree
TreePlot.createPlot(myTree)
print "---------------------------------------\n"













Ejemplo n.º 6
0
from TestTrees import calcShannonEnt
from TestTrees import chooseBestFeatureToSplit
from TestTrees import createTree
import TestTrees
import TreePlot


def createDataSet():
    dataSet = [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'],
               [0, 1, 'no']]

    labels = ['no surfacing', 'flippers']
    return dataSet, labels


fr = open('matpilb\lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels = ['ages', 'prescript', 'astigmatic', 'tearRate']
print(lenses)
lensesTree = TestTrees.createTree(lenses, lensesLabels)
TreePlot.createPlot(lensesTree)
Ejemplo n.º 7
0
                            '>=' + str(splite))] = CreateTree(
                                son_data, label_lisan, label_lianxu)

                elif j[feature] < splite and i == 1:
                    son_data.append(j)
                    if len(son_data) == 0:
                        return leaf(data)
                    else:
                        Tree[labels_word[feature]][str(
                            '<' + str(splite))] = CreateTree(
                                son_data, label_lisan, label_lianxu)

        else:
            for j in data:
                if j[feature] == label_value[feature][i]: son_data.append(j)

            if len(son_data) == 0:
                return leaf(data)

            else:
                Tree[labels_word[feature]][
                    label_value[feature][i]] = CreateTree(
                        son_data, label_lisan, labels_lianxu)

    return Tree


if __name__ == '__main__':
    Tree = CreateTree(dataSet, labels_lisan, labels_lianxu)
    TreePlot.createPlot(Tree)