Ejemplo n.º 1
0
def plantTrees(zMap, density=1, order=1):

    # Graph that will contain all the trees
    forestGraph = sg.SceneGraphNode("forest")

    xSize = zMap.shape[0]
    ySize = zMap.shape[1]

    dx = 2 * MAP_X_SIZE / xSize
    dy = 2 * MAP_Y_SIZE / ySize

    x = -MAP_X_SIZE + dx / 2

    # Choosing where to plant trees
    treeCoordinates = np.random.randint(0, 40 / density, zMap.shape)

    # Plants a tree in a (x, y, z) position
    for i in range(zMap.shape[0]):

        y = -MAP_Y_SIZE + dy / 2

        for j in range(zMap.shape[1]):

            if treeCoordinates[i, j] == 0:

                # Burying the tree a little to ensure it isn't floating
                normal = terrainNormal(xSize, ySize, zMap, i, j)
                correction = (abs(normal[0]) + abs(normal[1])) / 5

                # Randomizing the order, size and skip parameters
                np.random.seed(RANDOM + i * j)
                variance = np.random.uniform()
                realOrder = max(1, ORDER - int(variance > 0.4))
                realSize = tree.SIZE + 0.4 * int(variance > 0.6) + 0.2 * int(
                    variance > 0.8)
                realSize += (realOrder - 1) * 0.5
                realSkip = np.random.randint(0, realOrder**2 + 1)

                # Randomizing the rule of creation
                realRule = treeRule(realOrder)

                # Trees can't be planted close to each other
                for k in range(i, i + int(realSize + realOrder)):
                    for l in range(j, j + int(realSize + realOrder)):
                        try:
                            treeCoordinates[k, l] = 1
                        except:
                            pass

                treeGraph = tree.createTree(realRule, realOrder, realSize,
                                            realSkip)
                treeGraph.transform = tr.translate(x, y,
                                                   zMap[i, j] - correction)

                forestGraph.childs += [treeGraph]

            y += dy
        x += dx

    return forestGraph
Ejemplo n.º 2
0
def suffix(x):
  if int(x.conStart2) > int(x.conStart1):
    cStart1 = int(x.conStart1)
    cEnd1   = int(x.conEnd1)
    cType1  = x.conType1
    cStart2 = int(x.conStart2)
    cEnd2   = int(x.conEnd2)
    cType2  = x.conType2
  else:
    cStart1 = int(x.conStart2)
    cEnd1   = int(x.conEnd2)
    cType1  = x.conType2
    cStart2 = int(x.conStart1)
    cEnd2   = int(x.conEnd1)
    cType2  = x.conType1
  if V:
    print "Suffixing: %s, line: %s" % (x.fileName, x.lineNum)
  return tree.createString(tree.suffix(
           tree.createTree(x.parse),
           cStart1,
           cEnd1,
           cType1,
           cStart2,
           cEnd2,
           cType2))
Ejemplo n.º 3
0
def main():

    print('script started')
    """
    google api code
    """
    credentials = auth.get_credentials()
    http = credentials.authorize(httplib2.Http())
    service = discovery.build('drive', 'v3', http=http)
    """
    download file list
    """
    if config.DOWNLOAD_METADATA:
        downloadMetadata.downloadFileList(service)
    """
    create tree
    """
    nodeList = None
    if config.CREATE_TREE:
        nodeList = tree.createTree()
    """
    download files
    """
    if config.CREATE_STRUCTURE:
        createFileStructure.createStructure(nodeList, service)

    print('')
    print('script finished')
Ejemplo n.º 4
0
def mineTree(FPtree, headerTable, minSup, preFix, freqItemDict):
    #minSup:支持度,freqItemDict:频繁项集存放的地方,preFix:该项的前缀,FPtree:构建的FP树,headerTable:FP树对应的头表,
    bigL = [v[0] for v in sorted(headerTable.items(), key=lambda p: p[1])
            ]  #从频次出现低项开始挖掘
    for basePat in bigL:
        newFreqSet = preFix.copy()
        newFreqSet.add(basePat)
        #preFix是basePat的前缀路径,preFix+basePat是一个频繁项集,出现次数等于basePat出现的次数.当basePat为空时,preFix就是一个单路径FP树,它的路径上所有子集
        # 在构造单路径FP树过程中已经生成了
        print newFreqSet
        #记录每个频繁项的支持度计数
        if frozenset(newFreqSet) in freqItemDict:
            freqItemDict[frozenset(newFreqSet)] += headerTable[basePat][0]
        else:
            freqItemDict[frozenset(newFreqSet)] = headerTable[basePat][0]
        # print newFreqSet,freqItemDict[frozenset(newFreqSet)]

        condPattBases = findPrefixPath(basePat,
                                       headerTable[basePat][1])  #求条件模式基
        myCondTree, myHead = createTree(condPattBases, minSup)
        #实际上,它是以头表判断是否到是单路径FP树,头表为空则表示该basePat的前缀路径是条件FP树,否则,以条件模式基继续构造条件FP树,直到条件FP树为空
        if myHead != None:
            mineTree(
                myCondTree, myHead, minSup, newFreqSet,
                freqItemDict)  #FP树中的递归有一个特点:没有返回值,需要记录的数据都放在参数中了,上层可以直接拿到数据
Ejemplo n.º 5
0
def test_contactLenses():
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    #四个属性名称
    lensesAtts = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = tree.createTree(lenses, lensesAtts)
    print lensesTree
    tree.createPlot(lensesTree)
Ejemplo n.º 6
0
    def __init__(self, dataSet, labels):
        newDataSet = []
        for dataList in dataSet:
            newDataList = []
            for data in dataList:
                newDataList.append(str(data))
            newDataSet.append(newDataList)
        newLabels = [str(label) for label in labels]

        self._tree = createTree(newDataSet, newLabels)
Ejemplo n.º 7
0
def testClass():
    myDat, labels = tree.createDataSet()
    myTree = tree.createTree(myDat, labels)

    # persistenting the decision tree
    tree.storeTree(myTree, 'myTree.train')

    myTree2 = tree.grabTree('myTree.train')
    testVec = [1, 0]
    print "Test ",testVec," result: ", tree.classify(myTree2, labels, testVec)
    testVec = [1, 1]
    print "Test ",testVec," result: ", tree.classify(myTree2, labels, testVec)
Ejemplo n.º 8
0
def classifyTest():
    import tree as t
    import treePlotter as tp

    dataSet, labels = t.createDataSet()
    myTree = t.createTree(dataSet, labels.copy())
    print(myTree)
    print(labels)

    print(classify(myTree, labels, [1, 0]))
    print(classify(myTree, labels, [1, 1]))

    tp.createPlot(myTree)
Ejemplo n.º 9
0
def spt(x):
  if int(x.conStart2) > int(x.conStart1):
    cStart1 = int(x.conStart1)
    cEnd1   = int(x.conEnd1)
    cStart2 = int(x.conStart2)
    cEnd2   = int(x.conEnd2)
  else:
    cStart1 = int(x.conStart2)
    cEnd1   = int(x.conEnd2)
    cStart2 = int(x.conStart1)
    cEnd2   = int(x.conEnd1)
  if V:
    print "Finding spt for: %s, line: %s" % (x.fileName, x.lineNum)
  return tree.createString(tree.spt(
           tree.createTree(x.parse),
           cStart1,
           cEnd2))
Ejemplo n.º 10
0
    def fit(self, data, label):
        """
        模型拟合过程,实现原理参考对应的链接
        :param data: 特征矩阵
        :param label: 标签
        :return:
        """
        # 设置初始的数据分布的采样权重,此时都相等
        self.data_weight = np.ones((data.shape[0], 1)) / data.shape[0]

        # 记录数据集的索引
        index = np.arange(0, data.shape[0], 1)
        # 进行迭代求解
        for i in range(self.n_iterates):
            # 根据数据权重进行采样, 注意bagging是有放回,boosting是无放回
            # https://zhuanlan.zhihu.com/p/47922595
            sub_samping = np.random.choice(
                index,
                int(self.data_weight.shape[0] * self.alpha),
                replace=False,
                p=self.data_weight.reshape(-1, ).tolist())
            train_x = data[sub_samping]
            train_y = label[sub_samping]
            dt = createTree(train_x, train_y, self.feature_list)  # 进行弱学习模型训练

            self.model_list.append(dt)  # 存储该弱学习模型

            pred = list(
                map(lambda _: predict(dt, _, self.feature_list), train_x))
            # 计算模型在训练集上的误差率 (即预测错误的样本权重相加,相同为0,不同为1)
            pred_error = np.ones((len(pred), 1))
            pred_error[pred == train_y] = 0
            et = pred_error.T.dot(self.data_weight[sub_samping])

            # 把模型的权重加入到列表中
            at = 0.5 * np.log((1 - et) / et)
            self.model_weight.append(at)

            # 更新样本的权重
            self.data_weight[sub_samping] = self.data_weight[
                sub_samping] * np.exp(-at * train_y * pred).reshape(-1, 1)

            # 权重归一化
            self.data_weight = self.data_weight / self.data_weight.sum()
    def fit(self, data, label):
        # 得到决策树的列个数
        if self.max_features == 'auto':
            self.feature_size = np.rint(np.sqrt(data.shape[1])).astype(int) + 1
        else:
            self.feature_size = np.rint(np.log2(data.shape[1])).astype(int) + 1

        # 记录特征矩阵所有特征的索引
        feature_index = np.arange(0, data.shape[1], 1)

        for index in range(self.n_estimators):
            sub_x, sub_y = self.sub_sampling(data, label)
            # 从特征索引列表中随机抽取特征作为决策树训练的数据集
            feature_sample_index = np.random.choice(feature_index,
                                                    self.feature_size,
                                                    replace=False)
            # 保存决策树的列索引
            self.tree_feature.append(feature_sample_index)
            self.tree_list.append(
                createTree(sub_x[:, feature_sample_index], sub_y,
                           self.feature_list[feature_sample_index]))
Ejemplo n.º 12
0
def cross_validation_tree(dataSet,rate,tmpLabels):
    count = int(1/rate)                  #验证次数
    test_len = int(len(dataSet) * rate)  #测试数据集的长度
    train_dataSet = []                  #临时训练数据集
    test_dataSet = []                   #临时测试数据集
    tree_list = []                      #生成树列表
    accuracy_list = []                  #存放准确率
    for i in range(count):
        if i == 0:                      #第一个分割的处理
            train_dataSet = dataSet[test_len:]
            test_dataSet = dataSet[:test_len]
        elif i == count - 1:            #最后分割的处理
            train_dataSet = dataSet[:-test_len]
            test_dataSet = dataSet[-test_len:]
        else:
            train_dataSet = dataSet[:test_len * i]
            train_dataSet.extend(dataSet[test_len*(i+1):])
            test_dataSet = dataSet[test_len * i:test_len * (i+1)]
        
        lastest_tree = tree.createTree(train_dataSet, tmpLabels)
        tree_list.append(lastest_tree)
        #print('lastest_tree:'+str(lastest_tree))
        tree.Pep(lastest_tree,train_dataSet,tmpLabels)
        accuracy_list.append(tree.DataSetGetResult(lastest_tree,test_dataSet,tmpLabels))   #批量得到结果
    #求准确度平均值
    average_accuracy = 0
    max_accuracy = max(accuracy_list)   #最大值
    for each in accuracy_list:
        average_accuracy += each
    average_accuracy /= len(accuracy_list)
    
    print('accuracy:'+str(accuracy_list))
    print('max_accuracy:'+str(max_accuracy*100)+'%')
    print('average_accuracy:'+str(average_accuracy*100)+'%')
    i = 1   #树的临时编号
    for each_tree in tree_list:
        tree.saveTree(each_tree, save_path = 'Model/Tree/cross/'+str(i)+'.tree')
        print("Decision tree list saved successfully!"+'Model/Tree/cross/'+str(i)+'.tree')
        i += 1
Ejemplo n.º 13
0
import tree
import treePlotter

fp = open('./data.txt')
lenses = [line.strip().split(' ') for line in fp.readlines()]
labels = ['age', 'prescript', 'astigmatic', 'tearRate', 'other']

myTree = tree.createTree(lenses, labels)
treePlotter.createTreePlot(myTree)
Ejemplo n.º 14
0
import tree

# 计算数据集的香农熵
#myDat, labels = tree.createDataSet()
#print(myDat)
#print(tree.calcShannonEnt(myDat))
#myDat[0][-1] = 'maybe'
#print(myDat)
#print(tree.calcShannonEnt(myDat))

print("*****************************************************************")
# 在前面的简单样本数据上测试函数splitDataSet()
#print(tree.splitDataSet(myDat, 0, 1))
#print(tree.splitDataSet(myDat, 0, 0))

print("*****************************************************************")
#myDat, labels = tree.createDataSet()
#print(myDat)
#print(tree.chooseBestFeatureToSplit(myDat))

print("*****************************************************************")
# 变量myTree包含了很多代表树结构信息的嵌套字典。
# 从左边开始第一个关键字no surfacing是第一个划分数据集的特征名称,该关键字的值也是另一个数据字典。
# 第二个关键字是no surfacing特征划分的数据集,这些关键字的值是no surfacing节点的子节点
# 这些值可能是类标签,也可能是另一个数据字典。如果值是类标签,则该子节点是叶子节点;
# 如果值是另一个数据字典,则子节点是一个判断节点,这种格式结构不断重复就构成了整棵树,本节的例子中,这棵树包含了3个叶子节点以及2个判断节点
myDat, labels = tree.createDataSet()
myTree = tree.createTree(myDat, labels)
print(myTree)

Ejemplo n.º 15
0
def test_plot():
    data, attributes = createDataSet()
    tree.createPlot(tree.createTree(data, attributes))
Ejemplo n.º 16
0
from tree import createDataSet, createTree
from treePlotter import retrieveTree, createPlot
"""
决策树非常好的匹配了实验数据,然而只写匹配选项可能太多了,我们将这种问题称为过度匹配(overfitting).
为了减少过度匹配问题,我们可以裁剪决策树,去掉一些不必要的叶子节点.如果叶子节点只能增加少许信息,则可以删除该节点,
并将他加入其他叶子节点中
本章采用的算法叫做ID3, 无法直接处理数值型数据,尽管我们可以通过量化的方法将数值型数据转化为标称型数据,
但是存在提案多的特征划分,
第九章学习另一个决策树构造算法CART, C4.5
"""

def classify(inputTree, featLabels, testVec):
    """
    在实际数据集中改属性存储在哪个位置? 是第一个属性还是第二个属性?
    :param inputTree:
    :param featLabels:
    :param testVec:
    :return:
    """
    firstStr = list(inputTree.keys())[0]
    secondDict = inputTree[firstStr]
    # 将标签字符串转换为索引, 使用index方法查找当前列表中第一个匹配firstStr变量的元素
    featIndex = featLabels.index(firstStr)
    for key in secondDict.keys():
        # 比较testVec变量中的值与树节点的值
        if testVec[featIndex] == key:
            if type(secondDict[key]).__name__ == 'dict':
                classLabel = classify(secondDict[key], featLabels, testVec)
            else:
                # 如果到达叶子节点,返回当前节点的分类标签
Ejemplo n.º 17
0
# if you do get a dictonary you know it's a tree, and the first element will be another dict

def createPlot(inTree):
    fig = plt.figure(1, facecolor='white')
    fig.clf()
    axprops = dict(xticks=[], yticks=[])
    createPlot.ax1 = plt.subplot(111, frameon=False, **axprops)  # no ticks
    # createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses
    plotTree.totalW = float(getNumLeafs(inTree))
    plotTree.totalD = float(getTreeDepth(inTree))
    plotTree.xOff = -0.5 / plotTree.totalW;
    plotTree.yOff = 1.0;
    plotTree(inTree, (0.5, 1.0), '')
    plt.show()

def retrieveTree(i):
    listOfTrees = [{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}},
                   {'no surfacing': {0: 'no', 1: {'flippers': {0: {'head': {0: 'no', 1: 'yes'}}, 1: 'no'}}}}
                   ]
    return listOfTrees[i]

    # createPlot(thisTree)

if __name__ =="__main__":
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = tree.createTree(lenses, lensesLabels)
    createPlot(lensesTree)
Ejemplo n.º 18
0
#!/usr/bin/python
#encoding:utf-8
import tree
from ScrolledText import example
import plottree
dataSet , labels = tree.createDataSet();
print tree.createTree(dataSet, labels)
plottree.createPlot()
Ejemplo n.º 19
0
# -*- coding: utf-8 -*-
import tree
import copy
dataset, label = tree.createDataSet()
print(label)
# 这里仅仅用 labels=label是不行的,因为它们指向同一个内存
labels = copy.deepcopy(label)
myTree = tree.createTree(dataset, labels)
# print(myTree)
print(label)
testResult = tree.classify(myTree, label, [1, 1])
print(testResult)
tree.storeTree(myTree, "F:\NatureRecognition/tree.txt")
tt = tree.grabTree("F:\NatureRecognition/tree.txt")
print(tt)
Ejemplo n.º 20
0
#encoding=utf-8
# from pudb import set_trace
# set_trace()
# import pudb
# pu.db
import tree

d = tree.load_data()
t = tree.createTree(d)
import visualization

leaf = visualization.calLeafNum(t)
Ejemplo n.º 21
0
# encoding=utf8
import tree
import treePlotter

if __name__ == '__main__':
    myDat, labels = tree.createDataSet()
    subLabels = labels[:]
    # print myDat
    # shannonEnt = tree.calcShannonEnt(myDat)
    # print shannonEnt
    # 熵越高,混合的数据越多,也就是种类越多
    # myDat[0][-1] = 'maybe'
    # print tree.calcShannonEnt(myDat)
    # print tree.splitDataSet(myDat, 1, 1)
    # print tree.chooseBestFeatureToSplit(myDat)
    myTree = tree.createTree(myDat, subLabels)
    print myTree
    # print treePlotter.getNumLeafs(myTree)
    # print treePlotter.getTreeDepth(myTree)
    # treePlotter.createPlot()
    # print tree.classify(myTree, labels, [1, 0])
    # print tree.classify(myTree, labels, [1, 1])
    # print tree.classify(myTree, labels, [0, 0])
    tree.storeTree(myTree, 'tree.txt')
    print tree.grabTree('tree.txt')
Ejemplo n.º 22
0
                # 子树不剪,则继续下一个子树
                cutBranch_uptodown(secondDict[key], subDataSet, tempfeatLabels)


if __name__ == '__main__':
    global num
    num = 0
    # dataset, features = ig.createDataSet()
    # dataset,features = createDataSet()
    # dataset, features = createDataSet_iris()
    dataset, features = createDataSetCNDA(os.getcwd() + '/templates/tree/bloodpresure/bloodpresure.xls')
    # print dataset
    # print dataset
    print features
    features2 = features[:]  # labels2=labels:这样的赋值只是引用地址的传递,当labels改变时,labels2也会改变。只有labels2=labels[:]这样的才是真正的拷贝
    tree = tree.createTree(dataset, features, 'C4.5')

    # print tree
    # print classify(tree,features2,[0,1,1,1,0])
    tp.createPlot(tree)
    count = []
    # getCount(tree,dataset,features2,count)
    # print num
    # print count
    cutBranch_uptodown(tree, dataset, features2)
    # cutBranch_downtoup(tree, dataset, features2, count)
    tp.createPlot(tree)



def cutBranch_downtoup(inputTree, dataSet, featLabels, count):  # 自底向上剪枝
Ejemplo n.º 23
0
 def createGraph(self, node):
     tree.createTree(node)
     return 0
Ejemplo n.º 24
0
# -*- coding: UTF-8 -*-    或者  #coding=utf-8
'''
Created on 2016-8-19

@author: XiaoYuan1

'''

import tree
import classify
from copy import copy

'创建训练数据源'
mydat,labels = tree.createDataSet()
labels2 = copy(labels)

'构建决策树'
mytree = tree.createTree(mydat, labels)
print mytree

'使用决策树模型对数据进行分类'
result = classify.classify(mytree, labels2, [1,0])
print result






Ejemplo n.º 25
0
	def createGraph(self,node):
		tree.createTree(node)
		return 0
Ejemplo n.º 26
0
myTree=tree.createTree(myDat,labels)
print myTree
'''
'''
import treePlotter
reload(treePlotter)
print treePlotter.retrieveTree(1)
myTree=treePlotter.retrieveTree(0)
print treePlotter.getNumLeafs(myTree)
print treePlotter.getTreeDepth(myTree)
print treePlotter.createPlot(myTree)
'''
import tree
reload(tree)
'''
myDat,labels=tree.createDataSet()
print labels
myTree=treePlotter.retrieveTree(0)
print myTree
print tree.classify(myTree,labels,[1,0])
print tree.classify(myTree,labels,[1,1])
tree.storeTree(myTree,'classfierStorage.txt')
print tree.grabTree('classfierStorage.txt')
'''
fr=open('lenses.txt')
lenses=[inst.strip().split('\t') for inst in fr.readlines()]
lensesLabels=['age','prescript','astigmatic','tearRate']
lensesTree=tree.createTree(lenses,lensesLabels)
print lenses
import treePlotter
treePlotter.createPlot(lenseTree)
Ejemplo n.º 27
0
def testClass2():
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels =  ['age', 'prescript', 'astigmatic', 'tearRate']
    lensesTree = tree.createTree(lenses, lensesLabels)
    print lensesTree
Ejemplo n.º 28
0
#-*- coding:utf-8 -*-
import tree
mydat,label = tree.createDataSet()
#mydat
#tree.calcShannonEnt(mydat)#得到数据集的熵值

#reload(tree)
#tree.splitDataSet(mydat,0,1)#得到第0个特征值为1的元素list
#tree.splitDataSet(mydat,0,0)

#tree.chooseBestFeatureToSplit(mydat)#得到最佳特征值索引

mytree = tree.createTree(mydat,label)#得到决策树信息
mytree


Ejemplo n.º 29
0
    numLeafs = getNumLeafs(myTree)
    depth = getTreeDepth(myTree)
    firstStr = list(myTree.keys())[0]
    cntrPt = (plotTree.xOff + (1.0 + float(numLeafs)) / 2.0 / plotTree.totalW,
              plotTree.yOff)
    plotMidText(cntrPt, parentPt, nodeTxt)
    plotNode(firstStr, cntrPt, parentPt, decisionNode)
    secondDict = myTree[firstStr]
    plotTree.yOff = plotTree.yOff - 1.0 / plotTree.totalD
    for key in secondDict.keys():
        if type(secondDict[key]).__name__ == 'dict':
            plotTree(secondDict[key], cntrPt, str(key))
        else:
            plotTree.xOff = plotTree.xOff + 1.0 / plotTree.totalW
            plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt,
                     leafNode)
            plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))
    plotTree.yOff = plotTree.yOff + 1.0 / plotTree.totalD


if __name__ == '__main__':
    # myDat, labels = createDataset()
    # myTree = createTree(myDat, labels)
    # createPlot(myTree)

    fr = open('./dataset/lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', ' astigmatic', 'tearRate']
    lensesTree = createTree(lenses, lensesLabels)
    createPlot(lensesTree)
Ejemplo n.º 30
0
    query = query.lower()
    condition = condition.lower()
    observedList.append(query)
    observedCondition.append(condition)
    query = ''
    condition = ''
    take = 0

#print(observedList)
#print(observedCondition)
countobserved = 0
countquery = 0
node = Node(None, None, None, None, None)
for i in range(0, args.iteration):
    isobserved = True
    testtree = tree.createTree()
    testtree.startTree()
    for node in testtree.nodes:
        name = node.name
        if name in observedList:
            if observedCondition[observedList.index(name)] != node.status:
                isobserved = False

    if isobserved:
        countobserved += 1
        for node in testtree.nodes:
            if node.name in queryList:
                name = node.name
                if queryCondition[queryList.index(name)] == node.status:
                    countquery += 1
Ejemplo n.º 31
0
# labels = ['no surfacing', 'filppers']
# dataset[0][-1] = 'maybe'
# shannonEnt =  tree.calcShannonEnt(dataset)
# print shannonEnt

# print tree.splitDataSet(dataset, 0, 0)
# print tree.chooseBestFeature(dataset)
# print tree.createTree(dataset, labels)
# treeplotter.createPlot()
# myTree = treeplotter.retrieveTree(0)
# print myTree
# print treeplotter.getNumLeafs(myTree)
# print treeplotter.getTreeDepth(myTree)
# treeplotter.createPlot(myTree)
# print tree.classify(myTree, labels,[1,1])
fr = open('lenses.txt')
lines = fr.readlines()

lensesAll = [ inst.split("\t") for inst in lines]
lensesTrain = lensesAll[5:len(lines)]
lensesLables = ['age', 'prescript', 'astigmatic', 'tearRate']
lensesTree = tree.createTree(lensesTrain, lensesLables[:])
# treeplotter.createPlot(lensesTree)
# lensesTree =  tree.grabTree( 'Decision.txt')
# treeplotter.createPlot(lensesTree)
for i in range(5):
    print "分类为%s, 正确为%s" %(tree.classify(lensesTree, lensesLables, lensesAll[i][0:-1]), lensesAll[i][-1])



import tree
import treeplot

def loaddata(filename):
    f = open(filename)
    lines = f.readlines()
    dat = [line.strip().split('\t') for line in lines]
    label = ['age','prescript','astigmatic','tearrate']

    return dat,label

if __name__ == "__main__":
    dat, label = loaddata("lenses.txt")
    lensestree = tree.createTree(dat,label)
    print(lensestree)
    treeplot.createPlot(lensestree)
Ejemplo n.º 33
0
import numpy as np
'''
lenses,lensesLabels = tree.createDataSet()
lensesTree = tree.createTree(lenses,lensesLabels)
print(lensesTree)
treeplotter.createPlot(lensesTree)
'''

df = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data",
    header=None)
x = df.iloc[:, [0, 1, 2, 3]].values
y = df.iloc[:, 4].values
labels = [
    "sepal length<5. 55", "setal width>3. 35", "petal length<2. 45",
    "petal width>0. 8"
]
mydat = list(np.where(x[:, 0] < 5.5, 1, 0))
mydat = np.vstack([mydat, list(np.where(x[:, 1] < 3.3, 1, 0))])
mydat = np.vstack([mydat, list(np.where(x[:, 2] < 2., 1, 0))])
mydat = np.vstack([mydat, list(np.where(x[:, 3] < 1, 1, 0))])
mydat = mydat.transpose(1, 0)
mydat = np.column_stack((mydat, y))
mydat = list(mydat)
for i in range(len(mydat)):
    mydat[i] = list(mydat[i])

lensesTree = tree.createTree(mydat, labels, "C4.5")
print(lensesTree)
treeplotter.createPlot(lensesTree)
def main():
    fr = open('lenses.txt')
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]
    lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']
    Tree = tree.createTree(lenses, lensesLabels)
    createPlot(Tree)
示例:使用决策树预测隐形眼镜类型
(1)收集数据:提供的文本文件。
(2)准备数据:解析tab键分隔的数据行。
(3)分析数据:快速检查数据,确保正确地解析数据内容
(4)训练算法:使用3.1节的createTree 函数。
(5)测试算法:编写测试函数验证决策树可以正确分类给定的数据实例。
(6)使用算法:存储树的数据结构,以便下次使用时无需重新构造树。
'''
import tree
fr=open('lenses.txt')

'''
#lenses=[inst.strip().split('\t') for inst in fr.readline()]
lensesLabels =['age','prescript','astigmatic','tearRate']
#lensesTree =  createTree(lenses,lensesLabels) 
'''
dataSet=[]
while True:
    line = fr.readline()
    if not line:break
    dataSet.append(line.strip().split('\t'))

lensesLabels =['age','prescript','astigmatic','tearRate']
lensesTree =  tree.createTree(dataSet,lensesLabels)
print lensesTree

'''
 匹配选项可能太多了。我们将这种问题称之为过度匹配(overfitting)。
 为了减少过度匹配问题,我们可以裁剪决策树,去掉一些不必要的叶子节点。
 如果叶子节点只能增加少许信息,则可以删除该节点,将它并人到其他叶子节点中。
'''
Ejemplo n.º 36
0
def test_createTree():
    data, attributes = createDataSet()
    print tree.createTree(data, attributes)
Ejemplo n.º 37
0
import arff
import tree
import sys

arg = sys.argv
m = int(arg[3])
trainData = arff.load(open(arg[1], 'r'))
testData = arff.load(open(arg[2], 'r'))

myTree = tree.createTree(trainData['data'], trainData['attributes'], m)
tree.plotTree(myTree, trainData['attributes'])

prediction = [tree.classify(myTree, testData['attributes'], obs) for obs in testData['data']]
true = [obs[-1] for obs in testData['data']]
print "<Predictions for the Test Set Instances>"
n = 0
for i in range(len(prediction)):
    index = i + 1   
    if prediction[i] == true[i]:
        n += 1
    print "{}: Actual: {} Predicted: {}".format(n, true[i], prediction[i])
print "Number of correctly classified: {} Total number of test instances: {}".format(n, len(testData['data']))
Ejemplo n.º 38
0
#-*- coding:utf-8 –*-
import preprocess
import tree
import evaluation

parties, realSim = preprocess.preprocess(
    filename='NO_DMV_Match.csv',
    col=['last_name', 'first_name', 'middle_name'],
    parties=3,
    corruption=0.4,
    hashnumber=100,
    length=1000)

print parties
print realSim
root = tree.createTree(parties, 2)
#tree.printTree(root)

clusters = tree.cluster(root)
evaluation.printCluster(clusters)

evaluation.compareAll(clusters, realSim, 0.7)
Ejemplo n.º 39
0
import tree as t
import treePlotter as tp
import os

f = open(os.path.dirname(__file__) +'/lenses.txt')
lenses = [r.strip().split('\t') for r in f.readlines()]
lensesLabel = ['age','prescript','astigmatic','tearRate']
lensesTree = t.createTree(lenses,lensesLabel)
tp.createPlot(lensesTree)
fmt = '%10s'
print [fmt % x for x in lensesLabel]
for lense in lenses:
    print [fmt % x for x in lense],t.classify(lensesTree,lensesLabel,lense[0:-1])