def _put(self,cn,key,value):
     if(cn.key<key):
         if(cn.hasRightChild()):
             cn=cn.right
             self._put(cn, key, value)
         else:
             cn.right=treeNode(key=key,value=value,parent=cn)
     else:
         if(cn.hasLeftChild()):
             cn=cn.left
             self._put(cn,key,value)      
         else:
             cn.left=treeNode(key=key,value=value,parent=cn)
Example #2
0
    def _build_tree(self, x, y):  #Build the decision tree recursively

        node = treeNode()  #initialize a node

        index, value, splits = self._choose_split_index(
            x, y
        )  #uses the choose split function to obtain the index of the column we want so split, the value of the split(attribute)
        # and  the splits that are the new array values from the make split done inside the choose.

        if index is None or len(
                np.unique(y)
        ) == 1:  #len when only exists one value of target in the y array (so its a leaf and predict the value)
            node.leaf = True  # give the tree node leaf attribute the boolean value True so we know that its this node is a Leaf
            node.classes = Counter(
                y
            )  #Node classes  Counter({'no': 4})  Node classes  Counter({'yes': 3})
            node.name = node.classes.most_common(1)[0][
                0]  #node.name has the value of the most  common value in the node.classes counter

        else:
            X1, y1, X2, y2 = splits  #arrays with the values calcualted in the choose split from above
            node.column = index  #give the node.column the value from the index of the column we want to split
            node.value = value  #give the node.value the value from the attribute we will split
            node.header = self.array_header[0][
                index]  #gives the node.header the value from the column (only use this in the print time)
            node.left = self._build_tree(
                X1, y1
            )  #recursively do two new branches to do three (new trees)based on the new array values calculated aboce
            node.right = self._build_tree(X2, y2)

        return node
Example #3
0
 def createTreeNodes(self, ls, node, depthl, strategy, heu):
     """
     :param ls: list successors for a given state
     :param node: actual node string
     :param depthl: depth limit int
     :param strategy: strategy int
     :param heu: heuristic string
     :return: list of all problem's nodes
     """
     nodes = []
     h = 0
     if (depthl >= node._d):
         for (action, result, cost) in ls:
             if result._nodes:
                 dmin = min([
                     self.distance(result._current, n)
                     for n in result._nodes
                 ])
                 if heu == 'h1':
                     h = dmin
                 elif heu == 'h0':
                     for a, b in itertools.combinations(result._nodes, 2):
                         h = dmin + min([self.distance(a, b)])
             s = treeNode(result, strategy, node, float(cost), action, h)
             nodes.append(s)
     return nodes
Example #4
0
def limSearch(problem, strategy, depthl, pruning, heu):
    """
    :param problem: problem class object
    :param strategy: strategy int
    :param depthl: depth limit int
    :param pruning: pruning option boolean
    :param heu: heuristic string
    :return: goal node and number of elements in the frontier
    """
    f = frontier(); problem._visitedList = {}
    num_f = 0
    initial = treeNode(problem._init_state, strategy)
    f.insert(initial); num_f += 1
    problem._visitedList[initial._state._md5] = initial._f
    sol = False
    while(not sol and not f.isEmpty()):
        act = f.remove()
        if(problem.isGoal(act._state)): sol = True
        else:
            ls = problem._state_space.successors(act._state)
            ln = problem.createTreeNodes(ls, act, depthl, strategy, heu)
            if pruning:
                for node in ln:
                    if node._state._md5 not in problem._visitedList:
                        f.insert(node); num_f += 1
                        problem._visitedList[node._state._md5] = node._f
                    elif abs(node._f) < abs(problem._visitedList[node._state._md5]):
                        f.insert(node); num_f += 1
                        problem._visitedList[node._state._md5] = node._f
            else:
                for node in ln: f.insert(node); num_f += 1
    if(sol): return act, num_f
    else: return None
 def put(self,key,value):
     self.size=self.size+1
     if(not self.root):
         n=treeNode(key=key,value=value)
         self.root=n
     else:
         currentNode=self.root
         self._put(currentNode,key,value)
Example #6
0
    def __init__(self,system, samples, boolParams,numParams,fitness = 1.0):
        '''creates a unit decision maker for genetics'''

        self.samples = set(samples)
        self.keyStatements = set(system.keyStatements)
        self.numNodes = 0 #counting onle expanded trees
        self.tree = treeNode.treeNode(self.samples,self.keyStatements,system.isMajorant)
        self.fitness = fitness
        self.fringe = {self.tree}
        self.boolStatements = set(boolParams) #available boolean statements
        self.numStatements = set(numParams) #available numeric statements
        
        self.stateUseDict = {i:set() for     i in set(list(self.boolStatements)+list(self.numStatements))}#<bool/num>statement:nodes of application
        self.originDict = {}#dict X>C statement -> num statement X
Example #7
0
def createTree(dataSet, minSup=1):
    """
    创建 FP 树
    :param dataSet: 训练数据集
        :example
        {
            frozenset({'z'}): 1,
            frozenset({'h', 'j', 'p', 'r', 'z'}): 1,
            frozenset({'t', 'w', 'u', 'v', 'z', 's', 'x', 'y'}): 1,
            frozenset({'n', 'o', 's', 'x', 'r'}): 1,
            frozenset({'t', 'y', 'q', 'p', 'x', 'r', 'z'}): 1,
            frozenset({'t', 'y', 'q', 'm', 'e', 's', 'x', 'z'}): 1
        }
    :param minSup: 最小支持度
    :return:
    """
    headTable = {}  # 头指针表
    """
    创建头指针表
    """
    for trans in dataSet:
        for item in trans:
            headTable[item] = headTable.get(item, 0) + dataSet[trans]
    keys = list(headTable.keys())
    for k in keys:  # 不可以在迭代的时候改变大小
        if headTable[k] < minSup:
            del (headTable[k])
    freqItemSet = set(headTable.keys())
    if len(freqItemSet) == 0:
        return None, None
    for k in headTable:
        headTable[k] = [headTable[k],
                        None]  # 因为value还需要一个指向树节点的指针,故将value扩展为[count,node] 形式
    retTree = treeNode('Null Set', 1, None)
    """
    生成本地数据集,不带树节点
    """
    for tranSet, count in dataSet.items():
        localD = {}
        for item in tranSet:
            if item in freqItemSet:
                localD[item] = headTable[item][0]
        if len(localD) > 0:
            orderedItems = [
                v[0] for v in sorted(
                    localD.items(), key=lambda p: p[1], reverse=True)
            ]
            updateTree(orderedItems, retTree, headTable, count)
    return retTree, headTable
Example #8
0
def updateTree(orderedItems, retTree: treeNode, headTable, count):
    if orderedItems[0] in retTree.children:  # 判断FP树中是否已经有了该节点
        retTree.children[orderedItems[0]].inc(count)  # 如果有了就直接增加相应的计数
    else:
        retTree.children[orderedItems[0]] = treeNode(orderedItems[0], count,
                                                     retTree)  # 如果没有就新增该节点
        if headTable[orderedItems[0]][1] is None:  # 如果头指针指向还没有的话可以指向这个新增的节点
            headTable[orderedItems[0]][1] = retTree.children[orderedItems[0]]
        else:
            updateHeader(
                headTable[orderedItems[0]][1],
                retTree.children[orderedItems[0]])  # 如果已经存在就放在已经存在节点的子节点
    if len(orderedItems) > 1:
        updateTree(orderedItems[1::], retTree.children[orderedItems[0]],
                   headTable, count)  # 递归创建
Example #9
0
 def __init__(self,keyParams,boolParams, numParams, samples, samplesCount, nodesCount, majorant = False):
     '''the LICS itself'''
     '''reverse keyparams pls!'''
     self.boolParams = boolParams
     self.numParams = numParams
     self.keyParams = keyParams
     self.keyStatements = [ statements.get_statement(statements.op_takeValue,p) for p in keyParams]
     self.boolStatements = [ statements.get_statement(statements.op_takeValue,p) for p in boolParams]
     self.numStatements = [ statements.get_statement(statements.op_takeValue,p) for p in numParams]
     self.samples = set(samples)
     self.samplesCount = samplesCount
     self.nodesCount = nodesCount
     self.tree = treeNode.treeNode(self.samples,self.keyStatements,majorant)
     self.clusteriser = cluster.kmeans(self,numParams,boolParams + keyParams,samplesCount)
     self.isMajorant = majorant
Example #10
0
def updateTree(items, inTree, headerTable, count):
    """
    该函数的目的是为了让FP树生长。
    首先测试事务中的第一个元素项是否作为子节点存在。如果存在的话,则更新该元素项的计数。如果不存在,则创建一个新的treeNode并将其作为一个子节点添加到树中。
    这时,头指针表也要更新以指向新的节点。更新头指针表需要调用函数updataHeader().
    :param items:   满足最小支持度的元素key的数组(从大到小的排序)
    :param inTree:  空的retTree对象
    :param headerTable: 头指针列表 {元素:[元素次数,treeNode]}
    :param count:   原数据集中每一行元素出现的次数
    :return:
    """
    if items[0] in inTree.children:
        inTree.children[items[0]].inc(count)
    else:
        inTree.children[items[0]] = treeNode.treeNode(items[0], count, inTree)
        # 如果满足minSup的dist字典的value值第二位为null, 我们就设置该元素为 本节点对应的tree节点.
        if headerTable[items[0]][1] == None:
            headerTable[items[0]][1] = inTree.children[items[0]]
        # 如果元素第二位不为null,我们就更新header节点
        else:
            updateHeader(headerTable[items[0]][1], inTree.children[items[0]])
    if len(items) > 1:
        # 递归的调用,在items[0]的基础上,添加item0[1]做子节点, count只要循环的进行累计加和而已,统计出节点的最后的统计值。
        updateTree(items[1::], inTree.children[items[0]], headerTable, count)
Example #11
0
def createTree(dataSet, minSup=1):
    """
    生成FP树
    :param dataSet: 数据集字典 {行,出现的次数}
    :param minSup:  最小支持度
    :return:    返回FP树
    """
    headerTable = {}
    for trans in dataSet:  #开始遍历整个数据集字典{行:出现的次数}
        for item in trans:  #开始对每行数据遍历,统计每一行中每个元素出现的总次数
            headerTable[item] = headerTable.get(item, 0) + dataSet[trans]
    for k in list(headerTable.keys()):  #删除headerTable中元素不满足最小支持度的元素
        if headerTable[k] < minSup:
            del headerTable[k]
    freqItemSet = set(headerTable.keys())  #满足minSup的元素集合
    if len(freqItemSet) == 0:  #如果这个集合是空,就返回None
        return None, None
    for k in headerTable:  #格式化headerTable {元素:[元素次数,None]}
        headerTable[k] = [headerTable[k], None]
    #创建FP树,从空集合开始
    retTree = treeNode.treeNode('Null Set', 1, None)  # create tree
    for tranSet, count in dataSet.items():  #开始遍历数据集字典{行:出现的次数}
        localD = {}
        for item in tranSet:  #开始遍历每一行中的元素,判断在不在freqItemSet中,如果在加入字典 localD{元素:元素次数}
            if item in freqItemSet:
                localD[item] = headerTable[item][0]
        if len(localD) > 0:
            #如果localD不为空,那么根据全局频率对每个事务中的元素进行排序。
            orderedItems = [
                v[0] for v in sorted(
                    localD.items(), key=lambda p: p[1], reverse=True)
            ]
            #排序之后,开始对树进行填充
            updateTree(orderedItems, retTree, headerTable, count)

    return retTree, headerTable
cStart = m + eps

while m <= mMax:
    Path(logsDir + 'm_' + str(m) + '_figs').mkdir(parents=True, exist_ok=True)
    f = open(logsDir + 'm_' + str(m) + '_log.txt', "w")

    stoppingThreshold = 0.0005
    foundWorstQ = False
    print('Searching for worst Q. m: ' + str(m))
    cMin = 0
    cMax = m + eps

    plotBool = True

    #Build tree
    root = treeNode(0, m, 1, 1, None, [], [], [], [])
    root.createChildren()
    currentChildren = root.children
    for k in range(1, m + 1):
        numOfChildren = len(currentChildren)
        nextChildren = []
        for childIdx in range(numOfChildren):
            currentChildren[childIdx].createChildren()
            nextChildren = nextChildren + currentChildren[childIdx].children
        currentChildren = nextChildren
    leafs = currentChildren
    minLeafIdx = 0
    print('Number of leafs: ' + str(len(leafs)))
    f.write('Number of leafs: ' + str(len(leafs)) + '\n')

    cnt = -1
Example #13
0
def main(trainData,
         trainLabel,
         validateData,
         validateLabel,
         type=0,
         thershod=0.01,
         thershodImpure=0.2,
         method=0):
    bestTree = None  #最优的决策树
    bestPara = -1  #最优的参数选取
    bestAccuracy = 0  #最优的验证集准确率
    selectList = []  #挑选的超参数结果
    global totalPruningNum
    global totalLeafNum
    if (type == 1):
        thershodImpureList = [1e-10, 0.04, 0.10, 0.20,
                              0.30]  # thershodImpure调整时的取值列表
        for myThershodImpure in thershodImpureList:
            print('for thershodImpure = ', myThershodImpure)
            treeroot = treeNode.treeNode()
            treeNode.GenerateTree(treeroot, trainData, trainLabel, thershod,
                                  myThershodImpure, method)
            Prune(treeroot, validateData, validateLabel)
            results1, accuracy1 = Decision(treeroot, trainData, trainLabel)
            print('train set accuracy:', accuracy1)
            results2, accuracy2 = Decision(treeroot, validateData,
                                           validateLabel)
            print('validate set accuracy:', accuracy2)
            selectList.append([
                myThershodImpure, accuracy1, accuracy2, treeNode.totalLeafNum,
                totalPruningNum
            ])
            print('total leaf num:', treeNode.totalLeafNum)
            print('total pruning num:', totalPruningNum)
            treeNode.totalLeafCount = 0  # 叶节点个数归为0
            treeNode.totalLeafNum = 0
            totalPruningNum = 0
            if (accuracy2 > bestAccuracy):
                bestAccuracy = accuracy2
                bestTree = treeroot
                bestPara = myThershodImpure
    elif (type == 2):
        methodList = [0, 1, 2]  # method调整时的取值列表
        for myMethod in methodList:
            print('for method = ', myMethod)
            treeroot = treeNode.treeNode()
            treeNode.GenerateTree(treeroot, trainData, trainLabel, thershod,
                                  thershodImpure, myMethod)
            Prune(treeroot, validateData, validateLabel)
            results1, accuracy1 = Decision(treeroot, trainData, trainLabel)
            print('train set accuracy:', accuracy1)
            results2, accuracy2 = Decision(treeroot, validateData,
                                           validateLabel)
            print('validate set accuracy:', accuracy2)
            selectList.append([
                myMethod, accuracy1, accuracy2, treeNode.totalLeafNum,
                totalPruningNum
            ])
            print('total leaf num:', treeNode.totalLeafNum)
            print('total pruning num:', totalPruningNum)
            treeNode.totalLeafCount = 0  # 叶节点个数归为0
            treeNode.totalLeafNum = 0
            totalPruningNum = 0
            if (accuracy2 > bestAccuracy):
                bestAccuracy = accuracy2
                bestTree = treeroot
                bestPara = myMethod
    else:
        thershodList = [1e-10, 4e-3, 0.01, 0.02, 0.05]  # thershod调整时的取值列表
        for myThershod in thershodList:
            print('for thershod = ', myThershod)
            treeroot = treeNode.treeNode()
            treeNode.GenerateTree(treeroot, trainData, trainLabel, myThershod,
                                  thershodImpure, method)
            Prune(treeroot, validateData, validateLabel)
            results1, accuracy1 = Decision(treeroot, trainData, trainLabel)
            print('train set accuracy:', accuracy1)
            results2, accuracy2 = Decision(treeroot, validateData,
                                           validateLabel)
            print('validate set accuracy:', accuracy2)
            selectList.append([
                myThershod, accuracy1, accuracy2, treeNode.totalLeafNum,
                totalPruningNum
            ])
            print('total leaf num:', treeNode.totalLeafNum)
            print('total pruning num:', totalPruningNum)
            treeNode.totalLeafCount = 0  # 叶节点个数归为0
            treeNode.totalLeafNum = 0
            totalPruningNum = 0
            if (accuracy2 > bestAccuracy):
                bestAccuracy = accuracy2
                bestTree = treeroot
                bestPara = myThershod
    return [selectList, bestTree, bestPara]