Python DictTree.depth Examples

Programming Language: Python

Namespace/Package Name: apgl.graph.DictTree

Class/Type: DictTree

Method/Function: depth

Examples at hotexamples.com: 6

Python DictTree.depth - 6 examples found. These are the top rated real world Python examples of apgl.graph.DictTree.DictTree.depth extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DictTree(19)

setVertex(12)

addEdge(10)

getVertex(7)

getNumVertices(6)

vertexExists(5)

leaves(4)

addChild(3)

pruneVertex(3)

depth(3)

isLeaf(3)

getNumEdges(2)

getRootId(2)

isSubtree(2)

getAllVertexIds(2)

deepCopy(2)

cut(1)

getEdge(1)

isNonLeaf(1)

edgeExists(1)

copy(1)

subtreeAt(1)

subtreeIds(1)

Example #1

Show file

File: DictTreeTest.py Project: awj223/Insight-Data-Engineering-Code-Challenge

    def testDepth(self):
        dictTree = DictTree()
        self.assertEquals(dictTree.depth(), 0)
        dictTree.setVertex("a")
        self.assertEquals(dictTree.depth(), 0)

        dictTree.addEdge("a", "b")
        dictTree.addEdge("a", "c")
        dictTree.addEdge("d", "a")

        self.assertEquals(dictTree.depth(), 2)

        dictTree.addEdge("c", "e")
        self.assertEquals(dictTree.depth(), 3)

Example #2

Show file

    def testDepth(self):
        dictTree = DictTree()
        self.assertEquals(dictTree.depth(), 0)
        dictTree.setVertex("a")
        self.assertEquals(dictTree.depth(), 0)

        dictTree.addEdge("a", "b")
        dictTree.addEdge("a", "c")
        dictTree.addEdge("d", "a")

        self.assertEquals(dictTree.depth(), 2)

        dictTree.addEdge("c", "e")
        self.assertEquals(dictTree.depth(), 3)

Example #3

Show file

File: TreeRankTest.py Project: kentwang/sandbox

    def testSplitNode(self):
        d = 0
        k = 0
        maxDepth = 1
        inds = numpy.arange(self.y.shape[0])
        treeRank = TreeRank(self.leafRanklearner)
        treeRank.setMaxDepth(maxDepth)

        node = RankNode(inds, numpy.arange(self.X.shape[1]))

        tree = DictTree()
        tree.setVertex((0, 0), node)
        tree = treeRank.splitNode(tree, self.X, self.y, d, k)

        self.assertEquals(tree.getNumVertices(), 3)
        self.assertEquals(tree.getNumEdges(), 2)
        self.assertEquals(tree.getRootId(), (0, 0))
        self.assertTrue(not tree.getVertex((0, 0)).isLeafNode())
        self.assertTrue(tree.getVertex((1, 0)).isLeafNode())
        self.assertTrue(tree.getVertex((1, 1)).isLeafNode())

        self.assertTrue(tree.depth() <= maxDepth)

Example #4

Show file

File: TreeRankTest.py Project: rezaarmand/sandbox

    def testSplitNode(self):
        d = 0
        k = 0
        maxDepth = 1
        inds = numpy.arange(self.y.shape[0])
        treeRank = TreeRank(self.leafRanklearner)
        treeRank.setMaxDepth(maxDepth)

        node = RankNode(inds, numpy.arange(self.X.shape[1]))

        tree = DictTree()
        tree.setVertex((0, 0), node)
        tree = treeRank.splitNode(tree, self.X, self.y, d, k)

        self.assertEquals(tree.getNumVertices(), 3)
        self.assertEquals(tree.getNumEdges(), 2)
        self.assertEquals(tree.getRootId(), (0, 0))
        self.assertTrue(not tree.getVertex((0, 0)).isLeafNode())
        self.assertTrue(tree.getVertex((1, 0)).isLeafNode())
        self.assertTrue(tree.getVertex((1, 1)).isLeafNode())

        self.assertTrue(tree.depth() <= maxDepth)

Example #5

Show file

File: PenaltyDecisionTree.py Project: charanpald/sandbox

class PenaltyDecisionTree(AbstractPredictor): 
    def __init__(self, criterion="gain", maxDepth=10, minSplit=30, learnType="reg", pruning=True, gamma=0.01, sampleSize=10):
        """
        Learn a decision tree with penalty proportional to the root of the size 
        of the tree as in Nobel 2002. We use a stochastic approach in which we 
        learn a set of trees randomly and choose the best one. 

        :param criterion: The splitting criterion which is only informaiton gain currently 

        :param maxDepth: The maximum depth of the tree 
        :type maxDepth: `int`

        :param minSplit: The minimum size of a node for it to be split. 
        :type minSplit: `int`
        
        :param type: The type of learning to perform. Currently only regression 
        
        :param pruning: Whether to perform pruning or not. 
        :type pruning: `boolean`
        
        :param gamma: The weight on the penalty factor between 0 and 1
        :type gamma: `float`
        
        :param sampleSize: The number of trees to learn in the stochastic search. 
        :type sampleSize: `int`
        """
        super(PenaltyDecisionTree, self).__init__()
        self.maxDepth = maxDepth
        self.minSplit = minSplit
        self.criterion = criterion
        self.learnType = learnType
        self.setGamma(gamma)
        self.setSampleSize(sampleSize) 
        self.pruning = pruning 
        self.alphaThreshold = 0.0
                
    def setGamma(self, gamma): 
        Parameter.checkFloat(gamma, 0.0, 1.0)
        self.gamma = gamma   
        
    def setSampleSize(self, sampleSize):
        Parameter.checkInt(sampleSize, 1, float("inf"))
        self.sampleSize = sampleSize                

    def setAlphaThreshold(self, alphaThreshold): 
        Parameter.checkFloat(alphaThreshold, -float("inf"), float("inf"))
        self.alphaThreshold = alphaThreshold
   
    def getAlphaThreshold(self): 
        return self.alphaThreshold
    
    def getLeftChildId(self, nodeId): 
        leftChildId = list(nodeId)
        leftChildId.append(0)
        leftChildId = tuple(leftChildId)
        return leftChildId

    def getRightChildId(self, nodeId): 
        rightChildId = list(nodeId)
        rightChildId.append(1)
        rightChildId = tuple(rightChildId) 
        return rightChildId
        
    def getTree(self): 
        return self.tree 
                
    def learnModel(self, X, y):
        if numpy.unique(y).shape[0] != 2: 
            raise ValueError("Must provide binary labels")
        if y.dtype != numpy.int: 
            raise ValueError("Labels must be integers")
        
        self.shapeX = X.shape  
        argsortX = numpy.zeros(X.shape, numpy.int)
        for i in range(X.shape[1]): 
            argsortX[:, i] = numpy.argsort(X[:, i])
            argsortX[:, i] = numpy.argsort(argsortX[:, i])
        
            
        rootId = (0,)
        idStack = [rootId]
        self.tree = DictTree()
        rootNode = DecisionNode(numpy.arange(X.shape[0]), Util.mode(y))
        self.tree.setVertex(rootId, rootNode)
        bestError = float("inf")
        bestTree = self.tree 
        
        #First grow a selection of trees
        
        while len(idStack) != 0:
            #Prune the current node away and grow from that node 
            nodeId = idStack.pop()
            
            for i in range(self.sampleSize):
                self.tree = bestTree.deepCopy()
                try: 
                    node = self.tree.getVertex(nodeId)
                except ValueError:
                    print(nodeId)
                    print(self.tree)
                    raise 
                        
                self.tree.pruneVertex(nodeId)
                self.growTree(X, y, argsortX, nodeId)
                self.prune(X, y)
                error = self.treeObjective(X, y)
            
                if error < bestError: 
                    bestError = error
                    bestTree = self.tree.deepCopy()
            
            children = bestTree.children(nodeId)
            idStack.extend(children)
            
        self.tree = bestTree 

    def growTree(self, X, y, argsortX, startId): 
        """
        Grow a tree using a stack. Give a sample of data and a node index, we 
        find the best split and add children to the tree accordingly. We perform 
        pre-pruning based on the penalty. 
        """
        eps = 10**-4 
        idStack = [startId]
        
        while len(idStack) != 0: 
            nodeId = idStack.pop()
            node = self.tree.getVertex(nodeId)
            accuracies, thresholds = findBestSplitRisk(self.minSplit, X, y, node.getTrainInds(), argsortX)
        
            #Choose best feature based on gains 
            accuracies += eps 
            bestFeatureInd = Util.randomChoice(accuracies)[0]
            bestThreshold = thresholds[bestFeatureInd]
        
            nodeInds = node.getTrainInds()    
            bestLeftInds = numpy.sort(nodeInds[numpy.arange(nodeInds.shape[0])[X[:, bestFeatureInd][nodeInds]<bestThreshold]]) 
            bestRightInds = numpy.sort(nodeInds[numpy.arange(nodeInds.shape[0])[X[:, bestFeatureInd][nodeInds]>=bestThreshold]])
            
            #The split may have 0 items in one set, so don't split 
            if bestLeftInds.sum() != 0 and bestRightInds.sum() != 0 and self.tree.depth() < self.maxDepth: 
                node.setError(1-accuracies[bestFeatureInd])
                node.setFeatureInd(bestFeatureInd)
                node.setThreshold(bestThreshold)            
                            
                leftChildId = self.getLeftChildId(nodeId)
                leftChild = DecisionNode(bestLeftInds, Util.mode(y[bestLeftInds]))
                self.tree.addChild(nodeId, leftChildId, leftChild)
                
                if leftChild.getTrainInds().shape[0] >= self.minSplit: 
                    idStack.append(leftChildId)
                
                rightChildId = self.getRightChildId(nodeId)
                rightChild = DecisionNode(bestRightInds, Util.mode(y[bestRightInds]))
                self.tree.addChild(nodeId, rightChildId, rightChild)
                
                if rightChild.getTrainInds().shape[0] >= self.minSplit: 
                    idStack.append(rightChildId)
        
    def predict(self, X, y=None): 
        """
        Make a prediction for the set of examples given in the matrix X.  If 
        one passes in a label vector y then we set the errors for each node. On 
        the other hand if y=None, no errors are set. 
        """ 
        rootId = (0,)
        predY = numpy.zeros(X.shape[0])
        self.tree.getVertex(rootId).setTestInds(numpy.arange(X.shape[0]))
        idStack = [rootId]

        while len(idStack) != 0:
            nodeId = idStack.pop()
            node = self.tree.getVertex(nodeId)
            testInds = node.getTestInds()
            if y!=None: 
                node.setTestError(self.vertexTestError(y[testInds], node.getValue()))
        
            if self.tree.isLeaf(nodeId): 
                predY[testInds] = node.getValue()
            else: 
                 
                for childId in [self.getLeftChildId(nodeId), self.getRightChildId(nodeId)]:
                    if self.tree.vertexExists(childId):
                        child = self.tree.getVertex(childId)
        
                        if childId[-1] == 0: 
                            childInds = X[testInds, node.getFeatureInd()] < node.getThreshold() 
                        else:
                            childInds = X[testInds, node.getFeatureInd()] >= node.getThreshold()
                        
                        child.setTestInds(testInds[childInds])   
                        idStack.append(childId)
                
        return predY

    def treeObjective(self, X, y): 
        """
        Return the empirical risk plus penalty for the tree. 
        """
        predY = self.predict(X)
        (n, d) = X.shape
        return (1-self.gamma)*numpy.sum(predY!=y)/float(n) + self.gamma*numpy.sqrt(self.tree.getNumVertices())

    def prune(self, X, y): 
        """
        Do some post pruning greedily. 
        """
        self.predict(X, y)  
        self.computeAlphas()
        
        #Do the pruning, recomputing alpha along the way 
        rootId = (0,)
        idStack = [rootId]

        while len(idStack) != 0:        
            nodeId = idStack.pop()
            node = self.tree.getVertex(nodeId)
    
            if node.alpha > self.alphaThreshold: 
                self.tree.pruneVertex(nodeId)
                self.computeAlphas()
            else: 
                for childId in [self.getLeftChildId(nodeId), self.getRightChildId(nodeId)]: 
                    if self.tree.vertexExists(childId):
                        idStack.append(childId)
        
    def vertexTestError(self, trueY, predY):
        """
        This is the error used for pruning. We compute it at each node. 
        """
        return numpy.sum(trueY != predY)
        
    def computeAlphas(self): 
        """
        The alpha value at each vertex is the improvement in the objective by 
        pruning at that vertex.  
        """
        n = self.shapeX[0]    
        
        for vertexId in self.tree.getAllVertexIds(): 
            currentNode = self.tree.getVertex(vertexId)            
            subtreeLeaves = self.tree.leaves(vertexId)
    
            subtreeError = 0 
            for leaf in subtreeLeaves: 
                subtreeError += self.tree.getVertex(leaf).getTestError()
        
            T = self.tree.getNumVertices()
            T2 = T - len(self.tree.subtreeIds(vertexId)) + 1 
            currentNode.alpha = (1-self.gamma)*(subtreeError - currentNode.getTestError())
            currentNode.alpha /= n
            currentNode.alpha += self.gamma * numpy.sqrt(T)
            currentNode.alpha -= self.gamma * numpy.sqrt(T2)

    def copy(self): 
        """
        Create a new tree with the same parameters. 
        """
        newLearner = PenaltyDecisionTree(criterion=self.criterion, maxDepth=self.maxDepth, minSplit=self.minSplit, learnType=self.learnType, pruning=self.pruning, gamma=self.gamma, sampleSize=self.sampleSize)
        return newLearner 
        
    def getMetricMethod(self):
        """ 
        Returns a way to measure the performance of the classifier.
        """
        return Evaluator.binaryError

Example #6

Show file

File: PenaltyDecisionTree.py Project: rezaarmand/sandbox

class PenaltyDecisionTree(AbstractPredictor):
    def __init__(self,
                 criterion="gain",
                 maxDepth=10,
                 minSplit=30,
                 learnType="reg",
                 pruning=True,
                 gamma=0.01,
                 sampleSize=10):
        """
        Learn a decision tree with penalty proportional to the root of the size 
        of the tree as in Nobel 2002. We use a stochastic approach in which we 
        learn a set of trees randomly and choose the best one. 

        :param criterion: The splitting criterion which is only informaiton gain currently 

        :param maxDepth: The maximum depth of the tree 
        :type maxDepth: `int`

        :param minSplit: The minimum size of a node for it to be split. 
        :type minSplit: `int`
        
        :param type: The type of learning to perform. Currently only regression 
        
        :param pruning: Whether to perform pruning or not. 
        :type pruning: `boolean`
        
        :param gamma: The weight on the penalty factor between 0 and 1
        :type gamma: `float`
        
        :param sampleSize: The number of trees to learn in the stochastic search. 
        :type sampleSize: `int`
        """
        super(PenaltyDecisionTree, self).__init__()
        self.maxDepth = maxDepth
        self.minSplit = minSplit
        self.criterion = criterion
        self.learnType = learnType
        self.setGamma(gamma)
        self.setSampleSize(sampleSize)
        self.pruning = pruning
        self.alphaThreshold = 0.0

    def setGamma(self, gamma):
        Parameter.checkFloat(gamma, 0.0, 1.0)
        self.gamma = gamma

    def setSampleSize(self, sampleSize):
        Parameter.checkInt(sampleSize, 1, float("inf"))
        self.sampleSize = sampleSize

    def setAlphaThreshold(self, alphaThreshold):
        Parameter.checkFloat(alphaThreshold, -float("inf"), float("inf"))
        self.alphaThreshold = alphaThreshold

    def getAlphaThreshold(self):
        return self.alphaThreshold

    def getLeftChildId(self, nodeId):
        leftChildId = list(nodeId)
        leftChildId.append(0)
        leftChildId = tuple(leftChildId)
        return leftChildId

    def getRightChildId(self, nodeId):
        rightChildId = list(nodeId)
        rightChildId.append(1)
        rightChildId = tuple(rightChildId)
        return rightChildId

    def getTree(self):
        return self.tree

    def learnModel(self, X, y):
        if numpy.unique(y).shape[0] != 2:
            raise ValueError("Must provide binary labels")
        if y.dtype != numpy.int:
            raise ValueError("Labels must be integers")

        self.shapeX = X.shape
        argsortX = numpy.zeros(X.shape, numpy.int)
        for i in range(X.shape[1]):
            argsortX[:, i] = numpy.argsort(X[:, i])
            argsortX[:, i] = numpy.argsort(argsortX[:, i])

        rootId = (0, )
        idStack = [rootId]
        self.tree = DictTree()
        rootNode = DecisionNode(numpy.arange(X.shape[0]), Util.mode(y))
        self.tree.setVertex(rootId, rootNode)
        bestError = float("inf")
        bestTree = self.tree

        #First grow a selection of trees

        while len(idStack) != 0:
            #Prune the current node away and grow from that node
            nodeId = idStack.pop()

            for i in range(self.sampleSize):
                self.tree = bestTree.deepCopy()
                try:
                    node = self.tree.getVertex(nodeId)
                except ValueError:
                    print(nodeId)
                    print(self.tree)
                    raise

                self.tree.pruneVertex(nodeId)
                self.growTree(X, y, argsortX, nodeId)
                self.prune(X, y)
                error = self.treeObjective(X, y)

                if error < bestError:
                    bestError = error
                    bestTree = self.tree.deepCopy()

            children = bestTree.children(nodeId)
            idStack.extend(children)

        self.tree = bestTree

    def growTree(self, X, y, argsortX, startId):
        """
        Grow a tree using a stack. Give a sample of data and a node index, we 
        find the best split and add children to the tree accordingly. We perform 
        pre-pruning based on the penalty. 
        """
        eps = 10**-4
        idStack = [startId]

        while len(idStack) != 0:
            nodeId = idStack.pop()
            node = self.tree.getVertex(nodeId)
            accuracies, thresholds = findBestSplitRisk(self.minSplit, X, y,
                                                       node.getTrainInds(),
                                                       argsortX)

            #Choose best feature based on gains
            accuracies += eps
            bestFeatureInd = Util.randomChoice(accuracies)[0]
            bestThreshold = thresholds[bestFeatureInd]

            nodeInds = node.getTrainInds()
            bestLeftInds = numpy.sort(nodeInds[numpy.arange(nodeInds.shape[0])[
                X[:, bestFeatureInd][nodeInds] < bestThreshold]])
            bestRightInds = numpy.sort(nodeInds[numpy.arange(
                nodeInds.shape[0])[
                    X[:, bestFeatureInd][nodeInds] >= bestThreshold]])

            #The split may have 0 items in one set, so don't split
            if bestLeftInds.sum() != 0 and bestRightInds.sum(
            ) != 0 and self.tree.depth() < self.maxDepth:
                node.setError(1 - accuracies[bestFeatureInd])
                node.setFeatureInd(bestFeatureInd)
                node.setThreshold(bestThreshold)

                leftChildId = self.getLeftChildId(nodeId)
                leftChild = DecisionNode(bestLeftInds,
                                         Util.mode(y[bestLeftInds]))
                self.tree.addChild(nodeId, leftChildId, leftChild)

                if leftChild.getTrainInds().shape[0] >= self.minSplit:
                    idStack.append(leftChildId)

                rightChildId = self.getRightChildId(nodeId)
                rightChild = DecisionNode(bestRightInds,
                                          Util.mode(y[bestRightInds]))
                self.tree.addChild(nodeId, rightChildId, rightChild)

                if rightChild.getTrainInds().shape[0] >= self.minSplit:
                    idStack.append(rightChildId)

    def predict(self, X, y=None):
        """
        Make a prediction for the set of examples given in the matrix X.  If 
        one passes in a label vector y then we set the errors for each node. On 
        the other hand if y=None, no errors are set. 
        """
        rootId = (0, )
        predY = numpy.zeros(X.shape[0])
        self.tree.getVertex(rootId).setTestInds(numpy.arange(X.shape[0]))
        idStack = [rootId]

        while len(idStack) != 0:
            nodeId = idStack.pop()
            node = self.tree.getVertex(nodeId)
            testInds = node.getTestInds()
            if y != None:
                node.setTestError(
                    self.vertexTestError(y[testInds], node.getValue()))

            if self.tree.isLeaf(nodeId):
                predY[testInds] = node.getValue()
            else:

                for childId in [
                        self.getLeftChildId(nodeId),
                        self.getRightChildId(nodeId)
                ]:
                    if self.tree.vertexExists(childId):
                        child = self.tree.getVertex(childId)

                        if childId[-1] == 0:
                            childInds = X[
                                testInds,
                                node.getFeatureInd()] < node.getThreshold()
                        else:
                            childInds = X[
                                testInds,
                                node.getFeatureInd()] >= node.getThreshold()

                        child.setTestInds(testInds[childInds])
                        idStack.append(childId)

        return predY

    def treeObjective(self, X, y):
        """
        Return the empirical risk plus penalty for the tree. 
        """
        predY = self.predict(X)
        (n, d) = X.shape
        return (1 - self.gamma) * numpy.sum(predY != y) / float(
            n) + self.gamma * numpy.sqrt(self.tree.getNumVertices())

    def prune(self, X, y):
        """
        Do some post pruning greedily. 
        """
        self.predict(X, y)
        self.computeAlphas()

        #Do the pruning, recomputing alpha along the way
        rootId = (0, )
        idStack = [rootId]

        while len(idStack) != 0:
            nodeId = idStack.pop()
            node = self.tree.getVertex(nodeId)

            if node.alpha > self.alphaThreshold:
                self.tree.pruneVertex(nodeId)
                self.computeAlphas()
            else:
                for childId in [
                        self.getLeftChildId(nodeId),
                        self.getRightChildId(nodeId)
                ]:
                    if self.tree.vertexExists(childId):
                        idStack.append(childId)

    def vertexTestError(self, trueY, predY):
        """
        This is the error used for pruning. We compute it at each node. 
        """
        return numpy.sum(trueY != predY)

    def computeAlphas(self):
        """
        The alpha value at each vertex is the improvement in the objective by 
        pruning at that vertex.  
        """
        n = self.shapeX[0]

        for vertexId in self.tree.getAllVertexIds():
            currentNode = self.tree.getVertex(vertexId)
            subtreeLeaves = self.tree.leaves(vertexId)

            subtreeError = 0
            for leaf in subtreeLeaves:
                subtreeError += self.tree.getVertex(leaf).getTestError()

            T = self.tree.getNumVertices()
            T2 = T - len(self.tree.subtreeIds(vertexId)) + 1
            currentNode.alpha = (1 - self.gamma) * (subtreeError -
                                                    currentNode.getTestError())
            currentNode.alpha /= n
            currentNode.alpha += self.gamma * numpy.sqrt(T)
            currentNode.alpha -= self.gamma * numpy.sqrt(T2)

    def copy(self):
        """
        Create a new tree with the same parameters. 
        """
        newLearner = PenaltyDecisionTree(criterion=self.criterion,
                                         maxDepth=self.maxDepth,
                                         minSplit=self.minSplit,
                                         learnType=self.learnType,
                                         pruning=self.pruning,
                                         gamma=self.gamma,
                                         sampleSize=self.sampleSize)
        return newLearner

    def getMetricMethod(self):
        """ 
        Returns a way to measure the performance of the classifier.
        """
        return Evaluator.binaryError