def testPrune(self):
        startId = (0, )
        minSplit = 20
        maxDepth = 5
        gamma = 0.05
        learner = PenaltyDecisionTree(minSplit=minSplit,
                                      maxDepth=maxDepth,
                                      gamma=gamma,
                                      pruning=False)

        trainX = self.X[100:, :]
        trainY = self.y[100:]
        testX = self.X[0:100, :]
        testY = self.y[0:100]

        argsortX = numpy.zeros(trainX.shape, numpy.int)
        for i in range(trainX.shape[1]):
            argsortX[:, i] = numpy.argsort(trainX[:, i])
            argsortX[:, i] = numpy.argsort(argsortX[:, i])

        learner.tree = DictTree()
        rootNode = DecisionNode(numpy.arange(trainX.shape[0]),
                                Util.mode(trainY))
        learner.tree.setVertex(startId, rootNode)
        learner.growTree(trainX, trainY, argsortX, startId)
        learner.shapeX = trainX.shape
        learner.predict(trainX, trainY)
        learner.computeAlphas()

        obj1 = learner.treeObjective(trainX, trainY)
        size1 = learner.tree.getNumVertices()

        #Now we'll prune
        learner.prune(trainX, trainY)

        obj2 = learner.treeObjective(trainX, trainY)
        size2 = learner.tree.getNumVertices()

        self.assertTrue(obj1 >= obj2)
        self.assertTrue(size1 >= size2)

        #Check there are no nodes with alpha>alphaThreshold
        for vertexId in learner.tree.getAllVertexIds():
            self.assertTrue(
                learner.tree.getVertex(vertexId).alpha <=
                learner.alphaThreshold)
 def testPrune(self): 
     startId = (0, )
     minSplit = 20
     maxDepth = 5
     gamma = 0.05
     learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) 
     
     trainX = self.X[100:, :]
     trainY = self.y[100:]
     testX = self.X[0:100, :]
     testY = self.y[0:100]    
     
     argsortX = numpy.zeros(trainX.shape, numpy.int)
     for i in range(trainX.shape[1]): 
         argsortX[:, i] = numpy.argsort(trainX[:, i])
         argsortX[:, i] = numpy.argsort(argsortX[:, i])
     
     learner.tree = DictTree()
     rootNode = DecisionNode(numpy.arange(trainX.shape[0]), Util.mode(trainY))
     learner.tree.setVertex(startId, rootNode)        
     learner.growTree(trainX, trainY, argsortX, startId)    
     learner.shapeX = trainX.shape 
     learner.predict(trainX, trainY)
     learner.computeAlphas()
     
     obj1 = learner.treeObjective(trainX, trainY)        
     size1 = learner.tree.getNumVertices()
     
     #Now we'll prune 
     learner.prune(trainX, trainY)
     
     obj2 = learner.treeObjective(trainX, trainY)
     size2 = learner.tree.getNumVertices()
     
     self.assertTrue(obj1 >= obj2)    
     self.assertTrue(size1 >= size2)        
     
     #Check there are no nodes with alpha>alphaThreshold 
     for vertexId in learner.tree.getAllVertexIds(): 
         self.assertTrue(learner.tree.getVertex(vertexId).alpha <= learner.alphaThreshold)
    def testGrowTree(self):
        startId = (0, )
        minSplit = 20
        maxDepth = 3
        gamma = 0.01
        learner = PenaltyDecisionTree(minSplit=minSplit,
                                      maxDepth=maxDepth,
                                      gamma=gamma,
                                      pruning=False)

        trainX = self.X[100:, :]
        trainY = self.y[100:]
        testX = self.X[0:100, :]
        testY = self.y[0:100]

        argsortX = numpy.zeros(trainX.shape, numpy.int)
        for i in range(trainX.shape[1]):
            argsortX[:, i] = numpy.argsort(trainX[:, i])
            argsortX[:, i] = numpy.argsort(argsortX[:, i])

        learner.tree = DictTree()
        rootNode = DecisionNode(numpy.arange(trainX.shape[0]),
                                Util.mode(trainY))
        learner.tree.setVertex(startId, rootNode)

        #Note that this matches with the case where we create a new tree each time
        numpy.random.seed(21)
        bestError = float("inf")

        for i in range(20):
            learner.tree.pruneVertex(startId)
            learner.growTree(trainX, trainY, argsortX, startId)

            predTestY = learner.predict(testX)
            error = Evaluator.binaryError(predTestY, testY)
            #print(Evaluator.binaryError(predTestY, testY), learner.tree.getNumVertices())

            if error < bestError:
                bestError = error
                bestTree = learner.tree.copy()

            self.assertTrue(learner.tree.depth() <= maxDepth)

            for vertexId in learner.tree.nonLeaves():
                self.assertTrue(
                    learner.tree.getVertex(vertexId).getTrainInds().shape[0] >=
                    minSplit)

        bestError1 = bestError
        learner.tree = bestTree

        #Now we test growing a tree from a non-root vertex
        numpy.random.seed(21)
        for i in range(20):
            learner.tree.pruneVertex((0, 1))
            learner.growTree(trainX, trainY, argsortX, (0, 1))

            self.assertTrue(
                learner.tree.getVertex((0, )) == bestTree.getVertex((0, )))
            self.assertTrue(
                learner.tree.getVertex((0, 0)) == bestTree.getVertex((0, 0)))

            predTestY = learner.predict(testX)
            error = Evaluator.binaryError(predTestY, testY)

            if error < bestError:
                bestError = error
                bestTree = learner.tree.copy()
            #print(Evaluator.binaryError(predTestY, testY), learner.tree.getNumVertices())
        self.assertTrue(bestError1 >= bestError)
 def testGrowTree(self):
     startId = (0, )
     minSplit = 20
     maxDepth = 3
     gamma = 0.01
     learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) 
     
     trainX = self.X[100:, :]
     trainY = self.y[100:]
     testX = self.X[0:100, :]
     testY = self.y[0:100]    
     
     argsortX = numpy.zeros(trainX.shape, numpy.int)
     for i in range(trainX.shape[1]): 
         argsortX[:, i] = numpy.argsort(trainX[:, i])
         argsortX[:, i] = numpy.argsort(argsortX[:, i])
     
     learner.tree = DictTree()
     rootNode = DecisionNode(numpy.arange(trainX.shape[0]), Util.mode(trainY))
     learner.tree.setVertex(startId, rootNode)        
     
     #Note that this matches with the case where we create a new tree each time 
     numpy.random.seed(21)
     bestError = float("inf")        
     
     for i in range(20): 
         learner.tree.pruneVertex(startId)
         learner.growTree(trainX, trainY, argsortX, startId)
         
         predTestY = learner.predict(testX)
         error = Evaluator.binaryError(predTestY, testY)
         #print(Evaluator.binaryError(predTestY, testY), learner.tree.getNumVertices())
         
         if error < bestError: 
             bestError = error 
             bestTree = learner.tree.copy() 
         
         self.assertTrue(learner.tree.depth() <= maxDepth)
         
         for vertexId in learner.tree.nonLeaves(): 
             self.assertTrue(learner.tree.getVertex(vertexId).getTrainInds().shape[0] >= minSplit)
     
     bestError1 = bestError               
     learner.tree = bestTree    
     
     #Now we test growing a tree from a non-root vertex 
     numpy.random.seed(21)
     for i in range(20): 
         learner.tree.pruneVertex((0, 1)) 
         learner.growTree(trainX, trainY, argsortX, (0, 1))
         
         self.assertTrue(learner.tree.getVertex((0,)) == bestTree.getVertex((0,)))
         self.assertTrue(learner.tree.getVertex((0,0)) == bestTree.getVertex((0,0)))
         
         
         predTestY = learner.predict(testX)
         error = Evaluator.binaryError(predTestY, testY)
         
         if error < bestError: 
             bestError = error 
             bestTree = learner.tree.copy() 
         #print(Evaluator.binaryError(predTestY, testY), learner.tree.getNumVertices())
     self.assertTrue(bestError1 >= bestError )