def testLearnModel(self):
        minSplit = 20
        maxDepth = 3
        gamma = 0.00

        X, y = self.X, self.y

        testX = X[100:, :]
        testY = y[100:]
        X = X[0:100, :]
        y = y[0:100]

        learner = PenaltyDecisionTree(minSplit=minSplit,
                                      maxDepth=maxDepth,
                                      gamma=gamma)
        learner.learnModel(X, y)
        tree = learner.getTree()

        #Work out penalty cost
        predY = learner.predict(X)
        predTestY = learner.predict(testX)

        n = float(X.shape[0])
        d = X.shape[1]
        T = tree.getNumVertices()
        error = (1 - gamma) * numpy.sum(predY != y) / n
        testError = numpy.sum(predTestY != testY) / float(testY.shape[0])
        error += gamma * numpy.sqrt(T)

        self.assertEquals(error, learner.treeObjective(X, y))

        #Check if the values in the tree nodes are correct
        for vertexId in tree.getAllVertexIds():
            vertex = tree.getVertex(vertexId)

            self.assertTrue(vertex.getValue() == 1.0
                            or vertex.getValue() == -1.0)
            if tree.isNonLeaf(vertexId):
                self.assertTrue(0 <= vertex.getFeatureInd() <= X.shape[1])
                self.assertTrue(0 <= vertex.getError() <= 1)
 def testLearnModel(self): 
     minSplit = 20
     maxDepth = 3
     gamma = 0.00
         
     X, y = self.X, self.y
             
     testX = X[100:, :]
     testY = y[100:]
     X = X[0:100, :]
     y = y[0:100]
      
     learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma) 
     learner.learnModel(X, y)                  
     tree = learner.getTree() 
     
     #Work out penalty cost 
     predY = learner.predict(X)
     predTestY = learner.predict(testX)
     
     n = float(X.shape[0])
     d = X.shape[1]
     T = tree.getNumVertices()
     error = (1-gamma)*numpy.sum(predY!=y)/n
     testError = numpy.sum(predTestY!=testY)/float(testY.shape[0])
     error += gamma*numpy.sqrt(T)
     
     self.assertEquals(error, learner.treeObjective(X, y)) 
             
     #Check if the values in the tree nodes are correct 
     for vertexId in tree.getAllVertexIds(): 
         vertex = tree.getVertex(vertexId)
         
         self.assertTrue(vertex.getValue()==1.0 or vertex.getValue()==-1.0)
         if tree.isNonLeaf(vertexId): 
             self.assertTrue(0 <= vertex.getFeatureInd() <= X.shape[1])
             self.assertTrue(0 <= vertex.getError() <= 1)
    def testComputeAlphas(self):
        minSplit = 20
        maxDepth = 3
        gamma = 0.1

        X, y = self.X, self.y

        testX = X[100:, :]
        testY = y[100:]
        X = X[0:100, :]
        y = y[0:100]

        learner = PenaltyDecisionTree(minSplit=minSplit,
                                      maxDepth=maxDepth,
                                      gamma=gamma,
                                      pruning=False)
        learner.learnModel(X, y)
        tree = learner.getTree()

        rootId = (0, )
        learner.tree.getVertex(rootId).setTestInds(numpy.arange(X.shape[0]))
        learner.predict(X, y)
        learner.computeAlphas()

        #See if the alpha values of the nodes are correct
        for vertexId in tree.getAllVertexIds():
            subtreeLeaves = tree.leaves(vertexId)

            subtreeError = 0
            for subtreeLeaf in subtreeLeaves:
                subtreeError += (
                    1 - gamma) * tree.getVertex(subtreeLeaf).getTestError()

            n = float(X.shape[0])
            d = X.shape[1]
            T = tree.getNumVertices()
            subtreeError /= n
            subtreeError += gamma * numpy.sqrt(T)

            T2 = T - len(tree.subtreeIds(vertexId)) + 1
            vertexError = (1 -
                           gamma) * tree.getVertex(vertexId).getTestError() / n
            vertexError += gamma * numpy.sqrt(T2)

            self.assertAlmostEquals((subtreeError - vertexError),
                                    tree.getVertex(vertexId).alpha)

            if tree.isLeaf(vertexId):
                self.assertEquals(tree.getVertex(vertexId).alpha, 0.0)

        #Let's check the alpha of the root node via another method
        rootId = (0, )

        T = 1
        (n, d) = X.shape
        n = float(n)
        vertexError = (1 - gamma) * numpy.sum(y != Util.mode(y)) / n
        pen = gamma * numpy.sqrt(T)
        vertexError += pen

        T = tree.getNumVertices()
        treeError = (1 - gamma) * numpy.sum(y != learner.predict(X)) / n
        pen = gamma * numpy.sqrt(T)
        treeError += pen

        alpha = treeError - vertexError
        self.assertAlmostEqual(alpha, tree.getVertex(rootId).alpha)
 def testComputeAlphas(self): 
     minSplit = 20
     maxDepth = 3
     gamma = 0.1
         
     X, y = self.X, self.y
             
     testX = X[100:, :]
     testY = y[100:]
     X = X[0:100, :]
     y = y[0:100]
      
     learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) 
     learner.learnModel(X, y)                  
     tree = learner.getTree()    
     
     rootId = (0,)
     learner.tree.getVertex(rootId).setTestInds(numpy.arange(X.shape[0]))
     learner.predict(X, y)  
     learner.computeAlphas()
     
     #See if the alpha values of the nodes are correct 
     for vertexId in tree.getAllVertexIds(): 
         subtreeLeaves = tree.leaves(vertexId)
         
         subtreeError = 0 
         for subtreeLeaf in subtreeLeaves: 
             subtreeError += (1-gamma)*tree.getVertex(subtreeLeaf).getTestError()
         
         n = float(X.shape[0])
         d = X.shape[1]
         T = tree.getNumVertices() 
         subtreeError /= n 
         subtreeError += gamma * numpy.sqrt(T)
         
         T2 = T - len(tree.subtreeIds(vertexId)) + 1 
         vertexError = (1-gamma)*tree.getVertex(vertexId).getTestError()/n
         vertexError +=  gamma * numpy.sqrt(T2)
         
         self.assertAlmostEquals((subtreeError - vertexError), tree.getVertex(vertexId).alpha)
         
         if tree.isLeaf(vertexId): 
             self.assertEquals(tree.getVertex(vertexId).alpha, 0.0)
             
     #Let's check the alpha of the root node via another method 
     rootId = (0,)
     
     T = 1 
     (n, d) = X.shape
     n = float(n)
     vertexError = (1-gamma)*numpy.sum(y != Util.mode(y))/n
     pen = gamma*numpy.sqrt(T)
     vertexError += pen 
     
     T = tree.getNumVertices() 
     treeError = (1-gamma)*numpy.sum(y != learner.predict(X))/n         
     pen = gamma*numpy.sqrt(T)
     treeError += pen 
     
     alpha = treeError - vertexError 
     self.assertAlmostEqual(alpha, tree.getVertex(rootId).alpha)