def testLearnModel2(self):
        #We want to make sure the learnt tree with gamma = 0 maximise the
        #empirical risk
        minSplit = 20
        maxDepth = 3
        gamma = 0.01
        learner = PenaltyDecisionTree(minSplit=minSplit,
                                      maxDepth=maxDepth,
                                      gamma=gamma,
                                      pruning=False)

        #Vary sampleSize
        numpy.random.seed(21)
        learner.setSampleSize(1)
        learner.learnModel(self.X, self.y)
        error1 = learner.treeObjective(self.X, self.y)

        numpy.random.seed(21)
        learner.setSampleSize(5)
        learner.learnModel(self.X, self.y)
        error2 = learner.treeObjective(self.X, self.y)

        numpy.random.seed(21)
        learner.setSampleSize(10)
        learner.learnModel(self.X, self.y)
        error3 = learner.treeObjective(self.X, self.y)

        self.assertTrue(error1 >= error2)
        self.assertTrue(error2 >= error3)

        #Now vary max depth
        learner.gamma = 0

        numpy.random.seed(21)
        learner.setSampleSize(1)
        learner.minSplit = 1
        learner.maxDepth = 3
        learner.learnModel(self.X, self.y)
        predY = learner.predict(self.X)
        error1 = Evaluator.binaryError(self.y, predY)

        numpy.random.seed(21)
        learner.maxDepth = 5
        learner.learnModel(self.X, self.y)
        predY = learner.predict(self.X)
        error2 = Evaluator.binaryError(self.y, predY)

        numpy.random.seed(21)
        learner.maxDepth = 10
        learner.learnModel(self.X, self.y)
        predY = learner.predict(self.X)
        error3 = Evaluator.binaryError(self.y, predY)

        self.assertTrue(error1 >= error2)
        self.assertTrue(error2 >= error3)
    def testLearnModel2(self): 
        #We want to make sure the learnt tree with gamma = 0 maximise the 
        #empirical risk 
        minSplit = 20
        maxDepth = 3
        gamma = 0.01
        learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) 
        
        #Vary sampleSize
        numpy.random.seed(21)
        learner.setSampleSize(1)           
        learner.learnModel(self.X, self.y)        
        error1 = learner.treeObjective(self.X, self.y)

        numpy.random.seed(21)
        learner.setSampleSize(5)        
        learner.learnModel(self.X, self.y)
        error2 = learner.treeObjective(self.X, self.y)

        numpy.random.seed(21)                
        learner.setSampleSize(10)       
        learner.learnModel(self.X, self.y)
        error3 = learner.treeObjective(self.X, self.y)
        
        self.assertTrue(error1 >= error2)
        self.assertTrue(error2 >= error3)
        
        #Now vary max depth 
        learner.gamma = 0         
        
        numpy.random.seed(21)
        learner.setSampleSize(1) 
        learner.minSplit = 1
        learner.maxDepth = 3 
        learner.learnModel(self.X, self.y)
        predY = learner.predict(self.X)
        error1 = Evaluator.binaryError(self.y, predY)
        
        numpy.random.seed(21)
        learner.maxDepth = 5 
        learner.learnModel(self.X, self.y)
        predY = learner.predict(self.X)
        error2 = Evaluator.binaryError(self.y, predY)
        
        numpy.random.seed(21)
        learner.maxDepth = 10 
        learner.learnModel(self.X, self.y)
        predY = learner.predict(self.X)
        error3 = Evaluator.binaryError(self.y, predY)        
        
        self.assertTrue(error1 >= error2)
        self.assertTrue(error2 >= error3)
    def testPrune(self):
        startId = (0, )
        minSplit = 20
        maxDepth = 5
        gamma = 0.05
        learner = PenaltyDecisionTree(minSplit=minSplit,
                                      maxDepth=maxDepth,
                                      gamma=gamma,
                                      pruning=False)

        trainX = self.X[100:, :]
        trainY = self.y[100:]
        testX = self.X[0:100, :]
        testY = self.y[0:100]

        argsortX = numpy.zeros(trainX.shape, numpy.int)
        for i in range(trainX.shape[1]):
            argsortX[:, i] = numpy.argsort(trainX[:, i])
            argsortX[:, i] = numpy.argsort(argsortX[:, i])

        learner.tree = DictTree()
        rootNode = DecisionNode(numpy.arange(trainX.shape[0]),
                                Util.mode(trainY))
        learner.tree.setVertex(startId, rootNode)
        learner.growTree(trainX, trainY, argsortX, startId)
        learner.shapeX = trainX.shape
        learner.predict(trainX, trainY)
        learner.computeAlphas()

        obj1 = learner.treeObjective(trainX, trainY)
        size1 = learner.tree.getNumVertices()

        #Now we'll prune
        learner.prune(trainX, trainY)

        obj2 = learner.treeObjective(trainX, trainY)
        size2 = learner.tree.getNumVertices()

        self.assertTrue(obj1 >= obj2)
        self.assertTrue(size1 >= size2)

        #Check there are no nodes with alpha>alphaThreshold
        for vertexId in learner.tree.getAllVertexIds():
            self.assertTrue(
                learner.tree.getVertex(vertexId).alpha <=
                learner.alphaThreshold)
 def testPrune(self): 
     startId = (0, )
     minSplit = 20
     maxDepth = 5
     gamma = 0.05
     learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) 
     
     trainX = self.X[100:, :]
     trainY = self.y[100:]
     testX = self.X[0:100, :]
     testY = self.y[0:100]    
     
     argsortX = numpy.zeros(trainX.shape, numpy.int)
     for i in range(trainX.shape[1]): 
         argsortX[:, i] = numpy.argsort(trainX[:, i])
         argsortX[:, i] = numpy.argsort(argsortX[:, i])
     
     learner.tree = DictTree()
     rootNode = DecisionNode(numpy.arange(trainX.shape[0]), Util.mode(trainY))
     learner.tree.setVertex(startId, rootNode)        
     learner.growTree(trainX, trainY, argsortX, startId)    
     learner.shapeX = trainX.shape 
     learner.predict(trainX, trainY)
     learner.computeAlphas()
     
     obj1 = learner.treeObjective(trainX, trainY)        
     size1 = learner.tree.getNumVertices()
     
     #Now we'll prune 
     learner.prune(trainX, trainY)
     
     obj2 = learner.treeObjective(trainX, trainY)
     size2 = learner.tree.getNumVertices()
     
     self.assertTrue(obj1 >= obj2)    
     self.assertTrue(size1 >= size2)        
     
     #Check there are no nodes with alpha>alphaThreshold 
     for vertexId in learner.tree.getAllVertexIds(): 
         self.assertTrue(learner.tree.getVertex(vertexId).alpha <= learner.alphaThreshold)
    def testLearnModel(self):
        minSplit = 20
        maxDepth = 3
        gamma = 0.00

        X, y = self.X, self.y

        testX = X[100:, :]
        testY = y[100:]
        X = X[0:100, :]
        y = y[0:100]

        learner = PenaltyDecisionTree(minSplit=minSplit,
                                      maxDepth=maxDepth,
                                      gamma=gamma)
        learner.learnModel(X, y)
        tree = learner.getTree()

        #Work out penalty cost
        predY = learner.predict(X)
        predTestY = learner.predict(testX)

        n = float(X.shape[0])
        d = X.shape[1]
        T = tree.getNumVertices()
        error = (1 - gamma) * numpy.sum(predY != y) / n
        testError = numpy.sum(predTestY != testY) / float(testY.shape[0])
        error += gamma * numpy.sqrt(T)

        self.assertEquals(error, learner.treeObjective(X, y))

        #Check if the values in the tree nodes are correct
        for vertexId in tree.getAllVertexIds():
            vertex = tree.getVertex(vertexId)

            self.assertTrue(vertex.getValue() == 1.0
                            or vertex.getValue() == -1.0)
            if tree.isNonLeaf(vertexId):
                self.assertTrue(0 <= vertex.getFeatureInd() <= X.shape[1])
                self.assertTrue(0 <= vertex.getError() <= 1)
 def testLearnModel(self): 
     minSplit = 20
     maxDepth = 3
     gamma = 0.00
         
     X, y = self.X, self.y
             
     testX = X[100:, :]
     testY = y[100:]
     X = X[0:100, :]
     y = y[0:100]
      
     learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma) 
     learner.learnModel(X, y)                  
     tree = learner.getTree() 
     
     #Work out penalty cost 
     predY = learner.predict(X)
     predTestY = learner.predict(testX)
     
     n = float(X.shape[0])
     d = X.shape[1]
     T = tree.getNumVertices()
     error = (1-gamma)*numpy.sum(predY!=y)/n
     testError = numpy.sum(predTestY!=testY)/float(testY.shape[0])
     error += gamma*numpy.sqrt(T)
     
     self.assertEquals(error, learner.treeObjective(X, y)) 
             
     #Check if the values in the tree nodes are correct 
     for vertexId in tree.getAllVertexIds(): 
         vertex = tree.getVertex(vertexId)
         
         self.assertTrue(vertex.getValue()==1.0 or vertex.getValue()==-1.0)
         if tree.isNonLeaf(vertexId): 
             self.assertTrue(0 <= vertex.getFeatureInd() <= X.shape[1])
             self.assertTrue(0 <= vertex.getError() <= 1)