def testLearnModel2(self): #We want to make sure the learnt tree with gamma = 0 maximise the #empirical risk minSplit = 20 maxDepth = 3 gamma = 0.01 learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) #Vary sampleSize numpy.random.seed(21) learner.setSampleSize(1) learner.learnModel(self.X, self.y) error1 = learner.treeObjective(self.X, self.y) numpy.random.seed(21) learner.setSampleSize(5) learner.learnModel(self.X, self.y) error2 = learner.treeObjective(self.X, self.y) numpy.random.seed(21) learner.setSampleSize(10) learner.learnModel(self.X, self.y) error3 = learner.treeObjective(self.X, self.y) self.assertTrue(error1 >= error2) self.assertTrue(error2 >= error3) #Now vary max depth learner.gamma = 0 numpy.random.seed(21) learner.setSampleSize(1) learner.minSplit = 1 learner.maxDepth = 3 learner.learnModel(self.X, self.y) predY = learner.predict(self.X) error1 = Evaluator.binaryError(self.y, predY) numpy.random.seed(21) learner.maxDepth = 5 learner.learnModel(self.X, self.y) predY = learner.predict(self.X) error2 = Evaluator.binaryError(self.y, predY) numpy.random.seed(21) learner.maxDepth = 10 learner.learnModel(self.X, self.y) predY = learner.predict(self.X) error3 = Evaluator.binaryError(self.y, predY) self.assertTrue(error1 >= error2) self.assertTrue(error2 >= error3)
def testLearnModel2(self): #We want to make sure the learnt tree with gamma = 0 maximise the #empirical risk minSplit = 20 maxDepth = 3 gamma = 0.01 learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) #Vary sampleSize numpy.random.seed(21) learner.setSampleSize(1) learner.learnModel(self.X, self.y) error1 = learner.treeObjective(self.X, self.y) numpy.random.seed(21) learner.setSampleSize(5) learner.learnModel(self.X, self.y) error2 = learner.treeObjective(self.X, self.y) numpy.random.seed(21) learner.setSampleSize(10) learner.learnModel(self.X, self.y) error3 = learner.treeObjective(self.X, self.y) self.assertTrue(error1 >= error2) self.assertTrue(error2 >= error3) #Now vary max depth learner.gamma = 0 numpy.random.seed(21) learner.setSampleSize(1) learner.minSplit = 1 learner.maxDepth = 3 learner.learnModel(self.X, self.y) predY = learner.predict(self.X) error1 = Evaluator.binaryError(self.y, predY) numpy.random.seed(21) learner.maxDepth = 5 learner.learnModel(self.X, self.y) predY = learner.predict(self.X) error2 = Evaluator.binaryError(self.y, predY) numpy.random.seed(21) learner.maxDepth = 10 learner.learnModel(self.X, self.y) predY = learner.predict(self.X) error3 = Evaluator.binaryError(self.y, predY) self.assertTrue(error1 >= error2) self.assertTrue(error2 >= error3)
def testPrune(self): startId = (0, ) minSplit = 20 maxDepth = 5 gamma = 0.05 learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) trainX = self.X[100:, :] trainY = self.y[100:] testX = self.X[0:100, :] testY = self.y[0:100] argsortX = numpy.zeros(trainX.shape, numpy.int) for i in range(trainX.shape[1]): argsortX[:, i] = numpy.argsort(trainX[:, i]) argsortX[:, i] = numpy.argsort(argsortX[:, i]) learner.tree = DictTree() rootNode = DecisionNode(numpy.arange(trainX.shape[0]), Util.mode(trainY)) learner.tree.setVertex(startId, rootNode) learner.growTree(trainX, trainY, argsortX, startId) learner.shapeX = trainX.shape learner.predict(trainX, trainY) learner.computeAlphas() obj1 = learner.treeObjective(trainX, trainY) size1 = learner.tree.getNumVertices() #Now we'll prune learner.prune(trainX, trainY) obj2 = learner.treeObjective(trainX, trainY) size2 = learner.tree.getNumVertices() self.assertTrue(obj1 >= obj2) self.assertTrue(size1 >= size2) #Check there are no nodes with alpha>alphaThreshold for vertexId in learner.tree.getAllVertexIds(): self.assertTrue( learner.tree.getVertex(vertexId).alpha <= learner.alphaThreshold)
def testPrune(self): startId = (0, ) minSplit = 20 maxDepth = 5 gamma = 0.05 learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) trainX = self.X[100:, :] trainY = self.y[100:] testX = self.X[0:100, :] testY = self.y[0:100] argsortX = numpy.zeros(trainX.shape, numpy.int) for i in range(trainX.shape[1]): argsortX[:, i] = numpy.argsort(trainX[:, i]) argsortX[:, i] = numpy.argsort(argsortX[:, i]) learner.tree = DictTree() rootNode = DecisionNode(numpy.arange(trainX.shape[0]), Util.mode(trainY)) learner.tree.setVertex(startId, rootNode) learner.growTree(trainX, trainY, argsortX, startId) learner.shapeX = trainX.shape learner.predict(trainX, trainY) learner.computeAlphas() obj1 = learner.treeObjective(trainX, trainY) size1 = learner.tree.getNumVertices() #Now we'll prune learner.prune(trainX, trainY) obj2 = learner.treeObjective(trainX, trainY) size2 = learner.tree.getNumVertices() self.assertTrue(obj1 >= obj2) self.assertTrue(size1 >= size2) #Check there are no nodes with alpha>alphaThreshold for vertexId in learner.tree.getAllVertexIds(): self.assertTrue(learner.tree.getVertex(vertexId).alpha <= learner.alphaThreshold)
def testLearnModel(self): minSplit = 20 maxDepth = 3 gamma = 0.00 X, y = self.X, self.y testX = X[100:, :] testY = y[100:] X = X[0:100, :] y = y[0:100] learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma) learner.learnModel(X, y) tree = learner.getTree() #Work out penalty cost predY = learner.predict(X) predTestY = learner.predict(testX) n = float(X.shape[0]) d = X.shape[1] T = tree.getNumVertices() error = (1 - gamma) * numpy.sum(predY != y) / n testError = numpy.sum(predTestY != testY) / float(testY.shape[0]) error += gamma * numpy.sqrt(T) self.assertEquals(error, learner.treeObjective(X, y)) #Check if the values in the tree nodes are correct for vertexId in tree.getAllVertexIds(): vertex = tree.getVertex(vertexId) self.assertTrue(vertex.getValue() == 1.0 or vertex.getValue() == -1.0) if tree.isNonLeaf(vertexId): self.assertTrue(0 <= vertex.getFeatureInd() <= X.shape[1]) self.assertTrue(0 <= vertex.getError() <= 1)
def testLearnModel(self): minSplit = 20 maxDepth = 3 gamma = 0.00 X, y = self.X, self.y testX = X[100:, :] testY = y[100:] X = X[0:100, :] y = y[0:100] learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma) learner.learnModel(X, y) tree = learner.getTree() #Work out penalty cost predY = learner.predict(X) predTestY = learner.predict(testX) n = float(X.shape[0]) d = X.shape[1] T = tree.getNumVertices() error = (1-gamma)*numpy.sum(predY!=y)/n testError = numpy.sum(predTestY!=testY)/float(testY.shape[0]) error += gamma*numpy.sqrt(T) self.assertEquals(error, learner.treeObjective(X, y)) #Check if the values in the tree nodes are correct for vertexId in tree.getAllVertexIds(): vertex = tree.getVertex(vertexId) self.assertTrue(vertex.getValue()==1.0 or vertex.getValue()==-1.0) if tree.isNonLeaf(vertexId): self.assertTrue(0 <= vertex.getFeatureInd() <= X.shape[1]) self.assertTrue(0 <= vertex.getError() <= 1)