def testPrune(self): startId = (0, ) minSplit = 20 maxDepth = 5 gamma = 0.05 learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) trainX = self.X[100:, :] trainY = self.y[100:] testX = self.X[0:100, :] testY = self.y[0:100] argsortX = numpy.zeros(trainX.shape, numpy.int) for i in range(trainX.shape[1]): argsortX[:, i] = numpy.argsort(trainX[:, i]) argsortX[:, i] = numpy.argsort(argsortX[:, i]) learner.tree = DictTree() rootNode = DecisionNode(numpy.arange(trainX.shape[0]), Util.mode(trainY)) learner.tree.setVertex(startId, rootNode) learner.growTree(trainX, trainY, argsortX, startId) learner.shapeX = trainX.shape learner.predict(trainX, trainY) learner.computeAlphas() obj1 = learner.treeObjective(trainX, trainY) size1 = learner.tree.getNumVertices() #Now we'll prune learner.prune(trainX, trainY) obj2 = learner.treeObjective(trainX, trainY) size2 = learner.tree.getNumVertices() self.assertTrue(obj1 >= obj2) self.assertTrue(size1 >= size2) #Check there are no nodes with alpha>alphaThreshold for vertexId in learner.tree.getAllVertexIds(): self.assertTrue( learner.tree.getVertex(vertexId).alpha <= learner.alphaThreshold)
def testPrune(self): startId = (0, ) minSplit = 20 maxDepth = 5 gamma = 0.05 learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) trainX = self.X[100:, :] trainY = self.y[100:] testX = self.X[0:100, :] testY = self.y[0:100] argsortX = numpy.zeros(trainX.shape, numpy.int) for i in range(trainX.shape[1]): argsortX[:, i] = numpy.argsort(trainX[:, i]) argsortX[:, i] = numpy.argsort(argsortX[:, i]) learner.tree = DictTree() rootNode = DecisionNode(numpy.arange(trainX.shape[0]), Util.mode(trainY)) learner.tree.setVertex(startId, rootNode) learner.growTree(trainX, trainY, argsortX, startId) learner.shapeX = trainX.shape learner.predict(trainX, trainY) learner.computeAlphas() obj1 = learner.treeObjective(trainX, trainY) size1 = learner.tree.getNumVertices() #Now we'll prune learner.prune(trainX, trainY) obj2 = learner.treeObjective(trainX, trainY) size2 = learner.tree.getNumVertices() self.assertTrue(obj1 >= obj2) self.assertTrue(size1 >= size2) #Check there are no nodes with alpha>alphaThreshold for vertexId in learner.tree.getAllVertexIds(): self.assertTrue(learner.tree.getVertex(vertexId).alpha <= learner.alphaThreshold)
def testComputeAlphas(self): minSplit = 20 maxDepth = 3 gamma = 0.1 X, y = self.X, self.y testX = X[100:, :] testY = y[100:] X = X[0:100, :] y = y[0:100] learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) learner.learnModel(X, y) tree = learner.getTree() rootId = (0, ) learner.tree.getVertex(rootId).setTestInds(numpy.arange(X.shape[0])) learner.predict(X, y) learner.computeAlphas() #See if the alpha values of the nodes are correct for vertexId in tree.getAllVertexIds(): subtreeLeaves = tree.leaves(vertexId) subtreeError = 0 for subtreeLeaf in subtreeLeaves: subtreeError += ( 1 - gamma) * tree.getVertex(subtreeLeaf).getTestError() n = float(X.shape[0]) d = X.shape[1] T = tree.getNumVertices() subtreeError /= n subtreeError += gamma * numpy.sqrt(T) T2 = T - len(tree.subtreeIds(vertexId)) + 1 vertexError = (1 - gamma) * tree.getVertex(vertexId).getTestError() / n vertexError += gamma * numpy.sqrt(T2) self.assertAlmostEquals((subtreeError - vertexError), tree.getVertex(vertexId).alpha) if tree.isLeaf(vertexId): self.assertEquals(tree.getVertex(vertexId).alpha, 0.0) #Let's check the alpha of the root node via another method rootId = (0, ) T = 1 (n, d) = X.shape n = float(n) vertexError = (1 - gamma) * numpy.sum(y != Util.mode(y)) / n pen = gamma * numpy.sqrt(T) vertexError += pen T = tree.getNumVertices() treeError = (1 - gamma) * numpy.sum(y != learner.predict(X)) / n pen = gamma * numpy.sqrt(T) treeError += pen alpha = treeError - vertexError self.assertAlmostEqual(alpha, tree.getVertex(rootId).alpha)
def testComputeAlphas(self): minSplit = 20 maxDepth = 3 gamma = 0.1 X, y = self.X, self.y testX = X[100:, :] testY = y[100:] X = X[0:100, :] y = y[0:100] learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) learner.learnModel(X, y) tree = learner.getTree() rootId = (0,) learner.tree.getVertex(rootId).setTestInds(numpy.arange(X.shape[0])) learner.predict(X, y) learner.computeAlphas() #See if the alpha values of the nodes are correct for vertexId in tree.getAllVertexIds(): subtreeLeaves = tree.leaves(vertexId) subtreeError = 0 for subtreeLeaf in subtreeLeaves: subtreeError += (1-gamma)*tree.getVertex(subtreeLeaf).getTestError() n = float(X.shape[0]) d = X.shape[1] T = tree.getNumVertices() subtreeError /= n subtreeError += gamma * numpy.sqrt(T) T2 = T - len(tree.subtreeIds(vertexId)) + 1 vertexError = (1-gamma)*tree.getVertex(vertexId).getTestError()/n vertexError += gamma * numpy.sqrt(T2) self.assertAlmostEquals((subtreeError - vertexError), tree.getVertex(vertexId).alpha) if tree.isLeaf(vertexId): self.assertEquals(tree.getVertex(vertexId).alpha, 0.0) #Let's check the alpha of the root node via another method rootId = (0,) T = 1 (n, d) = X.shape n = float(n) vertexError = (1-gamma)*numpy.sum(y != Util.mode(y))/n pen = gamma*numpy.sqrt(T) vertexError += pen T = tree.getNumVertices() treeError = (1-gamma)*numpy.sum(y != learner.predict(X))/n pen = gamma*numpy.sqrt(T) treeError += pen alpha = treeError - vertexError self.assertAlmostEqual(alpha, tree.getVertex(rootId).alpha)