def testLearnModel(self): minSplit = 20 maxDepth = 3 gamma = 0.00 X, y = self.X, self.y testX = X[100:, :] testY = y[100:] X = X[0:100, :] y = y[0:100] learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma) learner.learnModel(X, y) tree = learner.getTree() #Work out penalty cost predY = learner.predict(X) predTestY = learner.predict(testX) n = float(X.shape[0]) d = X.shape[1] T = tree.getNumVertices() error = (1 - gamma) * numpy.sum(predY != y) / n testError = numpy.sum(predTestY != testY) / float(testY.shape[0]) error += gamma * numpy.sqrt(T) self.assertEquals(error, learner.treeObjective(X, y)) #Check if the values in the tree nodes are correct for vertexId in tree.getAllVertexIds(): vertex = tree.getVertex(vertexId) self.assertTrue(vertex.getValue() == 1.0 or vertex.getValue() == -1.0) if tree.isNonLeaf(vertexId): self.assertTrue(0 <= vertex.getFeatureInd() <= X.shape[1]) self.assertTrue(0 <= vertex.getError() <= 1)
def testLearnModel(self): minSplit = 20 maxDepth = 3 gamma = 0.00 X, y = self.X, self.y testX = X[100:, :] testY = y[100:] X = X[0:100, :] y = y[0:100] learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma) learner.learnModel(X, y) tree = learner.getTree() #Work out penalty cost predY = learner.predict(X) predTestY = learner.predict(testX) n = float(X.shape[0]) d = X.shape[1] T = tree.getNumVertices() error = (1-gamma)*numpy.sum(predY!=y)/n testError = numpy.sum(predTestY!=testY)/float(testY.shape[0]) error += gamma*numpy.sqrt(T) self.assertEquals(error, learner.treeObjective(X, y)) #Check if the values in the tree nodes are correct for vertexId in tree.getAllVertexIds(): vertex = tree.getVertex(vertexId) self.assertTrue(vertex.getValue()==1.0 or vertex.getValue()==-1.0) if tree.isNonLeaf(vertexId): self.assertTrue(0 <= vertex.getFeatureInd() <= X.shape[1]) self.assertTrue(0 <= vertex.getError() <= 1)
def testComputeAlphas(self): minSplit = 20 maxDepth = 3 gamma = 0.1 X, y = self.X, self.y testX = X[100:, :] testY = y[100:] X = X[0:100, :] y = y[0:100] learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) learner.learnModel(X, y) tree = learner.getTree() rootId = (0, ) learner.tree.getVertex(rootId).setTestInds(numpy.arange(X.shape[0])) learner.predict(X, y) learner.computeAlphas() #See if the alpha values of the nodes are correct for vertexId in tree.getAllVertexIds(): subtreeLeaves = tree.leaves(vertexId) subtreeError = 0 for subtreeLeaf in subtreeLeaves: subtreeError += ( 1 - gamma) * tree.getVertex(subtreeLeaf).getTestError() n = float(X.shape[0]) d = X.shape[1] T = tree.getNumVertices() subtreeError /= n subtreeError += gamma * numpy.sqrt(T) T2 = T - len(tree.subtreeIds(vertexId)) + 1 vertexError = (1 - gamma) * tree.getVertex(vertexId).getTestError() / n vertexError += gamma * numpy.sqrt(T2) self.assertAlmostEquals((subtreeError - vertexError), tree.getVertex(vertexId).alpha) if tree.isLeaf(vertexId): self.assertEquals(tree.getVertex(vertexId).alpha, 0.0) #Let's check the alpha of the root node via another method rootId = (0, ) T = 1 (n, d) = X.shape n = float(n) vertexError = (1 - gamma) * numpy.sum(y != Util.mode(y)) / n pen = gamma * numpy.sqrt(T) vertexError += pen T = tree.getNumVertices() treeError = (1 - gamma) * numpy.sum(y != learner.predict(X)) / n pen = gamma * numpy.sqrt(T) treeError += pen alpha = treeError - vertexError self.assertAlmostEqual(alpha, tree.getVertex(rootId).alpha)
def testComputeAlphas(self): minSplit = 20 maxDepth = 3 gamma = 0.1 X, y = self.X, self.y testX = X[100:, :] testY = y[100:] X = X[0:100, :] y = y[0:100] learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) learner.learnModel(X, y) tree = learner.getTree() rootId = (0,) learner.tree.getVertex(rootId).setTestInds(numpy.arange(X.shape[0])) learner.predict(X, y) learner.computeAlphas() #See if the alpha values of the nodes are correct for vertexId in tree.getAllVertexIds(): subtreeLeaves = tree.leaves(vertexId) subtreeError = 0 for subtreeLeaf in subtreeLeaves: subtreeError += (1-gamma)*tree.getVertex(subtreeLeaf).getTestError() n = float(X.shape[0]) d = X.shape[1] T = tree.getNumVertices() subtreeError /= n subtreeError += gamma * numpy.sqrt(T) T2 = T - len(tree.subtreeIds(vertexId)) + 1 vertexError = (1-gamma)*tree.getVertex(vertexId).getTestError()/n vertexError += gamma * numpy.sqrt(T2) self.assertAlmostEquals((subtreeError - vertexError), tree.getVertex(vertexId).alpha) if tree.isLeaf(vertexId): self.assertEquals(tree.getVertex(vertexId).alpha, 0.0) #Let's check the alpha of the root node via another method rootId = (0,) T = 1 (n, d) = X.shape n = float(n) vertexError = (1-gamma)*numpy.sum(y != Util.mode(y))/n pen = gamma*numpy.sqrt(T) vertexError += pen T = tree.getNumVertices() treeError = (1-gamma)*numpy.sum(y != learner.predict(X))/n pen = gamma*numpy.sqrt(T) treeError += pen alpha = treeError - vertexError self.assertAlmostEqual(alpha, tree.getVertex(rootId).alpha)