def testprune(self): learner = DecisionTreeLearner(minSplit=5) learner.learnModel(self.X, self.y) unprunedTree = learner.getTree().copy() learner.cartPrune(self.X, self.y) self.assertTrue(learner.tree.isSubtree(unprunedTree))
def testLearnModel(self): #First check the integrety of the trees generator = ExamplesGenerator() for i in range(5): numExamples = numpy.random.randint(1, 200) numFeatures = numpy.random.randint(1, 10) minSplit = numpy.random.randint(1, 50) maxDepth = numpy.random.randint(1, 10) X, y = generator.generateBinaryExamples(numExamples, numFeatures) y = numpy.array(y, numpy.float) learner = DecisionTreeLearner(minSplit=minSplit, maxDepth=maxDepth) learner.learnModel(X, y) tree = learner.getTree() for vertexId in tree.getAllVertexIds(): vertex = tree.getVertex(vertexId) if vertex.getFeatureInd() != None: meanValue = y[vertex.getTrainInds()].mean() self.assertEquals(meanValue, vertex.getValue()) if tree.isNonLeaf(vertexId): self.assertTrue(0 <= vertex.getFeatureInd() < X.shape[1]) self.assertTrue(X[:, vertex.getFeatureInd()].min() <= vertex.getThreshold() <= X[:, vertex.getFeatureInd()].max()) self.assertTrue(vertex.getTrainInds().shape[0] >= 1) self.assertTrue(tree.depth() <= maxDepth) #Check that each split contains indices from parent root = tree.getRootId() vertexStack = [root] while len(vertexStack) != 0: vertexId = vertexStack.pop() neighbours = tree.children(vertexId) if len(neighbours) > 2: self.fail("Cannot have more than 2 children") elif len(neighbours) > 0: inds1 = tree.getVertex(neighbours[0]).getTrainInds() inds2 = tree.getVertex(neighbours[1]).getTrainInds() nptst.assert_array_equal(numpy.union1d(inds1, inds2), numpy.unique(tree.getVertex(vertexId).getTrainInds())) vertexStack.append(neighbours[0]) vertexStack.append(neighbours[1]) #Try a tree of depth 0 #learner = DecisionTreeLearner(minSplit=10, maxDepth=0) #learner.learnModel(self.X, self.y) #tree = learner.getTree() #self.assertEquals(tree.depth(), 0) #Try minSplit > numExamples #learner = DecisionTreeLearner(minSplit=self.numExamples+1, maxDepth=0) #learner.learnModel(self.X, self.y) #tree = learner.getTree() #self.assertEquals(tree.getNumVertices(), 1) #Try a simple tree of depth 1 learner = DecisionTreeLearner(minSplit=1, maxDepth=1) learner.learnModel(self.X, self.y) bestFeature = 0 bestError = 10**6 bestThreshold = 0 for i in range(numFeatures): vals = numpy.unique(self.X[:, i]) for j in range(vals.shape[0]-1): threshold = (vals[j+1]+vals[j])/2 leftInds = self.X[:, i] <= threshold rightInds = self.X[:, i] > threshold valLeft = numpy.mean(self.y[leftInds]) valRight = numpy.mean(self.y[rightInds]) error = ((self.y[leftInds] - valLeft)**2).sum() + ((self.y[rightInds] - valRight)**2).sum() if error < bestError: bestError = error bestFeature = i bestThreshold = threshold self.assertAlmostEquals(bestThreshold, learner.tree.getRoot().getThreshold()) self.assertAlmostEquals(bestError, learner.tree.getRoot().getError(), 5) self.assertEquals(bestFeature, learner.tree.getRoot().getFeatureInd()) #Now we will test pruning works learner = DecisionTreeLearner(minSplit=1, maxDepth=10) learner.learnModel(X, y) numVertices1 = learner.getTree().getNumVertices() learner = DecisionTreeLearner(minSplit=1, maxDepth=10, pruneType="REP-CV") learner.learnModel(X, y) numVertices2 = learner.getTree().getNumVertices() self.assertTrue(numVertices1 >= numVertices2)