def testLearnModelCut(self): maxDepth = 5 minSplit = 10 treeRank = TreeRank(self.leafRanklearner) treeRank.setMaxDepth(maxDepth) treeRank.setMinSplit(minSplit) treeRank.learnModelCut(self.X, self.y) tree = treeRank.getTree() self.assertTrue(tree.depth() <= maxDepth)
def testMinSplit(self): maxDepth = 10 minSplit = 100 treeRank = TreeRank(self.leafRanklearner) treeRank.setMaxDepth(maxDepth) treeRank.setMinSplit(minSplit) treeRank.learnModel(self.X, self.y) tree = treeRank.getTree() vertexIds = tree.getAllVertexIds() for vertexId in vertexIds: node = tree.getVertex(vertexId) if not node.isLeafNode(): self.assertTrue(node.getTrainInds().shape[0] >= minSplit)
def learnModel(self, X, y): """ Learn a model for a set of examples given as the rows of the matrix X, with corresponding labels given in the elements of 1D array y. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param y: A vector of labels :type y: :class:`ndarray` """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(y, numpy.ndarray) Parameter.checkArray(X) Parameter.checkArray(y) labels = numpy.unique(y) if labels.shape[0] != 2: raise ValueError("Can only accept binary labelled data") if (labels != numpy.array([-1, 1])).any(): raise ValueError("Labels must be -1/+1: " + str(labels)) forestList = [] indList = [] numSampledExamples = int(numpy.round(self.sampleSize * X.shape[0])) for i in range(self.numTrees): Util.printConciseIteration(i, 1, self.numTrees, "Tree: ") if self.sampleReplace: inds = numpy.random.randint(0, X.shape[0], numSampledExamples) else: inds = numpy.random.permutation( X.shape[0])[0:numSampledExamples] treeRank = TreeRank(self.leafRanklearner) treeRank.setMaxDepth(self.maxDepth) treeRank.setMinSplit(self.minSplit) treeRank.setFeatureSize(self.featureSize) treeRank.setBestResponse(self.bestResponse) treeRank.learnModel(X[inds, :], y[inds]) forestList.append(treeRank) indList.append(inds) self.forestList = forestList self.indList = indList
def learnModel(self, X, y): """ Learn a model for a set of examples given as the rows of the matrix X, with corresponding labels given in the elements of 1D array y. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param y: A vector of labels :type y: :class:`ndarray` """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(y, numpy.ndarray) Parameter.checkArray(X) Parameter.checkArray(y) labels = numpy.unique(y) if labels.shape[0] != 2: raise ValueError("Can only accept binary labelled data") if (labels != numpy.array([-1, 1])).any(): raise ValueError("Labels must be -1/+1: " + str(labels)) forestList = [] indList = [] numSampledExamples = int(numpy.round(self.sampleSize*X.shape[0])) for i in range(self.numTrees): Util.printConciseIteration(i, 1, self.numTrees, "Tree: ") if self.sampleReplace: inds = numpy.random.randint(0, X.shape[0], numSampledExamples) else: inds = numpy.random.permutation(X.shape[0])[0:numSampledExamples] treeRank = TreeRank(self.leafRanklearner) treeRank.setMaxDepth(self.maxDepth) treeRank.setMinSplit(self.minSplit) treeRank.setFeatureSize(self.featureSize) treeRank.setBestResponse(self.bestResponse) treeRank.learnModel(X[inds, :], y[inds]) forestList.append(treeRank) indList.append(inds) self.forestList = forestList self.indList = indList