def testParallelPenaltyGridRbf(self): svm = self.svm svm.setKernel("gaussian") trainX = self.X[0:40, :] trainY = self.y[0:40] idealPenalties = svm.parallelPenaltyGridRbf(trainX, trainY, self.X, self.y) idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty = Evaluator.binaryError( predY, self.y) - Evaluator.binaryError(predTrainY, trainY) idealPenalties2[i, j] = penalty args = (trainX, trainY, self.X, self.y, svm) idealPenalties3[i, j] = computeIdealPenalty(args) tol = 10**-6 self.assertTrue( numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol)
def testParallelPenaltyGrid(self): svm = self.svm svm.setKernel("gaussian") trainX = self.X[0:40, :] trainY = self.y[0:40] paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X, self.y, paramDict) idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty = Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY) idealPenalties2[i, j] = penalty args = (trainX, trainY, self.X, self.y, svm) idealPenalties3[i, j] = computeIdealPenalty(args) tol = 10**-6 self.assertTrue(numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol)
def testSetErrorCost(self): try: import sklearn except ImportError as error: return numExamples = 1000 numFeatures = 100 eg = ExamplesGenerator() X, y = eg.generateBinaryExamples(numExamples, numFeatures) svm = LibSVM() C = 0.1 kernel = "linear" kernelParam = 0 svm.setKernel(kernel, kernelParam) svm.setC(C) svm.setErrorCost(0.1) svm.learnModel(X, y) predY = svm.classify(X) e1 = Evaluator.binaryErrorP(y, predY) svm.setErrorCost(0.9) svm.learnModel(X, y) predY = svm.classify(X) e2 = Evaluator.binaryErrorP(y, predY) self.assertTrue(e1 > e2)
def testPredict2(self): # Test on Gauss2D dataset dataDir = PathDefaults.getDataDir() fileName = dataDir + "Gauss2D_learn.csv" XY = numpy.loadtxt(fileName, skiprows=1, usecols=(1, 2, 3), delimiter=",") X = XY[:, 0:2] y = XY[:, 2] fileName = dataDir + "Gauss2D_test.csv" testXY = numpy.loadtxt(fileName, skiprows=1, usecols=(1, 2, 3), delimiter=",") testX = testXY[:, 0:2] testY = testXY[:, 2] X = Standardiser().standardiseArray(X) testX = Standardiser().standardiseArray(testX) maxDepths = range(3, 10) trainAucs = numpy.array( [0.7194734, 0.7284824, 0.7332185, 0.7348198, 0.7366152, 0.7367508, 0.7367508, 0.7367508] ) testAucs = numpy.array([0.6789078, 0.6844632, 0.6867918, 0.6873420, 0.6874820, 0.6874400, 0.6874400, 0.6874400]) i = 0 # The results are approximately the same, but not exactly for maxDepth in maxDepths: treeRank = TreeRank(self.leafRanklearner) treeRank.setMaxDepth(maxDepth) treeRank.learnModel(X, y) trainScores = treeRank.predict(X) testScores = treeRank.predict(testX) self.assertAlmostEquals(Evaluator.auc(trainScores, y), trainAucs[i], 2) self.assertAlmostEquals(Evaluator.auc(testScores, testY), testAucs[i], 1) i += 1
def testParallelPen(self): folds = 3 Cv = numpy.array([4.0]) idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() resultsList = svm.parallelPen(self.X, self.y, idx, paramDict, Cv) tol = 10**-6 bestError = 1 trainErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) penalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] penalty = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty += Evaluator.binaryError( predY, self.y) - Evaluator.binaryError( predTrainY, trainY) penalty = penalty * Cv[0] / len(idx) svm.learnModel(self.X, self.y) predY = svm.predict(self.X) trainErrors2[i, j] = Evaluator.binaryError(predY, self.y) penalties2[i, j] = penalty meanErrors2[i, j] = Evaluator.binaryError(predY, self.y) + penalty if meanErrors2[i, j] < bestError: bestC = C bestGamma = gamma bestError = meanErrors2[i, j] bestSVM, trainErrors, currentPenalties = resultsList[0] meanErrors = trainErrors + currentPenalties self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol) self.assertTrue(numpy.linalg.norm(trainErrors2.T - trainErrors) < tol) self.assertTrue( numpy.linalg.norm(penalties2.T - currentPenalties) < tol)
def testPredict(self): rankBoost = RankBoost() rankBoost.learnModel(self.X, self.y) predY = rankBoost.predict(self.X) self.assertTrue( Evaluator.auc(predY, self.y) <= 1.0 and Evaluator.auc(predY, self.y) >= 0.0)
def evaluateCvOuter(self, X, Y, folds): """ Run cross validation and output some ROC curves. In this case Y is a 1D array. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param y: A vector of labels :type y: :class:`ndarray` :param folds: The number of cross validation folds :type folds: :class:`int` """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(Y, numpy.ndarray) Parameter.checkInt(folds, 2, float('inf')) if Y.ndim != 1: raise ValueError("Expecting Y to be 1D") indexList = cross_val.StratifiedKFold(Y, folds) bestParams = [] bestTrainAUCs = numpy.zeros(folds) bestTrainROCs = [] bestTestAUCs = numpy.zeros(folds) bestTestROCs = [] bestMetaDicts = [] i = 0 for trainInds, testInds in indexList: Util.printIteration(i, 1, folds, "Outer CV: ") trainX, trainY = X[trainInds, :], Y[trainInds] testX, testY = X[testInds, :], Y[testInds] self.learnModel(trainX, trainY) #self.learnModelCut(trainX, trainY) predTrainY = self.predict(trainX) predTestY = self.predict(testX) bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY) bestTestAUCs[i] = Evaluator.auc(predTestY, testY) #Store the parameters and ROC curves bestTrainROCs.append(Evaluator.roc(trainY, predTrainY)) bestTestROCs.append(Evaluator.roc(testY, predTestY)) metaDict = {} bestMetaDicts.append(metaDict) i += 1 logging.debug("Mean test AUC = " + str(numpy.mean(bestTestAUCs))) logging.debug("Std test AUC = " + str(numpy.std(bestTestAUCs))) allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs] return (bestParams, allMetrics, bestMetaDicts)
def testParallelPen(self): folds = 3 Cv = numpy.array([4.0]) idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() resultsList = svm.parallelPen(self.X, self.y, idx, paramDict, Cv) tol = 10**-6 bestError = 1 trainErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) penalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] penalty = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(self.X) predTrainY = svm.predict(trainX) penalty += Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY) penalty = penalty*Cv[0]/len(idx) svm.learnModel(self.X, self.y) predY = svm.predict(self.X) trainErrors2[i, j] = Evaluator.binaryError(predY, self.y) penalties2[i, j] = penalty meanErrors2[i, j] = Evaluator.binaryError(predY, self.y) + penalty if meanErrors2[i, j] < bestError: bestC = C bestGamma = gamma bestError = meanErrors2[i, j] bestSVM, trainErrors, currentPenalties = resultsList[0] meanErrors = trainErrors + currentPenalties self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol) self.assertTrue(numpy.linalg.norm(trainErrors2.T - trainErrors) < tol) self.assertTrue(numpy.linalg.norm(penalties2.T - currentPenalties) < tol)
def testLearnModel2(self): #We want to make sure the learnt tree with gamma = 0 maximise the #empirical risk minSplit = 20 maxDepth = 3 gamma = 0.01 learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) #Vary sampleSize numpy.random.seed(21) learner.setSampleSize(1) learner.learnModel(self.X, self.y) error1 = learner.treeObjective(self.X, self.y) numpy.random.seed(21) learner.setSampleSize(5) learner.learnModel(self.X, self.y) error2 = learner.treeObjective(self.X, self.y) numpy.random.seed(21) learner.setSampleSize(10) learner.learnModel(self.X, self.y) error3 = learner.treeObjective(self.X, self.y) self.assertTrue(error1 >= error2) self.assertTrue(error2 >= error3) #Now vary max depth learner.gamma = 0 numpy.random.seed(21) learner.setSampleSize(1) learner.minSplit = 1 learner.maxDepth = 3 learner.learnModel(self.X, self.y) predY = learner.predict(self.X) error1 = Evaluator.binaryError(self.y, predY) numpy.random.seed(21) learner.maxDepth = 5 learner.learnModel(self.X, self.y) predY = learner.predict(self.X) error2 = Evaluator.binaryError(self.y, predY) numpy.random.seed(21) learner.maxDepth = 10 learner.learnModel(self.X, self.y) predY = learner.predict(self.X) error3 = Evaluator.binaryError(self.y, predY) self.assertTrue(error1 >= error2) self.assertTrue(error2 >= error3)
def testSetC(self): rankSVM = RankSVM() rankSVM.setC(100.0) rankSVM.learnModel(self.X, self.y) predY = rankSVM.predict(self.X) auc1 = Evaluator.auc(predY, self.y) rankSVM.setC(0.1) rankSVM.learnModel(self.X, self.y) predY = rankSVM.predict(self.X) auc2 = Evaluator.auc(predY, self.y) self.assertTrue(auc1 != auc2)
def evaluateCvOuter(self, X, Y, folds, leafRank): """ Run cross validation and output some ROC curves. In this case Y is a 1D array. """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(Y, numpy.ndarray) Parameter.checkInt(folds, 2, float('inf')) if Y.ndim != 1: raise ValueError("Expecting Y to be 1D") indexList = cross_val.StratifiedKFold(Y, folds) self.setLeafRank(leafRank) bestParams = [] bestTrainAUCs = numpy.zeros(folds) bestTrainROCs = [] bestTestAUCs = numpy.zeros(folds) bestTestROCs = [] bestMetaDicts = [] i = 0 for trainInds, testInds in indexList: Util.printIteration(i, 1, folds) trainX, trainY = X[trainInds, :], Y[trainInds] testX, testY = X[testInds, :], Y[testInds] logging.debug("Distribution of labels in train: " + str(numpy.bincount(trainY))) logging.debug("Distribution of labels in test: " + str(numpy.bincount(testY))) self.learnModel(trainX, trainY) predTrainY = self.predict(trainX) predTestY = self.predict(testX) bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY) bestTestAUCs[i] = Evaluator.auc(predTestY, testY) #Store the parameters and ROC curves bestTrainROCs.append(Evaluator.roc(trainY, predTrainY)) bestTestROCs.append(Evaluator.roc(testY, predTestY)) metaDict = {} bestMetaDicts.append(metaDict) i += 1 logging.debug("Mean test AUC = " + str(numpy.mean(bestTestAUCs))) logging.debug("Std test AUC = " + str(numpy.std(bestTestAUCs))) allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs] return (bestParams, allMetrics, bestMetaDicts)
def testPredict2(self): #Test on Gauss2D dataset dataDir = PathDefaults.getDataDir() fileName = dataDir + "Gauss2D_learn.csv" XY = numpy.loadtxt(fileName, skiprows=1, usecols=(1, 2, 3), delimiter=",") X = XY[:, 0:2] y = XY[:, 2] fileName = dataDir + "Gauss2D_test.csv" testXY = numpy.loadtxt(fileName, skiprows=1, usecols=(1, 2, 3), delimiter=",") testX = testXY[:, 0:2] testY = testXY[:, 2] X = Standardiser().standardiseArray(X) testX = Standardiser().standardiseArray(testX) maxDepths = range(3, 10) trainAucs = numpy.array([ 0.7194734, 0.7284824, 0.7332185, 0.7348198, 0.7366152, 0.7367508, 0.7367508, 0.7367508 ]) testAucs = numpy.array([ 0.6789078, 0.6844632, 0.6867918, 0.6873420, 0.6874820, 0.6874400, 0.6874400, 0.6874400 ]) i = 0 #The results are approximately the same, but not exactly for maxDepth in maxDepths: treeRank = TreeRank(self.leafRanklearner) treeRank.setMaxDepth(maxDepth) treeRank.learnModel(X, y) trainScores = treeRank.predict(X) testScores = treeRank.predict(testX) self.assertAlmostEquals(Evaluator.auc(trainScores, y), trainAucs[i], 2) self.assertAlmostEquals(Evaluator.auc(testScores, testY), testAucs[i], 1) i += 1
def testAuc(self): self.treeRank.learnModel(self.X, self.Y) scores = self.treeRank.predictScores(self.X) auc1 = Evaluator.auc(scores, self.Y.ravel()) auc2 = self.treeRank.aucFromROC(self.treeRank.predictROC(self.X, self.Y)) self.assertAlmostEquals(auc1, auc2, places=4)
def testPredict2(self): #Test on Gauss2D dataset dataDir = PathDefaults.getDataDir() fileName = dataDir + "Gauss2D_learn.csv" XY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",") X = XY[:, 0:2] y = XY[:, 2] y = y*2 - 1 fileName = dataDir + "Gauss2D_test.csv" testXY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",") testX = testXY[:, 0:2] testY = testXY[:, 2] testY = testY*2-1 X = Standardiser().standardiseArray(X) testX = Standardiser().standardiseArray(testX) numTrees = 5 minSplit = 50 maxDepths = range(3, 10) trainAucs = numpy.array([0.7252582, 0.7323278, 0.7350289, 0.7372529, 0.7399985, 0.7382176, 0.7395104, 0.7386347]) testAucs = numpy.array([0.6806122, 0.6851614, 0.6886183, 0.6904147, 0.6897266, 0.6874600, 0.6875980, 0.6878801]) i = 0 #The results are approximately the same, but not exactly for maxDepth in maxDepths: treeRankForest = TreeRankForest(self.leafRanklearner) treeRankForest.setMaxDepth(maxDepth) treeRankForest.setMinSplit(minSplit) treeRankForest.setNumTrees(numTrees) treeRankForest.learnModel(X, y) trainScores = treeRankForest.predict(X) testScores = treeRankForest.predict(testX) print(Evaluator.auc(trainScores, y), Evaluator.auc(testScores, testY)) self.assertAlmostEquals(Evaluator.auc(trainScores, y), trainAucs[i], 1) self.assertAlmostEquals(Evaluator.auc(testScores, testY), testAucs[i], 1) i+=1
def testSetSvmType(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) X = Standardiser().standardiseArray(X) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() + 1 y2 = numpy.array(y > 0, numpy.int32) * 2 - 1 svm = LibSVM() svm.setSvmType("Epsilon_SVR") self.assertEquals(svm.getType(), "Epsilon_SVR") #Try to get a good error Cs = 2**numpy.arange(-6, 4, dtype=numpy.float) epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float) bestError = 10 for C in Cs: for epsilon in epsilons: svm.setEpsilon(epsilon) svm.setC(C) svm.learnModel(X, y) yp = svm.predict(X) if Evaluator.rootMeanSqError(y, yp) < bestError: bestError = Evaluator.rootMeanSqError(y, yp) self.assertTrue( bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0]))) svm.setSvmType("C_SVC") svm.learnModel(X, y2) yp2 = svm.predict(X) self.assertTrue(0 <= Evaluator.binaryError(y2, yp2) <= 1)
def testSetSvmType(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) X = Standardiser().standardiseArray(X) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() + 1 y2 = numpy.array(y > 0, numpy.int32)*2 -1 svm = LibSVM() svm.setSvmType("Epsilon_SVR") self.assertEquals(svm.getType(), "Epsilon_SVR") #Try to get a good error Cs = 2**numpy.arange(-6, 4, dtype=numpy.float) epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float) bestError = 10 for C in Cs: for epsilon in epsilons: svm.setEpsilon(epsilon) svm.setC(C) svm.learnModel(X, y) yp = svm.predict(X) if Evaluator.rootMeanSqError(y, yp) < bestError: bestError = Evaluator.rootMeanSqError(y, yp) self.assertTrue(bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0]))) svm.setSvmType("C_SVC") svm.learnModel(X, y2) yp2 = svm.predict(X) self.assertTrue(0 <= Evaluator.binaryError(y2, yp2) <= 1)
def testPredict(self): generator = SVMLeafRank(self.paramDict, self.folds) learner = generator.generateLearner(self.X, self.y) predY = learner.predict(self.X) #Seems to work auc = learner.getMetricMethod()(predY, self.y) auc2 = Evaluator.auc(predY, self.y) self.assertEquals(auc, auc2)
def testClassify(self): try: import sklearn except ImportError as error: return self.svm.learnModel(self.X, self.y) predY = self.svm.classify(self.X) y = self.y e = Evaluator.binaryError(y, predY) #Now, permute examples perm = numpy.random.permutation(self.X.shape[0]) predY = self.svm.classify(self.X[perm, :]) y = y[perm] e2 = Evaluator.binaryError(y, predY) self.assertEquals(e, e2)
def computeBootstrapError(args): """ Used in conjunction with the parallel model selection. Trains and then tests on a seperate test set and evaluated the bootstrap error. """ (trainX, trainY, testX, testY, learner) = args learner.learnModel(trainX, trainY) predTestY = learner.predict(testX) predTrainY = learner.predict(trainX) weight = 0.632 return Evaluator.binaryBootstrapError(predTestY, testY, predTrainY, trainY, weight)
def testCvPrune(self): numExamples = 500 X, y = data.make_regression(numExamples) y = Standardiser().standardiseArray(y) numTrain = numpy.round(numExamples * 0.33) numValid = numpy.round(numExamples * 0.33) trainX = X[0:numTrain, :] trainY = y[0:numTrain] validX = X[numTrain:numTrain+numValid, :] validY = y[numTrain:numTrain+numValid] testX = X[numTrain+numValid:, :] testY = y[numTrain+numValid:] learner = DecisionTreeLearner() learner.learnModel(trainX, trainY) error1 = Evaluator.rootMeanSqError(learner.predict(testX), testY) #print(learner.getTree()) unprunedTree = learner.tree.copy() learner.setGamma(1000) learner.cvPrune(trainX, trainY) self.assertEquals(unprunedTree.getNumVertices(), learner.tree.getNumVertices()) learner.setGamma(100) learner.cvPrune(trainX, trainY) #Test if pruned tree is subtree of current: for vertexId in learner.tree.getAllVertexIds(): self.assertTrue(vertexId in unprunedTree.getAllVertexIds()) #The error should be better after pruning learner.learnModel(trainX, trainY) #learner.cvPrune(validX, validY, 0.0, 5) learner.repPrune(validX, validY) error2 = Evaluator.rootMeanSqError(learner.predict(testX), testY) self.assertTrue(error1 >= error2)
def testBayesError(self): dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/" data = numpy.load(dataDir + "toyData.npz") gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"] sampleSize = 100 trainX, trainY = X[0:sampleSize, :], y[0:sampleSize] testX, testY = X[sampleSize:, :], y[sampleSize:] #We form a test set from the grid points gridX = numpy.zeros((gridPoints.shape[0]**2, 2)) for m in range(gridPoints.shape[0]): gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m] Cs = 2**numpy.arange(-5, 5, dtype=numpy.float) gammas = 2**numpy.arange(-5, 5, dtype=numpy.float) bestError = 1 for C in Cs: for gamma in gammas: svm = LibSVM(kernel="gaussian", C=C, kernelParam=gamma) svm.learnModel(trainX, trainY) predY, decisionsY = svm.predict(gridX, True) decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F") error = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X) predY, decisionsY = svm.predict(testX, True) error2 = Evaluator.binaryError(testY, predY) print(error, error2) if error < bestError: error = bestError bestC = C bestGamma = gamma svm = LibSVM(kernel="gaussian", C=bestC, kernelParam=bestGamma) svm.learnModel(trainX, trainY) predY, decisionsY = svm.predict(gridX, True) plt.figure(0) plt.contourf(gridPoints, gridPoints, decisionGrid, 100) plt.colorbar() plt.figure(1) plt.scatter(X[y==1, 0], X[y==1, 1], c='r' ,label="-1") plt.scatter(X[y==-1, 0], X[y==-1, 1], c='b',label="+1") plt.legend() plt.show()
def computeIdealPenalty(args): """ Find the complete penalty. """ (X, y, fullX, C, gamma, gridPoints, pdfX, pdfY1X, pdfYminus1X) = args svm = LibSVM('gaussian', gamma, C) svm.learnModel(X, y) predY = svm.predict(X) predFullY, decisionsY = svm.predict(fullX, True) decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F") trueError = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X) idealPenalty = trueError - Evaluator.binaryError(predY, y) return idealPenalty
def testComputeTestError(self): C = 10.0 gamma = 0.5 numTrainExamples = self.X.shape[0]*0.5 trainX, trainY = self.X[0:numTrainExamples, :], self.y[0:numTrainExamples] testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:] svm = LibSVM('gaussian', gamma, C) args = (trainX, trainY, testX, testY, svm) error = computeTestError(args) svm = LibSVM('gaussian', gamma, C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) self.assertEquals(Evaluator.binaryError(predY, testY), error)
def testParallelModelSelect(self): folds = 3 idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() bestSVM, meanErrors = svm.parallelModelSelect(self.X, self.y, idx, paramDict) tol = 10**-6 bestError = 1 meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) print("Computing real grid") for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] error = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] testX = self.X[testInds, :] testY = self.y[testInds] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) error += Evaluator.binaryError(predY, testY) meanErrors2[i, j] = error / len(idx) if error < bestError: bestC = C bestGamma = gamma bestError = error self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)
def testComputeTestError(self): C = 10.0 gamma = 0.5 numTrainExamples = self.X.shape[0] * 0.5 trainX, trainY = self.X[ 0:numTrainExamples, :], self.y[0:numTrainExamples] testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:] svm = LibSVM('gaussian', gamma, C) args = (trainX, trainY, testX, testY, svm) error = computeTestError(args) svm = LibSVM('gaussian', gamma, C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) self.assertEquals(Evaluator.binaryError(predY, testY), error)
def testParallelModelSelect(self): folds = 3 idx = Sampling.crossValidation(folds, self.X.shape[0]) svm = self.svm svm.setKernel("gaussian") paramDict = {} paramDict["setC"] = svm.getCs() paramDict["setGamma"] = svm.getGammas() bestSVM, meanErrors = svm.parallelModelSelect(self.X, self.y, idx, paramDict) tol = 10**-6 bestError = 1 meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) print("Computing real grid") for i in range(svm.Cs.shape[0]): C = svm.Cs[i] for j in range(svm.gammas.shape[0]): gamma = svm.gammas[j] error = 0 for trainInds, testInds in idx: trainX = self.X[trainInds, :] trainY = self.y[trainInds] testX = self.X[testInds, :] testY = self.y[testInds] svm.setGamma(gamma) svm.setC(C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) error += Evaluator.binaryError(predY, testY) meanErrors2[i, j] = error/len(idx) if error < bestError: bestC = C bestGamma = gamma bestError = error self.assertEquals(bestC, bestSVM.getC()) self.assertEquals(bestGamma, bestSVM.getGamma()) self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)
def learnModelCut(self, X, Y, folds=4): """ Perform model learning with tree cutting in order to choose a maximal depth. The best tree is chosen using cross validation and depths are selected from 0 to maxDepth. The best depth corresponds the maximal AUC obtained using cross validation. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param Y: A vector of binary labels as a 1D array :type Y: :class:`ndarray` :param folds: The number of cross validation folds. :type folds: :class:`int` """ indexList = cross_val.StratifiedKFold(Y, folds) depths = numpy.arange(1, self.maxDepth) meanAUCs = numpy.zeros(depths.shape[0]) for trainInds, testInds in indexList: trainX, trainY = X[trainInds, :], Y[trainInds] testX, testY = X[testInds, :], Y[testInds] self.learnModel(trainX, trainY) fullTree = self.tree for i in range(fullTree.depth()): d = depths[i] self.tree = TreeRank.cut(fullTree, d) predTestY = self.predict(testX) meanAUCs[i] += Evaluator.auc(predTestY, testY)/float(folds) bestDepth = depths[numpy.argmax(meanAUCs)] self.learnModel(X, Y) self.tree = TreeRank.cut(self.tree, bestDepth)
def testLearnModel(self): try: import sklearn except ImportError as error: return self.svm.learnModel(self.X, self.y) predY = self.svm.classify(self.X) y = self.y e = Evaluator.binaryError(y, predY) #Test for wrong labels numExamples = 6 X = numpy.array([[-3], [-2], [-1], [1], [2], [3]], numpy.float) y = numpy.array([[-1], [-1], [-1], [1], [1], [5]]) self.assertRaises(ValueError, self.svm.learnModel, X, y) #Try the regression SVM svm = LibSVM(type="Epsilon_SVR") y = numpy.random.rand(self.X.shape[0]) svm.learnModel(self.X, self.y)
def testLearnModel(self): try: import sklearn except ImportError as error: return self.svm.learnModel(self.X, self.y) predY = self.svm.classify(self.X) y = self.y e = Evaluator.binaryError(y, predY) #Test for wrong labels numExamples = 6 X = numpy.array([[-3], [-2], [-1], [1], [2] ,[3]], numpy.float) y = numpy.array([[-1], [-1], [-1], [1], [1] ,[5]]) self.assertRaises(ValueError, self.svm.learnModel, X, y) #Try the regression SVM svm = LibSVM(type="Epsilon_SVR") y = numpy.random.rand(self.X.shape[0]) svm.learnModel(self.X, self.y)
def greedyMC2(lists, itemList, trainList, n): """ A method to greedily select a subset of the outputLists such that the average precision is maximised """ currentListsInds = range(len(lists)) newListsInds = [] currentAvPrecision = 0 lastAvPrecision = -0.1 while currentAvPrecision - lastAvPrecision > 0: lastAvPrecision = currentAvPrecision averagePrecisions = numpy.zeros(len(currentListsInds)) for i, j in enumerate(currentListsInds): newListsInds.append(j) newLists = [] for k in newListsInds: newLists.append(lists[k]) rankAggregate, scores = RankAggregator.MC2(newLists, itemList) averagePrecisions[i] = Evaluator.averagePrecisionFromLists(trainList, rankAggregate[0:n], n) newListsInds.remove(j) j = numpy.argmax(averagePrecisions) currentAvPrecision = averagePrecisions[j] if currentAvPrecision > lastAvPrecision: newListsInds.append(currentListsInds.pop(j)) return newListsInds
def testGrowTree(self): startId = (0, ) minSplit = 20 maxDepth = 3 gamma = 0.01 learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) trainX = self.X[100:, :] trainY = self.y[100:] testX = self.X[0:100, :] testY = self.y[0:100] argsortX = numpy.zeros(trainX.shape, numpy.int) for i in range(trainX.shape[1]): argsortX[:, i] = numpy.argsort(trainX[:, i]) argsortX[:, i] = numpy.argsort(argsortX[:, i]) learner.tree = DictTree() rootNode = DecisionNode(numpy.arange(trainX.shape[0]), Util.mode(trainY)) learner.tree.setVertex(startId, rootNode) #Note that this matches with the case where we create a new tree each time numpy.random.seed(21) bestError = float("inf") for i in range(20): learner.tree.pruneVertex(startId) learner.growTree(trainX, trainY, argsortX, startId) predTestY = learner.predict(testX) error = Evaluator.binaryError(predTestY, testY) #print(Evaluator.binaryError(predTestY, testY), learner.tree.getNumVertices()) if error < bestError: bestError = error bestTree = learner.tree.copy() self.assertTrue(learner.tree.depth() <= maxDepth) for vertexId in learner.tree.nonLeaves(): self.assertTrue(learner.tree.getVertex(vertexId).getTrainInds().shape[0] >= minSplit) bestError1 = bestError learner.tree = bestTree #Now we test growing a tree from a non-root vertex numpy.random.seed(21) for i in range(20): learner.tree.pruneVertex((0, 1)) learner.growTree(trainX, trainY, argsortX, (0, 1)) self.assertTrue(learner.tree.getVertex((0,)) == bestTree.getVertex((0,))) self.assertTrue(learner.tree.getVertex((0,0)) == bestTree.getVertex((0,0))) predTestY = learner.predict(testX) error = Evaluator.binaryError(predTestY, testY) if error < bestError: bestError = error bestTree = learner.tree.copy() #print(Evaluator.binaryError(predTestY, testY), learner.tree.getNumVertices()) self.assertTrue(bestError1 >= bestError )
minAlpha = alpha if alpha > maxAlpha: maxAlpha = alpha numAlphas = 100 alphas = numpy.linspace(maxAlpha+0.1, minAlpha, numAlphas) errors = numpy.zeros(numAlphas) for i in range(alphas.shape[0]): #learner.learnModel(trainX, trainY) learner.setAlphaThreshold(alphas[i]) learner.cvPrune(trainX, trainY) #learner.cvPrune(validX, validY, alphas[numpy.argmin(errors)]) #learner.prune(validX, validY, alphas[i]) predY = learner.predict(testX) errors[i] = Evaluator.rootMeanSqError(predY, testY) plt.figure(3) plt.scatter(alphas, errors) #Now plot best tree plt.figure(4) learner.learnModel(trainX, trainY) #learner.cvPrune(validX, validY, alphas[numpy.argmin(errors)]) learner.setAlphaThreshold(alphas[numpy.argmin(errors)]) learner.cvPrune(trainX, trainY) rootId = learner.tree.getRootId() displayTree(learner, rootId, 0, 1, 0, 1, colormap) plt.show()
print(numpy.sum(y==2), numpy.sum(y==0)) trainSplit = 0.3 numTrainExamples = numExamples*trainSplit trainX = X[0:numTrainExamples, :] trainY = y[0:numTrainExamples] testX = X[numTrainExamples:, :] testY = y[numTrainExamples:] learner = PenaltyDecisionTree(minSplit=1, maxDepth=50, pruning=False) learner.learnModel(trainX, trainY) predY = learner.predict(trainX) print(Evaluator.binaryError(predY, trainY)) print(learner.getTree()) plt.figure(0) plt.scatter(testX[:, 0], testX[:, 1], c=testY, s=50, vmin=0, vmax=2) plt.title("Test set") plt.colorbar() plt.figure(1) plt.scatter(trainX[:, 0], trainX[:, 1], c=trainY, s=50, vmin=0, vmax=2) plt.title("Training set") plt.colorbar() colormap = matplotlib.cm.get_cmap()
newTrainOutputList = [] newTestOutputList = [] for item in outputList: if item not in testExpertMatchesInds: newTrainOutputList.append(item) if item not in trainExpertMatchesInds: newTestOutputList.append(item) trainOutputLists.append(newTrainOutputList) testOutputLists.append(newTestOutputList) for i, n in enumerate(ns): for j, trainOutputList in enumerate(trainOutputLists): testOutputList = testOutputLists[j] trainPrecisions[i, j] = Evaluator.precisionFromIndLists(trainExpertMatchesInds, trainOutputList[0:n]) testPrecisions[i, j] = Evaluator.precisionFromIndLists(testExpertMatchesInds, testOutputList[0:n]) averageTrainPrecisions[s, i, j] = Evaluator.averagePrecisionFromLists(trainExpertMatchesInds, trainOutputList[0:n], n) averageTestPrecisions[s, i, j] = Evaluator.averagePrecisionFromLists(testExpertMatchesInds, testOutputList[0:n], n) #Now look at rank aggregations relevantItems = set([]) for trainOutputList in trainOutputLists: relevantItems = relevantItems.union(trainOutputList) relevantItems = list(relevantItems) listInds = RankAggregator.greedyMC2(trainOutputLists, relevantItems, trainExpertMatchesInds, 20) newOutputList = [] for listInd in listInds: newOutputList.append(testOutputLists[listInd])
def testGrowTree(self): startId = (0, ) minSplit = 20 maxDepth = 3 gamma = 0.01 learner = PenaltyDecisionTree(minSplit=minSplit, maxDepth=maxDepth, gamma=gamma, pruning=False) trainX = self.X[100:, :] trainY = self.y[100:] testX = self.X[0:100, :] testY = self.y[0:100] argsortX = numpy.zeros(trainX.shape, numpy.int) for i in range(trainX.shape[1]): argsortX[:, i] = numpy.argsort(trainX[:, i]) argsortX[:, i] = numpy.argsort(argsortX[:, i]) learner.tree = DictTree() rootNode = DecisionNode(numpy.arange(trainX.shape[0]), Util.mode(trainY)) learner.tree.setVertex(startId, rootNode) #Note that this matches with the case where we create a new tree each time numpy.random.seed(21) bestError = float("inf") for i in range(20): learner.tree.pruneVertex(startId) learner.growTree(trainX, trainY, argsortX, startId) predTestY = learner.predict(testX) error = Evaluator.binaryError(predTestY, testY) #print(Evaluator.binaryError(predTestY, testY), learner.tree.getNumVertices()) if error < bestError: bestError = error bestTree = learner.tree.copy() self.assertTrue(learner.tree.depth() <= maxDepth) for vertexId in learner.tree.nonLeaves(): self.assertTrue( learner.tree.getVertex(vertexId).getTrainInds().shape[0] >= minSplit) bestError1 = bestError learner.tree = bestTree #Now we test growing a tree from a non-root vertex numpy.random.seed(21) for i in range(20): learner.tree.pruneVertex((0, 1)) learner.growTree(trainX, trainY, argsortX, (0, 1)) self.assertTrue( learner.tree.getVertex((0, )) == bestTree.getVertex((0, ))) self.assertTrue( learner.tree.getVertex((0, 0)) == bestTree.getVertex((0, 0))) predTestY = learner.predict(testX) error = Evaluator.binaryError(predTestY, testY) if error < bestError: bestError = error bestTree = learner.tree.copy() #print(Evaluator.binaryError(predTestY, testY), learner.tree.getNumVertices()) self.assertTrue(bestError1 >= bestError)
def evaluateCvOuter(self, X, Y, folds, leafRank, innerFolds=3): """ Run model selection and output some ROC curves. In this case Y is a 1D array. """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(Y, numpy.ndarray) Parameter.checkInt(folds, 2, float('inf')) if Y.ndim != 1: raise ValueError("Expecting Y to be 1D") indexList = cross_val.StratifiedKFold(Y, folds) maxDepths = numpy.flipud(numpy.arange(1, 12, 1)) if leafRank == self.getTreeRankLib().LRforest: varSplits = numpy.arange(0.6, 1.01, 0.2) else: varSplits = numpy.array([1]) #According to Nicolas nfcv>1 doesn't help nfcvs = [1] #This is tied in with depth mincrit = 0.00 #If minsplit is too low sometimes get a node with no positive labels minSplits = numpy.array([50]) self.setLeafRank(leafRank) bestParams = [] bestTrainAUCs = numpy.zeros(folds) bestTrainROCs = [] bestTestAUCs = numpy.zeros(folds) bestTestROCs = [] bestMetaDicts = [] i = 0 for trainInds, testInds in indexList: trainX, trainY = X[trainInds, :], Y[trainInds] testX, testY = X[testInds, :], Y[testInds] meanParamAUCs = [] paramList = [] logging.debug("Distribution of labels in train: " + str(numpy.bincount(trainY))) logging.debug("Distribution of labels in test: " + str(numpy.bincount(testY))) for varSplit in varSplits: for nfcv in nfcvs: for minSplit in minSplits: self.setMaxDepth(maxDepths[0]) self.setVarSplit(varSplit) self.setNfcv(nfcv) self.setMinSplit(minSplit) logging.debug(self) idx = cross_val.StratifiedKFold(trainY, innerFolds) j = 0 metrics = numpy.zeros((len(idx), maxDepths.shape[0])) for idxtr, idxts in idx: Util.printIteration(j, 1, innerFolds) innerTrainX, innerTestX = trainX[idxtr, :], trainX[idxts, :] innerTrainY, innerTestY = trainY[idxtr], trainY[idxts] self.learnModel(innerTrainX, innerTrainY) for k in range(maxDepths.shape[0]): maxDepth = maxDepths[k] robjects.globalenv["maxDepth"] = maxDepth robjects.globalenv["tree"] = self.tree nodeList = robjects.r('tree$nodes[tree$depth>=maxDepth]') self.tree = self.treeRankLib.subTreeRank(self.tree, nodeList) predY = self.predict(innerTestX) gc.collect() metrics[j, k] = Evaluator.auc(predY, innerTestY) j += 1 meanAUC = numpy.mean(metrics, 0) varAUC = numpy.var(metrics, 0) logging.warn(self.baseLib.warnings()) logging.debug("Mean AUCs and variances at each depth " + str((meanAUC, varAUC))) for k in range(maxDepths.shape[0]): maxDepth = maxDepths[k] meanParamAUCs.append(meanAUC[k]) paramList.append((maxDepth, varSplit, nfcv, minSplit)) #Try to get some memory back gc.collect() robjects.r('gc(verbose=TRUE)') robjects.r('memory.profile()') #print(self.hp.heap()) #Now choose best params bestInd = numpy.argmax(numpy.array(meanParamAUCs)) self.setMaxDepth(paramList[bestInd][0]) self.setVarSplit(paramList[bestInd][1]) self.setNfcv(paramList[bestInd][2]) self.setMinSplit(paramList[bestInd][3]) self.learnModel(trainX, trainY) predTrainY = self.predict(trainX) predTestY = self.predict(testX) bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY) bestTestAUCs[i] = Evaluator.auc(predTestY, testY) #Store the parameters and ROC curves bestParams.append(paramList[bestInd]) bestTrainROCs.append(Evaluator.roc(trainY, predTrainY)) bestTestROCs.append(Evaluator.roc(testY, predTestY)) metaDict = {} metaDict["size"] = self.getTreeSize() metaDict["depth"] = self.getTreeDepth() bestMetaDicts.append(metaDict) i += 1 allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs] return (bestParams, allMetrics, bestMetaDicts)
#Figure out why the penalty is increasing X = trainX y = trainY for i in range(foldsSet.shape[0]): folds = foldsSet[i] idx = Sampling.crossValidation(folds, validX.shape[0]) penalty = 0 fullError = 0 trainError = 0 learner.learnModel(validX, validY) predY = learner.predict(X) predValidY = learner.predict(validX) idealPenalty = Evaluator.rootMeanSqError(predY, y) - Evaluator.rootMeanSqError(predValidY, validY) for trainInds, testInds in idx: trainX = validX[trainInds, :] trainY = validY[trainInds] #learner.setGamma(gamma) #learner.setC(C) learner.learnModel(trainX, trainY) predY = learner.predict(validX) predTrainY = learner.predict(trainX) fullError += Evaluator.rootMeanSqError(predY, validY) trainError += Evaluator.rootMeanSqError(predTrainY, trainY) penalty += Evaluator.rootMeanSqError(predY, validY) - Evaluator.rootMeanSqError(predTrainY, trainY) print((folds-1)*fullError/folds, (folds-1)*trainError/folds, (folds-1)*penalty/folds)
methodNames = graphRanker.getNames() if runLSI: outputFilename = dataset.getOutputFieldDir(field) + "outputListsLSI.npz" else: outputFilename = dataset.getOutputFieldDir(field) + "outputListsLDA.npz" Util.savePickle([outputLists, trainExpertMatchesInds, testExpertMatchesInds], outputFilename, debug=True) numMethods = len(outputLists) precisions = numpy.zeros((len(ns), numMethods)) averagePrecisions = numpy.zeros(numMethods) for i, n in enumerate(ns): for j in range(len(outputLists)): precisions[i, j] = Evaluator.precisionFromIndLists(testExpertMatchesInds, outputLists[j][0:n]) for j in range(len(outputLists)): averagePrecisions[j] = Evaluator.averagePrecisionFromLists(testExpertMatchesInds, outputLists[j][0:averagePrecisionN], averagePrecisionN) precisions2 = numpy.c_[numpy.array(ns), precisions] logging.debug(Latex.listToRow(methodNames)) logging.debug("Computing Precision") logging.debug(Latex.array2DToRows(precisions2)) logging.debug("Computing Average Precision") logging.debug(Latex.array1DToRow(averagePrecisions)) #fermer le fichier fich.close() logging.debug("All done!")
def testModelSelect(self): """ We test the results on some data and compare to SVR. """ numExamples = 200 X, y = data.make_regression(numExamples, noise=0.5) X = Standardiser().standardiseArray(X) y = Standardiser().standardiseArray(y) trainX = X[0:100, :] trainY = y[0:100] testX = X[100:, :] testY = y[100:] learner = DecisionTreeLearner(maxDepth=20, minSplit=10, pruneType="REP-CV") learner.setPruneCV(8) paramDict = {} paramDict["setGamma"] = numpy.linspace(0.0, 1.0, 10) paramDict["setPruneCV"] = numpy.arange(6, 11, 2, numpy.int) folds = 5 idx = Sampling.crossValidation(folds, trainX.shape[0]) bestTree, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict) predY = bestTree.predict(testX) error = Evaluator.rootMeanSqError(testY, predY) print(error) learner = DecisionTreeLearner(maxDepth=20, minSplit=5, pruneType="CART") paramDict = {} paramDict["setGamma"] = numpy.linspace(0.0, 1.0, 50) folds = 5 idx = Sampling.crossValidation(folds, trainX.shape[0]) bestTree, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict) predY = bestTree.predict(testX) error = Evaluator.rootMeanSqError(testY, predY) print(error) return #Let's compare to the SVM learner2 = LibSVM(kernel='gaussian', type="Epsilon_SVR") paramDict = {} paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float) paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float) paramDict["setEpsilon"] = learner2.getEpsilons() idx = Sampling.crossValidation(folds, trainX.shape[0]) bestSVM, cvGrid = learner2.parallelModelSelect(trainX, trainY, idx, paramDict) predY = bestSVM.predict(testX) error = Evaluator.rootMeanSqError(testY, predY) print(error)
def testPredict(self): rankBoost = RankBoost() rankBoost.learnModel(self.X, self.y) predY = rankBoost.predict(self.X) self.assertTrue(Evaluator.auc(predY, self.y) <= 1.0 and Evaluator.auc(predY, self.y) >= 0.0)