def computeIdealPenalty(args): """ Find the complete penalty. """ (X, y, fullX, C, gamma, gridPoints, pdfX, pdfY1X, pdfYminus1X) = args svm = LibSVM('gaussian', gamma, C) svm.learnModel(X, y) predY = svm.predict(X) predFullY, decisionsY = svm.predict(fullX, True) decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F") trueError = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X) idealPenalty = trueError - Evaluator.binaryError(predY, y) return idealPenalty
def testGetWeights(self): try: import sklearn except ImportError as error: return numExamples = 6 X = numpy.array([[-3], [-2], [-1], [1], [2], [3]], numpy.float64) #X = numpy.random.rand(numExamples, 10) y = numpy.array([[-1], [-1], [-1], [1], [1], [1]]) svm = LibSVM() svm.learnModel(X, y.ravel()) weights, b = svm.getWeights() #Let's see if we can compute the decision values y, decisions = svm.predict(X, True) decisions2 = numpy.zeros(numExamples) decisions2 = numpy.dot(X, weights) - b self.assertTrue((decisions == decisions2).all()) predY = numpy.sign(decisions2) self.assertTrue((y.ravel() == predY).all()) #Do the same test on a random datasets numExamples = 50 numFeatures = 10 X = numpy.random.rand(numExamples, numFeatures) y = numpy.sign(numpy.random.rand(numExamples) - 0.5) svm = LibSVM() svm.learnModel(X, y.ravel()) weights, b = svm.getWeights() #Let's see if we can compute the decision values y, decisions = svm.predict(X, True) decisions2 = numpy.dot(X, weights) + b tol = 10**-6 self.assertTrue(numpy.linalg.norm(decisions - decisions2) < tol) predY = numpy.sign(decisions2) self.assertTrue((y.ravel() == predY).all())
def testSetSvmType(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) X = Standardiser().standardiseArray(X) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() + 1 y2 = numpy.array(y > 0, numpy.int32) * 2 - 1 svm = LibSVM() svm.setSvmType("Epsilon_SVR") self.assertEquals(svm.getType(), "Epsilon_SVR") #Try to get a good error Cs = 2**numpy.arange(-6, 4, dtype=numpy.float) epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float) bestError = 10 for C in Cs: for epsilon in epsilons: svm.setEpsilon(epsilon) svm.setC(C) svm.learnModel(X, y) yp = svm.predict(X) if Evaluator.rootMeanSqError(y, yp) < bestError: bestError = Evaluator.rootMeanSqError(y, yp) self.assertTrue( bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0]))) svm.setSvmType("C_SVC") svm.learnModel(X, y2) yp2 = svm.predict(X) self.assertTrue(0 <= Evaluator.binaryError(y2, yp2) <= 1)
def testGetWeights(self): try: import sklearn except ImportError as error: return numExamples = 6 X = numpy.array([[-3], [-2], [-1], [1], [2] ,[3]], numpy.float64) #X = numpy.random.rand(numExamples, 10) y = numpy.array([[-1], [-1], [-1], [1], [1] ,[1]]) svm = LibSVM() svm.learnModel(X, y.ravel()) weights, b = svm.getWeights() #Let's see if we can compute the decision values y, decisions = svm.predict(X, True) decisions2 = numpy.zeros(numExamples) decisions2 = numpy.dot(X, weights) - b self.assertTrue((decisions == decisions2).all()) predY = numpy.sign(decisions2) self.assertTrue((y.ravel() == predY).all()) #Do the same test on a random datasets numExamples = 50 numFeatures = 10 X = numpy.random.rand(numExamples, numFeatures) y = numpy.sign(numpy.random.rand(numExamples)-0.5) svm = LibSVM() svm.learnModel(X, y.ravel()) weights, b = svm.getWeights() #Let's see if we can compute the decision values y, decisions = svm.predict(X, True) decisions2 = numpy.dot(X, weights) + b tol = 10**-6 self.assertTrue(numpy.linalg.norm(decisions - decisions2) < tol) predY = numpy.sign(decisions2) self.assertTrue((y.ravel() == predY).all())
def testSetSvmType(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) X = Standardiser().standardiseArray(X) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() + 1 y2 = numpy.array(y > 0, numpy.int32)*2 -1 svm = LibSVM() svm.setSvmType("Epsilon_SVR") self.assertEquals(svm.getType(), "Epsilon_SVR") #Try to get a good error Cs = 2**numpy.arange(-6, 4, dtype=numpy.float) epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float) bestError = 10 for C in Cs: for epsilon in epsilons: svm.setEpsilon(epsilon) svm.setC(C) svm.learnModel(X, y) yp = svm.predict(X) if Evaluator.rootMeanSqError(y, yp) < bestError: bestError = Evaluator.rootMeanSqError(y, yp) self.assertTrue(bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0]))) svm.setSvmType("C_SVC") svm.learnModel(X, y2) yp2 = svm.predict(X) self.assertTrue(0 <= Evaluator.binaryError(y2, yp2) <= 1)
def testPredict(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() y = numpy.array(y > 0, numpy.int32) * 2 - 1 svm = LibSVM() svm.learnModel(X, y) y2, d = svm.predict(X, True)
def testComputeTestError(self): C = 10.0 gamma = 0.5 numTrainExamples = self.X.shape[0]*0.5 trainX, trainY = self.X[0:numTrainExamples, :], self.y[0:numTrainExamples] testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:] svm = LibSVM('gaussian', gamma, C) args = (trainX, trainY, testX, testY, svm) error = computeTestError(args) svm = LibSVM('gaussian', gamma, C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) self.assertEquals(Evaluator.binaryError(predY, testY), error)
def testPredict(self): try: import sklearn except ImportError as error: return numExamples = 100 numFeatures = 10 X = numpy.random.randn(numExamples, numFeatures) c = numpy.random.randn(numFeatures) y = numpy.dot(X, numpy.array([c]).T).ravel() y = numpy.array(y > 0, numpy.int32)*2 -1 svm = LibSVM() svm.learnModel(X, y) y2, d = svm.predict(X, True)
def testComputeTestError(self): C = 10.0 gamma = 0.5 numTrainExamples = self.X.shape[0] * 0.5 trainX, trainY = self.X[ 0:numTrainExamples, :], self.y[0:numTrainExamples] testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:] svm = LibSVM('gaussian', gamma, C) args = (trainX, trainY, testX, testY, svm) error = computeTestError(args) svm = LibSVM('gaussian', gamma, C) svm.learnModel(trainX, trainY) predY = svm.predict(testX) self.assertEquals(Evaluator.binaryError(predY, testY), error)
""" #Figure out why the penalty is increasing X = trainX y = trainY for i in range(foldsSet.shape[0]): folds = foldsSet[i] idx = Sampling.crossValidation(folds, validX.shape[0]) penalty = 0 fullError = 0 trainError = 0 learner.learnModel(validX, validY) predY = learner.predict(X) predValidY = learner.predict(validX) idealPenalty = Evaluator.rootMeanSqError(predY, y) - Evaluator.rootMeanSqError(predValidY, validY) for trainInds, testInds in idx: trainX = validX[trainInds, :] trainY = validY[trainInds] #learner.setGamma(gamma) #learner.setC(C) learner.learnModel(trainX, trainY) predY = learner.predict(validX) predTrainY = learner.predict(trainX) fullError += Evaluator.rootMeanSqError(predY, validY) trainError += Evaluator.rootMeanSqError(predTrainY, trainY) penalty += Evaluator.rootMeanSqError(predY, validY) - Evaluator.rootMeanSqError(predTrainY, trainY)
validX = trainX[trainInds,:] validY = trainY[trainInds] #errors = learner.parallelPenaltyGrid(validX, validY, testX, testY, paramDict, computeTestError) #errors = numpy.squeeze(errors) errors = numpy.zeros((Cs.shape[0], gammas.shape[0])) norms = numpy.zeros((Cs.shape[0], gammas.shape[0])) for i, C in enumerate(Cs): for j, gamma in enumerate(gammas): learner.setEpsilon(epsilons[0]) learner.setC(C) learner.setGamma(gamma) learner.learnModel(validX, validY) predY = learner.predict(testX) errors[i, j] = Evaluator.meanAbsError(predY, testY) norms[i, j] = learner.weightNorm() for i in range(gammas.shape[0]): plt.figure(i) plt.plot(numpy.log(Cs), errors[:, i], label=str(sampleSize)) plt.legend(loc="upper left") plt.xlabel("C") plt.ylabel("Error") plt.figure(i+gammas.shape[0]) plt.plot(norms[:, i], errors[:, i], label=str(sampleSize)) plt.legend(loc="upper left") plt.xlabel("Norm")