Ejemplo n.º 1
0
def getSetup(learnerName, dataDir, outputDir, numProcesses): 
    
    if learnerName=="SVM":
        learner = LibSVM(kernel='gaussian', type="C_SVC", processes=numProcesses) 
        loadMethod = ModelSelectUtils.loadRatschDataset
        dataDir += "benchmark/"
        outputDir += "classification/" + learnerName + "/"
        
        paramDict = {} 
        paramDict["setC"] = learner.getCs()
        paramDict["setGamma"] = learner.getGammas()  
    elif learnerName=="SVR":
        learner = LibSVM(kernel='gaussian', type="Epsilon_SVR", processes=numProcesses) 
        learner.normModelSelect = True
        loadMethod = ModelSelectUtils.loadRegressDataset
        dataDir += "regression/"
        outputDir += "regression/" + learnerName + "/"

        paramDict = {} 
        paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
        paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)
        paramDict["setEpsilon"] = learner.getEpsilons()
    elif learnerName=="CART": 
        learner = DecisionTreeLearner(criterion="mse", maxDepth=30, minSplit=1, pruneType="CART", processes=numProcesses)
        learner.setChunkSize(2)
        loadMethod = ModelSelectUtils.loadRegressDataset
        dataDir += "regression/"
        outputDir += "regression/" + learnerName + "/"

        paramDict = {} 
        paramDict["setGamma"] =  numpy.array(numpy.round(2**numpy.arange(1, 7.5, 0.5)-1), dtype=numpy.int)
    else: 
        raise ValueError("Unknown learnerName: " + learnerName)
                
    return learner, loadMethod, dataDir, outputDir, paramDict 
Ejemplo n.º 2
0
def computeIdealPenalty(args):
    """
    Find the complete penalty.
    """
    (X, y, fullX, C, gamma, gridPoints, pdfX, pdfY1X, pdfYminus1X) = args

    svm = LibSVM('gaussian', gamma, C)
    svm.learnModel(X, y)
    predY = svm.predict(X)
    predFullY, decisionsY = svm.predict(fullX, True)
    decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F")
    trueError = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X)
    idealPenalty = trueError - Evaluator.binaryError(predY, y)

    return idealPenalty
Ejemplo n.º 3
0
    def testGetModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 50
        numFeatures = 3
        eg = ExamplesGenerator()

        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()
        svm.learnModel(X, y)

        weights, b = svm.getWeights()
Ejemplo n.º 4
0
    def testGetModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 50
        numFeatures = 3
        eg = ExamplesGenerator()

        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()
        svm.learnModel(X, y)

        weights, b  = svm.getWeights()
Ejemplo n.º 5
0
    def testStr(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
Ejemplo n.º 6
0
    def testPredict(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel()
        y = numpy.array(y > 0, numpy.int32)*2 -1

        svm = LibSVM()
        svm.learnModel(X, y)
        y2, d = svm.predict(X, True)
Ejemplo n.º 7
0
    def testPredict(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel()
        y = numpy.array(y > 0, numpy.int32) * 2 - 1

        svm = LibSVM()
        svm.learnModel(X, y)
        y2, d = svm.predict(X, True)
Ejemplo n.º 8
0
    def testComputeTestError(self):
        C = 10.0
        gamma = 0.5

        numTrainExamples = self.X.shape[0]*0.5

        trainX, trainY = self.X[0:numTrainExamples, :], self.y[0:numTrainExamples]
        testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:]

        svm = LibSVM('gaussian', gamma, C)
        args = (trainX, trainY, testX, testY, svm)
        error = computeTestError(args)

        svm = LibSVM('gaussian', gamma, C)
        svm.learnModel(trainX, trainY)
        predY = svm.predict(testX)
        self.assertEquals(Evaluator.binaryError(predY, testY), error)
Ejemplo n.º 9
0
    def profileParallelPen(self): 
        learner = LibSVM(processes=8)
        learner.setChunkSize(2)
        numExamples = 10000
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        Y = numpy.array(numpy.random.rand(numExamples) < 0.1, numpy.int)*2-1
        Cvs = [self.folds-1]

        def run():
            for i in range(2):
                print("Iteration " + str(i))
                idx = Sampling.crossValidation(self.folds, numExamples)
                learner.parallelPen(X, Y, idx, self.paramDict, Cvs)

        ProfileUtils.profile('run()', globals(), locals())
Ejemplo n.º 10
0
    def profileParallelPen(self):
        learner = LibSVM(processes=8)
        learner.setChunkSize(2)
        numExamples = 10000
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        Y = numpy.array(numpy.random.rand(numExamples) < 0.1,
                        numpy.int) * 2 - 1
        Cvs = [self.folds - 1]

        def run():
            for i in range(2):
                print("Iteration " + str(i))
                idx = Sampling.crossValidation(self.folds, numExamples)
                learner.parallelPen(X, Y, idx, self.paramDict, Cvs)

        ProfileUtils.profile('run()', globals(), locals())
Ejemplo n.º 11
0
    def testGetWeights(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 6
        X = numpy.array([[-3], [-2], [-1], [1], [2] ,[3]], numpy.float64)
        #X = numpy.random.rand(numExamples, 10)
        y = numpy.array([[-1], [-1], [-1], [1], [1] ,[1]])

        svm = LibSVM()
        svm.learnModel(X, y.ravel())
        weights, b  = svm.getWeights()

        #Let's see if we can compute the decision values 
        y, decisions = svm.predict(X, True)
        decisions2 = numpy.zeros(numExamples)
        decisions2 = numpy.dot(X, weights) - b

        self.assertTrue((decisions == decisions2).all())
        predY = numpy.sign(decisions2)
        self.assertTrue((y.ravel() == predY).all())

        #Do the same test on a random datasets
        numExamples = 50
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        y = numpy.sign(numpy.random.rand(numExamples)-0.5)

        svm = LibSVM()
        svm.learnModel(X, y.ravel())
        weights, b  = svm.getWeights()

        #Let's see if we can compute the decision values
        y, decisions = svm.predict(X, True)
        decisions2 = numpy.dot(X, weights) + b

        tol = 10**-6

        self.assertTrue(numpy.linalg.norm(decisions - decisions2) < tol)
        predY = numpy.sign(decisions2)
        self.assertTrue((y.ravel() == predY).all())
Ejemplo n.º 12
0
    def setUp(self):
        try:
            import sklearn
        except ImportError as error:
            logging.debug(error)
            return

        numpy.random.seed(21)
        numExamples = 100
        numFeatures = 10
        eg = ExamplesGenerator()

        self.X, self.y = eg.generateBinaryExamples(numExamples, numFeatures)
        self.svm = LibSVM()
        self.svm.Cs = 2.0**numpy.arange(-2, 2, dtype=numpy.float)
        self.svm.gammas = 2.0**numpy.arange(-3, 1, dtype=numpy.float)
        self.svm.epsilons = 2.0**numpy.arange(-2, 0, dtype=numpy.float)

        numpy.set_printoptions(linewidth=150, suppress=True, precision=3)
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
Ejemplo n.º 13
0
    def testLearnModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        self.svm.learnModel(self.X, self.y)
        predY = self.svm.classify(self.X)
        y = self.y

        e = Evaluator.binaryError(y, predY)

        #Test for wrong labels
        numExamples = 6
        X = numpy.array([[-3], [-2], [-1], [1], [2] ,[3]], numpy.float)
        y = numpy.array([[-1], [-1], [-1], [1], [1] ,[5]])

        self.assertRaises(ValueError, self.svm.learnModel, X, y)

        #Try the regression SVM
        svm = LibSVM(type="Epsilon_SVR")
        y = numpy.random.rand(self.X.shape[0])
        svm.learnModel(self.X, self.y)
Ejemplo n.º 14
0
    def testComputeBootstrapError(self):
        C = 10.0
        gamma = 0.5

        numTrainExamples = self.X.shape[0] * 0.5

        trainX, trainY = self.X[
            0:numTrainExamples, :], self.y[0:numTrainExamples]
        testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:]

        svm = LibSVM('gaussian', gamma, C)

        args = (trainX, trainY, testX, testY, svm)
        error = computeBootstrapError(args)
Ejemplo n.º 15
0
    def testLearnModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        self.svm.learnModel(self.X, self.y)
        predY = self.svm.classify(self.X)
        y = self.y

        e = Evaluator.binaryError(y, predY)

        #Test for wrong labels
        numExamples = 6
        X = numpy.array([[-3], [-2], [-1], [1], [2], [3]], numpy.float)
        y = numpy.array([[-1], [-1], [-1], [1], [1], [5]])

        self.assertRaises(ValueError, self.svm.learnModel, X, y)

        #Try the regression SVM
        svm = LibSVM(type="Epsilon_SVR")
        y = numpy.random.rand(self.X.shape[0])
        svm.learnModel(self.X, self.y)
Ejemplo n.º 16
0
    def profileModelSelect(self):
        learner = LibSVM()
        numExamples = 10000
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        Y = numpy.array(numpy.random.rand(numExamples) < 0.1,
                        numpy.int) * 2 - 1

        def run():
            for i in range(5):
                print("Iteration " + str(i))
                idx = Sampling.crossValidation(self.folds, numExamples)
                learner.parallelModelSelect(X, Y, idx, self.paramDict)

        ProfileUtils.profile('run()', globals(), locals())
Ejemplo n.º 17
0
    def __init__(self, examplesFileName):
        """
        Create the class by reading examples from a Matlab file. Instantiate the SVM
        and create a preprocesor to standarise examples to have zero mean and unit variance. 
        """
        self.examplesList = ExamplesList.readFromFile(examplesFileName)
        self.examplesList.setDefaultExamplesName("X")
        self.examplesList.setLabelsName("y")

        (freqs, items) = Util.histogram(self.examplesList.getSampledDataField("y").ravel())
        logging.info("Distribution of labels: " + str((freqs, items)))
        logging.info("The base error rate is " + str(float(min(freqs))/self.examplesList.getNumExamples()))
        
        self.classifier = LibSVM()
        self.errorMethod = Evaluator.balancedError

        self.preprocessor = Standardiser()
        X = self.preprocessor.standardiseArray(self.examplesList.getDataField(self.examplesList.getDefaultExamplesName()))
        self.examplesList.overwriteDataField(self.examplesList.getDefaultExamplesName(), X)
Ejemplo n.º 18
0
    def testComputeTestError(self):
        C = 10.0
        gamma = 0.5

        numTrainExamples = self.X.shape[0] * 0.5

        trainX, trainY = self.X[
            0:numTrainExamples, :], self.y[0:numTrainExamples]
        testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:]

        svm = LibSVM('gaussian', gamma, C)
        args = (trainX, trainY, testX, testY, svm)
        error = computeTestError(args)

        svm = LibSVM('gaussian', gamma, C)
        svm.learnModel(trainX, trainY)
        predY = svm.predict(testX)
        self.assertEquals(Evaluator.binaryError(predY, testY), error)
Ejemplo n.º 19
0
    def setUp(self):
        try:
            import sklearn
        except ImportError as error:
            logging.debug(error)
            return 

        numpy.random.seed(21)
        numExamples = 100
        numFeatures = 10
        eg = ExamplesGenerator()

        self.X, self.y = eg.generateBinaryExamples(numExamples, numFeatures)
        self.svm = LibSVM()
        self.svm.Cs = 2.0**numpy.arange(-2, 2, dtype=numpy.float)
        self.svm.gammas = 2.0**numpy.arange(-3, 1, dtype=numpy.float)
        self.svm.epsilons = 2.0**numpy.arange(-2, 0, dtype=numpy.float)

        numpy.set_printoptions(linewidth=150, suppress=True, precision=3)
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
Ejemplo n.º 20
0
 def testComputeIdealPenalty(self):
     C = 10.0
     gamma = 0.5
     svm = LibSVM("gaussian", gamma, C)
     args = (self.X, self.y, self.X, self.y, svm)
     error = computeIdealPenalty(args)
Ejemplo n.º 21
0
    def testSetErrorCost(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 1000
        numFeatures = 100
        eg = ExamplesGenerator()
        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()

        C = 0.1
        kernel = "linear"
        kernelParam = 0
        svm.setKernel(kernel, kernelParam)
        svm.setC(C)

        svm.setErrorCost(0.1)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e1 = Evaluator.binaryErrorP(y, predY)

        svm.setErrorCost(0.9)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e2 = Evaluator.binaryErrorP(y, predY)

        self.assertTrue(e1 > e2)
Ejemplo n.º 22
0
Cs = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
gammas = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)
epsilons = numpy.array([2**-2])

paramDict = {} 
paramDict["setC"] = Cs 
paramDict["setGamma"] = gammas
paramDict["setEpsilon"] = epsilons

sampleMethod = Sampling.crossValidation
numProcesses = multiprocessing.cpu_count()

j = 0 
trainX, trainY, testX, testY = ModelSelectUtils.loadRegressDataset(dataDir, datasetName, j)
learner = LibSVM(kernel='gaussian', type="Epsilon_SVR", processes=numProcesses) 


for sampleSize in sampleSizes: 
    print("Sample size " +str(sampleSize))
    trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize]
    validX = trainX[trainInds,:]
    validY = trainY[trainInds]
        
    #errors = learner.parallelPenaltyGrid(validX, validY, testX, testY, paramDict, computeTestError)
    #errors = numpy.squeeze(errors)
    
    errors = numpy.zeros((Cs.shape[0], gammas.shape[0]))
    norms = numpy.zeros((Cs.shape[0], gammas.shape[0]))
    
    for i, C in enumerate(Cs): 
Ejemplo n.º 23
0
i = 0 
datasetName = datasetNames[i][0]
numRealisations = datasetNames[i][1]
logging.debug("Learning using dataset " + datasetName)

data = numpy.load(dataDir + datasetName + ".npz")
gridPoints, trainX, trainY, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]

#We form a test set from the grid points
testX = numpy.zeros((gridPoints.shape[0]**2, 2))
for m in range(gridPoints.shape[0]):
    testX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints
    testX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m]

svm = LibSVM()

logging.debug("Using sample size " + str(sampleSize) + " and " + str(folds) + " folds")

perm = numpy.random.permutation(trainX.shape[0])
trainInds = perm[0:sampleSize]
validX = trainX[trainInds, :]
validY = trainY[trainInds]
logging.debug("Finding ideal grid of penalties")
idealGrid = parallelPenaltyGridRbf(svm, validX, validY, testX, gridPoints, pdfX, pdfY1X, pdfYminus1X)

for s in range(len(sampleMethods)):
    sampleMethod = sampleMethods[s][1]
    logging.debug("Sampling method :" + str(sampleMethod))
    
    idx = sampleMethod(folds, validY.shape[0])
Ejemplo n.º 24
0
    def testModelSelect(self): 
        
        """
        We test the results on some data and compare to SVR. 
        """
        numExamples = 200
        X, y = data.make_regression(numExamples, noise=0.5)  
        
        X = Standardiser().standardiseArray(X)
        y = Standardiser().standardiseArray(y)
        
        trainX = X[0:100, :]
        trainY = y[0:100]
        testX = X[100:, :]
        testY = y[100:]
        
        learner = DecisionTreeLearner(maxDepth=20, minSplit=10, pruneType="REP-CV")
        learner.setPruneCV(8)
        
        paramDict = {} 
        paramDict["setGamma"] = numpy.linspace(0.0, 1.0, 10) 
        paramDict["setPruneCV"] = numpy.arange(6, 11, 2, numpy.int)
        
        folds = 5
        idx = Sampling.crossValidation(folds, trainX.shape[0])
        bestTree, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict)


        predY = bestTree.predict(testX)
        error = Evaluator.rootMeanSqError(testY, predY)
        print(error)
        
        
        learner = DecisionTreeLearner(maxDepth=20, minSplit=5, pruneType="CART")
        
        paramDict = {} 
        paramDict["setGamma"] = numpy.linspace(0.0, 1.0, 50) 
        
        folds = 5
        idx = Sampling.crossValidation(folds, trainX.shape[0])
        bestTree, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict)


        predY = bestTree.predict(testX)
        error = Evaluator.rootMeanSqError(testY, predY)
        print(error)
              
        return 
        #Let's compare to the SVM 
        learner2 = LibSVM(kernel='gaussian', type="Epsilon_SVR") 
        
        paramDict = {} 
        paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
        paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)
        paramDict["setEpsilon"] = learner2.getEpsilons()
        
        idx = Sampling.crossValidation(folds, trainX.shape[0])
        bestSVM, cvGrid = learner2.parallelModelSelect(trainX, trainY, idx, paramDict)

        predY = bestSVM.predict(testX)
        error = Evaluator.rootMeanSqError(testY, predY)
        print(error)
Ejemplo n.º 25
0
    def testSetSvmType(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        X = Standardiser().standardiseArray(X)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel() + 1
        y2 = numpy.array(y > 0, numpy.int32) * 2 - 1

        svm = LibSVM()

        svm.setSvmType("Epsilon_SVR")

        self.assertEquals(svm.getType(), "Epsilon_SVR")

        #Try to get a good error
        Cs = 2**numpy.arange(-6, 4, dtype=numpy.float)
        epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float)

        bestError = 10
        for C in Cs:
            for epsilon in epsilons:
                svm.setEpsilon(epsilon)
                svm.setC(C)
                svm.learnModel(X, y)
                yp = svm.predict(X)

                if Evaluator.rootMeanSqError(y, yp) < bestError:
                    bestError = Evaluator.rootMeanSqError(y, yp)

        self.assertTrue(
            bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0])))

        svm.setSvmType("C_SVC")
        svm.learnModel(X, y2)
        yp2 = svm.predict(X)

        self.assertTrue(0 <= Evaluator.binaryError(y2, yp2) <= 1)
Ejemplo n.º 26
0
import matplotlib.pyplot as plt 

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
numpy.seterr(all="raise")
numpy.random.seed(21)
dataDir = PathDefaults.getDataDir() 
dataDir += "modelPenalisation/regression/"
outputDir = PathDefaults.getOutputDir() + "modelPenalisation/regression/SVR/"

figInd = 0 

loadMethod = ModelSelectUtils.loadRegressDataset
datasets = ModelSelectUtils.getRegressionDatasets(True)

numProcesses = multiprocessing.cpu_count()
learner = LibSVM(kernel="rbf", processes=numProcesses, type="Epsilon_SVR")
learner.setChunkSize(3)

Cs = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
gammas = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)
epsilons = learner.getEpsilons()

numCs = Cs.shape[0]
numGammas = gammas.shape[0]
numEpsilons = epsilons.shape[0]

learner.normModelSelect = True

paramDict = {} 
paramDict["setC"] = Cs 
paramDict["setGamma"] = gammas
Ejemplo n.º 27
0
    def testSetSvmType(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        X = Standardiser().standardiseArray(X)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel() + 1
        y2 = numpy.array(y > 0, numpy.int32)*2 -1 
        
        svm = LibSVM()

        svm.setSvmType("Epsilon_SVR")

        self.assertEquals(svm.getType(), "Epsilon_SVR")

        #Try to get a good error
        Cs = 2**numpy.arange(-6, 4, dtype=numpy.float)
        epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float)

        bestError = 10 
        for C in Cs:
            for epsilon in epsilons:
                svm.setEpsilon(epsilon)
                svm.setC(C)
                svm.learnModel(X, y)
                yp = svm.predict(X)

                if Evaluator.rootMeanSqError(y, yp) < bestError:
                    bestError = Evaluator.rootMeanSqError(y, yp) 

        self.assertTrue(bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0])))
        
        svm.setSvmType("C_SVC")
        svm.learnModel(X, y2)
        yp2 = svm.predict(X)

        self.assertTrue(0 <= Evaluator.binaryError(y2, yp2)  <= 1)
Ejemplo n.º 28
0
    def testSaveParams(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.5)
        svm.setEpsilon(12.1)
        svm.setErrorCost(1.8)
        svm.setSvmType("Epsilon_SVR")
        svm.setTermination(0.12)
        svm.setKernel("gaussian", 0.43)

        outputDir = PathDefaults.getOutputDir()
        fileName = outputDir + "test/testSvmParams"
        svm.saveParams(fileName)

        svm2 = LibSVM()
        svm2.loadParams(fileName)

        self.assertEquals(svm.getC(), 10.5)
        self.assertEquals(svm.getEpsilon(), 12.1)
        self.assertEqual(svm.getErrorCost(), 1.8)
        self.assertEqual(svm.getSvmType(), "Epsilon_SVR")
        self.assertEqual(svm.getTermination(), 0.12)
        self.assertEqual(svm.getKernel(), "gaussian")
        self.assertEqual(svm.getKernelParams(), 0.43)
Ejemplo n.º 29
0
    def testSetEpsilon(self):
        """
        Test out the parameter for the regressive SVM, vary epsilon and look at
        number of support vectors. 
        """
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.0)
        svm.setEpsilon(0.1)
        svm.setSvmType("Epsilon_SVR")

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel() + numpy.random.randn(100)

        svm.setEpsilon(1.0)
        svm.learnModel(X, y)
        numSV = svm.getModel().support_.shape

        svm.setEpsilon(0.5)
        svm.learnModel(X, y)
        numSV2 = svm.getModel().support_.shape

        svm.setEpsilon(0.01)
        svm.learnModel(X, y)
        numSV3 = svm.getModel().support_.shape

        #There should be fewer SVs as epsilon increases
        self.assertTrue(numSV < numSV2)
        self.assertTrue(numSV2 < numSV3)
Ejemplo n.º 30
0
    def testSaveParams(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.5)
        svm.setEpsilon(12.1)
        svm.setErrorCost(1.8)
        svm.setSvmType("Epsilon_SVR")
        svm.setTermination(0.12)
        svm.setKernel("gaussian", 0.43)

        outputDir = PathDefaults.getOutputDir()
        fileName = outputDir + "test/testSvmParams"
        svm.saveParams(fileName)

        svm2 = LibSVM()
        svm2.loadParams(fileName)

        self.assertEquals(svm.getC(), 10.5)
        self.assertEquals(svm.getEpsilon(), 12.1)
        self.assertEqual(svm.getErrorCost(), 1.8)
        self.assertEqual(svm.getSvmType(), "Epsilon_SVR")
        self.assertEqual(svm.getTermination(), 0.12)
        self.assertEqual(svm.getKernel(), "gaussian")
        self.assertEqual(svm.getKernelParams(), 0.43)
Ejemplo n.º 31
0
    def testSetEpsilon(self):
        """
        Test out the parameter for the regressive SVM, vary epsilon and look at
        number of support vectors. 
        """
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.0)
        svm.setEpsilon(0.1)
        svm.setSvmType("Epsilon_SVR")

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel() + numpy.random.randn(100)
        
        svm.setEpsilon(1.0)
        svm.learnModel(X, y)
        numSV = svm.getModel().support_.shape
        
        svm.setEpsilon(0.5)
        svm.learnModel(X, y)
        numSV2 = svm.getModel().support_.shape

        svm.setEpsilon(0.01)
        svm.learnModel(X, y)
        numSV3 = svm.getModel().support_.shape

        #There should be fewer SVs as epsilon increases
        self.assertTrue(numSV < numSV2)
        self.assertTrue(numSV2 < numSV3)
Ejemplo n.º 32
0
class LibSVMTest(unittest.TestCase):
    def setUp(self):
        try:
            import sklearn
        except ImportError as error:
            logging.debug(error)
            return

        numpy.random.seed(21)
        numExamples = 100
        numFeatures = 10
        eg = ExamplesGenerator()

        self.X, self.y = eg.generateBinaryExamples(numExamples, numFeatures)
        self.svm = LibSVM()
        self.svm.Cs = 2.0**numpy.arange(-2, 2, dtype=numpy.float)
        self.svm.gammas = 2.0**numpy.arange(-3, 1, dtype=numpy.float)
        self.svm.epsilons = 2.0**numpy.arange(-2, 0, dtype=numpy.float)

        numpy.set_printoptions(linewidth=150, suppress=True, precision=3)
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

    def testLearnModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        self.svm.learnModel(self.X, self.y)
        predY = self.svm.classify(self.X)
        y = self.y

        e = Evaluator.binaryError(y, predY)

        #Test for wrong labels
        numExamples = 6
        X = numpy.array([[-3], [-2], [-1], [1], [2], [3]], numpy.float)
        y = numpy.array([[-1], [-1], [-1], [1], [1], [5]])

        self.assertRaises(ValueError, self.svm.learnModel, X, y)

        #Try the regression SVM
        svm = LibSVM(type="Epsilon_SVR")
        y = numpy.random.rand(self.X.shape[0])
        svm.learnModel(self.X, self.y)

    def testSetErrorCost(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 1000
        numFeatures = 100
        eg = ExamplesGenerator()
        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()

        C = 0.1
        kernel = "linear"
        kernelParam = 0
        svm.setKernel(kernel, kernelParam)
        svm.setC(C)

        svm.setErrorCost(0.1)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e1 = Evaluator.binaryErrorP(y, predY)

        svm.setErrorCost(0.9)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e2 = Evaluator.binaryErrorP(y, predY)

        self.assertTrue(e1 > e2)

    def testClassify(self):
        try:
            import sklearn
        except ImportError as error:
            return

        self.svm.learnModel(self.X, self.y)
        predY = self.svm.classify(self.X)
        y = self.y

        e = Evaluator.binaryError(y, predY)

        #Now, permute examples
        perm = numpy.random.permutation(self.X.shape[0])
        predY = self.svm.classify(self.X[perm, :])
        y = y[perm]

        e2 = Evaluator.binaryError(y, predY)

        self.assertEquals(e, e2)

    def testEvaluateCv(self):
        try:
            import sklearn
        except ImportError as error:
            return

        folds = 10
        (means, vars) = self.svm.evaluateCv(self.X, self.y, folds)

        self.assertTrue((means <= 1).all())
        self.assertTrue((means >= 0).all())
        self.assertTrue((vars <= 1).all())
        self.assertTrue((vars >= 0).all())

    @apgl.skip("")
    def testGetModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 50
        numFeatures = 3
        eg = ExamplesGenerator()

        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()
        svm.learnModel(X, y)

        weights, b = svm.getWeights()

        #logging.debug(weights)
        #logging.debug(b)

    @apgl.skip("")
    def testGetWeights(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 6
        X = numpy.array([[-3], [-2], [-1], [1], [2], [3]], numpy.float64)
        #X = numpy.random.rand(numExamples, 10)
        y = numpy.array([[-1], [-1], [-1], [1], [1], [1]])

        svm = LibSVM()
        svm.learnModel(X, y.ravel())
        weights, b = svm.getWeights()

        #Let's see if we can compute the decision values
        y, decisions = svm.predict(X, True)
        decisions2 = numpy.zeros(numExamples)
        decisions2 = numpy.dot(X, weights) - b

        self.assertTrue((decisions == decisions2).all())
        predY = numpy.sign(decisions2)
        self.assertTrue((y.ravel() == predY).all())

        #Do the same test on a random datasets
        numExamples = 50
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        y = numpy.sign(numpy.random.rand(numExamples) - 0.5)

        svm = LibSVM()
        svm.learnModel(X, y.ravel())
        weights, b = svm.getWeights()

        #Let's see if we can compute the decision values
        y, decisions = svm.predict(X, True)
        decisions2 = numpy.dot(X, weights) + b

        tol = 10**-6

        self.assertTrue(numpy.linalg.norm(decisions - decisions2) < tol)
        predY = numpy.sign(decisions2)
        self.assertTrue((y.ravel() == predY).all())

    def testSetTermination(self):
        try:
            import sklearn
        except ImportError as error:
            return

        self.svm.learnModel(self.X, self.y)
        self.svm.setTermination(0.1)
        self.svm.learnModel(self.X, self.y)

    def testSetSvmType(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        X = Standardiser().standardiseArray(X)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel() + 1
        y2 = numpy.array(y > 0, numpy.int32) * 2 - 1

        svm = LibSVM()

        svm.setSvmType("Epsilon_SVR")

        self.assertEquals(svm.getType(), "Epsilon_SVR")

        #Try to get a good error
        Cs = 2**numpy.arange(-6, 4, dtype=numpy.float)
        epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float)

        bestError = 10
        for C in Cs:
            for epsilon in epsilons:
                svm.setEpsilon(epsilon)
                svm.setC(C)
                svm.learnModel(X, y)
                yp = svm.predict(X)

                if Evaluator.rootMeanSqError(y, yp) < bestError:
                    bestError = Evaluator.rootMeanSqError(y, yp)

        self.assertTrue(
            bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0])))

        svm.setSvmType("C_SVC")
        svm.learnModel(X, y2)
        yp2 = svm.predict(X)

        self.assertTrue(0 <= Evaluator.binaryError(y2, yp2) <= 1)

    @apgl.skip("")
    def testSaveParams(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.5)
        svm.setEpsilon(12.1)
        svm.setErrorCost(1.8)
        svm.setSvmType("Epsilon_SVR")
        svm.setTermination(0.12)
        svm.setKernel("gaussian", 0.43)

        outputDir = PathDefaults.getOutputDir()
        fileName = outputDir + "test/testSvmParams"
        svm.saveParams(fileName)

        svm2 = LibSVM()
        svm2.loadParams(fileName)

        self.assertEquals(svm.getC(), 10.5)
        self.assertEquals(svm.getEpsilon(), 12.1)
        self.assertEqual(svm.getErrorCost(), 1.8)
        self.assertEqual(svm.getSvmType(), "Epsilon_SVR")
        self.assertEqual(svm.getTermination(), 0.12)
        self.assertEqual(svm.getKernel(), "gaussian")
        self.assertEqual(svm.getKernelParams(), 0.43)

    def testStr(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()

        #logging.debug(svm)

    def testSetEpsilon(self):
        """
        Test out the parameter for the regressive SVM, vary epsilon and look at
        number of support vectors. 
        """
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.0)
        svm.setEpsilon(0.1)
        svm.setSvmType("Epsilon_SVR")

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel() + numpy.random.randn(100)

        svm.setEpsilon(1.0)
        svm.learnModel(X, y)
        numSV = svm.getModel().support_.shape

        svm.setEpsilon(0.5)
        svm.learnModel(X, y)
        numSV2 = svm.getModel().support_.shape

        svm.setEpsilon(0.01)
        svm.learnModel(X, y)
        numSV3 = svm.getModel().support_.shape

        #There should be fewer SVs as epsilon increases
        self.assertTrue(numSV < numSV2)
        self.assertTrue(numSV2 < numSV3)

    def testPredict(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel()
        y = numpy.array(y > 0, numpy.int32) * 2 - 1

        svm = LibSVM()
        svm.learnModel(X, y)
        y2, d = svm.predict(X, True)

        #self.assertTrue((numpy.sign(d) == y2).all())

    #@unittest.skip("")
    def testParallelVfcvRbf(self):
        folds = 3
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        bestSVM, meanErrors = svm.parallelVfcvRbf(self.X, self.y, idx)

        tol = 10**-6
        bestError = 1
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                error = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]
                    testX = self.X[testInds, :]
                    testY = self.y[testInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(testX)
                    error += Evaluator.binaryError(predY, testY)

                meanErrors2[i, j] = error / len(idx)

                if error < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = error

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)

    def testParallelVfcvRbf2(self):
        #In this test we try SVM regression
        folds = 3
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        bestSVM, meanErrors = svm.parallelVfcvRbf(self.X,
                                                  self.y,
                                                  idx,
                                                  type="Epsilon_SVR")

        tol = 10**-6
        bestError = 100
        meanErrors2 = numpy.zeros(
            (svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]

                    error = 0
                    for trainInds, testInds in idx:
                        trainX = self.X[trainInds, :]
                        trainY = self.y[trainInds]
                        testX = self.X[testInds, :]
                        testY = self.y[testInds]

                        svm.setGamma(gamma)
                        svm.setC(C)
                        svm.setEpsilon(epsilon)
                        svm.learnModel(trainX, trainY)
                        predY = svm.predict(testX)
                        error += svm.getMetricMethod()(predY, testY)

                    meanErrors2[j, k, i] = error / len(idx)

                    if error < bestError:
                        bestC = C
                        bestGamma = gamma
                        bestError = error
                        bestEpsilon = epsilon

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertEquals(bestEpsilon, bestSVM.getEpsilon())
        self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol)

    def testParallelVfPenRbf(self):
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        resultsList = svm.parallelVfPenRbf(self.X, self.y, idx, Cv)

        tol = 10**-6
        bestError = 1
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                penalty = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty += Evaluator.binaryError(
                        predY, self.y) - Evaluator.binaryError(
                            predTrainY, trainY)

                penalty = penalty * Cv[0] / len(idx)
                svm.learnModel(self.X, self.y)
                predY = svm.predict(self.X)
                meanErrors2[i,
                            j] = Evaluator.binaryError(predY, self.y) + penalty

                if meanErrors2[i, j] < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = meanErrors2[i, j]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)

    #@unittest.skip("")
    def testParallelVfPenRbf2(self):
        #Test support vector regression
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        resultsList = svm.parallelVfPenRbf(self.X,
                                           self.y,
                                           idx,
                                           Cv,
                                           type="Epsilon_SVR")

        tol = 10**-6
        bestError = 100
        meanErrors2 = numpy.zeros(
            (svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]

                    penalty = 0
                    for trainInds, testInds in idx:
                        trainX = self.X[trainInds, :]
                        trainY = self.y[trainInds]

                        svm.setGamma(gamma)
                        svm.setC(C)
                        svm.setEpsilon(epsilon)
                        svm.learnModel(trainX, trainY)
                        predY = svm.predict(self.X)
                        predTrainY = svm.predict(trainX)
                        penalty += svm.getMetricMethod()(
                            predY, self.y) - svm.getMetricMethod()(predTrainY,
                                                                   trainY)

                    penalty = penalty * Cv[0] / len(idx)
                    svm.learnModel(self.X, self.y)
                    predY = svm.predict(self.X)
                    meanErrors2[j, k, i] = svm.getMetricMethod()(
                        predY, self.y) + penalty

                    if meanErrors2[j, k, i] < bestError:
                        bestC = C
                        bestGamma = gamma
                        bestEpsilon = epsilon
                        bestError = meanErrors2[j, k, i]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertEquals(bestEpsilon, bestSVM.getEpsilon())
        self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol)

    def testGetC(self):
        svm = LibSVM()
        svm.setC(10.0)
        C = svm.getC()
        self.assertTrue(C == 10.0)

    def testGetGamma(self):
        svm = LibSVM()
        svm.setKernel("gaussian", 12.0)
        gamma = svm.getKernelParams()
        self.assertTrue(gamma == 12.0)

    def testComputeTestError(self):
        C = 10.0
        gamma = 0.5

        numTrainExamples = self.X.shape[0] * 0.5

        trainX, trainY = self.X[
            0:numTrainExamples, :], self.y[0:numTrainExamples]
        testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:]

        svm = LibSVM('gaussian', gamma, C)
        args = (trainX, trainY, testX, testY, svm)
        error = computeTestError(args)

        svm = LibSVM('gaussian', gamma, C)
        svm.learnModel(trainX, trainY)
        predY = svm.predict(testX)
        self.assertEquals(Evaluator.binaryError(predY, testY), error)

    def testComputeBootstrapError(self):
        C = 10.0
        gamma = 0.5

        numTrainExamples = self.X.shape[0] * 0.5

        trainX, trainY = self.X[
            0:numTrainExamples, :], self.y[0:numTrainExamples]
        testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:]

        svm = LibSVM('gaussian', gamma, C)

        args = (trainX, trainY, testX, testY, svm)
        error = computeBootstrapError(args)

    def testComputeIdealPenalty(self):
        C = 10.0
        gamma = 0.5
        svm = LibSVM("gaussian", gamma, C)
        args = (self.X, self.y, self.X, self.y, svm)
        error = computeIdealPenalty(args)

    def testParallelPenaltyGridRbf(self):
        svm = self.svm
        svm.setKernel("gaussian")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]

        idealPenalties = svm.parallelPenaltyGridRbf(trainX, trainY, self.X,
                                                    self.y)
        idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]

                svm.setGamma(gamma)
                svm.setC(C)
                svm.learnModel(trainX, trainY)
                predY = svm.predict(self.X)
                predTrainY = svm.predict(trainX)
                penalty = Evaluator.binaryError(
                    predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                idealPenalties2[i, j] = penalty

                args = (trainX, trainY, self.X, self.y, svm)
                idealPenalties3[i, j] = computeIdealPenalty(args)

        tol = 10**-6
        self.assertTrue(
            numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol)

    def testParallelPenaltyGridRbf2(self):
        #Test with SVM regression
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]

        idealPenalties = svm.parallelPenaltyGridRbf(trainX,
                                                    trainY,
                                                    self.X,
                                                    self.y,
                                                    type="Epsilon_SVR")
        idealPenalties2 = numpy.zeros(
            (svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]
                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.setEpsilon(epsilon)

                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty = svm.getMetricMethod()(
                        predY, self.y) - svm.getMetricMethod()(predTrainY,
                                                               trainY)

                    idealPenalties2[j, k, i] = penalty

        tol = 10**-6
        self.assertTrue(
            numpy.linalg.norm(idealPenalties2 - idealPenalties) < tol)

    def testParallelModelSelect(self):
        folds = 3
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")

        paramDict = {}
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()

        bestSVM, meanErrors = svm.parallelModelSelect(self.X, self.y, idx,
                                                      paramDict)

        tol = 10**-6
        bestError = 1
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        print("Computing real grid")

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                error = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]
                    testX = self.X[testInds, :]
                    testY = self.y[testInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(testX)
                    error += Evaluator.binaryError(predY, testY)

                meanErrors2[i, j] = error / len(idx)

                if error < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = error

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)

    def testParallelPenaltyGrid2(self):
        #Test with SVM regression
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]

        paramDict = {}
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()
        paramDict["setEpsilon"] = svm.getEpsilons()

        #print(paramDict.keys())

        idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X,
                                                 self.y, paramDict)
        idealPenalties2 = numpy.zeros(
            (svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]
                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.setEpsilon(epsilon)

                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty = svm.getMetricMethod()(
                        predY, self.y) - svm.getMetricMethod()(predTrainY,
                                                               trainY)

                    idealPenalties2[j, k, i] = penalty

        tol = 10**-6
        self.assertTrue(
            numpy.linalg.norm(idealPenalties2 - idealPenalties) < tol)

    def testParallelPen(self):
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")

        paramDict = {}
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()

        resultsList = svm.parallelPen(self.X, self.y, idx, paramDict, Cv)

        tol = 10**-6
        bestError = 1
        trainErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        penalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                penalty = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty += Evaluator.binaryError(
                        predY, self.y) - Evaluator.binaryError(
                            predTrainY, trainY)

                penalty = penalty * Cv[0] / len(idx)
                svm.learnModel(self.X, self.y)
                predY = svm.predict(self.X)
                trainErrors2[i, j] = Evaluator.binaryError(predY, self.y)
                penalties2[i, j] = penalty
                meanErrors2[i,
                            j] = Evaluator.binaryError(predY, self.y) + penalty

                if meanErrors2[i, j] < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = meanErrors2[i, j]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)
        self.assertTrue(numpy.linalg.norm(trainErrors2.T - trainErrors) < tol)
        self.assertTrue(
            numpy.linalg.norm(penalties2.T - currentPenalties) < tol)

    def testParallelPenaltyGrid(self):
        svm = self.svm
        svm.setKernel("gaussian")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]

        paramDict = {}
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()

        idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X,
                                                 self.y, paramDict)
        idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]

                svm.setGamma(gamma)
                svm.setC(C)
                svm.learnModel(trainX, trainY)
                predY = svm.predict(self.X)
                predTrainY = svm.predict(trainX)
                penalty = Evaluator.binaryError(
                    predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                idealPenalties2[i, j] = penalty

                args = (trainX, trainY, self.X, self.y, svm)
                idealPenalties3[i, j] = computeIdealPenalty(args)

        tol = 10**-6
        self.assertTrue(
            numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol)

    def testGetBestLearner(self):
        svm = self.svm
        paramDict = {}
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()

        errors = numpy.random.rand(svm.getCs().shape[0],
                                   svm.getGammas().shape[0])

        folds = 5
        idx = Sampling.crossValidation(folds, self.X.shape[0])

        svm.normModelSelect = True
        svm.setKernel("gaussian")
        learner = svm.getBestLearner(errors, paramDict, self.X, self.y, idx)

        bestC = learner.getC()

        #Find the best norm
        bestInds = numpy.unravel_index(numpy.argmin(errors), errors.shape)
        learner.setC(svm.getCs()[bestInds[0]])
        learner.setGamma(svm.getGammas()[bestInds[1]])

        norms = []
        for trainInds, testInds in idx:
            validX = self.X[trainInds, :]
            validY = self.y[trainInds]
            learner.learnModel(validX, validY)

            norms.append(learner.weightNorm())

        bestNorm = numpy.array(norms).mean()

        norms = numpy.zeros(paramDict["setC"].shape[0])
        for i, C in enumerate(paramDict["setC"]):
            learner.setC(C)
            learner.learnModel(self.X, self.y)
            norms[i] = learner.weightNorm()

        bestC2 = paramDict["setC"][numpy.abs(norms - bestNorm).argmin()]

        self.assertEquals(bestC, bestC2)
Ejemplo n.º 33
0
    def testSetErrorCost(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 1000
        numFeatures = 100
        eg = ExamplesGenerator()
        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()

        C = 0.1
        kernel = "linear"
        kernelParam = 0
        svm.setKernel(kernel, kernelParam)
        svm.setC(C)

        svm.setErrorCost(0.1)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e1 = Evaluator.binaryErrorP(y, predY)

        svm.setErrorCost(0.9)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e2 = Evaluator.binaryErrorP(y, predY)

        self.assertTrue(e1 > e2)
Ejemplo n.º 34
0
import matplotlib.pyplot as plt 

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
numpy.seterr(all="raise")
numpy.random.seed(21)
dataDir = PathDefaults.getDataDir() 
dataDir += "modelPenalisation/regression/"
outputDir = PathDefaults.getOutputDir() + "modelPenalisation/regression/SVR/"

figInd = 0 

loadMethod = ModelSelectUtils.loadRegressDataset
datasets = ModelSelectUtils.getRegressionDatasets(True)

numProcesses = multiprocessing.cpu_count()
learner = LibSVM(kernel="rbf", processes=numProcesses, type="Epsilon_SVR")
learner.setChunkSize(3)

Cs = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
gammas = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)
epsilons = learner.getEpsilons()

gammaInd = 3 
gamma = gammas[gammaInd]
learner.setGamma(gamma)

epsilonInd = 0 
epsilon = epsilons[epsilonInd]
learner.setEpsilon(epsilon)
learner.normModelSelect = True
Ejemplo n.º 35
0
class LibSVMTest(unittest.TestCase):
    def setUp(self):
        try:
            import sklearn
        except ImportError as error:
            logging.debug(error)
            return 

        numpy.random.seed(21)
        numExamples = 100
        numFeatures = 10
        eg = ExamplesGenerator()

        self.X, self.y = eg.generateBinaryExamples(numExamples, numFeatures)
        self.svm = LibSVM()
        self.svm.Cs = 2.0**numpy.arange(-2, 2, dtype=numpy.float)
        self.svm.gammas = 2.0**numpy.arange(-3, 1, dtype=numpy.float)
        self.svm.epsilons = 2.0**numpy.arange(-2, 0, dtype=numpy.float)

        numpy.set_printoptions(linewidth=150, suppress=True, precision=3)
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

    def testLearnModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        self.svm.learnModel(self.X, self.y)
        predY = self.svm.classify(self.X)
        y = self.y

        e = Evaluator.binaryError(y, predY)

        #Test for wrong labels
        numExamples = 6
        X = numpy.array([[-3], [-2], [-1], [1], [2] ,[3]], numpy.float)
        y = numpy.array([[-1], [-1], [-1], [1], [1] ,[5]])

        self.assertRaises(ValueError, self.svm.learnModel, X, y)

        #Try the regression SVM
        svm = LibSVM(type="Epsilon_SVR")
        y = numpy.random.rand(self.X.shape[0])
        svm.learnModel(self.X, self.y)
        

    def testSetErrorCost(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 1000
        numFeatures = 100
        eg = ExamplesGenerator()
        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()

        C = 0.1
        kernel = "linear"
        kernelParam = 0
        svm.setKernel(kernel, kernelParam)
        svm.setC(C)

        svm.setErrorCost(0.1)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e1 = Evaluator.binaryErrorP(y, predY)

        svm.setErrorCost(0.9)
        svm.learnModel(X, y)
        predY = svm.classify(X)
        e2 = Evaluator.binaryErrorP(y, predY)

        self.assertTrue(e1 > e2)

    def testClassify(self):
        try:
            import sklearn
        except ImportError as error:
            return

        self.svm.learnModel(self.X, self.y)
        predY = self.svm.classify(self.X)
        y = self.y

        e = Evaluator.binaryError(y, predY)

        #Now, permute examples
        perm = numpy.random.permutation(self.X.shape[0])
        predY = self.svm.classify(self.X[perm, :])
        y = y[perm]

        e2 = Evaluator.binaryError(y, predY)

        self.assertEquals(e, e2)

    def testEvaluateCv(self):
        try:
            import sklearn
        except ImportError as error:
            return

        folds = 10
        (means, vars) = self.svm.evaluateCv(self.X, self.y, folds)

        self.assertTrue((means <= 1).all())
        self.assertTrue((means>= 0).all())
        self.assertTrue((vars <= 1).all())
        self.assertTrue((vars>= 0).all())

    @apgl.skip("")
    def testGetModel(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 50
        numFeatures = 3
        eg = ExamplesGenerator()

        X, y = eg.generateBinaryExamples(numExamples, numFeatures)
        svm = LibSVM()
        svm.learnModel(X, y)

        weights, b  = svm.getWeights()

        #logging.debug(weights)
        #logging.debug(b)
        

    @apgl.skip("")
    def testGetWeights(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 6
        X = numpy.array([[-3], [-2], [-1], [1], [2] ,[3]], numpy.float64)
        #X = numpy.random.rand(numExamples, 10)
        y = numpy.array([[-1], [-1], [-1], [1], [1] ,[1]])

        svm = LibSVM()
        svm.learnModel(X, y.ravel())
        weights, b  = svm.getWeights()

        #Let's see if we can compute the decision values 
        y, decisions = svm.predict(X, True)
        decisions2 = numpy.zeros(numExamples)
        decisions2 = numpy.dot(X, weights) - b

        self.assertTrue((decisions == decisions2).all())
        predY = numpy.sign(decisions2)
        self.assertTrue((y.ravel() == predY).all())

        #Do the same test on a random datasets
        numExamples = 50
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        y = numpy.sign(numpy.random.rand(numExamples)-0.5)

        svm = LibSVM()
        svm.learnModel(X, y.ravel())
        weights, b  = svm.getWeights()

        #Let's see if we can compute the decision values
        y, decisions = svm.predict(X, True)
        decisions2 = numpy.dot(X, weights) + b

        tol = 10**-6

        self.assertTrue(numpy.linalg.norm(decisions - decisions2) < tol)
        predY = numpy.sign(decisions2)
        self.assertTrue((y.ravel() == predY).all())

    def testSetTermination(self):
        try:
            import sklearn
        except ImportError as error:
            return


        self.svm.learnModel(self.X, self.y)
        self.svm.setTermination(0.1)
        self.svm.learnModel(self.X, self.y)

    def testSetSvmType(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        X = Standardiser().standardiseArray(X)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel() + 1
        y2 = numpy.array(y > 0, numpy.int32)*2 -1 
        
        svm = LibSVM()

        svm.setSvmType("Epsilon_SVR")

        self.assertEquals(svm.getType(), "Epsilon_SVR")

        #Try to get a good error
        Cs = 2**numpy.arange(-6, 4, dtype=numpy.float)
        epsilons = 2**numpy.arange(-6, 4, dtype=numpy.float)

        bestError = 10 
        for C in Cs:
            for epsilon in epsilons:
                svm.setEpsilon(epsilon)
                svm.setC(C)
                svm.learnModel(X, y)
                yp = svm.predict(X)

                if Evaluator.rootMeanSqError(y, yp) < bestError:
                    bestError = Evaluator.rootMeanSqError(y, yp) 

        self.assertTrue(bestError < Evaluator.rootMeanSqError(y, numpy.zeros(y.shape[0])))
        
        svm.setSvmType("C_SVC")
        svm.learnModel(X, y2)
        yp2 = svm.predict(X)

        self.assertTrue(0 <= Evaluator.binaryError(y2, yp2)  <= 1)

    @apgl.skip("")
    def testSaveParams(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.5)
        svm.setEpsilon(12.1)
        svm.setErrorCost(1.8)
        svm.setSvmType("Epsilon_SVR")
        svm.setTermination(0.12)
        svm.setKernel("gaussian", 0.43)

        outputDir = PathDefaults.getOutputDir()
        fileName = outputDir + "test/testSvmParams"
        svm.saveParams(fileName)

        svm2 = LibSVM()
        svm2.loadParams(fileName)

        self.assertEquals(svm.getC(), 10.5)
        self.assertEquals(svm.getEpsilon(), 12.1)
        self.assertEqual(svm.getErrorCost(), 1.8)
        self.assertEqual(svm.getSvmType(), "Epsilon_SVR")
        self.assertEqual(svm.getTermination(), 0.12)
        self.assertEqual(svm.getKernel(), "gaussian")
        self.assertEqual(svm.getKernelParams(), 0.43)

    def testStr(self):
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()

        #logging.debug(svm)

    def testSetEpsilon(self):
        """
        Test out the parameter for the regressive SVM, vary epsilon and look at
        number of support vectors. 
        """
        try:
            import sklearn
        except ImportError as error:
            return

        svm = LibSVM()
        svm.setC(10.0)
        svm.setEpsilon(0.1)
        svm.setSvmType("Epsilon_SVR")

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel() + numpy.random.randn(100)
        
        svm.setEpsilon(1.0)
        svm.learnModel(X, y)
        numSV = svm.getModel().support_.shape
        
        svm.setEpsilon(0.5)
        svm.learnModel(X, y)
        numSV2 = svm.getModel().support_.shape

        svm.setEpsilon(0.01)
        svm.learnModel(X, y)
        numSV3 = svm.getModel().support_.shape

        #There should be fewer SVs as epsilon increases
        self.assertTrue(numSV < numSV2)
        self.assertTrue(numSV2 < numSV3)

    def testPredict(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 100
        numFeatures = 10
        X = numpy.random.randn(numExamples, numFeatures)
        c = numpy.random.randn(numFeatures)

        y = numpy.dot(X, numpy.array([c]).T).ravel()
        y = numpy.array(y > 0, numpy.int32)*2 -1

        svm = LibSVM()
        svm.learnModel(X, y)
        y2, d = svm.predict(X, True)

        #self.assertTrue((numpy.sign(d) == y2).all())

    #@unittest.skip("")
    def testParallelVfcvRbf(self):
        folds = 3
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        bestSVM, meanErrors = svm.parallelVfcvRbf(self.X, self.y, idx)

        tol = 10**-6 
        bestError = 1
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) 

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                error = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]
                    testX = self.X[testInds, :]
                    testY = self.y[testInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(testX)
                    error += Evaluator.binaryError(predY, testY)

                meanErrors2[i, j] = error/len(idx)

                if error < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = error

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)

    def testParallelVfcvRbf2(self):
        #In this test we try SVM regression 
        folds = 3
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        bestSVM, meanErrors = svm.parallelVfcvRbf(self.X, self.y, idx, type="Epsilon_SVR")

        tol = 10**-6
        bestError = 100
        meanErrors2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]

                    error = 0
                    for trainInds, testInds in idx:
                        trainX = self.X[trainInds, :]
                        trainY = self.y[trainInds]
                        testX = self.X[testInds, :]
                        testY = self.y[testInds]

                        svm.setGamma(gamma)
                        svm.setC(C)
                        svm.setEpsilon(epsilon)
                        svm.learnModel(trainX, trainY)
                        predY = svm.predict(testX)
                        error += svm.getMetricMethod()(predY, testY)

                    meanErrors2[j, k, i] = error/len(idx)

                    if error < bestError:
                        bestC = C
                        bestGamma = gamma
                        bestError = error
                        bestEpsilon = epsilon

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertEquals(bestEpsilon, bestSVM.getEpsilon())
        self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol)

    def testParallelVfPenRbf(self):
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        resultsList = svm.parallelVfPenRbf(self.X, self.y, idx, Cv)

        tol = 10**-6
        bestError = 1
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                penalty = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty += Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                penalty = penalty*Cv[0]/len(idx)
                svm.learnModel(self.X, self.y)
                predY = svm.predict(self.X)
                meanErrors2[i, j] = Evaluator.binaryError(predY, self.y) + penalty

                if meanErrors2[i, j] < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = meanErrors2[i, j]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)

    #@unittest.skip("")
    def testParallelVfPenRbf2(self):
        #Test support vector regression 
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        resultsList = svm.parallelVfPenRbf(self.X, self.y, idx, Cv, type="Epsilon_SVR")

        tol = 10**-6 
        bestError = 100
        meanErrors2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]
                    
                    penalty = 0
                    for trainInds, testInds in idx:
                        trainX = self.X[trainInds, :]
                        trainY = self.y[trainInds]

                        svm.setGamma(gamma)
                        svm.setC(C)
                        svm.setEpsilon(epsilon)
                        svm.learnModel(trainX, trainY)
                        predY = svm.predict(self.X)
                        predTrainY = svm.predict(trainX)
                        penalty += svm.getMetricMethod()(predY, self.y) - svm.getMetricMethod()(predTrainY, trainY)

                    penalty = penalty*Cv[0]/len(idx)
                    svm.learnModel(self.X, self.y)
                    predY = svm.predict(self.X)
                    meanErrors2[j, k, i] = svm.getMetricMethod()(predY, self.y) + penalty

                    if meanErrors2[j, k, i] < bestError:
                        bestC = C
                        bestGamma = gamma
                        bestEpsilon = epsilon 
                        bestError = meanErrors2[j, k, i]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertEquals(bestEpsilon, bestSVM.getEpsilon())
        self.assertTrue(numpy.linalg.norm(meanErrors2 - meanErrors) < tol)


    def testGetC(self):
        svm = LibSVM()
        svm.setC(10.0)
        C = svm.getC()
        self.assertTrue(C == 10.0)

    def testGetGamma(self):
        svm = LibSVM()
        svm.setKernel("gaussian", 12.0)
        gamma = svm.getKernelParams()
        self.assertTrue(gamma == 12.0)

    def testComputeTestError(self):
        C = 10.0
        gamma = 0.5

        numTrainExamples = self.X.shape[0]*0.5

        trainX, trainY = self.X[0:numTrainExamples, :], self.y[0:numTrainExamples]
        testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:]

        svm = LibSVM('gaussian', gamma, C)
        args = (trainX, trainY, testX, testY, svm)
        error = computeTestError(args)

        svm = LibSVM('gaussian', gamma, C)
        svm.learnModel(trainX, trainY)
        predY = svm.predict(testX)
        self.assertEquals(Evaluator.binaryError(predY, testY), error)

    def testComputeBootstrapError(self):
        C = 10.0
        gamma = 0.5

        numTrainExamples = self.X.shape[0]*0.5

        trainX, trainY = self.X[0:numTrainExamples, :], self.y[0:numTrainExamples]
        testX, testY = self.X[numTrainExamples:, :], self.y[numTrainExamples:]
        
        svm = LibSVM('gaussian', gamma, C)

        args = (trainX, trainY, testX, testY, svm)
        error = computeBootstrapError(args)



    def testComputeIdealPenalty(self):
        C = 10.0
        gamma = 0.5
        svm = LibSVM("gaussian", gamma, C)
        args = (self.X, self.y, self.X, self.y, svm)
        error = computeIdealPenalty(args)

    def testParallelPenaltyGridRbf(self):
        svm = self.svm
        svm.setKernel("gaussian")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]

        idealPenalties = svm.parallelPenaltyGridRbf(trainX, trainY, self.X, self.y)
        idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]

                svm.setGamma(gamma)
                svm.setC(C)
                svm.learnModel(trainX, trainY)
                predY = svm.predict(self.X)
                predTrainY = svm.predict(trainX)
                penalty = Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                idealPenalties2[i, j] = penalty

                args = (trainX, trainY, self.X, self.y, svm)
                idealPenalties3[i, j] = computeIdealPenalty(args)

        tol = 10**-6 
        self.assertTrue(numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol)


    def testParallelPenaltyGridRbf2(self):
        #Test with SVM regression
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]

        idealPenalties = svm.parallelPenaltyGridRbf(trainX, trainY, self.X, self.y, type="Epsilon_SVR")
        idealPenalties2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]
                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.setEpsilon(epsilon)

                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty = svm.getMetricMethod()(predY, self.y) - svm.getMetricMethod()(predTrainY, trainY)

                    idealPenalties2[j, k, i] = penalty

        tol = 10**-6
        self.assertTrue(numpy.linalg.norm(idealPenalties2 - idealPenalties) < tol)


    def testParallelModelSelect(self):
        folds = 3
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")

        paramDict = {} 
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()    
        
        bestSVM, meanErrors = svm.parallelModelSelect(self.X, self.y, idx, paramDict)
        
        tol = 10**-6 
        bestError = 1
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) 
        print("Computing real grid")

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                error = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]
                    testX = self.X[testInds, :]
                    testY = self.y[testInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(testX)
                    error += Evaluator.binaryError(predY, testY)

                meanErrors2[i, j] = error/len(idx)

                if error < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = error
            
        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)


    def testParallelPenaltyGrid2(self):
        #Test with SVM regression
        svm = self.svm
        svm.setKernel("gaussian")
        svm.setSvmType("Epsilon_SVR")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]
        
        paramDict = {} 
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()  
        paramDict["setEpsilon"] = svm.getEpsilons()
        
        #print(paramDict.keys())

        idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X, self.y, paramDict)
        idealPenalties2 = numpy.zeros((svm.gammas.shape[0], svm.epsilons.shape[0], svm.Cs.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                for k in range(svm.epsilons.shape[0]):
                    epsilon = svm.epsilons[k]
                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.setEpsilon(epsilon)

                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty = svm.getMetricMethod()(predY, self.y) - svm.getMetricMethod()(predTrainY, trainY)

                    idealPenalties2[j, k, i] = penalty

        tol = 10**-6
        self.assertTrue(numpy.linalg.norm(idealPenalties2 - idealPenalties) < tol)

    def testParallelPen(self): 
        folds = 3
        Cv = numpy.array([4.0])
        idx = Sampling.crossValidation(folds, self.X.shape[0])
        svm = self.svm
        svm.setKernel("gaussian")

        paramDict = {} 
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()            
        
        resultsList = svm.parallelPen(self.X, self.y, idx, paramDict, Cv)
        
        tol = 10**-6
        bestError = 1
        trainErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        penalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        meanErrors2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]
                penalty = 0
                for trainInds, testInds in idx:
                    trainX = self.X[trainInds, :]
                    trainY = self.y[trainInds]

                    svm.setGamma(gamma)
                    svm.setC(C)
                    svm.learnModel(trainX, trainY)
                    predY = svm.predict(self.X)
                    predTrainY = svm.predict(trainX)
                    penalty += Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                penalty = penalty*Cv[0]/len(idx)
                svm.learnModel(self.X, self.y)
                predY = svm.predict(self.X)
                trainErrors2[i, j] = Evaluator.binaryError(predY, self.y)
                penalties2[i, j] = penalty
                meanErrors2[i, j] = Evaluator.binaryError(predY, self.y) + penalty

                if meanErrors2[i, j] < bestError:
                    bestC = C
                    bestGamma = gamma
                    bestError = meanErrors2[i, j]

        bestSVM, trainErrors, currentPenalties = resultsList[0]
        meanErrors = trainErrors + currentPenalties

        self.assertEquals(bestC, bestSVM.getC())
        self.assertEquals(bestGamma, bestSVM.getGamma())
        self.assertTrue(numpy.linalg.norm(meanErrors2.T - meanErrors) < tol)
        self.assertTrue(numpy.linalg.norm(trainErrors2.T - trainErrors) < tol)
        self.assertTrue(numpy.linalg.norm(penalties2.T - currentPenalties) < tol)

    def testParallelPenaltyGrid(self):
        svm = self.svm
        svm.setKernel("gaussian")
        trainX = self.X[0:40, :]
        trainY = self.y[0:40]
        
        paramDict = {} 
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()      

        idealPenalties = svm.parallelPenaltyGrid(trainX, trainY, self.X, self.y, paramDict)
        idealPenalties2 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
        idealPenalties3 = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))

        for i in range(svm.Cs.shape[0]):
            C = svm.Cs[i]
            for j in range(svm.gammas.shape[0]):
                gamma = svm.gammas[j]

                svm.setGamma(gamma)
                svm.setC(C)
                svm.learnModel(trainX, trainY)
                predY = svm.predict(self.X)
                predTrainY = svm.predict(trainX)
                penalty = Evaluator.binaryError(predY, self.y) - Evaluator.binaryError(predTrainY, trainY)

                idealPenalties2[i, j] = penalty

                args = (trainX, trainY, self.X, self.y, svm)
                idealPenalties3[i, j] = computeIdealPenalty(args)

        tol = 10**-6 
        self.assertTrue(numpy.linalg.norm(idealPenalties2.T - idealPenalties) < tol)

    def testGetBestLearner(self): 
        svm = self.svm
        paramDict = {} 
        paramDict["setC"] = svm.getCs()
        paramDict["setGamma"] = svm.getGammas()      

        errors = numpy.random.rand(svm.getCs().shape[0], svm.getGammas().shape[0])

        folds = 5 
        idx = Sampling.crossValidation(folds, self.X.shape[0])

        svm.normModelSelect = True 
        svm.setKernel("gaussian")
        learner = svm.getBestLearner(errors, paramDict, self.X, self.y, idx)
        
        bestC = learner.getC()
        
        #Find the best norm 
        bestInds = numpy.unravel_index(numpy.argmin(errors), errors.shape)
        learner.setC(svm.getCs()[bestInds[0]])
        learner.setGamma(svm.getGammas()[bestInds[1]])              
        
        norms = []
        for trainInds, testInds in idx: 
            validX = self.X[trainInds, :]
            validY = self.y[trainInds]
            learner.learnModel(validX, validY)
            
            norms.append(learner.weightNorm())  
        
        bestNorm = numpy.array(norms).mean()
        
        norms = numpy.zeros(paramDict["setC"].shape[0]) 
        for i, C in enumerate(paramDict["setC"]): 
            learner.setC(C)
            learner.learnModel(self.X, self.y)
            norms[i] = learner.weightNorm()            
            
        bestC2 = paramDict["setC"][numpy.abs(norms-bestNorm).argmin()]
        
        self.assertEquals(bestC, bestC2)
Ejemplo n.º 36
0
 def testGetC(self):
     svm = LibSVM()
     svm.setC(10.0)
     C = svm.getC()
     self.assertTrue(C == 10.0)
Ejemplo n.º 37
0
def runToyExp(datasetNames, sampleSizes, foldsSet, cvScalings, sampleMethods, numProcesses, fileNameSuffix):
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
    outputDir = PathDefaults.getOutputDir() + "modelPenalisation/"

    svm = LibSVM()
    numCs = svm.getCs().shape[0]
    numGammas = svm.getGammas().shape[0]
    numMethods = 1 + (1 + cvScalings.shape[0])
    numParams = 2

    runIdeal = True
    runCv = True
    runVfpen = True

    for i in range(len(datasetNames)):
        datasetName = datasetNames[i][0]
        numRealisations = datasetNames[i][1]
        logging.debug("Learning using dataset " + datasetName)

        for s in range(len(sampleMethods)):
            sampleMethod = sampleMethods[s][1]
            outfileName = outputDir + datasetName + sampleMethods[s][0] + fileNameSuffix

            fileLock = FileLock(outfileName + ".npz")
            if not fileLock.isLocked() and not fileLock.fileExists():
                fileLock.lock()
                errors = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods))
                params = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numParams))
                errorGrids = numpy.zeros(
                    (numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numCs, numGammas)
                )
                approxGrids = numpy.zeros(
                    (numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numCs, numGammas)
                )
                idealGrids = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numCs, numGammas))

                data = numpy.load(dataDir + datasetName + ".npz")
                gridPoints, trainX, trainY, pdfX, pdfY1X, pdfYminus1X = (
                    data["arr_0"],
                    data["arr_1"],
                    data["arr_2"],
                    data["arr_3"],
                    data["arr_4"],
                    data["arr_5"],
                )

                # We form a test set from the grid points
                testX = numpy.zeros((gridPoints.shape[0] ** 2, 2))
                for m in range(gridPoints.shape[0]):
                    testX[m * gridPoints.shape[0] : (m + 1) * gridPoints.shape[0], 0] = gridPoints
                    testX[m * gridPoints.shape[0] : (m + 1) * gridPoints.shape[0], 1] = gridPoints[m]

                for j in range(numRealisations):
                    Util.printIteration(j, 1, numRealisations, "Realisation: ")

                    for k in range(sampleSizes.shape[0]):
                        sampleSize = sampleSizes[k]
                        for m in range(foldsSet.shape[0]):
                            folds = foldsSet[m]
                            logging.debug("Using sample size " + str(sampleSize) + " and " + str(folds) + " folds")
                            perm = numpy.random.permutation(trainX.shape[0])
                            trainInds = perm[0:sampleSize]
                            validX = trainX[trainInds, :]
                            validY = trainY[trainInds]

                            svm = LibSVM(processes=numProcesses)
                            # Find ideal penalties
                            if runIdeal:
                                logging.debug("Finding ideal grid of penalties")
                                idealGrids[j, k, m, :, :] = parallelPenaltyGridRbf(
                                    svm, validX, validY, testX, gridPoints, pdfX, pdfY1X, pdfYminus1X
                                )

                            # Cross validation
                            if runCv:
                                logging.debug("Running V-fold cross validation")
                                methodInd = 0
                                idx = sampleMethod(folds, validY.shape[0])
                                if sampleMethod == Sampling.bootstrap:
                                    bootstrap = True
                                else:
                                    bootstrap = False

                                bestSVM, cvGrid = svm.parallelVfcvRbf(validX, validY, idx, True, bootstrap)
                                predY, decisionsY = bestSVM.predict(testX, True)
                                decisionGrid = numpy.reshape(
                                    decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F"
                                )
                                errors[j, k, m, methodInd] = ModelSelectUtils.bayesError(
                                    gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X
                                )
                                params[j, k, m, methodInd, :] = numpy.array([bestSVM.getC(), bestSVM.getKernelParams()])
                                errorGrids[j, k, m, methodInd, :, :] = cvGrid

                            # v fold penalisation
                            if runVfpen:
                                logging.debug("Running penalisation")
                                # BIC penalisation
                                Cv = float((folds - 1) * numpy.log(validX.shape[0]) / 2)
                                tempCvScalings = cvScalings * (folds - 1)
                                tempCvScalings = numpy.insert(tempCvScalings, 0, Cv)

                                # Use cross validation
                                idx = sampleMethod(folds, validY.shape[0])
                                svmGridResults = svm.parallelVfPenRbf(validX, validY, idx, tempCvScalings)

                                for n in range(len(tempCvScalings)):
                                    bestSVM, trainErrors, approxGrid = svmGridResults[n]
                                    methodInd = n + 1
                                    predY, decisionsY = bestSVM.predict(testX, True)
                                    decisionGrid = numpy.reshape(
                                        decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F"
                                    )
                                    errors[j, k, m, methodInd] = ModelSelectUtils.bayesError(
                                        gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X
                                    )
                                    params[j, k, m, methodInd, :] = numpy.array(
                                        [bestSVM.getC(), bestSVM.getKernelParams()]
                                    )
                                    errorGrids[j, k, m, methodInd, :, :] = trainErrors + approxGrid
                                    approxGrids[j, k, m, methodInd, :, :] = approxGrid

                meanErrors = numpy.mean(errors, 0)
                print(meanErrors)

                meanParams = numpy.mean(params, 0)
                print(meanParams)

                meanErrorGrids = numpy.mean(errorGrids, 0)
                stdErrorGrids = numpy.std(errorGrids, 0)

                meanIdealGrids = numpy.mean(idealGrids, 0)
                stdIdealGrids = numpy.std(idealGrids, 0)

                meanApproxGrids = numpy.mean(approxGrids, 0)
                stdApproxGrids = numpy.std(approxGrids, 0)

                numpy.savez(
                    outfileName,
                    errors,
                    params,
                    meanErrorGrids,
                    stdErrorGrids,
                    meanIdealGrids,
                    stdIdealGrids,
                    meanApproxGrids,
                    stdApproxGrids,
                )
                logging.debug("Saved results as file " + outfileName + ".npz")
                fileLock.unlock()
            else:
                logging.debug("Results already computed")

    logging.debug("All done!")
Ejemplo n.º 38
0
 def testGetGamma(self):
     svm = LibSVM()
     svm.setKernel("gaussian", 12.0)
     gamma = svm.getKernelParams()
     self.assertTrue(gamma == 12.0)
Ejemplo n.º 39
0
from apgl.util import Util 

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
numpy.seterr(all="raise")
numpy.random.seed(21)
dataDir = PathDefaults.getDataDir() 
dataDir += "modelPenalisation/regression/"
outputDir = PathDefaults.getOutputDir() + "modelPenalisation/regression/SVR/"

figInd = 0 

loadMethod = ModelSelectUtils.loadRegressDataset
datasets = ModelSelectUtils.getRegressionDatasets(True)

numProcesses = multiprocessing.cpu_count()
learner = LibSVM(kernel="rbf", processes=numProcesses, type="Epsilon_SVR")
learner.setChunkSize(3)

Cs = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
gammas = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)
epsilons = learner.getEpsilons()

numCs = Cs.shape[0]
numGammas = gammas.shape[0]

paramDict = {} 
paramDict["setC"] = Cs 
paramDict["setGamma"] = gammas
paramDict["setEpsilon"] = epsilons 

print(learner)
Ejemplo n.º 40
0
    def testGetWeights(self):
        try:
            import sklearn
        except ImportError as error:
            return

        numExamples = 6
        X = numpy.array([[-3], [-2], [-1], [1], [2], [3]], numpy.float64)
        #X = numpy.random.rand(numExamples, 10)
        y = numpy.array([[-1], [-1], [-1], [1], [1], [1]])

        svm = LibSVM()
        svm.learnModel(X, y.ravel())
        weights, b = svm.getWeights()

        #Let's see if we can compute the decision values
        y, decisions = svm.predict(X, True)
        decisions2 = numpy.zeros(numExamples)
        decisions2 = numpy.dot(X, weights) - b

        self.assertTrue((decisions == decisions2).all())
        predY = numpy.sign(decisions2)
        self.assertTrue((y.ravel() == predY).all())

        #Do the same test on a random datasets
        numExamples = 50
        numFeatures = 10

        X = numpy.random.rand(numExamples, numFeatures)
        y = numpy.sign(numpy.random.rand(numExamples) - 0.5)

        svm = LibSVM()
        svm.learnModel(X, y.ravel())
        weights, b = svm.getWeights()

        #Let's see if we can compute the decision values
        y, decisions = svm.predict(X, True)
        decisions2 = numpy.dot(X, weights) + b

        tol = 10**-6

        self.assertTrue(numpy.linalg.norm(decisions - decisions2) < tol)
        predY = numpy.sign(decisions2)
        self.assertTrue((y.ravel() == predY).all())
Ejemplo n.º 41
0
import logging
import numpy
import os


datasets = ModelSelectUtils.getRegressionDatasets()

numProcesses = 8
dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/"
datasetName = datasets[9]
print(datasetName)

j = 0 
trainX, trainY, testX, testY = ModelSelectUtils.loadRegressDataset(dataDir, datasetName, j)

learner = LibSVM(kernel='gaussian', type="Epsilon_SVR", processes=numProcesses) 


paramDict = {} 
paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)
paramDict["setEpsilon"] = learner.getEpsilons()

foldsSet = numpy.arange(2, 31, 2)
Cvs = numpy.array([1.0])
sampleMethod = Sampling.crossValidation

sampleSize = 100
trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize]
validX = trainX[trainInds,:]
validY = trainY[trainInds]
Ejemplo n.º 42
0
 def testGetC(self):
     svm = LibSVM()
     svm.setC(10.0)
     C = svm.getC()
     self.assertTrue(C == 10.0)
Ejemplo n.º 43
0
class SvmEgoSimulator(AbstractDiffusionSimulator):
    """
    A class which combines SVM classification with the EgoSimulation. There are methods
    to run modelSelection, train the SVM and then run the simulation. The simulation itself
    is run using EgoSimulator. 
    """
    def __init__(self, examplesFileName):
        """
        Create the class by reading examples from a Matlab file. Instantiate the SVM
        and create a preprocesor to standarise examples to have zero mean and unit variance. 
        """
        self.examplesList = ExamplesList.readFromFile(examplesFileName)
        self.examplesList.setDefaultExamplesName("X")
        self.examplesList.setLabelsName("y")

        (freqs, items) = Util.histogram(self.examplesList.getSampledDataField("y").ravel())
        logging.info("Distribution of labels: " + str((freqs, items)))
        logging.info("The base error rate is " + str(float(min(freqs))/self.examplesList.getNumExamples()))
        
        self.classifier = LibSVM()
        self.errorMethod = Evaluator.balancedError

        self.preprocessor = Standardiser()
        X = self.preprocessor.standardiseArray(self.examplesList.getDataField(self.examplesList.getDefaultExamplesName()))
        self.examplesList.overwriteDataField(self.examplesList.getDefaultExamplesName(), X)

    def getPreprocessor(self):
        """
        Returns the preprocessor
        """
        return self.preprocessor

    def sampleExamples(self, sampleSize):
        """
        This function exists so that we can sample the same examples used in model
        selection and exclude them when running evaluateClassifier. 
        """
        self.examplesList.randomSubData(sampleSize)

    def modelSelection(self, Cs, kernel, kernelParams, errorCosts, folds, sampleSize):
        """
        Perform model selection using an SVM
        """
        Parameter.checkInt(sampleSize, 0, self.examplesList.getNumExamples())
        Parameter.checkInt(folds, 0, sampleSize)
        Parameter.checkString(kernel, ["linear", "polynomial", "gaussian"])
        Parameter.checkList(Cs, Parameter.checkFloat, [0.0, float("inf")])
        Parameter.checkList(errorCosts, Parameter.checkFloat, [0.0, float("inf")])

        #Perform model selection
        self.examplesList.randomSubData(sampleSize)
        (freqs, items) = Util.histogram(self.examplesList.getSampledDataField("y").ravel())
        logging.info("Using "  + str(sampleSize) + " examples for model selection")
        logging.info("Distribution of labels: " + str((freqs, items)))
        logging.info("List of Cs " + str(Cs))
        logging.info("List of kernels " + str(kernel))
        logging.info("List of kernelParams " + str(kernelParams))
        logging.info("List of errorCosts " + str(errorCosts))

        CVal, kernelParamVal, errorCost, error = self.classifier.cvModelSelection(self.examplesList, Cs, kernelParams, kernel, folds, errorCosts, self.errorMethod)
        logging.info("Model selection returned C = " + str(CVal) + " kernelParam = " + str(kernelParamVal) + " errorCost = " + str(errorCost)  + " with error " + str(error))
        return CVal, kernelParamVal, errorCost, error

    def evaluateClassifier(self, CVal, kernel, kernelParamVal, errorCost, folds, sampleSize, invert=True):
        """
        Evaluate the SVM with the given parameters. Often model selection is done before this step
        and in that case, invert=True uses a sample excluding those used for model selection. 
        """
        Parameter.checkFloat(CVal, 0.0, float('inf'))
        Parameter.checkFloat(errorCost, 0.0, float('inf'))
        Parameter.checkString(kernel, ["linear", "polynomial", "gaussian"])
        
        if kernel == "gaussian":
            Parameter.checkFloat(kernelParamVal, 0.0, float('inf'))
        elif kernel == "polynomial":
            Parameter.checkInt(kernelParamVal, 2, float('inf'))

        Parameter.checkInt(folds, 0, sampleSize)
        Parameter.checkInt(sampleSize, 0, self.examplesList.getNumExamples())

        if invert:
            allIndices = numpy.array(list(range(0, self.examplesList.getNumExamples())))
            testIndices = numpy.setdiff1d(allIndices, self.examplesList.getPermutationIndices())
            testIndices = numpy.random.permutation(testIndices)[0:sampleSize]
        else:
            testIndices = Util.sampleWithoutReplacement(sampleSize, self.examplesList.getNumExamples())

        logging.info("Using " + str(testIndices.shape[0]) + " examples for SVM evaluation")

        self.examplesList.setPermutationIndices(testIndices)
        self.classifier.setParams(C=CVal, kernel=kernel, kernelParam=kernelParamVal)
        self.classifier.setErrorCost(errorCost)
        
        (means, vars) = self.classifier.evaluateCv(self.examplesList, folds)

        logging.info("--- Classification evaluation ---")
        logging.info("Error on " + str(testIndices.shape[0]) + " examples is " + str(means[0]) + "(" + str(vars[0]) + ")")
        logging.info("Sensitivity (recall = TP/(TP+FN)): " + str(means[1])  + "(" + str(vars[1]) + ")")
        logging.info("Specificity (TN/TN+FP): "  + str(means[2])  + "(" + str(vars[2]) + ")")
        logging.info("Error on positives: "  + str(means[3])  + "(" + str(vars[3]) + ")")
        logging.info("Error on negatives: "  + str(means[4])  + "(" + str(vars[4]) + ")")
        logging.info("Balanced error: "  + str(means[5])  + "(" + str(vars[5]) + ")")

        return (means, vars)

    def trainClassifier(self, CVal, kernel, kernelParamVal, errorCost, sampleSize):
        Parameter.checkFloat(CVal, 0.0, float('inf'))
        Parameter.checkString(kernel, ["linear", "gaussian", "polynomial"])
        Parameter.checkFloat(kernelParamVal, 0.0, float('inf'))
        Parameter.checkFloat(errorCost, 0.0, float('inf'))
        Parameter.checkInt(sampleSize, 0, self.examplesList.getNumExamples())

        logging.info("Training SVM with C=" + str(CVal) + ", " + kernel + " kernel" + ", param=" + str(kernelParamVal) + ", sampleSize=" + str(sampleSize) + ", errorCost=" + str(errorCost))

        self.examplesList.randomSubData(sampleSize)
        self.classifier.setC(C=CVal)
        self.classifier.setKernel(kernel=kernel, kernelParam=kernelParamVal)
        self.classifier.setErrorCost(errorCost)

        X = self.examplesList.getSampledDataField(self.examplesList.getDefaultExamplesName())
        y = self.examplesList.getSampledDataField(self.examplesList.getLabelsName())
        y = y.ravel()
        self.classifier.learnModel(X, y)

        return self.classifier

    def getWeights(self):
        return self.classifier.getWeights()


    def runSimulation(self, maxIterations):
        Parameter.checkInt(maxIterations, 1, float('inf'))

        #Notice that the data is preprocessed in the same way as the survey data
        egoSimulator = EgoSimulator(self.graph, self.classifier, self.preprocessor)

        totalInfo = numpy.zeros(maxIterations+1)
        totalInfo[0] = EgoUtils.getTotalInformation(self.graph)
        logging.info("Total number of people with information: " + str(totalInfo[0]))

        logging.info("--- Simulation Started ---")

        for i in range(0, maxIterations):
            logging.info("--- Iteration " + str(i) + " ---")

            self.graph = egoSimulator.advanceGraph()
            totalInfo[i+1] = EgoUtils.getTotalInformation(self.graph)
            logging.info("Total number of people with information: " + str(totalInfo[i+1]))

            #Compute distribution of ages etc. in alters
            alterIndices = egoSimulator.getAlters(i)
            alterAges = numpy.zeros(len(alterIndices))
            alterGenders = numpy.zeros(len(alterIndices))

            for j in range(0, len(alterIndices)):
                currentVertex = self.graph.getVertex(alterIndices[j])
                alterAges[j] = currentVertex[self.egoQuestionIds.index(("Q5X", 0))]
                alterGenders[j] = currentVertex[self.egoQuestionIds.index(("Q4", 0))]

            (freqs, items) = Util.histogram(alterAges)
            logging.info("Distribution of ages " + str(freqs) + " " + str(items))
            (freqs, items) = Util.histogram(alterGenders)
            logging.info("Distribution of genders " + str(freqs) + " " + str(items))
            
        logging.info("--- Simulation Finished ---")

        return totalInfo, egoSimulator.getTransmissions()

    def getVertexFeatureDistribution(self, fIndex, vIndices=None):
        return self.graph.getVertexFeatureDistribution(fIndex, vIndices)

    def getPreProcessor(self):
        return self.preprocessor

    def getClassifier(self):
        return self.classifier 

    preprocessor = None
    examplesList = None
    classifier = None
    graph = None
    edgeWeight = 1 
Ejemplo n.º 44
0
 def testGetGamma(self):
     svm = LibSVM()
     svm.setKernel("gaussian", 12.0)
     gamma = svm.getKernelParams()
     self.assertTrue(gamma == 12.0)