Python ModelSelectUtils 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: apgl.modelselect.ModelSelectUtils

클래스/타입: ModelSelectUtils

hotexamples.com에서의 예제들: 3

Python ModelSelectUtils - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 apgl.modelselect.ModelSelectUtils.ModelSelectUtils에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

bayesError(2)

getRegressionDatasets(1)

loadRegressDataset(1)

예제 #1

파일 보기

파일: ModelSelectUtilsTest.py 프로젝트: malcolmreynolds/APGL

    def testBayesError(self):
        dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
        data = numpy.load(dataDir + "toyData.npz")
        gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]

        sampleSize = 100
        trainX, trainY = X[0:sampleSize, :], y[0:sampleSize]
        testX, testY = X[sampleSize:, :], y[sampleSize:]

        #We form a test set from the grid points
        gridX = numpy.zeros((gridPoints.shape[0]**2, 2))
        for m in range(gridPoints.shape[0]):
            gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints
            gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m]

        Cs = 2**numpy.arange(-5, 5, dtype=numpy.float)
        gammas = 2**numpy.arange(-5, 5, dtype=numpy.float)

        bestError = 1 

        for C in Cs:
            for gamma in gammas:
                svm = LibSVM(kernel="gaussian", C=C, kernelParam=gamma)
                svm.learnModel(trainX, trainY)
                predY, decisionsY = svm.predict(gridX, True)
                decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F")
                error = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X)

                predY, decisionsY = svm.predict(testX, True)
                error2 = Evaluator.binaryError(testY, predY)
                print(error, error2)

                if error < bestError:
                    error = bestError
                    bestC = C
                    bestGamma = gamma

        svm = LibSVM(kernel="gaussian", C=bestC, kernelParam=bestGamma)
        svm.learnModel(trainX, trainY)
        predY, decisionsY = svm.predict(gridX, True)

        plt.figure(0)
        plt.contourf(gridPoints, gridPoints, decisionGrid, 100)
        plt.colorbar()

        plt.figure(1)
        plt.scatter(X[y==1, 0], X[y==1, 1], c='r' ,label="-1")
        plt.scatter(X[y==-1, 0], X[y==-1, 1], c='b',label="+1")
        plt.legend()
        plt.show()

예제 #2

파일 보기

파일: PenaltyTest.py 프로젝트: charanpald/wallhack

import multiprocessing
import sys
from apgl.predictors.LibSVM import LibSVM, computeTestError
from apgl.predictors.DecisionTree import DecisionTree
from sandbox.util.FileLock import FileLock
from sandbox.util.PathDefaults import PathDefaults
from sandbox.util.Sampling import Sampling
from sandbox.util.Evaluator import Evaluator
from sandbox.util.Util import Util
from apgl.modelselect.ModelSelectUtils import ModelSelectUtils
import logging
import numpy
import os


datasets = ModelSelectUtils.getRegressionDatasets()

numProcesses = 8
dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/"
datasetName = datasets[9]
print(datasetName)

j = 0 
trainX, trainY, testX, testY = ModelSelectUtils.loadRegressDataset(dataDir, datasetName, j)

learner = LibSVM(kernel='gaussian', type="Epsilon_SVR", processes=numProcesses) 


paramDict = {} 
paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float)
paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float)

예제 #3

파일 보기

파일: ToyDataExp.py 프로젝트: pierrebo/wallhack

def runToyExp(datasetNames, sampleSizes, foldsSet, cvScalings, sampleMethods, numProcesses, fileNameSuffix):
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
    outputDir = PathDefaults.getOutputDir() + "modelPenalisation/"

    svm = LibSVM()
    numCs = svm.getCs().shape[0]
    numGammas = svm.getGammas().shape[0]
    numMethods = 1 + (1 + cvScalings.shape[0])
    numParams = 2

    runIdeal = True
    runCv = True
    runVfpen = True

    for i in range(len(datasetNames)):
        datasetName = datasetNames[i][0]
        numRealisations = datasetNames[i][1]
        logging.debug("Learning using dataset " + datasetName)

        for s in range(len(sampleMethods)):
            sampleMethod = sampleMethods[s][1]
            outfileName = outputDir + datasetName + sampleMethods[s][0] + fileNameSuffix

            fileLock = FileLock(outfileName + ".npz")
            if not fileLock.isLocked() and not fileLock.fileExists():
                fileLock.lock()
                errors = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods))
                params = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numParams))
                errorGrids = numpy.zeros(
                    (numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numCs, numGammas)
                )
                approxGrids = numpy.zeros(
                    (numRealisations, len(sampleSizes), foldsSet.shape[0], numMethods, numCs, numGammas)
                )
                idealGrids = numpy.zeros((numRealisations, len(sampleSizes), foldsSet.shape[0], numCs, numGammas))

                data = numpy.load(dataDir + datasetName + ".npz")
                gridPoints, trainX, trainY, pdfX, pdfY1X, pdfYminus1X = (
                    data["arr_0"],
                    data["arr_1"],
                    data["arr_2"],
                    data["arr_3"],
                    data["arr_4"],
                    data["arr_5"],
                )

                # We form a test set from the grid points
                testX = numpy.zeros((gridPoints.shape[0] ** 2, 2))
                for m in range(gridPoints.shape[0]):
                    testX[m * gridPoints.shape[0] : (m + 1) * gridPoints.shape[0], 0] = gridPoints
                    testX[m * gridPoints.shape[0] : (m + 1) * gridPoints.shape[0], 1] = gridPoints[m]

                for j in range(numRealisations):
                    Util.printIteration(j, 1, numRealisations, "Realisation: ")

                    for k in range(sampleSizes.shape[0]):
                        sampleSize = sampleSizes[k]
                        for m in range(foldsSet.shape[0]):
                            folds = foldsSet[m]
                            logging.debug("Using sample size " + str(sampleSize) + " and " + str(folds) + " folds")
                            perm = numpy.random.permutation(trainX.shape[0])
                            trainInds = perm[0:sampleSize]
                            validX = trainX[trainInds, :]
                            validY = trainY[trainInds]

                            svm = LibSVM(processes=numProcesses)
                            # Find ideal penalties
                            if runIdeal:
                                logging.debug("Finding ideal grid of penalties")
                                idealGrids[j, k, m, :, :] = parallelPenaltyGridRbf(
                                    svm, validX, validY, testX, gridPoints, pdfX, pdfY1X, pdfYminus1X
                                )

                            # Cross validation
                            if runCv:
                                logging.debug("Running V-fold cross validation")
                                methodInd = 0
                                idx = sampleMethod(folds, validY.shape[0])
                                if sampleMethod == Sampling.bootstrap:
                                    bootstrap = True
                                else:
                                    bootstrap = False

                                bestSVM, cvGrid = svm.parallelVfcvRbf(validX, validY, idx, True, bootstrap)
                                predY, decisionsY = bestSVM.predict(testX, True)
                                decisionGrid = numpy.reshape(
                                    decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F"
                                )
                                errors[j, k, m, methodInd] = ModelSelectUtils.bayesError(
                                    gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X
                                )
                                params[j, k, m, methodInd, :] = numpy.array([bestSVM.getC(), bestSVM.getKernelParams()])
                                errorGrids[j, k, m, methodInd, :, :] = cvGrid

                            # v fold penalisation
                            if runVfpen:
                                logging.debug("Running penalisation")
                                # BIC penalisation
                                Cv = float((folds - 1) * numpy.log(validX.shape[0]) / 2)
                                tempCvScalings = cvScalings * (folds - 1)
                                tempCvScalings = numpy.insert(tempCvScalings, 0, Cv)

                                # Use cross validation
                                idx = sampleMethod(folds, validY.shape[0])
                                svmGridResults = svm.parallelVfPenRbf(validX, validY, idx, tempCvScalings)

                                for n in range(len(tempCvScalings)):
                                    bestSVM, trainErrors, approxGrid = svmGridResults[n]
                                    methodInd = n + 1
                                    predY, decisionsY = bestSVM.predict(testX, True)
                                    decisionGrid = numpy.reshape(
                                        decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F"
                                    )
                                    errors[j, k, m, methodInd] = ModelSelectUtils.bayesError(
                                        gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X
                                    )
                                    params[j, k, m, methodInd, :] = numpy.array(
                                        [bestSVM.getC(), bestSVM.getKernelParams()]
                                    )
                                    errorGrids[j, k, m, methodInd, :, :] = trainErrors + approxGrid
                                    approxGrids[j, k, m, methodInd, :, :] = approxGrid

                meanErrors = numpy.mean(errors, 0)
                print(meanErrors)

                meanParams = numpy.mean(params, 0)
                print(meanParams)

                meanErrorGrids = numpy.mean(errorGrids, 0)
                stdErrorGrids = numpy.std(errorGrids, 0)

                meanIdealGrids = numpy.mean(idealGrids, 0)
                stdIdealGrids = numpy.std(idealGrids, 0)

                meanApproxGrids = numpy.mean(approxGrids, 0)
                stdApproxGrids = numpy.std(approxGrids, 0)

                numpy.savez(
                    outfileName,
                    errors,
                    params,
                    meanErrorGrids,
                    stdErrorGrids,
                    meanIdealGrids,
                    stdIdealGrids,
                    meanApproxGrids,
                    stdApproxGrids,
                )
                logging.debug("Saved results as file " + outfileName + ".npz")
                fileLock.unlock()
            else:
                logging.debug("Results already computed")

    logging.debug("All done!")