def getSetup(learnerName, dataDir, outputDir, numProcesses): if learnerName=="SVM": learner = LibSVM(kernel='gaussian', type="C_SVC", processes=numProcesses) loadMethod = ModelSelectUtils.loadRatschDataset dataDir += "benchmark/" outputDir += "classification/" + learnerName + "/" paramDict = {} paramDict["setC"] = learner.getCs() paramDict["setGamma"] = learner.getGammas() elif learnerName=="SVR": learner = LibSVM(kernel='gaussian', type="Epsilon_SVR", processes=numProcesses) learner.normModelSelect = True loadMethod = ModelSelectUtils.loadRegressDataset dataDir += "regression/" outputDir += "regression/" + learnerName + "/" paramDict = {} paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float) paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float) paramDict["setEpsilon"] = learner.getEpsilons() elif learnerName=="CART": learner = DecisionTreeLearner(criterion="mse", maxDepth=30, minSplit=1, pruneType="CART", processes=numProcesses) learner.setChunkSize(2) loadMethod = ModelSelectUtils.loadRegressDataset dataDir += "regression/" outputDir += "regression/" + learnerName + "/" paramDict = {} paramDict["setGamma"] = numpy.array(numpy.round(2**numpy.arange(1, 7.5, 0.5)-1), dtype=numpy.int) else: raise ValueError("Unknown learnerName: " + learnerName) return learner, loadMethod, dataDir, outputDir, paramDict
dataDir = PathDefaults.getDataDir() dataDir += "modelPenalisation/regression/" outputDir = PathDefaults.getOutputDir() + "modelPenalisation/regression/SVR/" figInd = 0 loadMethod = ModelSelectUtils.loadRegressDataset datasets = ModelSelectUtils.getRegressionDatasets(True) numProcesses = multiprocessing.cpu_count() learner = LibSVM(kernel="rbf", processes=numProcesses, type="Epsilon_SVR") learner.setChunkSize(3) Cs = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float) gammas = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float) epsilons = learner.getEpsilons() gammaInd = 3 gamma = gammas[gammaInd] learner.setGamma(gamma) epsilonInd = 0 epsilon = epsilons[epsilonInd] learner.setEpsilon(epsilon) learner.normModelSelect = True paramDict = {} paramDict["setC"] = Cs numParams = Cs.shape[0] #datasets = [datasets[1]]
numProcesses = 8 dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" datasetName = datasets[9] print(datasetName) j = 0 trainX, trainY, testX, testY = ModelSelectUtils.loadRegressDataset(dataDir, datasetName, j) learner = LibSVM(kernel='gaussian', type="Epsilon_SVR", processes=numProcesses) paramDict = {} paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float) paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float) paramDict["setEpsilon"] = learner.getEpsilons() foldsSet = numpy.arange(2, 31, 2) Cvs = numpy.array([1.0]) sampleMethod = Sampling.crossValidation sampleSize = 100 trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize] validX = trainX[trainInds,:] validY = trainY[trainInds] """ for i in range(foldsSet.shape[0]): folds = foldsSet[i] Cvs = numpy.array([folds-1.0])
def testModelSelect(self): """ We test the results on some data and compare to SVR. """ numExamples = 200 X, y = data.make_regression(numExamples, noise=0.5) X = Standardiser().standardiseArray(X) y = Standardiser().standardiseArray(y) trainX = X[0:100, :] trainY = y[0:100] testX = X[100:, :] testY = y[100:] learner = DecisionTreeLearner(maxDepth=20, minSplit=10, pruneType="REP-CV") learner.setPruneCV(8) paramDict = {} paramDict["setGamma"] = numpy.linspace(0.0, 1.0, 10) paramDict["setPruneCV"] = numpy.arange(6, 11, 2, numpy.int) folds = 5 idx = Sampling.crossValidation(folds, trainX.shape[0]) bestTree, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict) predY = bestTree.predict(testX) error = Evaluator.rootMeanSqError(testY, predY) print(error) learner = DecisionTreeLearner(maxDepth=20, minSplit=5, pruneType="CART") paramDict = {} paramDict["setGamma"] = numpy.linspace(0.0, 1.0, 50) folds = 5 idx = Sampling.crossValidation(folds, trainX.shape[0]) bestTree, cvGrid = learner.parallelModelSelect(trainX, trainY, idx, paramDict) predY = bestTree.predict(testX) error = Evaluator.rootMeanSqError(testY, predY) print(error) return #Let's compare to the SVM learner2 = LibSVM(kernel='gaussian', type="Epsilon_SVR") paramDict = {} paramDict["setC"] = 2.0**numpy.arange(-10, 14, 2, dtype=numpy.float) paramDict["setGamma"] = 2.0**numpy.arange(-10, 4, 2, dtype=numpy.float) paramDict["setEpsilon"] = learner2.getEpsilons() idx = Sampling.crossValidation(folds, trainX.shape[0]) bestSVM, cvGrid = learner2.parallelModelSelect(trainX, trainY, idx, paramDict) predY = bestSVM.predict(testX) error = Evaluator.rootMeanSqError(testY, predY) print(error)