def testLearningRate(self): numExamples = 100 trainX, trainY = data.make_regression(numExamples) trainX = Standardiser().normaliseArray(trainX) trainY = Standardiser().normaliseArray(trainY) learner = DecisionTreeLearner(pruneType="CART", maxDepth=20, minSplit=1) foldsSet = numpy.arange(2, 7, 2) gammas = numpy.array(numpy.round(2**numpy.arange(1, 8, 1)-1), dtype=numpy.int) paramDict = {} paramDict["setGamma"] = gammas betaGrid = learner.learningRate(trainX, trainY, foldsSet, paramDict) #Compute beta more directly numParams = gammas.shape[0] sampleSize = trainX.shape[0] sampleMethod = Sampling.crossValidation Cvs = numpy.array([1]) penalties = numpy.zeros((foldsSet.shape[0], numParams)) betas = numpy.zeros(gammas.shape[0]) for k in range(foldsSet.shape[0]): folds = foldsSet[k] logging.debug("Folds " + str(folds)) idx = sampleMethod(folds, trainX.shape[0]) #Now try penalisation resultsList = learner.parallelPen(trainX, trainY, idx, paramDict, Cvs) bestLearner, trainErrors, currentPenalties = resultsList[0] penalties[k, :] = currentPenalties for i in range(gammas.shape[0]): inds = numpy.logical_and(numpy.isfinite(penalties[:, i]), penalties[:, i]>0) tempPenalties = penalties[:, i][inds] tempfoldsSet = numpy.array(foldsSet, numpy.float)[inds] if tempPenalties.shape[0] > 1: x = numpy.log((tempfoldsSet-1)/tempfoldsSet*sampleSize) y = numpy.log(tempPenalties)+numpy.log(tempfoldsSet) clf = linear_model.LinearRegression() clf.fit(numpy.array([x]).T, y) betas[i] = clf.coef_[0] betas = -betas nptst.assert_array_equal(betaGrid, betas)
logging.debug("Dataset " + datasetName) learner = DecisionTreeLearner(criterion="mse", maxDepth=100, minSplit=1, pruneType="CART", processes=numProcesses) learner.setChunkSize(3) outfileName = outputDir + datasetName + "Beta" for m in range(sampleSizes.shape[0]): sampleSize = sampleSizes[m] logging.debug("Sample size " + str(sampleSize)) penalties = numpy.zeros((foldsSet.shape[0], numParams)) betas = numpy.zeros((gammas.shape[0], sampleSizes.shape[0])) for j in range(numRealisations): logging.debug("Realisation: " + str(j)) trainX, trainY, testX, testY = loadMethod(dataDir, datasetName, j) numpy.random.seed(21) trainInds = numpy.random.permutation(trainX.shape[0])[0:sampleSize] validX = trainX[trainInds,:] validY = trainY[trainInds] betas = learner.learningRate(validX, validY, foldsSet, paramDict) print(betas) plt.plot(gammas, betas) plt.show() break