Ejemplo n.º 1
0
 def testParallelPen(self): 
     #Check if penalisation == inf when treeSize < gamma 
     numExamples = 100
     X, y = data.make_regression(numExamples) 
     learner = DecisionTreeLearner(pruneType="CART", maxDepth=10, minSplit=2)
     
     paramDict = {} 
     paramDict["setGamma"] = numpy.array(numpy.round(2**numpy.arange(1, 10, 0.5)-1), dtype=numpy.int)
     
     folds = 3
     alpha = 1.0
     Cvs = numpy.array([(folds-1)*alpha])
     
     idx = Sampling.crossValidation(folds, X.shape[0])
     
     resultsList = learner.parallelPen(X, y, idx, paramDict, Cvs)
     
     learner, trainErrors, currentPenalties = resultsList[0]
     
     learner.setGamma(2**10)
     treeSize = 0
     #Let's work out the size of the unpruned tree 
     for trainInds, testInds in idx: 
         trainX = X[trainInds, :]
         trainY = y[trainInds]
         
         learner.learnModel(trainX, trainY)
         treeSize += learner.tree.size 
     
     treeSize /= float(folds)         
     
     self.assertTrue(numpy.isinf(currentPenalties[paramDict["setGamma"]>treeSize]).all())      
     self.assertTrue(not numpy.isinf(currentPenalties[paramDict["setGamma"]<treeSize]).all())
Ejemplo n.º 2
0
 def testLearningRate(self): 
     numExamples = 100
     trainX, trainY = data.make_regression(numExamples) 
     trainX = Standardiser().normaliseArray(trainX)
     trainY = Standardiser().normaliseArray(trainY)
     learner = DecisionTreeLearner(pruneType="CART", maxDepth=20, minSplit=1)
     
     
     foldsSet = numpy.arange(2, 7, 2)
     
     gammas = numpy.array(numpy.round(2**numpy.arange(1, 8, 1)-1), dtype=numpy.int)
     paramDict = {} 
     paramDict["setGamma"] = gammas
     
     betaGrid = learner.learningRate(trainX, trainY, foldsSet, paramDict)
     
     #Compute beta more directly 
     numParams = gammas.shape[0]
     sampleSize = trainX.shape[0]
     sampleMethod = Sampling.crossValidation
     Cvs = numpy.array([1])
     penalties = numpy.zeros((foldsSet.shape[0], numParams))
     betas = numpy.zeros(gammas.shape[0])
     
     for k in range(foldsSet.shape[0]): 
         folds = foldsSet[k]
         logging.debug("Folds " + str(folds))
         
         idx = sampleMethod(folds, trainX.shape[0])   
         
         #Now try penalisation
         resultsList = learner.parallelPen(trainX, trainY, idx, paramDict, Cvs)
         bestLearner, trainErrors, currentPenalties = resultsList[0]
         penalties[k, :] = currentPenalties
     
     for i in range(gammas.shape[0]): 
         inds = numpy.logical_and(numpy.isfinite(penalties[:, i]), penalties[:, i]>0)
         tempPenalties = penalties[:, i][inds]
         tempfoldsSet = numpy.array(foldsSet, numpy.float)[inds]                            
         
         if tempPenalties.shape[0] > 1: 
             x = numpy.log((tempfoldsSet-1)/tempfoldsSet*sampleSize)
             y = numpy.log(tempPenalties)+numpy.log(tempfoldsSet)   
         
             clf = linear_model.LinearRegression()
             clf.fit(numpy.array([x]).T, y)
             betas[i] = clf.coef_[0]    
             
     betas = -betas   
     
     nptst.assert_array_equal(betaGrid, betas)