Beispiel #1
0
class StackedDenoisingAutoencoder:
    def __init__(self,
                 numpyRng,
                 theanoRng=None,
                 nIn=28*28,
                 hiddenLayerSizes=[500,500],
                 nOut=10):
        self.nLayers = len(hiddenLayerSizes)
        if not theanoRng:
            theanoRng = theano.tensor.shared_randomstreams.RandomStreams(numpyRng.randint(2 ** 30))
        self.x = T.matrix('x')
        self.y = T.ivector('y')
        def makeSigmoidLayer(lastLayer,lastLayerSize,size):
            return Layer(rng=numpyRng,input=lastLayer,nIn=lastLayerSize,nOut=size,activation=T.nnet.sigmoid)
        def makeDALayer(lastLayer,lastLayerSize,size,sigmoidLayer):
            return DenoisingAutoEncoder(
                numpyRng=numpyRng,theanoRng=theanoRng,input=lastLayer,
                nVisible=lastLayerSize,
                nHidden=size,
                W=sigmoidLayer.W,
                bHidden=sigmoidLayer.b)
        def makeLayers(lastLayer,lastInputSize,nextLayerSizes):
            if nextLayerSizes:
                newList = list(nextLayerSizes)
                size = newList.pop()
                sigmoidLayer = makeSigmoidLayer(lastLayer,lastInputSize,size)
                daLayer = makeDALayer(lastLayer,lastInputSize,size,sigmoidLayer)
                yield (sigmoidLayer,daLayer)
                for layer in makeLayers(sigmoidLayer.output,size,newList):
                    yield layer
        self.sigmoidLayers,self.dALayers = zip(*makeLayers(self.x,nIn,reversed(hiddenLayerSizes)))
        print "created sda with layer shapes below."
        for da in self.dALayers:
            
            print "layersize:", da.W.get_value().shape
        self.logLayer = LogisticRegression(self.sigmoidLayers[-1].output,hiddenLayerSizes[-1],nOut)
        self.params = [l.params for l in self.sigmoidLayers] + [self.logLayer.negativeLogLikelihood(self.y)]
        self.fineTuneCost = self.logLayer.negativeLogLikelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

    def pretrainingFunctions(self,trainSetX,batchSize):
        index = T.lscalar("index")
        corruptionLevel = T.scalar('corruption')
        learningRate = T.scalar("learning")
        batchBegin = batchSize * index
        batchEnd = batchBegin + batchSize
        for dA in self.dALayers:
            cost,updates = dA.costFunctionAndUpdates(corruptionLevel,learningRate)
            f = theano.function(
                inputs=[
                    index,
                    theano.Param(corruptionLevel,default=0.2),
                    theano.Param(learningRate,default=0.1)
                ],
                outputs=cost,
                updates=updates,
                givens={self.x:trainSetX[batchBegin:batchEnd]},
            )
            yield f
            
    def pretrainingFunctionsWithOptimizer(self,trainSetX,batchSize,optimizer):
        """
        with optimizer.
        optimizer(params,grads)
        """
        index = T.lscalar("index")
        corruptionLevel = T.scalar('corruption')
        learningRate = T.scalar("learning")
        batchBegin = batchSize * index
        batchEnd = batchBegin + batchSize
        for dA in self.dALayers:
            #cost,updates = dA.costFunctionAndUpdates(corruptionLevel,learningRate)
            cost, param, grads = dA.costParamGrads(corruptionLevel)
            updates = optimizer(param,grads)
            f = theano.function(
                inputs=[
                    index,
                    theano.Param(corruptionLevel,default=0.2),
                ],
                outputs=cost,
                updates=updates,
                givens={self.x:trainSetX[batchBegin:batchEnd]},
            )
            yield f
            
    def fineTuneFunctions(self,datasets,batchSize,learningRate):
        index = T.lscalar('i')
        trainSetX,trainSetY = datasets[0]
        validSetX,validSetY = datasets[1]
        testSetX,testSetY = datasets[2]
        gparams = T.grad(self.fineTuneCost,self.params)
        updates = [
            (param,param-gparam*learningRate)
            for param,gparam in zip(self.params,gparams)
        ]
        def makeGivens(x,y):
            return {self.x:x[index*batchSize:(index+1)*batchSize],
                    self.y:y[index*batchSize:(index+1)*batchSize]}
        trainer = theano.function(
            inputs=[index],
            outputs=self.fineTuneCost,
            updates=updates,
            givens=makeGivens(trainSetX,trainSetY),
            name='train'
        )
        testScoreI=theano.function(
            inputs=[index],
            outputs=self.errors,
            givens=makeGivens(testSetX,testSetY),
            name='test'
        )
        validScoreI=theano.function(
            inputs=[index],
            outputs=self.errors,
            givens=makeGivens(validSetX,validSetY),
            name='valid'
        )

        def validationScore():
            return [validScoreI(i) for i in xrange(validSetX.get_value(borrow=True).shape[0]/batchSize)]

        def testScore():
            return [testScoreI(i) for i in xrange(validSetX.get_value(borrow=True).shape[0]/batchSize)]

        return trainer,validationScore,testScore

    def preTrain(self,
                 data,
                 batchSize=20,
                 preLearningRate=0.1,
                 corruptionLevels=(.1,.2,.3)):
        import numpy,util
        preTrainer = list(self.pretrainingFunctions(data,batchSize=batchSize))
        assert len(corruptionLevels) == len(preTrainer) , "given corruption levels do not correspond to the layers!!!"
        for i,(trainer,corruptionLevel) in enumerate(zip(preTrainer,corruptionLevels)):
            for epoch in xrange(15):
                print 'Pre-training layer %i, epoch %d start' % (i,epoch)
                trainScores = [trainer(batchIndex,corruptionLevel,preLearningRate) for batchIndex in xrange(data.get_value(borrow=True).shape[0]/batchSize)]
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),numpy.mean(trainScores)
Beispiel #2
0
class LRTest:
    def __init__(self):
        import theano
        import util
        from theano import tensor as T
        from logistic_regression import LogisticRegression
        self.index = T.iscalar('index')
        self.BATCH_SIZE = 100
        self.LEARNING_RATE = 0.12
        self.dataSets = util.loadMnistData("mnist.pkl.gz")
        self.x = T.dmatrix('x')
        self.y = T.ivector('y')
        self.index = T.iscalar('index')
        self.classifier = LogisticRegression(input=self.x, nIn=28 * 28, nOut=10)
        self.cost = self.classifier.negativeLogLikelihood(self.y)
        self.gW = T.grad(cost=self.cost, wrt=self.classifier.W)
        self.gB = T.grad(cost=self.cost, wrt=self.classifier.b)
        self.trainSet, self.validSet, self.testSet = self.dataSets
        self.nTrainSet, self.nValidSet, self.nTestSet = map(self.numBatches, self.dataSets)
        updates = [
            (self.classifier.W, self.classifier.W - self.LEARNING_RATE * self.gW),
            (self.classifier.b, self.classifier.b - self.LEARNING_RATE * self.gB)
        ]

        def makeGivens(data):
            return {
                self.x: data[0][self.index * self.BATCH_SIZE:(self.index + 1) * self.BATCH_SIZE],
                self.y: data[1][self.index * self.BATCH_SIZE:(self.index + 1) * self.BATCH_SIZE]
            }

        self.testModel = theano.function(
            inputs=[self.index],
            outputs=self.classifier.errors(self.y),
            givens=makeGivens(self.dataSets[2])
        )
        self.validationModel = theano.function(
            inputs=[self.index],
            outputs=self.classifier.errors(self.y),
            givens=makeGivens(self.dataSets[1])
        )
        self.trainModel = theano.function(
            inputs=[self.index],
            outputs=self.cost,
            updates=updates,
            givens=makeGivens(self.dataSets[0])
        )

    def numBatches(self, dataSet):
        return dataSet[0].get_value(borrow=True).shape[0] / self.BATCH_SIZE

    def printValid(self, epoch, batchIndex, loss):
        return 'epoch %i, minibatch %i/%i, validation error %f %%' % (
            epoch,
            batchIndex + 1,
            self.nTrainSet,
            loss * 100.
        )

    def printTestScore(self, epoch, batchIndex, score):
        return (
                   '     epoch %i, minibatch %i/%i, test error of'
                   ' best model %f %%'
               ) % (
                   epoch,
                   batchIndex + 1,
                   self.nTrainSet,
                   score * 100.
               )

    def resultString(self, best, test):
        return ('Optimization complete with best validation score of %f %%,'
                'with test performance %f %%') % (best * 100., test * 100.)

    def train(self):
        import numpy

        patience = 5000
        patienceIncrease = 2
        MAX_EPOCH = 1000
        improveThresh = 0.995
        validationFreq = min(self.nTrainSet, patience / 2)
        bestValidationLoss = numpy.inf
        epoch = 0
        done = False
        testLoss = 0

        while (epoch < MAX_EPOCH) and not done:
            epoch += 1
            for batchIndex in xrange(self.nTrainSet):
                avgCost = self.trainModel(batchIndex)
                iter = (epoch - 1) * self.nTrainSet + batchIndex
                if (iter + 1) % validationFreq == 0:
                    loss = numpy.mean(map(self.validationModel, xrange(self.nValidSet)))
                    if loss < bestValidationLoss:
                        if loss < bestValidationLoss * improveThresh:
                            patience = max(patience, iter * patienceIncrease)
                        bestValidationLoss = loss
                        testLoss = numpy.mean(map(self.testModel, xrange(self.nTestSet)))
                    yield epoch,batchIndex,loss, testLoss, bestValidationLoss
                if patience <= iter:
                    done = True
                    break

    def doTrain(self):
        for epoch,batchIndex, loss,testScore,bestScore in self.train():
            str = self.printValid(epoch,batchIndex,loss)
            str += self.printTestScore(epoch,batchIndex,testScore)
            print(str)

        print(self.resultString(bestScore,testScore))