class StackedDenoisingAutoencoder: def __init__(self, numpyRng, theanoRng=None, nIn=28*28, hiddenLayerSizes=[500,500], nOut=10): self.nLayers = len(hiddenLayerSizes) if not theanoRng: theanoRng = theano.tensor.shared_randomstreams.RandomStreams(numpyRng.randint(2 ** 30)) self.x = T.matrix('x') self.y = T.ivector('y') def makeSigmoidLayer(lastLayer,lastLayerSize,size): return Layer(rng=numpyRng,input=lastLayer,nIn=lastLayerSize,nOut=size,activation=T.nnet.sigmoid) def makeDALayer(lastLayer,lastLayerSize,size,sigmoidLayer): return DenoisingAutoEncoder( numpyRng=numpyRng,theanoRng=theanoRng,input=lastLayer, nVisible=lastLayerSize, nHidden=size, W=sigmoidLayer.W, bHidden=sigmoidLayer.b) def makeLayers(lastLayer,lastInputSize,nextLayerSizes): if nextLayerSizes: newList = list(nextLayerSizes) size = newList.pop() sigmoidLayer = makeSigmoidLayer(lastLayer,lastInputSize,size) daLayer = makeDALayer(lastLayer,lastInputSize,size,sigmoidLayer) yield (sigmoidLayer,daLayer) for layer in makeLayers(sigmoidLayer.output,size,newList): yield layer self.sigmoidLayers,self.dALayers = zip(*makeLayers(self.x,nIn,reversed(hiddenLayerSizes))) print "created sda with layer shapes below." for da in self.dALayers: print "layersize:", da.W.get_value().shape self.logLayer = LogisticRegression(self.sigmoidLayers[-1].output,hiddenLayerSizes[-1],nOut) self.params = [l.params for l in self.sigmoidLayers] + [self.logLayer.negativeLogLikelihood(self.y)] self.fineTuneCost = self.logLayer.negativeLogLikelihood(self.y) self.errors = self.logLayer.errors(self.y) def pretrainingFunctions(self,trainSetX,batchSize): index = T.lscalar("index") corruptionLevel = T.scalar('corruption') learningRate = T.scalar("learning") batchBegin = batchSize * index batchEnd = batchBegin + batchSize for dA in self.dALayers: cost,updates = dA.costFunctionAndUpdates(corruptionLevel,learningRate) f = theano.function( inputs=[ index, theano.Param(corruptionLevel,default=0.2), theano.Param(learningRate,default=0.1) ], outputs=cost, updates=updates, givens={self.x:trainSetX[batchBegin:batchEnd]}, ) yield f def pretrainingFunctionsWithOptimizer(self,trainSetX,batchSize,optimizer): """ with optimizer. optimizer(params,grads) """ index = T.lscalar("index") corruptionLevel = T.scalar('corruption') learningRate = T.scalar("learning") batchBegin = batchSize * index batchEnd = batchBegin + batchSize for dA in self.dALayers: #cost,updates = dA.costFunctionAndUpdates(corruptionLevel,learningRate) cost, param, grads = dA.costParamGrads(corruptionLevel) updates = optimizer(param,grads) f = theano.function( inputs=[ index, theano.Param(corruptionLevel,default=0.2), ], outputs=cost, updates=updates, givens={self.x:trainSetX[batchBegin:batchEnd]}, ) yield f def fineTuneFunctions(self,datasets,batchSize,learningRate): index = T.lscalar('i') trainSetX,trainSetY = datasets[0] validSetX,validSetY = datasets[1] testSetX,testSetY = datasets[2] gparams = T.grad(self.fineTuneCost,self.params) updates = [ (param,param-gparam*learningRate) for param,gparam in zip(self.params,gparams) ] def makeGivens(x,y): return {self.x:x[index*batchSize:(index+1)*batchSize], self.y:y[index*batchSize:(index+1)*batchSize]} trainer = theano.function( inputs=[index], outputs=self.fineTuneCost, updates=updates, givens=makeGivens(trainSetX,trainSetY), name='train' ) testScoreI=theano.function( inputs=[index], outputs=self.errors, givens=makeGivens(testSetX,testSetY), name='test' ) validScoreI=theano.function( inputs=[index], outputs=self.errors, givens=makeGivens(validSetX,validSetY), name='valid' ) def validationScore(): return [validScoreI(i) for i in xrange(validSetX.get_value(borrow=True).shape[0]/batchSize)] def testScore(): return [testScoreI(i) for i in xrange(validSetX.get_value(borrow=True).shape[0]/batchSize)] return trainer,validationScore,testScore def preTrain(self, data, batchSize=20, preLearningRate=0.1, corruptionLevels=(.1,.2,.3)): import numpy,util preTrainer = list(self.pretrainingFunctions(data,batchSize=batchSize)) assert len(corruptionLevels) == len(preTrainer) , "given corruption levels do not correspond to the layers!!!" for i,(trainer,corruptionLevel) in enumerate(zip(preTrainer,corruptionLevels)): for epoch in xrange(15): print 'Pre-training layer %i, epoch %d start' % (i,epoch) trainScores = [trainer(batchIndex,corruptionLevel,preLearningRate) for batchIndex in xrange(data.get_value(borrow=True).shape[0]/batchSize)] print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),numpy.mean(trainScores)
class LRTest: def __init__(self): import theano import util from theano import tensor as T from logistic_regression import LogisticRegression self.index = T.iscalar('index') self.BATCH_SIZE = 100 self.LEARNING_RATE = 0.12 self.dataSets = util.loadMnistData("mnist.pkl.gz") self.x = T.dmatrix('x') self.y = T.ivector('y') self.index = T.iscalar('index') self.classifier = LogisticRegression(input=self.x, nIn=28 * 28, nOut=10) self.cost = self.classifier.negativeLogLikelihood(self.y) self.gW = T.grad(cost=self.cost, wrt=self.classifier.W) self.gB = T.grad(cost=self.cost, wrt=self.classifier.b) self.trainSet, self.validSet, self.testSet = self.dataSets self.nTrainSet, self.nValidSet, self.nTestSet = map(self.numBatches, self.dataSets) updates = [ (self.classifier.W, self.classifier.W - self.LEARNING_RATE * self.gW), (self.classifier.b, self.classifier.b - self.LEARNING_RATE * self.gB) ] def makeGivens(data): return { self.x: data[0][self.index * self.BATCH_SIZE:(self.index + 1) * self.BATCH_SIZE], self.y: data[1][self.index * self.BATCH_SIZE:(self.index + 1) * self.BATCH_SIZE] } self.testModel = theano.function( inputs=[self.index], outputs=self.classifier.errors(self.y), givens=makeGivens(self.dataSets[2]) ) self.validationModel = theano.function( inputs=[self.index], outputs=self.classifier.errors(self.y), givens=makeGivens(self.dataSets[1]) ) self.trainModel = theano.function( inputs=[self.index], outputs=self.cost, updates=updates, givens=makeGivens(self.dataSets[0]) ) def numBatches(self, dataSet): return dataSet[0].get_value(borrow=True).shape[0] / self.BATCH_SIZE def printValid(self, epoch, batchIndex, loss): return 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, batchIndex + 1, self.nTrainSet, loss * 100. ) def printTestScore(self, epoch, batchIndex, score): return ( ' epoch %i, minibatch %i/%i, test error of' ' best model %f %%' ) % ( epoch, batchIndex + 1, self.nTrainSet, score * 100. ) def resultString(self, best, test): return ('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best * 100., test * 100.) def train(self): import numpy patience = 5000 patienceIncrease = 2 MAX_EPOCH = 1000 improveThresh = 0.995 validationFreq = min(self.nTrainSet, patience / 2) bestValidationLoss = numpy.inf epoch = 0 done = False testLoss = 0 while (epoch < MAX_EPOCH) and not done: epoch += 1 for batchIndex in xrange(self.nTrainSet): avgCost = self.trainModel(batchIndex) iter = (epoch - 1) * self.nTrainSet + batchIndex if (iter + 1) % validationFreq == 0: loss = numpy.mean(map(self.validationModel, xrange(self.nValidSet))) if loss < bestValidationLoss: if loss < bestValidationLoss * improveThresh: patience = max(patience, iter * patienceIncrease) bestValidationLoss = loss testLoss = numpy.mean(map(self.testModel, xrange(self.nTestSet))) yield epoch,batchIndex,loss, testLoss, bestValidationLoss if patience <= iter: done = True break def doTrain(self): for epoch,batchIndex, loss,testScore,bestScore in self.train(): str = self.printValid(epoch,batchIndex,loss) str += self.printTestScore(epoch,batchIndex,testScore) print(str) print(self.resultString(bestScore,testScore))