def sgd_optimization(learning_rate=0.1, n_epochs=1000, batch_size=200): datasets = LF.load_cifar() train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[1] n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0]/batch_size print '... building the model' index = T.lscalar() x = T.matrix('x') y = T.ivector('y') classifier = LogisticRegression(input=x, n_in=1024, n_out=10) test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) cost = classifier.neg_log_hood(y) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print '... training the model' test_score = 0. start_time = timeit.default_timer() prev_avg_cost = 0 done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 train_cost_vector=[train_model(i) for i in xrange(n_train_batches)] train_cost = numpy.mean(train_cost_vector)/batch_size test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print 'epoch %d with train cost %.2f, test error %.2f%%'% (epoch,train_cost,test_score*100.) if train_cost<1920: test_losses_opt = [test_model(i) for i in xrange(n_test_batches)] test_score_opt = numpy.mean(test_losses_opt) # save the best model f = file('best_model_logistic.pkl', 'w') cPickle.dump(classifier, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() break prev_avg_cost=train_cost end_time = timeit.default_timer() print 'Optimization complete with test performance %.2f %%' %(test_score_opt*100.) print 'The code runs for %.1fs' % (end_time - start_time)
class DBN(object): def __init__(self, n_ins=1024, hidden_layers_sizes=[500, 500], n_outs=10): self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels for i in xrange(self.n_layers): # construct the sigmoidal layer if i == 0: input_size = n_ins layer_input = self.x else: input_size = hidden_layers_sizes[i - 1] layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.neg_log_hood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y) def pretraining_functions(self, train_set_x, batch_size, k): '''Generates a list of functions, for performing one step of gradient descent at a given layer. The function will require as input the minibatch index, and to train an RBM you just need to iterate, calling the corresponding function on all minibatch indexes. :type train_set_x: theano.tensor.TensorType :param train_set_x: Shared var. that contains all datapoints used for training the RBM :type batch_size: int :param batch_size: size of a [mini]batch :param k: number of Gibbs steps to do in CD-k / PCD-k ''' # index to a [mini]batch index = T.lscalar('index') # index to a minibatch learning_rate = T.scalar('lr') # learning rate to use # number of batches n_batches = train_set_x.shape[0] / batch_size pretrain_fns = [] for rbm in self.rbm_layers: # change cost function to reconstruction error cost, updates = rbm.cost_updates(learning_rate,k=k) # compile the theano function fn = theano.function( inputs=[index, theano.Param(learning_rate, default=0.01)], outputs=cost, updates=updates, givens={self.x: train_set_x[index*batch_size: (index+1)*batch_size]} ) pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, datasets, batch_size, learning_rate): '''Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :type datasets: list of pairs of theano.tensor.TensorType :param datasets: It is a list that contain all the datasets; the has to contain three pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano variables, one for the datapoints, the other for the labels :type batch_size: int :param batch_size: size of a minibatch :type learning_rate: float :param learning_rate: learning rate used during finetune stage ''' (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.shape[0]/batch_size n_test_batches = test_set_x.shape[0]/batch_size index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - gparam*learning_rate)) train_fn = theano.function( inputs=[index], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_set_x[index*batch_size: (index+1)*batch_size], self.y: train_set_y[index*batch_size: (index+1)*batch_size] } ) test_fn = theano.function( inputs=[index], outputs=self.errors, givens={ self.x: test_set_x[index*batch_size: (index+1)*batch_size], self.y: test_set_y[index*batch_size: (index+1)*batch_size] } ) valid_fn = theano.function( inputs=[index], outputs=self.errors, givens={ self.x: valid_set_x[index*batch_size: (index+1)*batch_size], self.y: valid_set_y[index*batch_size: (index+1)*batch_size] } ) return train_fn, valid_fn, test_fn