def __init__(self, rng, input, n_in, n_hidden, n_out): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = hlv_layers.HiddenLayer(rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = hlv_layers.LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) # L1 norm ; one regularization option is to enforce L1 norm to # be small self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \ + (self.logRegressionLayer.W ** 2).sum() # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors # the parameters of the model are the parameters of the two layer it is # made out of self.params = self.hiddenLayer.params + self.logRegressionLayer.params
def __init__(self, n_in, n_out, data, batch_size, rng, n_hidden, learning_rate, activation=T.tanh, L1_reg=0., L2_reg=0.0001): index = T.lscalar() # index to a [mini]batch x = T.matrix('x') y = T.ivector('y') X = T.matrix('X') Y = T.ivector('Y') if (rng is None): rng = np.random.RandomState(1234) self.batch_size = batch_size self.n_train_batches = data.train.x.get_value( borrow=True).shape[0] / batch_size self.n_valid_batches = data.valid.x.get_value( borrow=True).shape[0] / batch_size self.n_test_batches = data.test.x.get_value( borrow=True).shape[0] / batch_size self.x = x self.y = y self.data = data # Define the layers layer_input = x layer_n_in = n_in self.hiddenLayers = [] for i in range(len(n_hidden) - 1): layer_n_out = n_hidden[i + 1] hiddenLayer = hlv_layers.HiddenLayer(rng=rng, input=layer_input, n_in=layer_n_in, n_out=layer_n_out, activation=activation) self.hiddenLayers.append(hiddenLayer) layer_input = hiddenLayer.output layer_n_in = layer_n_out self.logRegressionLayer = hlv_layers.LogisticRegression( input=layer_input, n_in=layer_n_in, n_out=n_out) # Define regularization self.L1 = 0 self.L2_sqr = 0 # define parameters self.params = self.logRegressionLayer.params for HL in self.hiddenLayers: self.L1 = self.L1 + abs(HL.W).sum() self.L2 = self.L2_sqr + abs(HL.W**2).sum() self.params = self.params + HL.params self.L1 = self.L1 + abs(self.logRegressionLayer.W).sum() self.L2_sqr = self.L2_sqr + (self.logRegressionLayer.W**2).sum() self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood self.errors = self.logRegressionLayer.errors # define the cost self.cost = self.negative_log_likelihood(y) \ + L1_reg * self.L1 \ + L2_reg * self.L2_sqr self.grads = [ T.grad(cost=self.cost, wrt=param) for param in self.params ] self.updates = [(param, param - learning_rate * grad) for param, grad in zip(self.params, self.grads)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` self.train_model = theano.function(inputs=[X, Y], outputs=self.errors(y), updates=self.updates, givens={ x: X, y: Y }) self.test_model = theano.function(inputs=[], outputs=self.errors(y), givens={ x: self.data.test.x, y: self.data.test.y }) self.validate_model = theano.function(inputs=[], outputs=self.errors(y), givens={ x: self.data.valid.x, y: self.data.valid.y }) self.train_model_minibatch = theano.function( inputs=[index], outputs=self.cost, updates=self.updates, givens={ x: self.data.train.x[index * self.batch_size:(index + 1) * self.batch_size], y: self.data.train.y[index * self.batch_size:(index + 1) * self.batch_size] })
def __init__(self, n_in, n_out, data, batch_size, rng, n_hidden, learning_rate, activation=T.tanh, L1_reg=0., L2_reg=0.0001, prob_drop=0.2): index = T.lscalar() # index to a [mini]batch x = T.matrix('x') y = T.ivector('y') X = T.matrix('X') Y = T.ivector('Y') if (rng is None): rng = np.random.RandomState(1234) self.batch_size = batch_size self.n_train_batches = data.train.x.get_value( borrow=True).shape[0] / batch_size self.n_valid_batches = data.valid.x.get_value( borrow=True).shape[0] / batch_size self.n_test_batches = data.test.x.get_value( borrow=True).shape[0] / batch_size self.x = x self.y = y self.data = data # Define the layers self.hiddenLayer = hlv_layers.HiddenLayer(rng=rng, input=x, n_in=n_in, n_out=n_hidden, activation=activation) self.dropOutLayer = hlv_layers.DropOutLayer(self.hiddenLayer.output, n_hidden) self.logRegressionLayer_do = hlv_layers.LogisticRegression( input=self.dropOutLayer.output, n_in=n_hidden, n_out=n_out) self.logRegressionLayer = hlv_layers.LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out) # Make the vanilla logistic regression layer have the same parameters # as the dropout layer self.logRegressionLayer.W = self.logRegressionLayer_do.W self.logRegressionLayer.b = self.logRegressionLayer_do.b self.logRegressionLayer.p_y_given_x = T.nnet.softmax( T.dot(self.hiddenLayer.output, self.logRegressionLayer.W) + self.logRegressionLayer.b) self.logRegressionLayer.y_pred = T.argmax( self.logRegressionLayer.p_y_given_x, axis=1) # Define regularization self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer_do.W).sum() self.L2_sqr = (self.hiddenLayer.W ** 2).sum() \ + (self.logRegressionLayer_do.W ** 2).sum() self.negative_log_likelihood = self.logRegressionLayer_do.negative_log_likelihood self.errors = self.logRegressionLayer.errors # define the cost self.cost = self.negative_log_likelihood(y) \ + L1_reg * self.L1 \ + L2_reg * self.L2_sqr # define parameters self.params = self.hiddenLayer.params + self.logRegressionLayer_do.params self.grads = [ T.grad(cost=self.cost, wrt=param) for param in self.params ] self.updates = [(param, param - learning_rate * grad) for param, grad in zip(self.params, self.grads)] # Just dropout stuff ########################## self.prob_drop = prob_drop self.srng = theano.tensor.shared_randomstreams.RandomStreams(0) def random_drop_mask(): # p=1-p because 1's indicate keep and p is prob of dropping mask = self.srng.binomial(n=1, p=1 - prob_drop, size=self.dropOutLayer.drop_mask.shape) # The cast is important because # int * float32 = float64 which pulls things off the gpu output = T.cast(mask, theano.config.floatX) return output self.dropOutLayer.drop_mask.set_value(random_drop_mask().eval()) self.updates.append((self.dropOutLayer.drop_mask, random_drop_mask())) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` self.train_model = theano.function(inputs=[X, Y], outputs=self.errors(y), updates=self.updates, givens={ x: X, y: Y }) self.test_model = theano.function(inputs=[], outputs=self.errors(y), givens={ x: self.data.test.x, y: self.data.test.y }) self.validate_model = theano.function(inputs=[], outputs=self.errors(y), givens={ x: self.data.valid.x, y: self.data.valid.y }) self.train_model_minibatch = theano.function( inputs=[index], outputs=self.cost, updates=self.updates, givens={ x: self.data.train.x[index * self.batch_size:(index + 1) * self.batch_size], y: self.data.train.y[index * self.batch_size:(index + 1) * self.batch_size] })
def __init__(self, n_in, n_out, data, learning_rate=0.001, batch_size=1): index = T.lscalar() # index to a [mini]batch X = T.matrix() Y = T.ivector() x = T.matrix('x') y = T.ivector('y') self.batch_size = batch_size self.n_train_batches = data.train.x.get_value( borrow=True).shape[0] / batch_size self.n_valid_batches = data.valid.x.get_value( borrow=True).shape[0] / batch_size self.n_test_batches = data.test.x.get_value( borrow=True).shape[0] / batch_size self.x = x self.y = y self.data = data self.classifier = hlv_layers.LogisticRegression(input=self.x, n_in=n_in, n_out=n_out) self.cost = self.classifier.negative_log_likelihood(y) self.params = [self.classifier.W, self.classifier.b] self.grads = [ T.grad(cost=self.cost, wrt=param) for param in self.params ] self.updates = [(param, param - learning_rate * grad) for param, grad in zip(self.params, self.grads)] # Model operations (training/testing) self.train_model = theano.function(inputs=[X, Y], outputs=self.cost, updates=self.updates, givens={ x: X, y: Y }) self.test_model = theano.function(inputs=[], outputs=self.classifier.errors(y), givens={ x: self.data.test.x, y: self.data.test.y }) self.validate_model = theano.function( inputs=[], outputs=self.classifier.errors(y), givens={ x: self.data.valid.x, y: self.data.valid.y }) self.train_model_minibatch = theano.function( inputs=[index], outputs=self.cost, updates=self.updates, givens={ x: self.data.train.x[index * self.batch_size:(index + 1) * self.batch_size], y: self.data.train.y[index * self.batch_size:(index + 1) * self.batch_size] })
def __init__(self, n_in, n_out, data, n_hidden, batch_size, rng, learning_rate, activation=T.tanh, L1_reg=0., L2_reg=0.0001): # Define classifier independent stuff ##################################### index = T.lscalar() # index to a [mini]batch x = T.matrix('x') y = T.ivector('y') X = T.matrix('X') Y = T.ivector('Y') if (rng is None): rng = np.random.RandomState(23455) self.batch_size = batch_size self.n_train_batches = data.train.x.get_value( borrow=True).shape[0] / batch_size self.n_valid_batches = data.valid.x.get_value( borrow=True).shape[0] / batch_size self.n_test_batches = data.test.x.get_value( borrow=True).shape[0] / batch_size self.x = x self.y = y self.data = data # Define the model structure ############################ """ layerClasses = { 'conv': hlv_layers.ConvLayer, 'pool': hlv_layers.PoolingLayer, 'hidden': hlv_layers.HiddenLayer, 'logistic': hlv_layers.LogisticRegression, 'flatten': hlv_layers.FlattenLayer 'reshape': hlv_layers.ReshapeLayer } Layers = [] layer_input = x layer_n_input = n_in for layer_idx in range(len(layerSpecs)): layerType, layerConfig = layerSpecs[layer_idx] layerClass = layerClasses[layerType] new_layer = layerClass( input = layer_input, shape_in = layer_n_in, data = data, rng = rng, **config) """ self.convLayer1 = hlv_layers.ConvLayer(rng=rng, input=x.reshape( (-1, 1, 64, 64)), filter_shape=(n_hidden[0], 1, 13, 13), image_shape=(batch_size, 1, 64, 64), activation=activation, poolsize=(4, 4)) self.poolingLayer1 = hlv_layers.PoolingLayer( rng=rng, input=self.convLayer1.output, input_shape=(0), poolsize=(4, 4)) self.convLayer2 = hlv_layers.ConvLayer( rng=rng, input=self.poolingLayer1.output.reshape((-1, n_hidden[0], 13, 13)), filter_shape=(n_hidden[1], n_hidden[0], 4, 4), image_shape=(batch_size, n_hidden[0], 13, 13), activation=activation, poolsize=(2, 2)) self.poolingLayer2 = hlv_layers.PoolingLayer( rng=rng, input=self.convLayer2.output, input_shape=(0), poolsize=(2, 2)) self.hiddenLayer = hlv_layers.HiddenLayer( rng=rng, input=self.poolingLayer2.output.flatten(2), n_in=n_hidden[1] * 5 * 5, n_out=500, activation=activation) self.logRegressionLayer = hlv_layers.LogisticRegression( input=self.hiddenLayer.output, n_in=500, n_out=n_out) self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood self.errors = self.logRegressionLayer.errors # define the cost self.cost = self.negative_log_likelihood(y) #\ # + L1_reg * self.L1 \ # + L2_reg * self.L2_sqr # define parameters self.params = self.convLayer1.params + self.convLayer2.params + self.hiddenLayer.params + self.logRegressionLayer.params #self.params = self.logRegressionLayer.params self.grads = [ T.grad(cost=self.cost, wrt=param) for param in self.params ] self.updates = [(param, param - learning_rate * grad) for param, grad in zip(self.params, self.grads)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` """ self.train_model = theano.function(inputs=[X, Y], outputs=self.errors(y), updates=self.updates, givens={ x: X, y: Y}) self.test_model = theano.function( inputs=[], outputs=self.errors(y), givens={ x: self.data.test.x, y: self.data.test.y}) self.validate_model = theano.function(inputs=[], outputs=self.errors(y), givens={ x: self.data.valid.x, y: self.data.valid.y}) """ self.test_model_minibatch = theano.function( inputs=[index], outputs=self.errors(y), givens={ x: self.data.test.x[index * self.batch_size:(index + 1) * self.batch_size], y: self.data.test.y[index * self.batch_size:(index + 1) * self.batch_size] }) self.validate_model_minibatch = theano.function( inputs=[index], outputs=self.errors(y), givens={ x: self.data.valid.x[index * self.batch_size:(index + 1) * self.batch_size], y: self.data.valid.y[index * self.batch_size:(index + 1) * self.batch_size] }) self.train_model_minibatch = theano.function( inputs=[index], outputs=self.cost, updates=self.updates, givens={ x: self.data.train.x[index * self.batch_size:(index + 1) * self.batch_size], y: self.data.train.y[index * self.batch_size:(index + 1) * self.batch_size] })
def logistic_sgd(): learning_rate=0.13 n_epochs=100 dataset='mnist.pkl.gz', batch_size=600 # get data ready datasets = load_data('mnist.pkl.gz') train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels classifier = hlv_layers.LogisticRegression(input=x, n_in=28 * 28, n_out=10) cost = classifier.negative_log_likelihood(y) test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = np.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best' ' model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print ('The code '+ ' ran for %.1fs' % ((end_time - start_time))) if (test_score*100 < 7.5): print >> sys.stderr, ('Test OK (logistic_sgd)') else: print >> sys.stderr, ('Test FAILED!')