class StackedDenoisingAutoEncoders(object): def __init__(self, random_generator, theano_random_generator=None, x_dim=28 * 28, y_dim=10, hidden_layer_sizes=[500, 500], corruption_levels=[0.1, 0.1]): """ """ # Declare empty sigmoid layer array for MLP self.sigmoid_layers = [] # Declare an empty array of DenoisingAutoEncoder self.autoencoder_layers = [] self.params = [] self.n_layers = len(hidden_layer_sizes) if theano_random_generator == None: self.theano_random_generator = RandomStreams( random_generator.randint(2**30)) else: self.theano_random_generator = theano_random_generator # Inputs using Theano self.x = T.matrix("x") self.y = T.ivector("y") # Initialize all parameters for i in range(self.n_layers): # Define x and y dimensions if i == 0: internal_x_dim = x_dim else: internal_x_dim = hidden_layer_sizes[i - 1] internal_y_dim = hidden_layer_sizes[i] # Find inputs if i == 0: internal_input = self.x else: internal_input = self.sigmoid_layers[i - 1].output # Define Sigmoid Layer self.sigmoid_layers.append( HiddenLayer(internal_input, internal_x_dim, internal_y_dim, random_generator, activation=T.nnet.sigmoid)) # Define input self.autoencoder_layers.append( DenoisingAutoEncoder(random_generator, theano_random_generator, internal_x_dim, internal_y_dim, internal_input, W=self.sigmoid_layers[i].W, b=self.sigmoid_layers[i].b)) # Uppdate parameters self.params.extend(self.sigmoid_layers[i].params) # Finally add logistic layer self.logistic_layer = LogisticRegression( self.sigmoid_layers[-1].output, hidden_layer_sizes[-1], y_dim) self.params.extend(self.logistic_layer.params) # These are two important costs # Finetuning after pretraining individual AutoEncoders self.finetune_cost = self.logistic_layer.negative_log_likelihood( self.y) # Error from prediction self.error = self.logistic_layer.error(self.y) def pretrain(self, train_x, batch_size): """Generates a list of functions, each of them implementing one step in trainnig the dA corresponding to the layer with same index. The function will require as input the minibatch index, and to train a dA you just need to iterate, calling the corresponding function on all minibatch indexes. :type train_set_x: theano.tensor.TensorType :param train_set_x: Shared variable that contains all datapoints used for training the dA :type batch_size: int :param batch_size: size of a [mini]batch :type learning_rate: float :param learning_rate: learning rate used during training for any of the dA layer """ index = T.iscalar("index") corruption_level = T.scalar("corruption_level") learning_rate = T.scalar("learning_rate") pretrain_functions = [] for autoencoder in self.autoencoder_layers: # Find cost and updates for the layer cost, updates = autoencoder.cost_updates(corruption_level, learning_rate) f = theano.function(inputs=[ index, theano.Param(corruption_level, default=0.2), theano.Param(learning_rate, default=0.1) ], outputs=cost, updates=updates, givens={ self.x: train_x[index * batch_size:(index + 1) * batch_size] }) pretrain_functions.append(f) return pretrain_functions def finetune(self, train_x, train_y, valid_x, valid_y, test_x, test_y, batch_size, learning_rate): """Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :type batch_size: int :param batch_size: size of a minibatch :type learning_rate: float :param learning_rate: learning rate used during finetune stage """ # Define index index = T.iscalar("index") # Cost and updates in SGD grad = T.grad(self.finetune_cost, wrt=self.params) updates = list() for i in range(len(self.params)): updates.append( (self.params[i], self.params[i] - learning_rate * grad[i])) # Define train, valid and test models train_model = theano.function( inputs=[index], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_x[index * batch_size:(index + 1) * batch_size], self.y: train_y[index * batch_size:(index + 1) * batch_size] }) valid_model = theano.function( inputs=[index], outputs=self.error, givens={ self.x: valid_x[index * batch_size:(index + 1) * batch_size], self.y: valid_y[index * batch_size:(index + 1) * batch_size] }) test_model = theano.function( inputs=[index], outputs=self.error, givens={ self.x: test_x[index * batch_size:(index + 1) * batch_size], self.y: test_y[index * batch_size:(index + 1) * batch_size] }) return (train_model, valid_model, test_model)
def sgd_optimize(learning_rate=0.1, n_epochs=200, batch_size=500, nkerns=[20, 50]): # Load input train, valid, test = util.load() print "loading 0 - ", train[0].shape[0], " train inputs in gpu memory" train_x, train_y = util.create_theano_shared(train) print "loading 0 - ", valid[0].shape[0], " validation inputs in gpu memory" valid_x, valid_y = util.create_theano_shared(valid) print "loading 0 - ", test[0].shape[0], " test inputs in gpu memory" test_x, test_y = util.create_theano_shared(test) # Define symbolic input matrices print "Building Model..." index = T.iscalar() x = T.matrix("x") y = T.ivector("y") random_generator = numpy.random.RandomState(1) # Create Layer0 of Lenet Model layer0_input = x.reshape( (batch_size, 1, 28, 28) ) filter_shape0 = (nkerns[0], 1, 5, 5) image_shape0 = (batch_size, 1, 28, 28) layer0 = LeNetConvPoolLayer(layer0_input, filter_shape0, image_shape0, random_generator) # Create Layer1 of Lenet model filter_shape1 = (nkerns[1], nkerns[0], 5, 5) image_shape1 = (batch_size, nkerns[0], 12, 12) layer1 = LeNetConvPoolLayer(layer0.output, filter_shape1, image_shape1, random_generator) # Create Layer2 which is a simple MLP hidden layer layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer(layer2_input, nkerns[1] * 4 * 4, 500, random_generator) # Finally, Layer3 is LogisticRegression layer layer3 = LogisticRegression(layer2.output, 500, 10) # Define error error = layer3.error(y) # Create cost function cost = layer3.negative_log_likelihood(y) # Gradient and update functions params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, wrt=params) updates = list() for i in range(len(params)): updates.append( (params[i], params[i] - learning_rate * grads[i]) ) # Train model train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens = { x: train_x[index*batch_size : (index+1)*batch_size], y: train_y[index*batch_size : (index+1)*batch_size] }) # Valid model valid_model = theano.function( inputs=[index], outputs=error, givens = { x: valid_x[index*batch_size : (index+1)*batch_size], y: valid_y[index*batch_size : (index+1)*batch_size] }) # Test Model test_model = theano.function( inputs=[index], outputs=error, givens={ x: test_x[index*batch_size : (index+1)*batch_size], y: test_y[index*batch_size : (index+1)*batch_size] }) # Create number of minibatches n_train_batches = train[0].shape[0] / batch_size n_valid_batches = valid[0].shape[0] / batch_size n_test_batches = test[0].shape[0] / batch_size # Finally, main loop for training util.train_test_model(n_epochs, train_model, valid_model, test_model, n_train_batches, n_valid_batches, n_test_batches)
class StackedDenoisingAutoEncoders(object): def __init__(self, random_generator, theano_random_generator=None, x_dim=28*28, y_dim=10, hidden_layer_sizes=[500, 500], corruption_levels=[0.1, 0.1]): """ """ # Declare empty sigmoid layer array for MLP self.sigmoid_layers = [] # Declare an empty array of DenoisingAutoEncoder self.autoencoder_layers = [] self.params = [] self.n_layers = len(hidden_layer_sizes) if theano_random_generator == None: self.theano_random_generator = RandomStreams(random_generator.randint(2 ** 30)) else: self.theano_random_generator = theano_random_generator # Inputs using Theano self.x = T.matrix("x") self.y = T.ivector("y") # Initialize all parameters for i in range(self.n_layers): # Define x and y dimensions if i == 0: internal_x_dim = x_dim else: internal_x_dim = hidden_layer_sizes[i - 1] internal_y_dim = hidden_layer_sizes[i] # Find inputs if i == 0: internal_input = self.x else: internal_input = self.sigmoid_layers[i-1].output # Define Sigmoid Layer self.sigmoid_layers.append(HiddenLayer(internal_input, internal_x_dim, internal_y_dim, random_generator, activation=T.nnet.sigmoid)) # Define input self.autoencoder_layers.append(DenoisingAutoEncoder(random_generator, theano_random_generator, internal_x_dim, internal_y_dim, internal_input, W=self.sigmoid_layers[i].W, b=self.sigmoid_layers[i].b)) # Uppdate parameters self.params.extend(self.sigmoid_layers[i].params) # Finally add logistic layer self.logistic_layer = LogisticRegression(self.sigmoid_layers[-1].output, hidden_layer_sizes[-1], y_dim) self.params.extend(self.logistic_layer.params) # These are two important costs # Finetuning after pretraining individual AutoEncoders self.finetune_cost = self.logistic_layer.negative_log_likelihood(self.y) # Error from prediction self.error = self.logistic_layer.error(self.y) def pretrain(self, train_x, batch_size): """Generates a list of functions, each of them implementing one step in trainnig the dA corresponding to the layer with same index. The function will require as input the minibatch index, and to train a dA you just need to iterate, calling the corresponding function on all minibatch indexes. :type train_set_x: theano.tensor.TensorType :param train_set_x: Shared variable that contains all datapoints used for training the dA :type batch_size: int :param batch_size: size of a [mini]batch :type learning_rate: float :param learning_rate: learning rate used during training for any of the dA layer """ index = T.iscalar("index") corruption_level = T.scalar("corruption_level") learning_rate = T.scalar("learning_rate") pretrain_functions = [] for autoencoder in self.autoencoder_layers: # Find cost and updates for the layer cost, updates = autoencoder.cost_updates(corruption_level, learning_rate) f = theano.function( inputs=[index, theano.Param(corruption_level, default=0.2), theano.Param (learning_rate, default=0.1)], outputs=cost, updates=updates, givens={ self.x : train_x[index * batch_size : (index + 1) * batch_size] }) pretrain_functions.append(f) return pretrain_functions def finetune(self, train_x, train_y, valid_x, valid_y, test_x, test_y, batch_size, learning_rate): """Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :type batch_size: int :param batch_size: size of a minibatch :type learning_rate: float :param learning_rate: learning rate used during finetune stage """ # Define index index = T.iscalar("index") # Cost and updates in SGD grad = T.grad(self.finetune_cost, wrt=self.params) updates = list() for i in range(len(self.params)): updates.append( (self.params[i], self.params[i] - learning_rate * grad[i]) ) # Define train, valid and test models train_model = theano.function( inputs=[index], outputs=self.finetune_cost, updates = updates, givens = { self.x : train_x[index * batch_size : (index + 1) * batch_size], self.y : train_y[index * batch_size : (index + 1) * batch_size] }) valid_model = theano.function( inputs=[index], outputs=self.error, givens = { self.x : valid_x[index * batch_size : (index + 1) * batch_size], self.y : valid_y[index * batch_size : (index + 1) * batch_size] }) test_model = theano.function( inputs=[index], outputs=self.error, givens = { self.x : test_x[index * batch_size : (index + 1) * batch_size], self.y : test_y[index * batch_size : (index + 1) * batch_size] }) return (train_model, valid_model, test_model)