def __init__( self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], corruption_levels=[0.1, 0.1] ): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = theano.shared(value=numpy.zeros((1,), dtype=theano.config.floatX ), name='y', borrow=True ) # the labels are presented as 1D vector of # [double] vector # end-snippet-1 # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP # start-snippet-2 for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # end-snippet-2 # We now need to add a value function computing self.valueLayer = ValueFunction( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], ) self.params.extend(self.valueLayer.params) # construct a function that implements one step of finetunining # calculate the squared error for the value function self.finetune_cost = self.valueLayer.cost(self.y) self.error = self.valueLayer.cost(self.y)
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], corruption_levels=[0.1, 0.1]): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = theano.shared( value=numpy.zeros((1, ), dtype=theano.config.floatX), name='y', borrow=True) # the labels are presented as 1D vector of # [double] vector # end-snippet-1 # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP # start-snippet-2 for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # end-snippet-2 # We now need to add a value function computing self.valueLayer = ValueFunction( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], ) self.params.extend(self.valueLayer.params) # construct a function that implements one step of finetunining # calculate the squared error for the value function self.finetune_cost = self.valueLayer.cost(self.y) self.error = self.valueLayer.cost(self.y)
class SdA(object): def __init__( self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], corruption_levels=[0.1, 0.1] ): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = theano.shared(value=numpy.zeros((1,), dtype=theano.config.floatX ), name='y', borrow=True ) # the labels are presented as 1D vector of # [double] vector # end-snippet-1 # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP # start-snippet-2 for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # end-snippet-2 # We now need to add a value function computing self.valueLayer = ValueFunction( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], ) self.params.extend(self.valueLayer.params) # construct a function that implements one step of finetunining # calculate the squared error for the value function self.finetune_cost = self.valueLayer.cost(self.y) self.error = self.valueLayer.cost(self.y) # def __getstate__(self): # state_list = [] # for i in xrange(self.n_layers): # state_list.append(self.sigmoid_layers[i].__getstate__()) # state_list.append(self.valueLayer.__getstate__()) # return state_list # # def __setstate__(self, state_list): # self.params = [] # for i in xrange(self.n_layers): # self.sigmoid_layers[i].__setstate__(state_list[i]) # self.dA_layers[i].__setstate__(state_list[i]) # self.params.extend(sigmoid_layers[i].params) # self.valueLayer.__setstate__(state_list[-1]) # self.params.extend(self.valueLayer.params) def compute_val(self, inp): curr = numpy.copy(inp) for level in self.sigmoid_layers: curr = level.compute_val(curr) curr = self.valueLayer.compute_val(curr) return curr def pretraining_functions(self, train_set_x, batch_size): # index to a [mini]batch index = T.lscalar('index') # index to a minibatch corruption_level = T.scalar('corruption') # % of corruption to use learning_rate = T.scalar('lr') # learning rate to use # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size pretrain_fns = [] for dA in self.dA_layers: # get the cost and the updates list cost, updates = dA.get_cost_updates(corruption_level, learning_rate) # compile the theano function fn = theano.function( inputs=[ index, theano.Param(corruption_level, default=0.2), theano.Param(learning_rate, default=0.1) ], outputs=cost, updates=updates, givens={ self.x: train_set_x[batch_begin: batch_end] } ) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, datasets, batch_size, learning_rate): '''Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :type datasets: list of pairs of theano.tensor.TensorType :param datasets: It is a list that contain all the datasets; the has to contain three pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano variables, one for the datapoints, the other for the labels :type batch_size: int :param batch_size: size of a minibatch :type learning_rate: float :param learning_rate: learning rate used during finetune stage ''' (train_set_x, train_set_y) = datasets #(valid_set_x, valid_set_y) = datasets[1] #(test_set_x, test_set_y) = datasets[2] # compute number of minibatches for training, validation and testing #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] #n_valid_batches /= batch_size #n_test_batches = test_set_x.get_value(borrow=True).shape[0] #n_test_batches /= batch_size index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = [ (param, param - gparam * learning_rate) for param, gparam in zip(self.params, gparams) ] train_fn = theano.function( inputs=[index], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_set_x[ index * batch_size: (index + 1) * batch_size ], self.y: train_set_y[ index * batch_size: (index + 1) * batch_size ] }, name='train', ) # # test_score_i = theano.function( # [index], # self.errors, # givens={ # self.x: test_set_x[ # index * batch_size: (index + 1) * batch_size # ], # self.y: test_set_y[ # index * batch_size: (index + 1) * batch_size # ] # }, # name='test' # ) # # valid_score_i = theano.function( # [index], # self.errors, # givens={ # self.x: valid_set_x[ # index * batch_size: (index + 1) * batch_size # ], # self.y: valid_set_y[ # index * batch_size: (index + 1) * batch_size # ] # }, # name='valid' # ) # # # Create a function that scans the entire validation set # def valid_score(): # return [valid_score_i(i) for i in xrange(n_valid_batches)] # # # Create a function that scans the entire test set # def test_score(): # return [test_score_i(i) for i in xrange(n_test_batches)] # return train_fn, 0, 0
class SdA(object): def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], corruption_levels=[0.1, 0.1]): self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = theano.shared( value=numpy.zeros((1, ), dtype=theano.config.floatX), name='y', borrow=True) # the labels are presented as 1D vector of # [double] vector # end-snippet-1 # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP # start-snippet-2 for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = dA(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) # end-snippet-2 # We now need to add a value function computing self.valueLayer = ValueFunction( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], ) self.params.extend(self.valueLayer.params) # construct a function that implements one step of finetunining # calculate the squared error for the value function self.finetune_cost = self.valueLayer.cost(self.y) self.error = self.valueLayer.cost(self.y) # def __getstate__(self): # state_list = [] # for i in xrange(self.n_layers): # state_list.append(self.sigmoid_layers[i].__getstate__()) # state_list.append(self.valueLayer.__getstate__()) # return state_list # # def __setstate__(self, state_list): # self.params = [] # for i in xrange(self.n_layers): # self.sigmoid_layers[i].__setstate__(state_list[i]) # self.dA_layers[i].__setstate__(state_list[i]) # self.params.extend(sigmoid_layers[i].params) # self.valueLayer.__setstate__(state_list[-1]) # self.params.extend(self.valueLayer.params) def compute_val(self, inp): curr = numpy.copy(inp) for level in self.sigmoid_layers: curr = level.compute_val(curr) curr = self.valueLayer.compute_val(curr) return curr def pretraining_functions(self, train_set_x, batch_size): # index to a [mini]batch index = T.lscalar('index') # index to a minibatch corruption_level = T.scalar('corruption') # % of corruption to use learning_rate = T.scalar('lr') # learning rate to use # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size pretrain_fns = [] for dA in self.dA_layers: # get the cost and the updates list cost, updates = dA.get_cost_updates(corruption_level, learning_rate) # compile the theano function fn = theano.function( inputs=[ index, theano.Param(corruption_level, default=0.2), theano.Param(learning_rate, default=0.1) ], outputs=cost, updates=updates, givens={self.x: train_set_x[batch_begin:batch_end]}) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, datasets, batch_size, learning_rate): '''Generates a function `train` that implements one step of finetuning, a function `validate` that computes the error on a batch from the validation set, and a function `test` that computes the error on a batch from the testing set :type datasets: list of pairs of theano.tensor.TensorType :param datasets: It is a list that contain all the datasets; the has to contain three pairs, `train`, `valid`, `test` in this order, where each pair is formed of two Theano variables, one for the datapoints, the other for the labels :type batch_size: int :param batch_size: size of a minibatch :type learning_rate: float :param learning_rate: learning rate used during finetune stage ''' (train_set_x, train_set_y) = datasets #(valid_set_x, valid_set_y) = datasets[1] #(test_set_x, test_set_y) = datasets[2] # compute number of minibatches for training, validation and testing #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] #n_valid_batches /= batch_size #n_test_batches = test_set_x.get_value(borrow=True).shape[0] #n_test_batches /= batch_size index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.finetune_cost, self.params) # compute list of fine-tuning updates updates = [(param, param - gparam * learning_rate) for param, gparam in zip(self.params, gparams)] train_fn = theano.function( inputs=[index], outputs=self.finetune_cost, updates=updates, givens={ self.x: train_set_x[index * batch_size:(index + 1) * batch_size], self.y: train_set_y[index * batch_size:(index + 1) * batch_size] }, name='train', ) # # test_score_i = theano.function( # [index], # self.errors, # givens={ # self.x: test_set_x[ # index * batch_size: (index + 1) * batch_size # ], # self.y: test_set_y[ # index * batch_size: (index + 1) * batch_size # ] # }, # name='test' # ) # # valid_score_i = theano.function( # [index], # self.errors, # givens={ # self.x: valid_set_x[ # index * batch_size: (index + 1) * batch_size # ], # self.y: valid_set_y[ # index * batch_size: (index + 1) * batch_size # ] # }, # name='valid' # ) # # # Create a function that scans the entire validation set # def valid_score(): # return [valid_score_i(i) for i in xrange(n_valid_batches)] # # # Create a function that scans the entire test set # def test_score(): # return [test_score_i(i) for i in xrange(n_test_batches)] # return train_fn, 0, 0