def addLayer(self, file_name=None, neurons=None, output=False, trainable=True): """Add a layer to the network """ if len(self.layers) == 0: input_size = self.input_size else: input_size = self.layers[-1].num_neurons if file_name: if output: self.layers.append( OutputLayer(weight_file=file_name, trainable=trainable)) else: self.layers.append( HiddenLayer(weight_file=file_name, trainable=trainable)) else: if output: self.layers.append( OutputLayer(num_neurons=neurons, inputs=input_size, trainable=trainable)) else: self.layers.append( HiddenLayer(num_neurons=neurons, inputs=input_size, trainable=trainable))
class MLP(object): def __init__(self, rng, input, n_in, n_hidden, n_out): # hidden layer, defined in HiddenLayer.py self.hiddenLayer = HiddenLayer(rng = rng, input = input, n_in = n_in, n_out = n_hidden, activation = T.tanh) # output layer, logistic regression self.logRegressionLayer = LogisticRegression(input = self.hiddenLayer.output, n_in = n_hidden, n_out = n_out) # Regularization of params # option 1: L1 regularization of params self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() self.L2_sqr = (self.hiddenLayer.W **2).sum() \ + (self.logRegressionLayer.W **2).sum() # Define the log likelihood, errors based on component models self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood self.errors = self.logRegressionLayer.errors self.params = self.hiddenLayer.params + \ self.logRegressionLayer.params def __getstate__(self): """ Return the hidden layer and logistic regression layer that make up the MLP. """ return (self.hiddenLayer,self.logRegressionLayer) def __setstate__(self, state): """ Re-establish the hidden layer and logistic regression layer objects. """ (hiddenLayer,logRegressionLayer) = state self.hiddenLayer = hiddenLayer self.logRegressionLayer = logRegressionLayer def reconstruct_state(self,input, activation): """ Re-establish the inputs for each layer of the MLP. """ self.hiddenLayer.reconstruct_state(input,activation) self.logRegressionLayer.reconstruct_state(self.hiddenLayer.output) self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() self.L2_sqr = (self.hiddenLayer.W **2).sum() \ + (self.logRegressionLayer.W **2).sum() # Define the log likelihood, errors based on component models self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood self.errors = self.logRegressionLayer.errors self.params = self.hiddenLayer.params + \ self.logRegressionLayer.params
def init_params(self): self.transform_hidden = HiddenLayer(input_size=self.input_hidden_size, hidden_size=5+self.use_dx_dy, activation=act.Identity, name='Writer.Params.'+self.name) self.w_transform = HiddenLayer(input_size=self.input_hidden_size, hidden_size=self.channels*self.N*self.N, activation=act.Identity, name='Writer.Write.'+self.name)
def init_params(self): self.transform_hidden = HiddenLayer(input_size=self.input_hidden_size, hidden_size=5 + self.use_dx_dy, activation=act.Identity, name='Writer.Params.' + self.name) self.w_transform = HiddenLayer(input_size=self.input_hidden_size, hidden_size=self.channels * self.N * self.N, activation=act.Identity, name='Writer.Write.' + self.name)
class MultilayerPerceptronClassifier(Classifier): """ """ def initialize_l1(self, L1_reg): """ L1 norm ; one regularization option is to enforce L1 norm to be small """ self.L1 = (abs(self.hiddenLayer.weights).sum() + abs(self.logRegressionLayer.weights).sum()) self.L1_reg = L1_reg def initialize_l2(self, L2_reg): """ square of L2 norm ; one regularization option is to enforce square of L2 norm to be small """ self.L2_sqr = ((self.hiddenLayer.weights**2).sum() + (self.logRegressionLayer.weights**2).sum()) self.L2_reg = L2_reg def __init__(self, rng, n_in, n_hidden, n_out, L1_reg=0.00, L2_reg=0.0001): """ """ super(MultilayerPerceptronClassifier, self).__init__() self.hiddenLayer = HiddenLayer(rng=rng, input_units=n_in, output_units=n_hidden, nonlinear_function=Tensor.tanh) self.logRegressionLayer = LogisticClassifier(input_units=n_hidden, output_units=n_out) self.initialize_l1(L1_reg) self.initialize_l2(L2_reg) self.parameters = (self.hiddenLayer.parameters + self.logRegressionLayer.parameters) def cost_function(self, inputs, outputs): """docstring for cost""" hidden_outputs = self.hiddenLayer.output_probabilities_function(inputs) return ( self.logRegressionLayer.cost_function(hidden_outputs, outputs) + self.L1_reg * self.L1 + self.L2_reg * self.L2_sqr) def evaluation_function(self, inputs, outputs): """docstring for errors""" return self.logRegressionLayer.evaluation_function( self.hiddenLayer.output_probabilities_function(inputs), outputs)
def __init__(self, rng, input, n_in, n_hidden, n_out): # hidden layer, defined in HiddenLayer.py self.hiddenLayer = HiddenLayer(rng = rng, input = input, n_in = n_in, n_out = n_hidden, activation = T.tanh) # output layer, logistic regression self.logRegressionLayer = LogisticRegression(input = self.hiddenLayer.output, n_in = n_hidden, n_out = n_out) # Regularization of params # option 1: L1 regularization of params self.L1 = abs(self.hiddenLayer.W).sum() \ + abs(self.logRegressionLayer.W).sum() self.L2_sqr = (self.hiddenLayer.W **2).sum() \ + (self.logRegressionLayer.W **2).sum() # Define the log likelihood, errors based on component models self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood self.errors = self.logRegressionLayer.errors self.params = self.hiddenLayer.params + \ self.logRegressionLayer.params
def __init__(self, batch_size, nkerns=[20, 50]): """ """ super(ConvolutionalMultilayerPerceptronClassifier, self).__init__() self.batch_size = batch_size rng = numpy.random.RandomState(23455) # Reshape matrix of rasterized images of shape (self.batch_size,28*28) # to a 4D tensor, compatible with our PoolingLayer # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (self.batch_size,nkerns[0],12,12) self.layer0 = PoolingLayer(rng, image_shape=(self.batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) self.layer1 = PoolingLayer(rng, image_shape=(self.batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (self.batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) # construct a fully-connected sigmoidal layer self.layer2 = HiddenLayer(rng, input_units=nkerns[1] * 4 * 4, output_units=500, nonlinear_function=Tensor.tanh) # classify the values of the fully-connected sigmoidal layer self.layer3 = LogisticClassifier(input_units=500, output_units=10) # create a list of all model parameters to be fit by gradient descent self.parameters = (self.layer3.parameters + self.layer2.parameters + self.layer1.parameters + self.layer0.parameters)
def __init__(self, rng, n_in, n_hidden, n_out, L1_reg=0.00, L2_reg=0.0001): """ """ super(MultilayerPerceptronClassifier, self).__init__() self.hiddenLayer = HiddenLayer(rng=rng, input_units=n_in, output_units=n_hidden, nonlinear_function=Tensor.tanh) self.logRegressionLayer = LogisticClassifier(input_units=n_hidden, output_units=n_out) self.initialize_l1(L1_reg) self.initialize_l2(L2_reg) self.parameters = (self.hiddenLayer.parameters + self.logRegressionLayer.parameters)
def __init__(self, batch_size, nkerns=[20, 50]): """ """ super(ConvolutionalMultilayerPerceptronClassifier, self).__init__() self.batch_size = batch_size rng = numpy.random.RandomState(23455) # Reshape matrix of rasterized images of shape (self.batch_size,28*28) # to a 4D tensor, compatible with our PoolingLayer # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (self.batch_size,nkerns[0],12,12) self.layer0 = PoolingLayer( rng, image_shape=(self.batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) self.layer1 = PoolingLayer( rng, image_shape=(self.batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (self.batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) # construct a fully-connected sigmoidal layer self.layer2 = HiddenLayer( rng, input_units=nkerns[1] * 4 * 4, output_units=500, nonlinear_function=Tensor.tanh ) # classify the values of the fully-connected sigmoidal layer self.layer3 = LogisticClassifier(input_units=500, output_units=10) # create a list of all model parameters to be fit by gradient descent self.parameters = ( self.layer3.parameters + self.layer2.parameters + self.layer1.parameters + self.layer0.parameters )
def init(self, layers_data): self.layers_data = layers_data for i in range(1, len(layers_data) - 1): self.hiddenLayers.append(HiddenLayer()) self.hiddenLayers[-1].init(layers_data[i - 1] + 1, layers_data[i]) self.outputLayer.init(layers_data[-2] + 1, layers_data[-1]) """self.hiddenLayers[0].neurons[0].weights = [0.5, 0.4, -0.8] self.hiddenLayers[0].neurons[1].weights = [0.9, 1.0, 0.1] self.outputLayer.neurons[0].weights = [-1.2, 1.1, -0.3]""" self.outputs = [0 for i in range(layers_data[-1])]
def __init__(self, rng, n_in, n_hidden, n_out, L1_reg=0.00, L2_reg=0.0001): """ """ super(MultilayerPerceptronClassifier, self).__init__() self.hiddenLayer = HiddenLayer( rng=rng, input_units=n_in, output_units=n_hidden, nonlinear_function=Tensor.tanh ) self.logRegressionLayer = LogisticClassifier( input_units=n_hidden, output_units=n_out ) self.initialize_l1(L1_reg) self.initialize_l2(L2_reg) self.parameters = ( self.hiddenLayer.parameters + self.logRegressionLayer.parameters )
class MultilayerPerceptronClassifier(Classifier): """ """ def initialize_l1(self, L1_reg): """ L1 norm ; one regularization option is to enforce L1 norm to be small """ self.L1 = ( abs(self.hiddenLayer.weights).sum() + abs(self.logRegressionLayer.weights).sum() ) self.L1_reg = L1_reg def initialize_l2(self, L2_reg): """ square of L2 norm ; one regularization option is to enforce square of L2 norm to be small """ self.L2_sqr = ( (self.hiddenLayer.weights ** 2).sum() + (self.logRegressionLayer.weights ** 2).sum() ) self.L2_reg = L2_reg def __init__(self, rng, n_in, n_hidden, n_out, L1_reg=0.00, L2_reg=0.0001): """ """ super(MultilayerPerceptronClassifier, self).__init__() self.hiddenLayer = HiddenLayer( rng=rng, input_units=n_in, output_units=n_hidden, nonlinear_function=Tensor.tanh ) self.logRegressionLayer = LogisticClassifier( input_units=n_hidden, output_units=n_out ) self.initialize_l1(L1_reg) self.initialize_l2(L2_reg) self.parameters = ( self.hiddenLayer.parameters + self.logRegressionLayer.parameters ) def cost_function(self, inputs, outputs): """docstring for cost""" hidden_outputs = self.hiddenLayer.output_probabilities_function(inputs) return ( self.logRegressionLayer.cost_function( hidden_outputs, outputs ) + self.L1_reg * self.L1 + self.L2_reg * self.L2_sqr ) def evaluation_function(self, inputs, outputs): """docstring for errors""" return self.logRegressionLayer.evaluation_function( self.hiddenLayer.output_probabilities_function(inputs), outputs )
def evaluate_lenet5(learning_rate=0.1, momentum=0.9, n_epochs=500, dataset='mnist', depth = 1, augment_data = False, nkerns=[20, 50, 100], batch_size=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type n_feature_maps: int :param n_feature_maps: number of feature maps in input, i.e. 3 for RGB image :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = load_data(dataset, augment_data) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Set the initial dimensions of the input images if (dataset == 'mnist'): in_dim = 28; elif (dataset == 'cifar-10'): in_dim = 32; # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, depth, in_dim, in_dim)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) filter_dim = 5 pool_dim = 1 layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, depth, in_dim, in_dim), filter_shape=(nkerns[0], depth, filter_dim, filter_dim), poolsize=(pool_dim, pool_dim) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) in_dim = (in_dim - filter_dim + 1) / pool_dim filter_dim = 5 pool_dim = 1 layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], in_dim, in_dim), filter_shape=(nkerns[1], nkerns[0], filter_dim, filter_dim), poolsize=(pool_dim, pool_dim) ) # Construct the third convolutional layer with no pooling # filtering reduces the image size to (4-3+1, 4-3+1) = (2, 2) # 4D output tensor is thus of shape (batch_size, nkerns[2], 2, 2) in_dim = (in_dim - filter_dim + 1) / pool_dim filter_dim = 3 pool_dim = 1 layer2 = LeNetConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size, nkerns[1], in_dim, in_dim), filter_shape=(nkerns[2], nkerns[1], filter_dim, filter_dim), poolsize=(pool_dim, pool_dim) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 2 * 2), # or (500, 200 * 2 * 2) = (500, 800) with the default values. layer3_input = layer2.output.flatten(2) # construct a fully-connected rectifier layer in_dim = (in_dim - filter_dim + 1) / pool_dim layer3 = HiddenLayer( rng, input=layer3_input, n_in=nkerns[2] * in_dim * in_dim, n_out=500, activation=relu ) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=gradient_updates_momentum(cost, params, learning_rate, momentum), givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look at this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 learning_rate = set_learning_rate(learning_rate, epoch, dataset) momentum = set_momentum(momentum, epoch) for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on train set train_losses = [train_model(i) for i in range(n_train_batches)] this_train_loss = numpy.mean(train_losses) print(' epoch %i, minibatch %i/%i, train error %f %%\n' % (epoch, minibatch_index + 1, n_train_batches, this_train_loss * 100.)) # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print(' epoch %i, minibatch %i/%i, validation error %f %%\n' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%\n') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
class WriteLayer(object): def __init__(self, batch_size, channels, N, image_width, image_height, input_hidden_size, use_dx_dy=False, name=''): """ Write Layer from DRAW paper """ self.batch_size = batch_size self.use_dx_dy = use_dx_dy self.N = N self.width = image_width self.height = image_height self.name = name self.input_hidden_size = input_hidden_size self.channels = channels self.test = False self.init_params() def init_params(self): self.transform_hidden = HiddenLayer(input_size=self.input_hidden_size, hidden_size=5+self.use_dx_dy, activation=act.Identity, name='Writer.Params.'+self.name) self.w_transform = HiddenLayer(input_size=self.input_hidden_size, hidden_size=self.channels*self.N*self.N, activation=act.Identity, name='Writer.Write.'+self.name) def batched_dot(self, A, B): C = A.dimshuffle([0,1,2,'x']) * B.dimshuffle([0,'x',1,2]) return C.sum(axis=-2) def get_params(self, h): hidden = self.transform_hidden.run(h) gx = (hidden[:,0]+1)*0.5 * self.width gy = (hidden[:,1]+1)*0.5 * self.height s2 = T.exp(hidden[:,3]/2.0) g = T.exp(hidden[:,4]).dimshuffle(0,'x') if self.use_dx_dy: dx = (self.width-1.0) / (self.N-1.0) * T.exp(hidden[:,2]) dy = (self.height-1.0) / (self.N-1.0) * T.exp(hidden[:,5]) else: dx = dy = ((max(self.width,self.height)-1.0) / (self.N-1.0) * T.exp(hidden[:,2])) return gx,gy,dx,dy,s2,g def get_params_test(self, h): return h[:,0], h[:,1], h[:,2], h[:,5], h[:,3], h[:,4].dimshuffle(0,'x') def run(self, h): channels = self.channels#images.shape[1] if not self.test: gx,gy,dx,dy,s2,g = self.get_params(h) else: gx,gy,dx,dy,s2,g = self.get_params_test(h) w = self.w_transform.run(h) w = w.reshape((self.batch_size*self.channels, self.N, self.N)) muX = gx.dimshuffle([0,'x']) + dx.dimshuffle([0,'x']) * (T.arange(self.N).astype(theano.config.floatX) - self.N/2 - 0.5) muY = gy.dimshuffle([0,'x']) + dy.dimshuffle([0,'x']) * (T.arange(self.N).astype(theano.config.floatX) - self.N/2 - 0.5) a = T.arange(self.width).astype(theano.config.floatX) b = T.arange(self.height).astype(theano.config.floatX) Fx = T.exp(-(a-muX.dimshuffle([0,1,'x']))**2 / 2. / s2.dimshuffle([0,'x','x'])**2) Fy = T.exp(-(b-muY.dimshuffle([0,1,'x']))**2 / 2. / s2.dimshuffle([0,'x','x'])**2) Fx = Fx / (Fx.sum(axis=-1).dimshuffle([0,1,'x']) + 1e-4) Fy = Fy / (Fy.sum(axis=-1).dimshuffle([0,1,'x']) + 1e-4) self.Fx = T.repeat(Fx, channels, axis=0) self.Fy = T.repeat(Fy, channels, axis=0) self.fint = self.batched_dot(self.Fy.transpose((0,2,1)), w) self.fim = self.batched_dot(self.fint, self.Fx).reshape((self.batch_size, self.channels*self.width*self.height)) return 1./g * self.fim, (gx, gy, dx, dy, self.fint) @property def params(self): return [param for param in [self.transform_hidden.params]+[self.w_transform.params]] @params.setter def params(self, params): self.transform_hidden.params = params[:len(params)/2] self.w_transform.params = params[len(params)/2:]
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10): """ """ super(DBN, self).__init__() self.sigmoid_layers = [] self.rbm_layers = [] self.parameters = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] sigmoid_layer = HiddenLayer(rng=numpy_rng, input_units=input_size, output_units=hidden_layers_sizes[i], nonlinear_function=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.parameters.extend(sigmoid_layer.parameters) # Construct an RBM that shared weights with this layer rbm_layer = RestrictedBoltzmannMachine( numpy_rng=numpy_rng, theano_rng=theano_rng, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.weights, hbias=sigmoid_layer.biases) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticClassifier(input_units=hidden_layers_sizes[-1], output_units=n_outs) self.parameters.extend(self.logLayer.parameters)
def train_CNN_mini_batch(learning_rate, n_epochs, num_kernels, batch_size, filter_size, is_multi_scale, num_of_classes, height, width, use_interpolation, use_hidden_layer): train_set_x_by_1, train_set_y, valid_set_x_by_1, valid_set_y, test_set_x_by_1, test_set_y, train_set_x_by_2, \ train_set_x_by_4, valid_set_x_by_2, valid_set_x_by_4, test_set_x_by_2, test_set_x_by_4 \ = load_processed_img_data() n_train_batches = train_set_x_by_1.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x_by_1.get_value(borrow=True).shape[0] n_test_batches = test_set_x_by_1.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size index = theano.tensor.lscalar() x_by_1 = theano.tensor.ftensor4('x_by_1') x_by_2 = theano.tensor.ftensor4('x_by_2') x_by_4 = theano.tensor.ftensor4('x_by_4') y = theano.tensor.ivector('y') print '... initialize the model' cnn_dir = 'models/CNN_' if is_multi_scale is True: cnn_dir += 'M_' else: cnn_dir += 'S_' if use_hidden_layer is True: cnn_dir += 'H_' else: cnn_dir += 'L_' if use_interpolation is True: cnn_dir += 'I_' else: cnn_dir += 'N_' cnn_dir = cnn_dir + str(num_kernels[0]) + '_' + str( num_kernels[1]) + '_' + str( num_kernels[2]) + '_' + str(batch_size) + '_' curr_date = str(datetime.date.today()) curr_date = curr_date.replace('-', '_') cnn_dir = cnn_dir + curr_date + str(time.strftime('_%H_%M_%S')) print 'CNN model is ', cnn_dir if not os.path.exists(cnn_dir): os.makedirs(cnn_dir) class Logger(object): def __init__(self): self.terminal = sys.stdout self.log = open(cnn_dir + '/log.txt', 'w') def write(self, message): self.terminal.write(message) self.log.write(message) sys.stdout = Logger() layer0 = CNN_Layer( name='Layer_0', W=None, b=None, filter_shape=(num_kernels[0], 3, filter_size, filter_size), ) layer1 = CNN_Layer( name='Layer_1', W=None, b=None, filter_shape=(num_kernels[1], num_kernels[0], filter_size, filter_size), ) layer2 = CNN_Layer( name='Layer_2', W=None, b=None, filter_shape=(num_kernels[2], num_kernels[1], filter_size, filter_size), ) layer3 = HiddenLayer(name='Layer_3', W=None, b=None, n_in=num_kernels[2] * 3 if is_multi_scale is True else num_kernels[2], n_out=num_kernels[2] * 4 if is_multi_scale is True else num_kernels[2] * 2, activation=theano.tensor.tanh) if is_multi_scale and use_hidden_layer: layer4_in = num_kernels[2] * 4 elif is_multi_scale and not use_hidden_layer: layer4_in = num_kernels[2] * 3 elif not is_multi_scale and use_hidden_layer: layer4_in = num_kernels[2] * 2 else: layer4_in = num_kernels[2] layer4 = LogisticRegression( name='Layer_4', W=None, b=None, n_in=layer4_in, n_out=num_of_classes, ) forward_propagation(layer0=layer0, layer1=layer1, layer2=layer2, layer3=layer3, layer4=layer4, x_by_1=x_by_1, x_by_2=x_by_2, x_by_4=x_by_4, num_kernels=num_kernels, batch_size=batch_size, filter_size=filter_size, is_multi_scale=is_multi_scale, height=height, width=width, use_interpolation=use_interpolation, use_hidden_layer=use_hidden_layer) if use_hidden_layer is True: L2_norm = (layer4.W**2).sum() + (layer3.W**2).sum() + ( layer2.W**2).sum() + (layer1.W**2).sum() + (layer0.W**2).sum() else: L2_norm = (layer4.W**2).sum() + (layer2.W**2).sum() + ( layer1.W**2).sum() + (layer0.W**2).sum() regularization = 0.00001 cost = layer4.negative_log_likelihood(y) + (regularization * L2_norm) if is_multi_scale is True: test_model = theano.function( [index], layer4.errors(y), givens={ x_by_1: test_set_x_by_1[index * batch_size:(index + 1) * batch_size], x_by_2: test_set_x_by_2[index * batch_size:(index + 1) * batch_size], x_by_4: test_set_x_by_4[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size * height * width:(index + 1) * batch_size * height * width] }) else: test_model = theano.function( [index], layer4.errors(y), givens={ x_by_1: test_set_x_by_1[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size * height * width:(index + 1) * batch_size * height * width] }) if is_multi_scale is True: validate_model = theano.function( [index], layer4.errors(y), givens={ x_by_1: valid_set_x_by_1[index * batch_size:(index + 1) * batch_size], x_by_2: valid_set_x_by_2[index * batch_size:(index + 1) * batch_size], x_by_4: valid_set_x_by_4[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size * height * width:(index + 1) * batch_size * height * width] }) else: validate_model = theano.function( [index], layer4.errors(y), givens={ x_by_1: valid_set_x_by_1[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size * height * width:(index + 1) * batch_size * height * width] }) if use_hidden_layer is True: params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params else: params = layer4.params + layer2.params + layer1.params + layer0.params grads = theano.tensor.grad(cost, params) updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] if is_multi_scale is True: train_model = theano.function( [index], cost, updates=updates, givens={ x_by_1: train_set_x_by_1[index * batch_size:(index + 1) * batch_size], x_by_2: train_set_x_by_2[index * batch_size:(index + 1) * batch_size], x_by_4: train_set_x_by_4[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size * width * height:(index + 1) * batch_size * width * height] }) else: train_model = theano.function( [index], cost, updates=updates, givens={ x_by_1: train_set_x_by_1[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size * width * height:(index + 1) * batch_size * width * height] }) print '... training the model' patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is considered significant validation_frequency = min(n_train_batches, patience / 2) best_layer_0_W = numpy.zeros_like(layer0.W.get_value()) best_layer_0_b = numpy.zeros_like(layer0.b.get_value()) best_layer_1_W = numpy.zeros_like(layer1.W.get_value()) best_layer_1_b = numpy.zeros_like(layer1.b.get_value()) best_layer_2_W = numpy.zeros_like(layer2.W.get_value()) best_layer_2_b = numpy.zeros_like(layer2.b.get_value()) best_layer_3_W = numpy.zeros_like(layer3.W.get_value()) best_layer_3_b = numpy.zeros_like(layer3.b.get_value()) best_layer_4_W = numpy.zeros_like(layer4.W.get_value()) best_layer_4_b = numpy.zeros_like(layer4.b.get_value()) best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch += 1 for mini_batch_index in xrange(n_train_batches): start = time.clock() iter = (epoch - 1) * n_train_batches + mini_batch_index cost_ij = train_model(mini_batch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, mini-batch %i/%i, validation error %f %%' % (epoch, mini_batch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # save best filters best_layer_0_W = layer0.W.get_value() best_layer_0_b = layer0.b.get_value() best_layer_1_W = layer1.W.get_value() best_layer_1_b = layer1.b.get_value() best_layer_2_W = layer2.W.get_value() best_layer_2_b = layer2.b.get_value() best_layer_3_W = layer3.W.get_value() best_layer_3_b = layer3.b.get_value() best_layer_4_W = layer4.W.get_value() best_layer_4_b = layer4.b.get_value() # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, mini-batch %i/%i, test error of ' 'best model %f %%') % (epoch, mini_batch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break print 'training @ iter = %d, time taken = %f' % (iter, (time.clock() - start)) end_time = time.clock() print('Optimization complete.') print( 'Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if not os.path.exists(cnn_dir + '/params'): os.makedirs(cnn_dir + '/params') numpy.save(cnn_dir + '/params/layer_0_W.npy', best_layer_0_W) numpy.save(cnn_dir + '/params/layer_0_b.npy', best_layer_0_b) numpy.save(cnn_dir + '/params/layer_1_W.npy', best_layer_1_W) numpy.save(cnn_dir + '/params/layer_1_b.npy', best_layer_1_b) numpy.save(cnn_dir + '/params/layer_2_W.npy', best_layer_2_W) numpy.save(cnn_dir + '/params/layer_2_b.npy', best_layer_2_b) numpy.save(cnn_dir + '/params/layer_3_W.npy', best_layer_3_W) numpy.save(cnn_dir + '/params/layer_3_b.npy', best_layer_3_b) numpy.save(cnn_dir + '/params/layer_4_W.npy', best_layer_4_W) numpy.save(cnn_dir + '/params/layer_4_b.npy', best_layer_4_b) numpy.save(cnn_dir + '/params/filer_kernels.npy', num_kernels) numpy.save(cnn_dir + '/params/filter_size.npy', filter_size) return cnn_dir
from neural_network import NeuralNetwork from input_layer import InputLayer from output_layer import OutputLayer from hidden_layer import HiddenLayer # test train xor function model = NeuralNetwork(InputLayer(2), HiddenLayer(2, "sigmoid"), OutputLayer(1, "sigmoid")) train_input = [[1, 1], [1, 0], [0, 1], [0, 0]] train_output = [0, 1, 1, 0] model.train(train_input, train_output, 1, 0.1, 20) model.predict([1, 1]) model.predict([1, 0]) model.predict([0, 1]) model.predict([0, 0])
class WriteLayer(object): def __init__(self, batch_size, channels, N, image_width, image_height, input_hidden_size, use_dx_dy=False, name=''): """ Write Layer from DRAW paper """ self.batch_size = batch_size self.use_dx_dy = use_dx_dy self.N = N self.width = image_width self.height = image_height self.name = name self.input_hidden_size = input_hidden_size self.channels = channels self.test = False self.init_params() def init_params(self): self.transform_hidden = HiddenLayer(input_size=self.input_hidden_size, hidden_size=5 + self.use_dx_dy, activation=act.Identity, name='Writer.Params.' + self.name) self.w_transform = HiddenLayer(input_size=self.input_hidden_size, hidden_size=self.channels * self.N * self.N, activation=act.Identity, name='Writer.Write.' + self.name) def batched_dot(self, A, B): C = A.dimshuffle([0, 1, 2, 'x']) * B.dimshuffle([0, 'x', 1, 2]) return C.sum(axis=-2) def get_params(self, h): hidden = self.transform_hidden.run(h) gx = (hidden[:, 0] + 1) * 0.5 * self.width gy = (hidden[:, 1] + 1) * 0.5 * self.height s2 = T.exp(hidden[:, 3] / 2.0) g = T.exp(hidden[:, 4]).dimshuffle(0, 'x') if self.use_dx_dy: dx = (self.width - 1.0) / (self.N - 1.0) * T.exp(hidden[:, 2]) dy = (self.height - 1.0) / (self.N - 1.0) * T.exp(hidden[:, 5]) else: dx = dy = ((max(self.width, self.height) - 1.0) / (self.N - 1.0) * T.exp(hidden[:, 2])) return gx, gy, dx, dy, s2, g def get_params_test(self, h): return h[:, 0], h[:, 1], h[:, 2], h[:, 5], h[:, 3], h[:, 4].dimshuffle(0, 'x') def run(self, h): channels = self.channels #images.shape[1] if not self.test: gx, gy, dx, dy, s2, g = self.get_params(h) else: gx, gy, dx, dy, s2, g = self.get_params_test(h) w = self.w_transform.run(h) w = w.reshape((self.batch_size * self.channels, self.N, self.N)) muX = gx.dimshuffle([0, 'x']) + dx.dimshuffle([0, 'x']) * ( T.arange(self.N).astype(theano.config.floatX) - self.N / 2 - 0.5) muY = gy.dimshuffle([0, 'x']) + dy.dimshuffle([0, 'x']) * ( T.arange(self.N).astype(theano.config.floatX) - self.N / 2 - 0.5) a = T.arange(self.width).astype(theano.config.floatX) b = T.arange(self.height).astype(theano.config.floatX) Fx = T.exp(-(a - muX.dimshuffle([0, 1, 'x']))**2 / 2. / s2.dimshuffle([0, 'x', 'x'])**2) Fy = T.exp(-(b - muY.dimshuffle([0, 1, 'x']))**2 / 2. / s2.dimshuffle([0, 'x', 'x'])**2) Fx = Fx / (Fx.sum(axis=-1).dimshuffle([0, 1, 'x']) + 1e-4) Fy = Fy / (Fy.sum(axis=-1).dimshuffle([0, 1, 'x']) + 1e-4) self.Fx = T.repeat(Fx, channels, axis=0) self.Fy = T.repeat(Fy, channels, axis=0) self.fint = self.batched_dot(self.Fy.transpose((0, 2, 1)), w) self.fim = self.batched_dot(self.fint, self.Fx).reshape( (self.batch_size, self.channels * self.width * self.height)) return 1. / g * self.fim, (gx, gy, dx, dy, self.fint) @property def params(self): return [ param for param in [self.transform_hidden.params] + [self.w_transform.params] ] @params.setter def params(self, params): self.transform_hidden.params = params[:len(params) / 2] self.w_transform.params = params[len(params) / 2:]
def stack(self): if self.__stacked is 0: self.layers = [] self.rbm_layers = [] self.downpass_layers = [] for i in range(self.n_layers): # let the output of each layer become the input of # the next layer. if i == 0: input = self.input n_visible = self.n_visible else: input = self.layers[-1].output n_visible = self.n_hidden[i - 1] n_hidden = self.n_hidden[i] self.rbm_layers.append( ParallelTempering(input=input, n_visible=n_visible, n_hidden=n_hidden)) self.layers.append( HiddenLayer(input=input, n_in=n_visible, n_out=n_hidden, W=self.rbm_layers[i].W, vbias=self.rbm_layers[i].vbias, hbias=self.rbm_layers[i].hbias)) print "\rFowardProbagation Layer: %i Done..." % (i) # building the downpass network. for i in reversed(xrange(self.n_layers)): if i == (self.n_layers - 1): # last layer is same with feedfoward self.downpass_layers.append(self.layers[i]) elif i == (self.n_layers - 2): # Change the layer # The layer configration is a fliped hidden layer object # Take the output from the last layer(except for # last 2 layers.) self.downpass_layers.append( HiddenLayer( input=self.downpass_layers[-1].feedbackward, n_in=self.downpass_layers[-1].n_visible, n_out=self.rbm_layers[i].n_visible, W=self.rbm_layers[i].W.T, vbias=self.rbm_layers[i].hbias, hbias=self.rbm_layers[i].vbias)) else: self.downpass_layers.append( HiddenLayer(input=self.downpass_layers[-1].output, n_in=self.downpass_layers[-1].n_hidden, n_out=self.rbm_layers[i].n_visible, W=self.rbm_layers[i].W.T, vbias=self.rbm_layers[i].hbias, hbias=self.rbm_layers[i].vbias)) print 'BackProbagation Layer: %i Done...' % (i) print 'Stacking Done' self.__stacked = 1 else: print "The Network has been builded" return self
def __init__(self, corpus, n_emb, n_hidden, pooling, rng=None, th_rng=None, load_from=None, gensim_w2v=None): self.corpus = corpus self.n_emb = n_emb self.n_hidden = n_hidden self.pooling = pooling assert pooling in ('mean', 'max') if rng is None: rng = np.random.RandomState(1226) if th_rng is None: th_rng = RandomStreams(1226) # x/mask: (batch size, nsteps) x = T.matrix('x', dtype='int32') mask = T.matrix('mask', dtype=theano.config.floatX) y = T.vector('y', dtype='int32') batch_idx_seq = T.vector('index', dtype='int32') use_noise = theano.shared(th_floatX(0.)) self.x, self.mask, self.y, self.batch_idx_seq, self.use_noise = x, mask, y, batch_idx_seq, use_noise # TRANSPOSE THE AXIS! trans_x, trans_mask = x.T, mask.T # trancate the useless data trunc_x, trunc_mask = RNNModel.trunc_inputs_mask(trans_x, trans_mask) n_steps, n_samples = trunc_x.shape # list of model layers model_layers = [] model_layers.append( EmbLayer(trunc_x, load_from=load_from, rand_init_params=(rng, (corpus.dic.size, n_emb)), gensim_w2v=gensim_w2v, dic=corpus.dic)) model_layers.append( RNNLayer(model_layers[-1].outputs, trunc_mask, load_from=load_from, rand_init_params=(rng, (n_emb, n_hidden)))) if pooling == 'mean': model_layers.append( MeanPoolingLayer(model_layers[-1].outputs, trunc_mask)) else: model_layers.append( MaxPoolingLayer(model_layers[-1].outputs, trunc_mask)) model_layers.append( DropOutLayer(model_layers[-1].outputs, use_noise, th_rng)) model_layers.append( HiddenLayer(model_layers[-1].outputs, activation=T.nnet.softmax, load_from=load_from, rand_init_params=(rng, (n_hidden, corpus.n_type)))) self.model_layers = model_layers model_params = [] for layer in model_layers: model_params += layer.params self.pred_prob = model_layers[-1].outputs self.pred = T.argmax(self.pred_prob, axis=1) off = 1e-8 self.cost = -T.mean( T.log(self.pred_prob[T.arange(n_samples), y] + off)) # attributes with `func` suffix is compiled function self.predict_func = theano.function(inputs=[x, mask], outputs=self.pred) self.predict_prob_func = theano.function(inputs=[x, mask], outputs=self.pred_prob) grads = T.grad(self.cost, model_params) self.gr_updates, self.gr_sqr_updates, self.dp_sqr_updates, self.param_updates = ada_updates( model_params, grads)
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1]): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) self.theano_rng = theano_rng # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels for i in range(self.n_layers): # n sigmoid layers and n dA layers # size of input is either hidden units of layer below, or input size for first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # input to this layer, is either: # activation of hidden layer below # or input to SDA if you are first layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) dA_layer = DenoisingAutoEncoder(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.dA_layers.append(dA_layer) self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
def __init__(self, data): """Builds the network. Args: data: An instance of the class Dataset. The constructor actually only needs to know the input and output number of neurons from the dataset. However, we directly pass the whole Dataset instance, such that we don't have to pass it anymore in future function calls. Returns: """ self._data = data # Determine number of hidden layers. num_hlayers = config.num_hidden_layers if isinstance(config.hidden_layer_sizes, list): num_hlayers = len(config.hidden_layer_sizes) # Check if configurations are consistent. if isinstance(config.lateral_inhibition_window, list) \ and len(config.lateral_inhibition_window) != num_hlayers + 1: raise ConfigException('The length of the option list ' \ + '\'lateral_inhibition_window\' does not' \ + ' match the number of layers specified.') # Determine size of each layer. self._layer_sizes = [] self._layer_sizes.append(data.input_size) if isinstance(config.hidden_layer_sizes, list): self._layer_sizes.extend(config.hidden_layer_sizes) else: self._layer_sizes.extend([config.hidden_layer_sizes] * num_hlayers) # Output layer size. The output layer has either as size the number of # classes or the user-defined size (if specified). if config.output_size is None: self._layer_sizes.append(data.output_size) else: self._layer_sizes.append(config.output_size) # Check and prepare equations equation_preparation.prepare_equations(num_hlayers+1) # To compute firing rates of neurons, we have to store the spike counts # of neurons before the input rates have been changed (see above # comment). The input layer can be ommitted. self._exc_prev_spike_count = [] self._inh_prev_spike_count = [None] for i in range(len(self._layer_sizes)): self._exc_prev_spike_count.append(np.zeros(self._layer_sizes[i], dtype=np.int64)) if i > 0: self._inh_prev_spike_count.append( \ np.zeros(self._layer_sizes[i], dtype=np.int64)) # In order to compute firing rates, we need to know the time difference # between the current time and a reference time. self._prev_simulation_time = np.float32(b2.defaultclock.t_) assert(self._prev_simulation_time == 0) # Layer-wise SpikeMonitors for exc. neurons. self._exc_spike_monitors = [] # Layer-wise SpikeMonitors for inh. neurons (None for input layer). self._inh_spike_monitors = [] # Excitatory NeuronGroup of each layer. self._exc_layer = [] # Inhibitory NeuronGroup of each layer (will be None for input layer). self._inh_layer = [] # Feed-forward connections from excitatory neurons of one layer to the # next one (fully-connected). self._ee_synapses = [] # Excitatory to inhibitory connections within layer. self._ei_synapses = [] # Inhibitory to excitatory connections within layer. self._ie_synapses = [] ### Input Layer # The input of the network will be a Poisson Layer. self._input_group = b2.NeuronGroup(self._layer_sizes[0], 'rates : Hz', threshold='rand()<rates*dt', name='neurons_poisson_0') self._exc_layer.append(self._input_group) exc_sm_args, _ = Recordings.get_spike_monitor_args(0) self._exc_spike_monitors.append(b2.SpikeMonitor(self._input_group, \ variables=exc_sm_args[0], record=exc_sm_args[1])) self._inh_layer.append(None) self._inh_spike_monitors.append(None) # There are no recurrent connections within the input layer. self._ei_synapses.append(None) self._ie_synapses.append(None) ### Hidden Layer + Output Layer # We can generate a seperate threade to setup each layer, as the setup # can be done independently. threads = [] for i in range(num_hlayers + 1): if isinstance(config.lateral_inhibition_window, list): k = config.lateral_inhibition_window[i] else: k = config.lateral_inhibition_window threads.append(HiddenLayer(self._layer_sizes[i+1], i+1, k, str(i+1))) if config.num_threads > 1: logger.warning('Multithreading during Network Initialization' + \ ' has been disabled due to known issues.') thread_chunks = utils.yield_chunks(threads, 1) #thread_chunks = utils.yield_chunks(threads, config.num_threads) for tc in thread_chunks: logger.debug('Starting threads to create %d layer/s in parallel.' \ % (len(tc))) for thread in tc: thread.start() for thread in tc: thread.join() exn = thread.exc_neurons inn = thread.inh_neurons eis = thread.ei_synapses ies = thread.ie_synapses l = len(self._exc_spike_monitors) exc_sm_args, inh_sm_args = Recordings.get_spike_monitor_args(l) self._exc_layer.append(exn) self._exc_spike_monitors.append(b2.SpikeMonitor(exn, \ variables=exc_sm_args[0], record=exc_sm_args[1])) self._inh_layer.append(inn) self._inh_spike_monitors.append(b2.SpikeMonitor(inn, \ variables=inh_sm_args[0], record=inh_sm_args[1])) self._ei_synapses.append(eis) self._ie_synapses.append(ies) ### Connect layers. for i in range(self.num_layers - 1): # Connect excitatory neurons of layer i with those of later i+1. eq = config._equation_module ees = bw.synapses(self._exc_layer[i], self._exc_layer[i+1], eq.ee_model[i], eq.ee_method[i], eq.ee_on_pre[i], eq.ee_on_post[i], eq.ee_delay[i], eq.ee_namespace[i], eq.ee_initialization[i], name='synapses_ee_'+str(i+1), connections=None, # Fully-connected layer=i+1) self._ee_synapses.append(ees) ### Create the Brian simluation control center (Network) self._network = b2.Network() # Add all components to the network. self._network.add(self._exc_layer) self._network.add(self._inh_layer[1:]) self._network.add(self._exc_spike_monitors) self._network.add(self._inh_spike_monitors[1:]) self._network.add(self._ee_synapses) self._network.add(self._ei_synapses[1:]) self._network.add(self._ie_synapses[1:]) # Double-check correctness if one changes the code! #print(self._network.objects) # FIXME delete assertions assert(len(self._exc_layer) == self.num_layers) assert(len(self._inh_layer) == self.num_layers) assert(len(self._exc_spike_monitors) == self.num_layers) assert(len(self._inh_spike_monitors) == self.num_layers) assert(len(self._ei_synapses) == self.num_layers) assert(len(self._ie_synapses) == self.num_layers) self._eq_state = EqStateVars() self._eq_state.register(self)
def evaluate_cifar(learning_rate=0.001, n_epochs=100, dataset_folder='cifar-10-batches-py', nkerns=[16, 20, 20], batch_size=32): """ Network for classification of MNIST database :type learning_rate: float :param learning_rate: this is the initial learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset_folder: string :param dataset_folder: the folder containing the batch files for cifar :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_size: the batch size for training """ rng = numpy.random.RandomState(23455) # loading the cifar data datasets = load_cifar_data(dataset_folder) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('Building the model...') # Reshape matrix of rasterized images of shape (batch_size, 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (32, 32) is the size of MNIST images. layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer: # filtering does not reduce the layer size because we use padding # maxpooling reduces the size to (32/2, 32/2) = (16, 16) # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16) layer0 = MyConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), p1=2, p2=2, filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer: # filtering does not reduce the layer size because we use padding # maxpooling reduces the size to (16/2, 16/2) = (8, 8) # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5) layer1 = MyConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 16, 16), p1=2, p2=2, filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # Construct the third convolutional pooling layer # filtering does not reduce the layer size because we use padding # maxpooling reduces the size to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4) layer2 = MyConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], 8, 8), p1=2, p2=2, filter_shape=(nkerns[2], nkerns[1], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4), # or (500, 20 * 4 * 4) = (500, 320) with the default values. layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=5) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) predicted_output = layer4.y_pred # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # the learning rate for batch SGD (adaptive learning rate) l_rate = T.scalar('l_rate', dtype=theano.config.floatX) # the momentum SGD momentum = T.scalar('momentum', dtype=theano.config.floatX) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [] for param in params: previous_step = theano.shared(param.get_value() * 0., broadcastable=param.broadcastable) step = momentum * previous_step - l_rate * T.grad(cost, param) updates.append((previous_step, step)) updates.append((param, param + step)) train_model = theano.function( [index, l_rate, momentum], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-1 ############### # TRAIN MODEL # ############### print('Training...') # early-stopping parameters patience = 50000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False # initializing the adaptive leaning rate adaptive_learning_rate = learning_rate # initializing the momentum momentum = 0.9 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 if epoch % 10 == 0: # decreasing the learning rate after every 10 epochs adaptive_learning_rate = 0.95 * adaptive_learning_rate # increasing the learning rate after every 10 epochs momentum = 1.05 * momentum for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index, adaptive_learning_rate, momentum) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # increase the learning rate by small amount (adaptive) adaptive_learning_rate = 1.01 * adaptive_learning_rate #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) else: # decrease the learning rate by small amount (adaptive) adaptive_learning_rate = 0.5 * adaptive_learning_rate if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print( ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def __init__(self, corpus, n_emb, n_hidden, batch_size, conv_size, pooling, rng=None, th_rng=None, load_from=None, gensim_w2v=None): ''' n_hidden: output conv stack size conv_size: filter height size ''' self.corpus = corpus self.n_emb = n_emb self.n_hidden = n_hidden self.batch_size = batch_size self.conv_size = conv_size self.pooling = pooling assert pooling in ('mean', 'max') if rng is None: rng = np.random.RandomState(1226) if th_rng is None: th_rng = RandomStreams(1226) # x/mask: (batch size, nsteps) x = T.matrix('x', dtype='int32') mask = T.matrix('mask', dtype=theano.config.floatX) y = T.vector('y', dtype='int32') batch_idx_seq = T.vector('index', dtype='int32') use_noise = theano.shared(th_floatX(0.)) self.x, self.mask, self.y, self.batch_idx_seq, self.use_noise = x, mask, y, batch_idx_seq, use_noise # No need for transpose of x/mask in CNN n_samples, n_steps = x.shape # transpose mask-matrix to be consistent with pooling-layer-inputs trans_mask = mask.T # truncate mask-matrix to be consistent with conv-outputs trunc_mask = trans_mask[(conv_size - 1):] # list of model layers model_layers = [] model_layers.append( EmbLayer(x, load_from=load_from, rand_init_params=(rng, (corpus.dic.size, n_emb)), gensim_w2v=gensim_w2v, dic=corpus.dic)) # emb-out: (batch size, n_words/steps, emb_dim) # conv-in: (batch size, 1(input stack size), n_words/steps, emb_dim) # conv-out: (batch size, n_hidden(output stack size), output feature map height, 1(output feature map width)) # pooling-in: (output feature map height, batch size, output stack size) conv_in = model_layers[-1].outputs[:, None, :, :] model_layers.append( ConvLayer(conv_in, image_shape=(batch_size, 1, corpus.maxlen, n_emb), load_from=load_from, rand_init_params=(rng, (n_hidden, 1, conv_size, n_emb)))) pooling_in = T.transpose(model_layers[-1].outputs.flatten(3), axes=(2, 0, 1)) if pooling == 'mean': model_layers.append(MeanPoolingLayer(pooling_in, trunc_mask)) else: model_layers.append(MaxPoolingLayer(pooling_in, trunc_mask)) model_layers.append( DropOutLayer(model_layers[-1].outputs, use_noise, th_rng)) model_layers.append( HiddenLayer(model_layers[-1].outputs, activation=T.nnet.softmax, load_from=load_from, rand_init_params=(rng, (n_hidden, corpus.n_type)))) self.model_layers = model_layers model_params = [] for layer in model_layers: model_params += layer.params self.pred_prob = model_layers[-1].outputs self.pred = T.argmax(self.pred_prob, axis=1) off = 1e-8 self.cost = -T.mean( T.log(self.pred_prob[T.arange(n_samples), y] + off)) # attributes with `func` suffix is compiled function self.predict_func = theano.function(inputs=[x, mask], outputs=self.pred) self.predict_prob_func = theano.function(inputs=[x, mask], outputs=self.pred_prob) grads = T.grad(self.cost, model_params) self.gr_updates, self.gr_sqr_updates, self.dp_sqr_updates, self.param_updates = ada_updates( model_params, grads)
def generate_segmented_image_tensors(img_by_1, img_by_2, img_by_4, model_dir, batch_size, height, width, num_of_classes): layer_0_W = numpy.load(model_dir + '/params/layer_0_W.npy') layer_0_b = numpy.load(model_dir + '/params/layer_0_b.npy') layer_1_W = numpy.load(model_dir + '/params/layer_1_W.npy') layer_1_b = numpy.load(model_dir + '/params/layer_1_b.npy') layer_2_W = numpy.load(model_dir + '/params/layer_2_W.npy') layer_2_b = numpy.load(model_dir + '/params/layer_2_b.npy') layer_3_W = numpy.load(model_dir + '/params/layer_3_W.npy') layer_3_b = numpy.load(model_dir + '/params/layer_3_b.npy') layer_4_W = numpy.load(model_dir + '/params/layer_4_W.npy') layer_4_b = numpy.load(model_dir + '/params/layer_4_b.npy') num_kernels = numpy.load(model_dir + '/params/filer_kernels.npy') filter_size = numpy.load(model_dir + '/params/filter_size.npy') if model_dir[11] == 'M': is_multi_scale = True elif model_dir[11] == 'S': is_multi_scale = False else: return NotImplemented if model_dir[13] == 'H': use_hidden_layer = True elif model_dir[13] == 'L': use_hidden_layer = False else: return NotImplemented if model_dir[15] == 'I': use_interpolation = True elif model_dir[13] == 'L': use_interpolation = False else: return NotImplemented layer0 = CNN_Layer( name='Layer_0', W=layer_0_W, b=layer_0_b, filter_shape=(num_kernels[0], 3, filter_size, filter_size), ) layer1 = CNN_Layer( name='Layer_1', W=layer_1_W, b=layer_1_b, filter_shape=(num_kernels[1], num_kernels[0], filter_size, filter_size), ) layer2 = CNN_Layer( name='Layer_2', W=layer_2_W, b=layer_2_b, filter_shape=(num_kernels[2], num_kernels[1], filter_size, filter_size), ) layer3 = HiddenLayer(name='Layer_3', W=layer_3_W, b=layer_3_b, n_in=num_kernels[2] * 3 if is_multi_scale is True else num_kernels[2], n_out=num_kernels[2] * 4 if is_multi_scale is True else num_kernels[2] * 2, activation=theano.tensor.tanh) layer4 = LogisticRegression( name='Layer_4', W=layer_4_W, b=layer_4_b, n_in=num_kernels[2] * 4 if is_multi_scale is True else num_kernels[2] * 2, n_out=num_of_classes, ) x_by_1 = theano.tensor.ftensor4('x_by_1') x_by_2 = theano.tensor.ftensor4('x_by_2') x_by_4 = theano.tensor.ftensor4('x_by_4') forward_propagation( layer0=layer0, layer1=layer1, layer2=layer2, layer3=layer3, layer4=layer4, x_by_1=x_by_1, x_by_2=x_by_2, x_by_4=x_by_4, num_kernels=num_kernels, batch_size=batch_size, filter_size=filter_size, is_multi_scale=is_multi_scale, height=height, width=width, use_interpolation=use_interpolation, use_hidden_layer=use_hidden_layer, ) # create a function to compute the mistakes that are made by the model if is_multi_scale is True: test_model = theano.function([x_by_1, x_by_2, x_by_4], layer4.y_prediction) else: test_model = theano.function([x_by_1], layer4.y_prediction) if is_multi_scale is True: op = test_model(img_by_1, img_by_2, img_by_4) else: op = test_model(img_by_1) y = theano.tensor.reshape(op, (batch_size, height, width)) return y.eval()
def evaluate_model(learning_rate=0.001, n_epochs=100, nkerns=[16, 40, 50, 60], batch_size=20): """ Network for classification of MNIST database :type learning_rate: float :param learning_rate: this is the initial learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_size: the batch size for training """ print("Evaluating model") rng = numpy.random.RandomState(23455) # loading the data datasets = load_test_data() valid_set_x, valid_set_y = datasets[0] test_set_x, test_set_y = datasets[1] # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels loaded_params = numpy.load('../saved_models/model.npy') layer4_W, layer4_b, layer3_W, layer3_b, layer2_W, layer2_b, layer1_W, layer1_b, layer0_W, layer0_b = loaded_params ###################### # BUILD ACTUAL MODEL # ###################### print('Building the model...') chosen_height = 64 chosen_width = 64 # Reshape matrix of rasterized images of shape (batch_size, 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (32, 32) is the size of MNIST images. layer0_input = x.reshape((batch_size, 3, chosen_height, chosen_width)) # Construct the first convolutional pooling layer: # filtering does not reduce the layer size because we use padding # maxpooling reduces the size to (32/2, 32/2) = (16, 16) # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16) layer0 = MyConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, chosen_height, chosen_width), p1=2, p2=2, filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer: # filtering does not reduce the layer size because we use padding # maxpooling reduces the size to (16/2, 16/2) = (8, 8) # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5) layer1 = MyConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], chosen_height / 2, chosen_width / 2), p1=2, p2=2, filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # Construct the third convolutional pooling layer # filtering does not reduce the layer size because we use padding # maxpooling reduces the size to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4) layer2 = MyConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], chosen_height / 4, chosen_width / 4), p1=2, p2=2, filter_shape=(nkerns[2], nkerns[1], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4), # or (500, 20 * 4 * 4) = (500, 320) with the default values. layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * (chosen_height / 8) * (chosen_width / 8), n_out=800, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=800, n_out=6) cost = layer4.negative_log_likelihood(y) predicted_output = layer4.y_pred # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params #Loading the model # f = file('../saved_models/model317.save.npy', 'r') # params = cPickle.load(f) # print(params) # f.close() # # layer4.params, layer3.params, layer2.params, layer1.params, layer0.params = params # # layer4.W, layer4.b = layer4.params # # layer3.W, layer3.b = layer3.params # # layer2.W, layer2.b = layer2.params # # layer1.W, layer1.b = layer1.params # # layer0.W, layer0.b = layer0.params # layer4.W, layer4.b, layer3.W, layer3.b, layer2.W, layer2.b, layer1.W, layer1.b, layer0.W, layer0.b = params # layer4.params = [layer4.W, layer4.b] # layer3.params = [layer3.W, layer3.b] # layer2.params = [layer2.W, layer2.b] # layer1.params = [layer1.W, layer1.b] # layer0.params = [layer0.W, layer0.b] # x = cPickle.load(f) # layer4.params = [layer4.W, layer4.b] # layer3.params = [layer3.W, layer3.b] # layer2.params = [layer2.W, layer2.b] # layer1.params = [layer1.W, layer1.b] # layer0.params = [layer0.W, layer0.b] # test it on the test set test_losses = [test_model(i) for i in range(n_test_batches)] validation_losses = [validate_model(i) for i in range(n_valid_batches)] test_score = numpy.mean(test_losses) validation_score = numpy.mean(validation_losses) print((' Validation error is %f %%') % (validation_score * 100.)) print((' Test error is %f %%') % (test_score * 100.))
def evaluate_model(learning_rate=0.001, n_epochs=100, nkerns=[16, 40, 50, 60], batch_size=20): """ Network for classification of MNIST database :type learning_rate: float :param learning_rate: this is the initial learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_size: the batch size for training """ print("Evaluating model") rng = numpy.random.RandomState(23455) # loading the data1 datasets = load_test_data(1) valid_set_x, valid_set_y = datasets[0] test_set_x, test_set_y = datasets[1] # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels loaded_params = numpy.load('../saved_models/model1.npy') layer4_W, layer4_b, layer3_W, layer3_b, layer2_W, layer2_b, layer1_W, layer1_b, layer0_W, layer0_b = loaded_params ###################### # BUILD ACTUAL MODEL # ###################### print('Building the model...') # Reshape matrix of rasterized images of shape (batch_size, 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (32, 32) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 64, 88)) # Construct the first convolutional pooling layer: # filtering does not reduce the layer size because we use padding # maxpooling reduces the size to (32/2, 32/2) = (16, 16) # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16) layer0 = MyConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 64, 88), p1=2, p2=2, filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2), W=layer0_W, b=layer0_b) # Construct the second convolutional pooling layer: # filtering does not reduce the layer size because we use padding # maxpooling reduces the size to (16/2, 16/2) = (8, 8) # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5) layer1 = MyConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 32, 44), p1=2, p2=2, filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2), W=layer1_W, b=layer1_b) # Construct the third convolutional pooling layer # filtering does not reduce the layer size because we use padding # maxpooling reduces the size to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4) layer2 = MyConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], 16, 22), p1=2, p2=2, filter_shape=(nkerns[2], nkerns[1], 5, 5), poolsize=(2, 2), W=layer2_W, b=layer2_b) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4), # or (500, 20 * 4 * 4) = (500, 320) with the default values. layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * 8 * 11, n_out=800, activation=T.tanh, W=layer3_W, b=layer3_b) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=800, n_out=6, W=layer4_W, b=layer4_b) cost = layer4.negative_log_likelihood(y) predicted_output = layer4.y_pred # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) val_model_preds = theano.function( [index], layer4.prediction(), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params val_preds = [val_model_preds(i) for i in range(n_valid_batches)] #print(val_preds) #preds = numpy(val_preds) preds = [] for pred in val_preds: for p in pred: preds.append(p) #preds = val_preds.reshape(valid_set_x.get_value(borrow=True).shape[0]) actual_labels = load_test_data(1, 2) n = len(actual_labels) confusion_matrix = numpy.zeros((6, 6)) for i in range(n): confusion_matrix[int(actual_labels[i])][preds[i]] += 1 print(confusion_matrix) correct = 0.0 for i in range(n): if (preds[i] == int(actual_labels[i])): correct += 1.0 accuracy = correct / n print("Number of correctly classified : ", correct) print("Test accuracy is", accuracy * 100)
def main(): rng = np.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing batch_size = 500 n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size nkerns = [20, 50] # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of [int] labels layer0_input = x.reshape(batch_size, 1, 28, 28) layer0 = LeNetConvPoolLayer(rng, layer0_input, filter_shape=(nkerns[0], 1, 5, 5), image_shape=(batch_size, 1, 28, 28), poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, filter_shape=(nkerns[1], nkerns[0], 5, 5), image_shape=(batch_size, nkerns[0], 12, 12), poolsize=(2, 2)) layer2_input = layer1.output.flaten(2) layer2 = HiddenLayer(rng, layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500) layer3 = LogisticRegression(layer2.output, n_in=500, n_out=10) cost = layer3.negative_log_likelihood(y) test_model = theano.function([index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] }) validate_model = theano.function([index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] }) params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) learning_rate = 0.1 updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] }) print "Start training..." patience = 10000 patience_increase = 2 improvement_threshold = 0.995 n_epochs = 200 validation_frequency = min(n_train_batches, patience // 2) best_validation_loss = np.inf test_score = 0. epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) # NOQA if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10, corruption_levels=[0.1, 0.1]): """ This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network :type corruption_levels: list of float :param corruption_levels: amount of corruption to use for each layer """ self.sigmoid_layers = [] self.dA_layers = [] self.parameters = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # The SdA is an MLP, for which all weights of intermediate layers # are shared with a different denoising autoencoders # We will first construct the SdA as a deep multilayer perceptron, # and when constructing each sigmoidal layer we also construct a # denoising autoencoder that shares weights with that layer # During pretraining we will train these autoencoders (which will # lead to chainging the weights of the MLP as well) # During finetunining we will finish training the SdA by doing # stochastich gradient descent on the MLP for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] sigmoid_layer = HiddenLayer(rng=numpy_rng, input_units=input_size, output_units=hidden_layers_sizes[i], nonlinear_function=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.parameters.extend(sigmoid_layer.parameters) # Construct a denoising autoencoder that shared weights with this # layer dA_layer = DenoisingAutoencoder(numpy_rng=numpy_rng, theano_rng=theano_rng, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.weights, bhid=sigmoid_layer.biases) self.dA_layers.append(dA_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticClassifier(input_units=hidden_layers_sizes[-1], output_units=n_outs) self.parameters.extend(self.logLayer.parameters)
def __init__(self, rng, input, n_in, n_hidden, n_out): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # Since we are dealing with a one hidden layer MLP, this will translate # into a HiddenLayer with a tanh activation function connected to the # LogisticRegression layer; the activation function can be replaced by # sigmoid or any other nonlinear function self.hiddenLayer = HiddenLayer( rng=rng, input=input, n_in=n_in, n_out=n_hidden, activation=T.tanh ) # The logistic regression layer gets as input the hidden units # of the hidden layer self.logRegressionLayer = LogisticRegression( input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out ) # Enforce L1 norm to be small self.L1 = ( abs(self.hiddenLayer.W).sum() + abs(self.logRegressionLayer.W).sum() ) # Enforce square of L2 norm to be small self.L2_sqr = ( (self.hiddenLayer.W ** 2).sum() + (self.logRegressionLayer.W ** 2).sum() ) # negative log likelihood of MLP is negative log likelihood of model # which is NLL of LR layer self.negative_log_likelihood = ( self.logRegressionLayer.negative_log_likelihood ) self.errors = self.logRegressionLayer.errors self.params = self.hiddenLayer.params + self.logRegressionLayer.params self.input = input
def evaluate_model(learning_rate=0.005, n_epochs=50, nkerns=[16, 40, 50, 60], batch_size=32): """ Network for classification :type learning_rate: float :param learning_rate: this is the initial learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_size: the batch size for training """ print("Evaluating model") rng = numpy.random.RandomState(23455) # loading the data datasets = load_data(3) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('Building the model...') layer0_input = x.reshape((batch_size, 1, 64, 88)) layer0 = MyConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 64, 88), p1=2, p2=2, filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) layer1 = MyConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 32, 44), p1=2, p2=2, filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) layer2 = MyConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], 16, 22), p1=2, p2=2, filter_shape=(nkerns[2], nkerns[1], 5, 5), poolsize=(2, 2)) layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * 8 * 11, n_out=800, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=800, n_out=6) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) predicted_output = layer4.y_pred # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # the learning rate for batch SGD (adaptive learning rate) l_rate = T.scalar('l_rate', dtype=theano.config.floatX) adaptive_learning_rate = T.scalar('adaptive_learning_rate', dtype=theano.config.floatX) # the momentum SGD momentum = T.scalar('momentum', dtype=theano.config.floatX) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [] for param in params: previous_step = theano.shared(param.get_value() * 0., broadcastable=param.broadcastable) step = momentum * previous_step - l_rate * T.grad(cost, param) updates.append((previous_step, step)) updates.append((param, param + step)) train_model = theano.function( [index, l_rate, momentum], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-1 ############### # TRAIN MODEL # ############### print('Training...') # early-stopping parameters patience = 50000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False # initializing the adaptive leaning rate adaptive_learning_rate = learning_rate # initializing the momentum momentum = 0.1 a = 0.0001 b = 0.3 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 if epoch % 5 == 0: # decreasing the learning rate after every 10 epochs adaptive_learning_rate = 0.95 * adaptive_learning_rate # increasing the learning rate after every 10 epochs #momentum = 1.005 * momentum for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index, adaptive_learning_rate, momentum) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # increase the learning rate by small amount (adaptive) adaptive_learning_rate += a #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter #Save the model print("Saving model") save_filename = "../saved_models/model3" x = numpy.array([ layer4.W.get_value(), layer4.b.get_value(), layer3.W.get_value(), layer3.b.get_value(), layer2.W.get_value(), layer2.b.get_value(), layer1.W.get_value(), layer1.b.get_value(), layer0.W.get_value(), layer0.b.get_value() ]) numpy.save(save_filename, x) # f = file(save_filename, 'wb') # # cPickle.dump([param.get_value() for param in params], f, protocol=cPickle.HIGHEST_PROTOCOL) # cPickle.dump([param.get_value() for param in params], f, protocol=cPickle.HIGHEST_PROTOCOL) # # cPickle.dump(params, f, protocol=cPickle.HIGHEST_PROTOCOL) # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) else: # decrease the learning rate by small amount (adaptive) adaptive_learning_rate = adaptive_learning_rate - ( b * adaptive_learning_rate) + (0.01 * a) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print( ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, hidden_layers_sizes=[500, 500], n_outs=10): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input to the DBN :type hidden_layers_sizes: list of ints :param hidden_layers_sizes: intermediate layers size, must contain at least one value :type n_outs: int :param n_outs: dimension of the output of the network """ self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(hidden_layers_sizes) assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector # of [int] labels # end-snippet-1 # The DBN is an MLP, for which all weights of intermediate # layers are shared with a different RBM. We will first # construct the DBN as a deep multilayer perceptron, and when # constructing each sigmoidal layer we also construct an RBM # that shares weights with that layer. During pretraining we # will train these RBMs (which will lead to chainging the # weights of the MLP as well) During finetuning we will finish # training the DBN by doing stochastic gradient descent on the # MLP. for i in xrange(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden # units of the layer below or the input size if we are on # the first layer if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] # the input to this layer is either the activation of the # hidden layer below or the input of the DBN if you are on # the first layer if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.sigmoid_layers.append(sigmoid_layer) # its arguably a philosophical question... but we are # going to only declare that the parameters of the # sigmoid_layers are parameters of the DBN. The visible # biases in the RBM are parameters of those RBMs, but not # of the DBN. self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=numpy_rng, theano_rng=theano_rng, input=layer_input, n_visible=input_size, n_hidden=hidden_layers_sizes[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression( input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.params.extend(self.logLayer.params) # compute the cost for second phase of training, defined as the # negative log likelihood of the logistic regression (output) layer self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logLayer.errors(self.y)