class DBN(object): """Deep Belief Network A deep belief network is obtained by stacking several RBMs on top of each other. The hidden layer of the RBM at layer `i` becomes the input of the RBM at layer `i+1`. The first layer RBM gets as input the input of the network, and the hidden layer of the last RBM represents the output. When used for classification, the DBN is treated as a MLP, by adding a logistic regression layer on top. """ def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, lambda_reg=0.001, alpha_reg=0.001): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_in: int :param n_in: dimension of the input to the DBN :type n_hidden: list of ints :param n_hidden: intermediate layers size, must contain at least one value :type n_out: int :param n_out: dimension of the output of the network :type lambda_reg: float :param lambda_reg: paramter to control the sparsity of weights by l_1 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ). Thus, the larger lambda_reg is, the sparser the weights are. :type alpha_reg: float :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ), Thus, the smaller alpha_reg is, the smoother the weights are. """ self.hidden_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data, each row is a sample self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels for i in range(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_in else: input_size = n_hidden[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.hidden_layers[-1].output sigmoid_layer = HiddenLayer(rng=rng, input=layer_input, n_in=input_size, n_out=n_hidden[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.hidden_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=rng, theano_rng=None, input=layer_input, n_visible=input_size, n_hidden=n_hidden[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP if self.n_layers > 0: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) else: self.logRegressionLayer = LogisticRegression(input=self.x, n_in=input_size, n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization L1s = [] L2_sqrs = [] for i in range(self.n_layers): L1s.append(abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W**2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W**2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # compute the cost for second phase of training, # defined as the negative log likelihood self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood( self.y) self.cost=self.negative_log_likelihood + \ lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr + alpha_reg*self.L1) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logRegressionLayer.errors(self.y) self.y_pred = self.logRegressionLayer.y_pred def pretraining_functions(self, train_set_x, batch_size, persistent_k=15): ''' Build the symbolic pretraining functions to update the parameter in one iteration. ''' # index to a [mini]batch index = T.lscalar('index') # index to a minibatch learning_rate = T.scalar('learning_rate') # learning rate to use # number of batches #n_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size pretrain_fns = [] for rbm_layer in self.rbm_layers: # get the cost and the updates list cost, updates = rbm_layer.get_cost_updates(learning_rate, persistent=None, k=persistent_k) # compile the theano function fn = theano.function( inputs=[index, theano.Param(learning_rate, default=0.1)], outputs=cost, updates=updates, givens={self.x: train_set_x[batch_begin:batch_end]}) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, train_set_x, train_set_y, valid_set_x, valid_set_y, batch_size, learning_rate_shared): ''' Build the symbolic finetuning functions to update the parameters in one iteration. Validation function is also defined. ''' # compute number of minibatches for training, validation and testing n_valid_batches = int( math.ceil( valid_set_x.get_value(borrow=True).shape[0] / batch_size)) index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.cost, self.params) # compute list of fine-tuning updates updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - gparam * learning_rate_shared)) train_fn = theano.function( inputs=[index], outputs=self.cost, updates=updates, givens={ self.x: train_set_x[index * batch_size:(index + 1) * batch_size], self.y: train_set_y[index * batch_size:(index + 1) * batch_size] }, name='train') # test_score_i = theano.function([index], self.errors, # givens={ # self.x: test_set_x[index * batch_size: # (index + 1) * batch_size], # self.y: test_set_y[index * batch_size: # (index + 1) * batch_size]}, # name='test') valid_score_i = theano.function( [index], self.errors, givens={ self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size] }, name='valid') # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in range(n_valid_batches)] # Create a function that scans the entire test set # def test_score(): # return [test_score_i(i) for i in xrange(n_test_batches)] return train_fn, valid_score def build_test_function(self, test_set_x, batch_size): """ Build the symbolic test function. """ n_test_batches = int( math.ceil(test_set_x.get_value(borrow=True).shape[0] / batch_size)) index = T.lscalar('index') # index to a [mini]batch test_score_i = theano.function( [index], self.y_pred, givens={ self.x: test_set_x[index * batch_size:(index + 1) * batch_size] }, name='test') # Create a function that scans the entire test set def test_score(): y_pred = [] for i in range(n_test_batches): y_pred.extend(test_score_i(i)) return y_pred return test_score def get_params(self): return copy.deepcopy(self.params) def set_params(self, given_params): self.params = given_params def print_params(self): for param in self.params: print(param.get_value(borrow=True)) def save_params(self, filename): f = open(filename, 'w') # remove existing file f.close() f = open(filename, 'a') for param in self.params: pickle.dump(param.get_value(borrow=True), f) f.close()
def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, lambda_reg=0.001, alpha_reg=0.001): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_in: int :param n_in: dimension of the input to the DBN :type n_hidden: list of ints :param n_hidden: intermediate layers size, must contain at least one value :type n_out: int :param n_out: dimension of the output of the network :type lambda_reg: float :param lambda_reg: paramter to control the sparsity of weights by l_1 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ). Thus, the larger lambda_reg is, the sparser the weights are. :type alpha_reg: float :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ), Thus, the smaller alpha_reg is, the smoother the weights are. """ self.hidden_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data, each row is a sample self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels for i in range(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_in else: input_size = n_hidden[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.hidden_layers[-1].output sigmoid_layer = HiddenLayer(rng=rng, input=layer_input, n_in=input_size, n_out=n_hidden[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.hidden_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=rng, theano_rng=None, input=layer_input, n_visible=input_size, n_hidden=n_hidden[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP if self.n_layers > 0: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) else: self.logRegressionLayer = LogisticRegression(input=self.x, n_in=input_size, n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization L1s = [] L2_sqrs = [] for i in range(self.n_layers): L1s.append(abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W**2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W**2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # compute the cost for second phase of training, # defined as the negative log likelihood self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood( self.y) self.cost=self.negative_log_likelihood + \ lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr + alpha_reg*self.L1) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logRegressionLayer.errors(self.y) self.y_pred = self.logRegressionLayer.y_pred
class DFS(object): """ Deep feature selection class. One-one input layer + MLP. """ def __init__(self, rng, n_in, n_hidden, n_out, x=None, y=None, activation=T.tanh, lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0): """Initialize the parameters for the DFL class. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie activation: activation function, from {T.tanh, T.nnet.sigmoid} lambda1: float scalar, control the sparsity of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda1 is, the sparser the input weights are. lambda2: float scalar, control the smoothness of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda2 is, the smoother the input weights are. alpha1: float scalar, control the sparsity of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha1 is, the sparser the MLP weights are. alpha2: float scalar, control the smoothness of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha2 is, the smoother the MLP weights are. """ if not x: x=T.matrix('x') self.x=x if not y: y=T.ivector('y') self.y=y self.hidden_layers=[] self.params=[] self.n_layers=len(n_hidden) input_layer=InputLayer(input=self.x,n_in=n_in) self.params.extend(input_layer.params) self.input_layer=input_layer for i in range(len(n_hidden)): if i==0: # first hidden layer hd=HiddenLayer(rng=rng, input=self.input_layer.output, n_in=n_in, n_out=n_hidden[i], activation=activation) else: hd=HiddenLayer(rng=rng, input=self.hidden_layers[i-1].output, n_in=n_hidden[i-1], n_out=n_hidden[i], activation=activation) self.hidden_layers.append(hd) self.params.extend(hd.params) # The logistic regression layer gets as input the hidden units # of the hidden layer if len(n_hidden)<=0: self.logRegressionLayer = LogisticRegression( input=self.input_layer.output, n_in=n_in, n_out=n_out) else: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms self.L1_input=abs(self.input_layer.w).sum() self.L2_input=(self.input_layer.w **2).sum() self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum() # # penalize positive values L1s=[] L2_sqrs=[] #L1s.append(abs(self.hidden_layers[0].W).sum()) for i in range(len(n_hidden)): L1s.append (abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W ** 2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W ** 2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # self.cost = self.negative_log_likelihood(self.y) \ # + lambda1*(1.0-lambda2)*0.5*self.L2_input \ # + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \ # + lambda1*lambda2*lambda3*self.hinge_loss_neg \ # + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.cost = self.negative_log_likelihood(self.y) \ + lambda1*(1.0-lambda2)*0.5*self.L2_input \ + lambda1*lambda2*self.L1_input \ + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.y_pred=self.logRegressionLayer.y_pred def build_train_function(self, train_set_x, train_set_y, batch_size, alpha, learning_rate_shared): """ Create a function to compute the mistakes that are made by the model. """ index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters grads = T.grad(self.cost, self.params) # add momentum # initialize the delta_i-1 delta_before=[] for param_i in self.params: delta_before_i=theano.shared(value=numpy.zeros(param_i.get_value().shape)) delta_before.append(delta_before_i) updates = [] for param_i, grad_i, delta_before_i in zip(self.params, grads, delta_before): delta_i=-learning_rate_shared * grad_i + alpha*delta_before_i updates.append((param_i, param_i + delta_i )) updates.append((delta_before_i,delta_i)) train_model_cost = theano.function([index], self.cost, updates=updates, givens={ self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}, name='train') return train_model_cost def build_valid_function(self,valid_set_x, valid_set_y, batch_size): """ Build symbolic validation function. """ n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size)) index = T.lscalar('index') # index to a [mini]batch valid_error_i = theano.function([index], self.errors, givens={self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size]}, name='valid') # Create a function that scans the entire validation set def valid_error(): return [valid_error_i(i) for i in range(n_valid_batches)] return valid_error def build_test_function(self, test_set_x, batch_size): """ Build symbolic test function. """ n_test_batches = int(math.ceil(test_set_x.get_value(borrow=True).shape[0] / batch_size)) index = T.lscalar('index') # index to a [mini]batch test_pred_i = theano.function([index], self.y_pred, givens={self.x: test_set_x[index * batch_size : (index + 1) * batch_size]}, name='test') # Create a function that scans the entire test set def test_pred(): y_pred=[] for i in range(n_test_batches): y_pred.extend(test_pred_i(i)) return y_pred return test_pred def get_predicted(self,data): for i in range(len(self.hidden_layers)): data=self.hidden_layers[i].get_predicted(data) p_y_given_x = T.nnet.softmax(T.dot(data, self.logRegressionLayer.W) + self.logRegressionLayer.b) y_pred = T.argmax(p_y_given_x, axis=1) return y_pred def get_params(self): return copy.deepcopy(self.params) def set_params(self, given_params): self.params=given_params def print_params(self): for param in self.params: print(param.get_value(borrow=True)) def save_params(self,filename): f=open(filename,'w') # remove existing file f.close() f=open(filename,'a') for param in self.params: pickle.dump(param.get_value(borrow=True),f) f.close()
def __init__(self, rng, n_in, n_hidden, n_out, x=None, y=None, activation=T.tanh, lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0): """Initialize the parameters for the DFL class. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie activation: activation function, from {T.tanh, T.nnet.sigmoid} lambda1: float scalar, control the sparsity of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda1 is, the sparser the input weights are. lambda2: float scalar, control the smoothness of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda2 is, the smoother the input weights are. alpha1: float scalar, control the sparsity of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha1 is, the sparser the MLP weights are. alpha2: float scalar, control the smoothness of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha2 is, the smoother the MLP weights are. """ if not x: x=T.matrix('x') self.x=x if not y: y=T.ivector('y') self.y=y self.hidden_layers=[] self.params=[] self.n_layers=len(n_hidden) input_layer=InputLayer(input=self.x,n_in=n_in) self.params.extend(input_layer.params) self.input_layer=input_layer for i in range(len(n_hidden)): if i==0: # first hidden layer hd=HiddenLayer(rng=rng, input=self.input_layer.output, n_in=n_in, n_out=n_hidden[i], activation=activation) else: hd=HiddenLayer(rng=rng, input=self.hidden_layers[i-1].output, n_in=n_hidden[i-1], n_out=n_hidden[i], activation=activation) self.hidden_layers.append(hd) self.params.extend(hd.params) # The logistic regression layer gets as input the hidden units # of the hidden layer if len(n_hidden)<=0: self.logRegressionLayer = LogisticRegression( input=self.input_layer.output, n_in=n_in, n_out=n_out) else: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms self.L1_input=abs(self.input_layer.w).sum() self.L2_input=(self.input_layer.w **2).sum() self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum() # # penalize positive values L1s=[] L2_sqrs=[] #L1s.append(abs(self.hidden_layers[0].W).sum()) for i in range(len(n_hidden)): L1s.append (abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W ** 2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W ** 2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # self.cost = self.negative_log_likelihood(self.y) \ # + lambda1*(1.0-lambda2)*0.5*self.L2_input \ # + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \ # + lambda1*lambda2*lambda3*self.hinge_loss_neg \ # + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.cost = self.negative_log_likelihood(self.y) \ + lambda1*(1.0-lambda2)*0.5*self.L2_input \ + lambda1*lambda2*self.L1_input \ + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.y_pred=self.logRegressionLayer.y_pred
class DFS(object): """Deep feature selection class. This structure is input_layer + stacked RBM. """ def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, lambda1=0, lambda2=0, alpha1=0, alpha2=0): """This class is made to support a variable number of layers. :type rng: numpy.random.RandomState :param rng: numpy random number generator used to draw initial weights :type n_in: int :param n_in: dimension of the input to the DFS :type n_hidden: list of ints :param n_hidden: intermediate layers size, must contain at least one value :type n_out: int :param n_out: dimension of the output of the network lambda1: float scalar, control the sparsity of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda1 is, the sparser the input weights are. lambda2: float scalar, control the smoothness of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda2 is, the smoother the input weights are. alpha1: float scalar, control the sparsity of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha1 is, the sparser the MLP weights are. alpha2: float scalar, control the smoothness of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha2 is, the smoother the MLP weights are. """ self.hidden_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # input layer input_layer = InputLayer(input=self.x, n_in=n_in) self.params.extend(input_layer.params) self.input_layer = input_layer # hidden layers for i in range(len(n_hidden)): if i == 0: input_hidden = self.input_layer.output n_in_hidden = n_in else: input_hidden = self.hidden_layers[i - 1].output n_in_hidden = n_hidden[i - 1] hd = HiddenLayer(rng=rng, input=input_hidden, n_in=n_in_hidden, n_out=n_hidden[i], activation=T.nnet.sigmoid) self.hidden_layers.append(hd) self.params.extend(hd.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=rng, theano_rng=None, input=input_hidden, n_visible=n_in_hidden, n_hidden=n_hidden[i], W=hd.W, hbias=hd.b) self.rbm_layers.append(rbm_layer) # The logistic regression layer gets as input the hidden units # of the hidden layer if len(n_hidden) <= 0: self.logRegressionLayer = LogisticRegression( input=self.input_layer.output, n_in=n_in, n_out=n_out) else: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms on coefficients of input layer self.L1_input = abs(self.input_layer.w).sum() self.L2_input = (self.input_layer.w**2).sum() #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum() # # penalize positive values # regularization terms on weights of hidden layers L1s = [] L2_sqrs = [] for i in range(len(n_hidden)): L1s.append(abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W**2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W**2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # self.cost = self.negative_log_likelihood(self.y) \ # + lambda1*(1.0-lambda2)*0.5*self.L2_input \ # + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \ # + lambda1*lambda2*lambda3*self.hinge_loss_neg \ # + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.cost = self.negative_log_likelihood(self.y) \ + lambda1*(1.0-lambda2)*0.5*self.L2_input \ + lambda1*lambda2*self.L1_input \ + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.y_pred = self.logRegressionLayer.y_pred def get_params(self): return copy.deepcopy(self.params) def set_params(self, given_params): self.params = given_params def print_params(self): for param in self.params: print(param.get_value(borrow=True)) def pretraining_functions(self, train_set_x, batch_size, persistent_k=15): ''' Build the symbolic pretraining functions to update the parameter in one iteration. ''' # index to a [mini]batch index = T.lscalar('index') # index to a minibatch learning_rate = T.scalar('learning_rate') # learning rate to use # number of batches #n_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size pretrain_fns = [] for rbm_layer in self.rbm_layers: # get the cost and the updates list cost, updates = rbm_layer.get_cost_updates(learning_rate, persistent=None, k=persistent_k) # compile the theano function fn = theano.function( inputs=[index, theano.Param(learning_rate, default=0.1)], outputs=cost, updates=updates, givens={self.x: train_set_x[batch_begin:batch_end]}) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, train_set_x, train_set_y, valid_set_x, valid_set_y, batch_size, learning_rate_shared): ''' Build symbolic funetuning functions for training and validating. ''' # compute number of minibatches for training, validation and testing n_valid_batches = int( math.ceil( valid_set_x.get_value(borrow=True).shape[0] / batch_size)) index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.cost, self.params) # compute list of fine-tuning updates updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - gparam * learning_rate_shared)) train_fn = theano.function( inputs=[index], outputs=self.cost, updates=updates, givens={ self.x: train_set_x[index * batch_size:(index + 1) * batch_size], self.y: train_set_y[index * batch_size:(index + 1) * batch_size] }, name='train') # test_score_i = theano.function([index], self.errors, # givens={ # self.x: test_set_x[index * batch_size: # (index + 1) * batch_size], # self.y: test_set_y[index * batch_size: # (index + 1) * batch_size]}, # name='test') valid_score_i = theano.function( [index], self.errors, givens={ self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size] }, name='valid') # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in range(n_valid_batches)] # Create a function that scans the entire test set # def test_score(): # return [test_score_i(i) for i in xrange(n_test_batches)] return train_fn, valid_score def build_test_function(self, test_set_x, batch_size): """ Build a symbolic test function. """ n_test_batches = int( math.ceil(test_set_x.get_value(borrow=True).shape[0] / batch_size)) index = T.lscalar('index') # index to a [mini]batch test_score_i = theano.function( [index], self.y_pred, givens={ self.x: test_set_x[index * batch_size:(index + 1) * batch_size] }, name='test') # Create a function that scans the entire test set def test_score(): y_pred = [] for i in range(n_test_batches): y_pred.extend(test_score_i(i)) return y_pred return test_score
def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, lambda1=0, lambda2=0, alpha1=0, alpha2=0): """This class is made to support a variable number of layers. :type rng: numpy.random.RandomState :param rng: numpy random number generator used to draw initial weights :type n_in: int :param n_in: dimension of the input to the DFS :type n_hidden: list of ints :param n_hidden: intermediate layers size, must contain at least one value :type n_out: int :param n_out: dimension of the output of the network lambda1: float scalar, control the sparsity of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda1 is, the sparser the input weights are. lambda2: float scalar, control the smoothness of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda2 is, the smoother the input weights are. alpha1: float scalar, control the sparsity of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha1 is, the sparser the MLP weights are. alpha2: float scalar, control the smoothness of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha2 is, the smoother the MLP weights are. """ self.hidden_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # input layer input_layer = InputLayer(input=self.x, n_in=n_in) self.params.extend(input_layer.params) self.input_layer = input_layer # hidden layers for i in range(len(n_hidden)): if i == 0: input_hidden = self.input_layer.output n_in_hidden = n_in else: input_hidden = self.hidden_layers[i - 1].output n_in_hidden = n_hidden[i - 1] hd = HiddenLayer(rng=rng, input=input_hidden, n_in=n_in_hidden, n_out=n_hidden[i], activation=T.nnet.sigmoid) self.hidden_layers.append(hd) self.params.extend(hd.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=rng, theano_rng=None, input=input_hidden, n_visible=n_in_hidden, n_hidden=n_hidden[i], W=hd.W, hbias=hd.b) self.rbm_layers.append(rbm_layer) # The logistic regression layer gets as input the hidden units # of the hidden layer if len(n_hidden) <= 0: self.logRegressionLayer = LogisticRegression( input=self.input_layer.output, n_in=n_in, n_out=n_out) else: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms on coefficients of input layer self.L1_input = abs(self.input_layer.w).sum() self.L2_input = (self.input_layer.w**2).sum() #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum() # # penalize positive values # regularization terms on weights of hidden layers L1s = [] L2_sqrs = [] for i in range(len(n_hidden)): L1s.append(abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W**2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W**2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # self.cost = self.negative_log_likelihood(self.y) \ # + lambda1*(1.0-lambda2)*0.5*self.L2_input \ # + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \ # + lambda1*lambda2*lambda3*self.hinge_loss_neg \ # + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.cost = self.negative_log_likelihood(self.y) \ + lambda1*(1.0-lambda2)*0.5*self.L2_input \ + lambda1*lambda2*self.L1_input \ + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.y_pred = self.logRegressionLayer.y_pred
def __init__(self, rng, batch_size=100, input_size=None, nkerns=[4, 4, 4], receptive_fields=((2, 8), (2, 8), (2, 8)), poolsizes=((1, 8), (1, 8), (1, 4)), full_hidden=16, n_out=10): """ """ self.x = T.matrix(name='x', dtype=theano.config.floatX ) # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of self.batch_size = theano.shared( value=batch_size, name='batch_size') #T.lscalar('batch_size') self.layers = [] self.params = [] for i in range(len(nkerns)): receptive_field = receptive_fields[i] if i == 0: featmap_size_after_downsample = input_size layeri_input = self.x.reshape( (batch_size, 1, featmap_size_after_downsample[0], featmap_size_after_downsample[1])) image_shape = (batch_size, 1, featmap_size_after_downsample[0], featmap_size_after_downsample[1]) filter_shape = (nkerns[i], 1, receptive_field[0], receptive_field[1]) else: layeri_input = self.layers[i - 1].output image_shape = (batch_size, nkerns[i - 1], featmap_size_after_downsample[0], featmap_size_after_downsample[1]) filter_shape = (nkerns[i], nkerns[i - 1], receptive_field[0], receptive_field[1]) layeri = LeNetConvPoolLayer(rng=rng, input=layeri_input, image_shape=image_shape, filter_shape=filter_shape, poolsize=poolsizes[i]) featmap_size_after_conv = get_featmap_size_after_conv( featmap_size_after_downsample, receptive_fields[i]) featmap_size_after_downsample = get_featmap_size_after_downsample( featmap_size_after_conv, poolsizes[i]) self.layers.append(layeri) self.params.extend(layeri.params) # fully connected layer print('going to fully connected layer') layer_full_input = self.layers[-1].output.flatten(2) # construct a fully-connected sigmoidal layer layer_full = HiddenLayer(rng=rng, input=layer_full_input, n_in=nkerns[-1] * featmap_size_after_downsample[0] * featmap_size_after_downsample[1], n_out=full_hidden, activation=T.tanh) self.layers.append(layer_full) self.params.extend(layer_full.params) # classify the values of the fully-connected sigmoidal layer print('going to output layer') self.logRegressionLayer = LogisticRegression( input=self.layers[-1].output, n_in=full_hidden, n_out=n_out) self.params.extend(self.logRegressionLayer.params) # the cost we minimize during training is the NLL of the model self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood( self.y) self.cost = self.logRegressionLayer.negative_log_likelihood(self.y) self.errors = self.logRegressionLayer.errors(self.y) self.y_pred = self.logRegressionLayer.y_pred
class cnn(object): """ The CNN class. """ def __init__(self, rng, batch_size=100, input_size=None, nkerns=[4, 4, 4], receptive_fields=((2, 8), (2, 8), (2, 8)), poolsizes=((1, 8), (1, 8), (1, 4)), full_hidden=16, n_out=10): """ """ self.x = T.matrix(name='x', dtype=theano.config.floatX ) # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of self.batch_size = theano.shared( value=batch_size, name='batch_size') #T.lscalar('batch_size') self.layers = [] self.params = [] for i in range(len(nkerns)): receptive_field = receptive_fields[i] if i == 0: featmap_size_after_downsample = input_size layeri_input = self.x.reshape( (batch_size, 1, featmap_size_after_downsample[0], featmap_size_after_downsample[1])) image_shape = (batch_size, 1, featmap_size_after_downsample[0], featmap_size_after_downsample[1]) filter_shape = (nkerns[i], 1, receptive_field[0], receptive_field[1]) else: layeri_input = self.layers[i - 1].output image_shape = (batch_size, nkerns[i - 1], featmap_size_after_downsample[0], featmap_size_after_downsample[1]) filter_shape = (nkerns[i], nkerns[i - 1], receptive_field[0], receptive_field[1]) layeri = LeNetConvPoolLayer(rng=rng, input=layeri_input, image_shape=image_shape, filter_shape=filter_shape, poolsize=poolsizes[i]) featmap_size_after_conv = get_featmap_size_after_conv( featmap_size_after_downsample, receptive_fields[i]) featmap_size_after_downsample = get_featmap_size_after_downsample( featmap_size_after_conv, poolsizes[i]) self.layers.append(layeri) self.params.extend(layeri.params) # fully connected layer print('going to fully connected layer') layer_full_input = self.layers[-1].output.flatten(2) # construct a fully-connected sigmoidal layer layer_full = HiddenLayer(rng=rng, input=layer_full_input, n_in=nkerns[-1] * featmap_size_after_downsample[0] * featmap_size_after_downsample[1], n_out=full_hidden, activation=T.tanh) self.layers.append(layer_full) self.params.extend(layer_full.params) # classify the values of the fully-connected sigmoidal layer print('going to output layer') self.logRegressionLayer = LogisticRegression( input=self.layers[-1].output, n_in=full_hidden, n_out=n_out) self.params.extend(self.logRegressionLayer.params) # the cost we minimize during training is the NLL of the model self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood( self.y) self.cost = self.logRegressionLayer.negative_log_likelihood(self.y) self.errors = self.logRegressionLayer.errors(self.y) self.y_pred = self.logRegressionLayer.y_pred def build_train_function(self, train_set_x, train_set_y, batch_size, alpha, learning_rate_shared): """ Build the symbolic training function to update the parameter in one iteration. """ # create a function to compute the mistakes that are made by the model index = T.lscalar('index') # index to a [mini]batch #batch_size_var = T.lscalar('batch_size_var') # batch_size # compute the gradients with respect to the model parameters grads = T.grad(self.cost, self.params) # add momentum # initialize the delta_i-1 delta_before = [] for param_i in self.params: delta_before_i = theano.shared( value=numpy.zeros(param_i.get_value().shape)) delta_before.append(delta_before_i) updates = [] for param_i, grad_i, delta_before_i in zip(self.params, grads, delta_before): delta_i = -learning_rate_shared * grad_i + alpha * delta_before_i updates.append((param_i, param_i + delta_i)) updates.append((delta_before_i, delta_i)) train_model_cost = theano.function( [index], self.cost, updates=updates, givens={ self.x: train_set_x[index * batch_size:(index + 1) * batch_size], self.y: train_set_y[index * batch_size:(index + 1) * batch_size] }) return train_model_cost def build_valid_function(self, valid_set_x, valid_set_y, batch_size): """ Build the symbolic validation function to get the validation error. """ n_valid = valid_set_x.get_value( borrow=True).shape[0] # number of validation samples n_valid_batches = n_valid // batch_size #int(math.ceil( n_valid/ batch_size)) index = T.lscalar('index') # index to a [mini]batch #batch_size_var = T.lscalar('batch_size_var') # batch_size valid_error_i = theano.function( [index], self.errors, givens={ self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size] }, name='valid') # Create a function that scans the entire validation set def valid_error(): return [valid_error_i(i) for i in range(n_valid_batches)] # errors=[] # for i in xrange(n_valid_batches): # if i==n_valid_batches-1: # batch_size_current= n_valid - i*batch_size # else: # batch_size_current=batch_size # errors.extend(valid_error_i(i,batch_size_current)) # return errors return valid_error def build_test_function(self, test_set_x): """ Build the symbolic test function to get predicted class labels. """ n_test = test_set_x.get_value(borrow=True).shape[0] batch_size = self.batch_size.get_value(borrow=True) n_test_batches = n_test // batch_size #int(math.ceil(n_test / batch_size)) index = T.lscalar('index') # index to a [mini]batch # batch_size_var = T.lscalar('batch_size_var') # batch_size # test_pred_i = theano.function([index,batch_size_var], self.y_pred, # givens={self.x: test_set_x[index * batch_size_var : (index + 1) * batch_size_var], # self.batch_size: batch_size_var}, # name='test') test_pred_i = theano.function( [index], self.y_pred, givens={ self.x: test_set_x[index * batch_size:(index + 1) * batch_size] }, name='test') test_pred_last = theano.function( [], self.y_pred, givens={self.x: test_set_x[-batch_size:]}, name='test') # Create a function that scans the entire test set def test_pred(): y_pred = [] y_pred = numpy.array(y_pred) for i in range(n_test_batches): # if i==n_test_batches-1: # batch_size_current=n_test - i*batch_size # else: # batch_size_current=batch_size # y_pred.extend(test_pred_i(i,batch_size_current)) y_pred = numpy.append(y_pred, test_pred_i(i)) left_over = n_test % batch_size if left_over > 0: left_over_pred = test_pred_last() y_pred = numpy.append(y_pred, left_over_pred[-left_over:]) return y_pred return test_pred def get_params(self): return copy.deepcopy(self.params) def set_params(self, given_params): self.params = given_params def print_params(self): for param in self.params: print(param.get_value(borrow=True)) def save_params(self, filename): f = open(filename, 'w') # remove existing file f.close() f = open(filename, 'a') for param in self.params: pickle.dump(param.get_value(borrow=True), f) f.close()
class ScA(object): """ Stacked contractive auto-encoder class (ScA) """ def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, lambda_reg=0.001, alpha_reg=0.001, batch_size=100): """ This class is made to support a variable number of layers. :type rng: numpy.random.RandomState :param rng: numpy random number generator used to draw initial weights :type n_in: int :param n_in: dimension of the input to the sdA :type n_layers_sizes: list of ints :param n_layers_sizes: intermediate layers size, must contain at least one value :type n_out: int :param n_out: dimension of the output of the network :type lambda_reg: float :param lambda_reg: paramter to control the sparsity of weights by l_1 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ). Thus, the larger lambda_reg is, the sparser the weights are. :type alpha_reg: float :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ), Thus, the smaller alpha_reg is, the smoother the weights are. :type batch_size: int :param batch_size: minibatch size """ self.hidden_layers = [] self.cA_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data, each row of which is a sample self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels for i in range(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_in else: input_size = n_hidden[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the ScA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.hidden_layers[-1].output sigmoid_layer = HiddenLayer(rng=rng, input=layer_input, n_in=input_size, n_out=n_hidden[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.hidden_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) # Construct a contractive autoencoder that shared weights with this # layer cA_layer = cA(numpy_rng=rng, input=layer_input, n_visible=input_size, n_hidden=n_hidden[i], n_batchsize=batch_size, W=sigmoid_layer.W, bhid=sigmoid_layer.b) self.cA_layers.append(cA_layer) # We now need to add a logistic layer on top of the MLP if self.n_layers > 0: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) else: self.logRegressionLayer = LogisticRegression(input=self.x, n_in=input_size, n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization L1s = [] L2_sqrs = [] for i in range(self.n_layers): L1s.append(abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W**2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W**2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # compute the cost for second phase of training, # defined as the negative log likelihood self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood( self.y) self.cost=self.negative_log_likelihood + \ lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr + alpha_reg*self.L1) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logRegressionLayer.errors(self.y) self.y_pred = self.logRegressionLayer.y_pred def pretraining_functions(self, train_set_x, batch_size): ''' Generates a list of functions, each of them implementing one step in trainnig the cA corresponding to the layer with same index. The function will require as input the minibatch index, and to train a cA you just need to iterate, calling the corresponding function on all minibatch indexes. :type train_set_x: theano.tensor.TensorType :param train_set_x: Shared variable that contains all datapoints used for training the cA :type batch_size: int :param batch_size: size of a [mini]batch :type learning_rate: float :param learning_rate: learning rate used during training for any of the cA layers ''' index = T.lscalar('index') # index to a minibatch contraction_level = T.scalar( 'contraction_level') # % of corruption to use learning_rate = T.scalar('learning_rate') # learning rate to use # number of batches #n_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size pretrain_fns = [] for cA_layer in self.cA_layers: # get the cost and the updates list cost, updates = cA_layer.get_cost_updates(contraction_level, learning_rate) # compile the theano function fn = theano.function( inputs=[ index, theano.Param(contraction_level, default=0.1), theano.Param(learning_rate, default=0.1) ], outputs=[T.mean(cA_layer.L_rec), cA_layer.L_jacob], updates=updates, givens={self.x: train_set_x[batch_begin:batch_end]}) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, train_set_x, train_set_y, valid_set_x, valid_set_y, batch_size, learning_rate_shared): ''' Build symbolic funetuning functions for training and validating. ''' # compute number of minibatches for training, validation and testing n_valid_batches = int( math.ceil( valid_set_x.get_value(borrow=True).shape[0] / batch_size)) index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.cost, self.params) # compute list of fine-tuning updates updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - gparam * learning_rate_shared)) train_fn = theano.function( inputs=[index], outputs=self.cost, updates=updates, givens={ self.x: train_set_x[index * batch_size:(index + 1) * batch_size], self.y: train_set_y[index * batch_size:(index + 1) * batch_size] }, name='train') # test_score_i = theano.function([index], self.errors, # givens={ # self.x: test_set_x[index * batch_size: # (index + 1) * batch_size], # self.y: test_set_y[index * batch_size: # (index + 1) * batch_size]}, # name='test') valid_score_i = theano.function( [index], self.errors, givens={ self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size] }, name='valid') # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in range(n_valid_batches)] # Create a function that scans the entire test set # def test_score(): # return [test_score_i(i) for i in xrange(n_test_batches)] return train_fn, valid_score def build_test_function(self, test_set_x, batch_size): """ Build symbolic test function. """ n_test_batches = int( math.ceil(test_set_x.get_value(borrow=True).shape[0] / batch_size)) index = T.lscalar('index') # index to a [mini]batch test_score_i = theano.function( [index], self.y_pred, givens={ self.x: test_set_x[index * batch_size:(index + 1) * batch_size] }, name='test') # Create a function that scans the entire test set def test_score(): y_pred = [] for i in range(n_test_batches): y_pred.extend(test_score_i(i)) return y_pred return test_score def get_params(self): return copy.deepcopy(self.params) def set_params(self, given_params): self.params = given_params def print_params(self): for param in self.params: print(param.get_value(borrow=True)) def save_params(self, filename): f = open(filename, 'w') # remove existing file f.close() f = open(filename, 'a') for param in self.params: pickle.dump(param.get_value(borrow=True), f) f.close()
def __init__(self, rng, n_in, n_hidden, n_out, x=None, y=None, activation=T.tanh, lambda_reg=0.001, alpha_reg=0.0): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie :type lambda_reg: float :param lambda_reg: paramter to control the sparsity of weights by l_1 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * \sum||W||_2^2 + alpha_reg \sum||W||_1 ). Thus, the larger lambda_reg is, the sparser the weights are. :type alpha_reg: float :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * \sum||W||_2^2 + alpha_reg \sum||W||_1 ), Thus, the smaller alpha_reg is, the smoother the weights are. """ self.hidden_layers=[] self.params=[] self.n_layers=len(n_hidden) if not x: x=T.matrix('x') self.x=x if not y: y=T.ivector('y') self.y=y for i in range(len(n_hidden)): if i==0: # first hidden layer hd=HiddenLayer(rng=rng, input=self.x, n_in=n_in, n_out=n_hidden[i], activation=activation) else: hd=HiddenLayer(rng=rng, input=self.hidden_layers[i-1].output, n_in=n_hidden[i-1], n_out=n_hidden[i], activation=activation) self.hidden_layers.append(hd) self.params.extend(hd.params) # The logistic regression layer gets as input the hidden units # of the hidden layer if self.n_layers>0: self.logRegressionLayer = LogisticRegression(input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) else: self.logRegressionLayer = LogisticRegression(input=self.x, n_in=n_in, n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms L1s=[] L2_sqrs=[] #L1s.append(abs(self.hidden_layers[0].W).sum()) for i in range(len(n_hidden)): L1s.append (abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W ** 2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W ** 2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # cost function to be minimized self.cost = self.negative_log_likelihood(self.y) \ + lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr + alpha_reg*self.L1) self.y_pred=self.logRegressionLayer.y_pred
class MLP(object): """Multi-Layer Perceptron Class A multilayer perceptron is a feedforward artificial neural network model that has one layer or more of hidden units and nonlinear activations. Intermediate layers usually have as activation function tanh or the sigmoid function (defined here by a ``HiddenLayer`` class) while the top layer is a softamx layer (defined here by a ``LogisticRegression`` class). """ def __init__(self, rng, n_in, n_hidden, n_out, x=None, y=None, activation=T.tanh, lambda_reg=0.001, alpha_reg=0.0): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie :type lambda_reg: float :param lambda_reg: paramter to control the sparsity of weights by l_1 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * \sum||W||_2^2 + alpha_reg \sum||W||_1 ). Thus, the larger lambda_reg is, the sparser the weights are. :type alpha_reg: float :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * \sum||W||_2^2 + alpha_reg \sum||W||_1 ), Thus, the smaller alpha_reg is, the smoother the weights are. """ self.hidden_layers=[] self.params=[] self.n_layers=len(n_hidden) if not x: x=T.matrix('x') self.x=x if not y: y=T.ivector('y') self.y=y for i in range(len(n_hidden)): if i==0: # first hidden layer hd=HiddenLayer(rng=rng, input=self.x, n_in=n_in, n_out=n_hidden[i], activation=activation) else: hd=HiddenLayer(rng=rng, input=self.hidden_layers[i-1].output, n_in=n_hidden[i-1], n_out=n_hidden[i], activation=activation) self.hidden_layers.append(hd) self.params.extend(hd.params) # The logistic regression layer gets as input the hidden units # of the hidden layer if self.n_layers>0: self.logRegressionLayer = LogisticRegression(input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) else: self.logRegressionLayer = LogisticRegression(input=self.x, n_in=n_in, n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms L1s=[] L2_sqrs=[] #L1s.append(abs(self.hidden_layers[0].W).sum()) for i in range(len(n_hidden)): L1s.append (abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W ** 2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W ** 2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # cost function to be minimized self.cost = self.negative_log_likelihood(self.y) \ + lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr + alpha_reg*self.L1) self.y_pred=self.logRegressionLayer.y_pred def build_train_function(self, train_set_x, train_set_y, batch_size, alpha, learning_rate_shared): """ Create a function to compute the cost of model being trained. """ index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters grads = T.grad(self.cost, self.params) # add momentum # initialize the delta_i-1 delta_before=[] for param_i in self.params: delta_before_i=theano.shared(value=numpy.zeros(param_i.get_value().shape)) delta_before.append(delta_before_i) updates = [] for param_i, grad_i, delta_before_i in zip(self.params, grads, delta_before): delta_i=-learning_rate_shared * grad_i + alpha*delta_before_i updates.append((param_i, param_i + delta_i )) updates.append((delta_before_i,delta_i)) train_model_cost = theano.function([index], self.cost, updates=updates, givens={ self.x: train_set_x[index * batch_size: (index + 1) * batch_size], self.y: train_set_y[index * batch_size: (index + 1) * batch_size]}) return train_model_cost def build_valid_function(self,valid_set_x, valid_set_y, batch_size): """ Build symbolic function to calculate the validation error of a validation set. """ n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size)) index = T.lscalar('index') # index to a [mini]batch valid_error_i = theano.function([index], self.errors, givens={self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size]}, name='valid') # Create a function that scans the entire validation set def valid_error(): return [valid_error_i(i) for i in range(n_valid_batches)] return valid_error def build_test_function(self, test_set_x, batch_size): """ Build the symbolic test function to predict class labels. """ n_test_batches = int(math.ceil(test_set_x.get_value(borrow=True).shape[0] / batch_size)) index = T.lscalar('index') # index to a [mini]batch test_pred_i = theano.function([index], self.y_pred, givens={self.x: test_set_x[index * batch_size : (index + 1) * batch_size]}, name='test') # Create a function that scans the entire test set def test_pred(): y_pred=[] for i in range(n_test_batches): y_pred.extend(test_pred_i(i)) return y_pred return test_pred def get_predicted(self,data): """ Predict the class labels of given data. """ for i in range(len(self.hidden_layers)): data=self.hidden_layers[i].get_predicted(data) p_y_given_x = T.nnet.softmax(T.dot(data, self.logRegressionLayer.W) + self.logRegressionLayer.b) y_pred = T.argmax(p_y_given_x, axis=1) return y_pred def get_params(self): return copy.deepcopy(self.params) def set_params(self, given_params): self.params=given_params def print_params(self): for param in self.params: print(param.get_value(borrow=True)) def save_params(self,filename): f=open(filename,'w') # remove existing file f.close() f=open(filename,'a') for param in self.params: pickle.dump(param.get_value(borrow=True),f) f.close()
def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, activation=T.nnet.sigmoid, lambda1=0, lambda2=0, alpha1=0, alpha2=0, batch_size=100): """ Initialize the parameters for the DFL class. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie activation: activation function, from {T.tanh, T.nnet.sigmoid (default)} lambda1: float scalar, control the sparsity of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda1 is, the sparser the input weights are. lambda2: float scalar, control the smoothness of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda2 is, the smoother the input weights are. alpha1: float scalar, control the sparsity of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha1 is, the sparser the MLP weights are. alpha2: float scalar, control the smoothness of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha2 is, the smoother the MLP weights are. batch_size: int, minibatch size. """ self.hidden_layers = [] self.cA_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data, each row is a sample self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # input layer input_layer = InputLayer(input=self.x, n_in=n_in) self.params.extend(input_layer.params) self.input_layer = input_layer # hidden layers for i in range(len(n_hidden)): if i == 0: input_hidden = self.input_layer.output n_in_hidden = n_in else: input_hidden = self.hidden_layers[i - 1].output n_in_hidden = n_hidden[i - 1] hd = HiddenLayer(rng=rng, input=input_hidden, n_in=n_in_hidden, n_out=n_hidden[i], activation=T.nnet.sigmoid) self.hidden_layers.append(hd) self.params.extend(hd.params) cA_layer = cA(numpy_rng=rng, input=input_hidden, n_visible=n_in_hidden, n_hidden=n_hidden[i], n_batchsize=batch_size, W=hd.W, bhid=hd.b) self.cA_layers.append(cA_layer) # The logistic regression layer gets as input the hidden units # of the hidden layer if len(n_hidden) <= 0: self.logRegressionLayer = LogisticRegression( input=self.input_layer.output, n_in=n_in, n_out=n_out) else: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms on coefficients of input layer self.L1_input = abs(self.input_layer.w).sum() self.L2_input = (self.input_layer.w**2).sum() #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum() # # penalize positive values # regularization terms on weights of hidden layers L1s = [] L2_sqrs = [] for i in range(len(n_hidden)): L1s.append(abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W**2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W**2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # self.cost = self.negative_log_likelihood(self.y) \ # + lambda1*(1.0-lambda2)*0.5*self.L2_input \ # + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \ # + lambda1*lambda2*lambda3*self.hinge_loss_neg \ # + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.cost = self.negative_log_likelihood(self.y) \ + lambda1*(1.0-lambda2)*0.5*self.L2_input \ + lambda1*lambda2*self.L1_input \ + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.y_pred = self.logRegressionLayer.y_pred
class DFS(object): """Deep feature selection class. This structure is input_layer + stacked contractive autoencoder. """ def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, activation=T.nnet.sigmoid, lambda1=0, lambda2=0, alpha1=0, alpha2=0, batch_size=100): """ Initialize the parameters for the DFL class. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie activation: activation function, from {T.tanh, T.nnet.sigmoid (default)} lambda1: float scalar, control the sparsity of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda1 is, the sparser the input weights are. lambda2: float scalar, control the smoothness of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda2 is, the smoother the input weights are. alpha1: float scalar, control the sparsity of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha1 is, the sparser the MLP weights are. alpha2: float scalar, control the smoothness of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha2 is, the smoother the MLP weights are. batch_size: int, minibatch size. """ self.hidden_layers = [] self.cA_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data, each row is a sample self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # input layer input_layer = InputLayer(input=self.x, n_in=n_in) self.params.extend(input_layer.params) self.input_layer = input_layer # hidden layers for i in range(len(n_hidden)): if i == 0: input_hidden = self.input_layer.output n_in_hidden = n_in else: input_hidden = self.hidden_layers[i - 1].output n_in_hidden = n_hidden[i - 1] hd = HiddenLayer(rng=rng, input=input_hidden, n_in=n_in_hidden, n_out=n_hidden[i], activation=T.nnet.sigmoid) self.hidden_layers.append(hd) self.params.extend(hd.params) cA_layer = cA(numpy_rng=rng, input=input_hidden, n_visible=n_in_hidden, n_hidden=n_hidden[i], n_batchsize=batch_size, W=hd.W, bhid=hd.b) self.cA_layers.append(cA_layer) # The logistic regression layer gets as input the hidden units # of the hidden layer if len(n_hidden) <= 0: self.logRegressionLayer = LogisticRegression( input=self.input_layer.output, n_in=n_in, n_out=n_out) else: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms on coefficients of input layer self.L1_input = abs(self.input_layer.w).sum() self.L2_input = (self.input_layer.w**2).sum() #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum() # # penalize positive values # regularization terms on weights of hidden layers L1s = [] L2_sqrs = [] for i in range(len(n_hidden)): L1s.append(abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W**2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W**2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # self.cost = self.negative_log_likelihood(self.y) \ # + lambda1*(1.0-lambda2)*0.5*self.L2_input \ # + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \ # + lambda1*lambda2*lambda3*self.hinge_loss_neg \ # + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.cost = self.negative_log_likelihood(self.y) \ + lambda1*(1.0-lambda2)*0.5*self.L2_input \ + lambda1*lambda2*self.L1_input \ + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.y_pred = self.logRegressionLayer.y_pred def get_params(self): return copy.deepcopy(self.params) def set_params(self, given_params): self.params = given_params def print_params(self): for param in self.params: print(param.get_value(borrow=True)) def pretraining_functions(self, train_set_x, batch_size): ''' Generates a list of functions, each of them implementing one step in trainnig the cA corresponding to the layer with same index. The function will require as input the minibatch index, and to train a cA you just need to iterate, calling the corresponding function on all minibatch indexes. :type train_set_x: theano.tensor.TensorType :param train_set_x: Shared variable that contains all datapoints used for training the cA :type batch_size: int :param batch_size: size of a [mini]batch :type learning_rate: float :param learning_rate: learning rate used during training for any of the cA layers ''' index = T.lscalar('index') # index to a minibatch contraction_level = T.scalar( 'contraction_level') # % of corruption to use learning_rate = T.scalar('learning_rate') # learning rate to use # number of batches #n_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size)) # begining of a batch, given `index` batch_begin = index * batch_size # ending of a batch given `index` batch_end = batch_begin + batch_size pretrain_fns = [] for cA_layer in self.cA_layers: # get the cost and the updates list cost, updates = cA_layer.get_cost_updates(contraction_level, learning_rate) # compile the theano function fn = theano.function( inputs=[ index, theano.Param(contraction_level, default=0.1), theano.Param(learning_rate, default=0.1) ], outputs=[T.mean(cA_layer.L_rec), cA_layer.L_jacob], updates=updates, givens={self.x: train_set_x[batch_begin:batch_end]}) # append `fn` to the list of functions pretrain_fns.append(fn) return pretrain_fns def build_finetune_functions(self, train_set_x, train_set_y, valid_set_x, valid_set_y, batch_size, learning_rate_shared): ''' Build symbolic funetuning functions for training and validating. ''' # compute number of minibatches for training, validation and testing n_valid_batches = int( math.ceil( valid_set_x.get_value(borrow=True).shape[0] / batch_size)) index = T.lscalar('index') # index to a [mini]batch # compute the gradients with respect to the model parameters gparams = T.grad(self.cost, self.params) # compute list of fine-tuning updates updates = [] for param, gparam in zip(self.params, gparams): updates.append((param, param - gparam * learning_rate_shared)) train_fn = theano.function( inputs=[index], outputs=self.cost, updates=updates, givens={ self.x: train_set_x[index * batch_size:(index + 1) * batch_size], self.y: train_set_y[index * batch_size:(index + 1) * batch_size] }, name='train') # test_score_i = theano.function([index], self.errors, # givens={ # self.x: test_set_x[index * batch_size: # (index + 1) * batch_size], # self.y: test_set_y[index * batch_size: # (index + 1) * batch_size]}, # name='test') valid_score_i = theano.function( [index], self.errors, givens={ self.x: valid_set_x[index * batch_size:(index + 1) * batch_size], self.y: valid_set_y[index * batch_size:(index + 1) * batch_size] }, name='valid') # Create a function that scans the entire validation set def valid_score(): return [valid_score_i(i) for i in range(n_valid_batches)] # Create a function that scans the entire test set # def test_score(): # return [test_score_i(i) for i in xrange(n_test_batches)] return train_fn, valid_score def build_test_function(self, test_set_x, batch_size): """ Build a symbolic test function. """ n_test_batches = int( math.ceil(test_set_x.get_value(borrow=True).shape[0] / batch_size)) index = T.lscalar('index') # index to a [mini]batch test_score_i = theano.function( [index], self.y_pred, givens={ self.x: test_set_x[index * batch_size:(index + 1) * batch_size] }, name='test') # Create a function that scans the entire test set def test_score(): y_pred = [] for i in range(n_test_batches): y_pred.extend(test_score_i(i)) return y_pred return test_score