def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, lambda_reg=0.001, alpha_reg=0.001): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_in: int :param n_in: dimension of the input to the DBN :type n_hidden: list of ints :param n_hidden: intermediate layers size, must contain at least one value :type n_out: int :param n_out: dimension of the output of the network :type lambda_reg: float :param lambda_reg: paramter to control the sparsity of weights by l_1 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ). Thus, the larger lambda_reg is, the sparser the weights are. :type alpha_reg: float :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ), Thus, the smaller alpha_reg is, the smoother the weights are. """ self.hidden_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data, each row is a sample self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels for i in range(self.n_layers): # construct the sigmoidal layer # the size of the input is either the number of hidden units of # the layer below or the input size if we are on the first layer if i == 0: input_size = n_in else: input_size = n_hidden[i - 1] # the input to this layer is either the activation of the hidden # layer below or the input of the SdA if you are on the first # layer if i == 0: layer_input = self.x else: layer_input = self.hidden_layers[-1].output sigmoid_layer = HiddenLayer(rng=rng, input=layer_input, n_in=input_size, n_out=n_hidden[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.hidden_layers.append(sigmoid_layer) # its arguably a philosophical question... # but we are going to only declare that the parameters of the # sigmoid_layers are parameters of the StackedDAA # the visible biases in the dA are parameters of those # dA, but not the SdA self.params.extend(sigmoid_layer.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=rng, theano_rng=None, input=layer_input, n_visible=input_size, n_hidden=n_hidden[i], W=sigmoid_layer.W, hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # We now need to add a logistic layer on top of the MLP if self.n_layers > 0: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) else: self.logRegressionLayer = LogisticRegression(input=self.x, n_in=input_size, n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization L1s = [] L2_sqrs = [] for i in range(self.n_layers): L1s.append(abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W**2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W**2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # compute the cost for second phase of training, # defined as the negative log likelihood self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood( self.y) self.cost=self.negative_log_likelihood + \ lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr + alpha_reg*self.L1) # compute the gradients with respect to the model parameters # symbolic variable that points to the number of errors made on the # minibatch given by self.x and self.y self.errors = self.logRegressionLayer.errors(self.y) self.y_pred = self.logRegressionLayer.y_pred
def __init__(self, rng, n_in, n_hidden, n_out, x=None, y=None, activation=T.tanh, lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0): """Initialize the parameters for the DFL class. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie activation: activation function, from {T.tanh, T.nnet.sigmoid} lambda1: float scalar, control the sparsity of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda1 is, the sparser the input weights are. lambda2: float scalar, control the smoothness of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda2 is, the smoother the input weights are. alpha1: float scalar, control the sparsity of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha1 is, the sparser the MLP weights are. alpha2: float scalar, control the smoothness of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha2 is, the smoother the MLP weights are. """ if not x: x=T.matrix('x') self.x=x if not y: y=T.ivector('y') self.y=y self.hidden_layers=[] self.params=[] self.n_layers=len(n_hidden) input_layer=InputLayer(input=self.x,n_in=n_in) self.params.extend(input_layer.params) self.input_layer=input_layer for i in range(len(n_hidden)): if i==0: # first hidden layer hd=HiddenLayer(rng=rng, input=self.input_layer.output, n_in=n_in, n_out=n_hidden[i], activation=activation) else: hd=HiddenLayer(rng=rng, input=self.hidden_layers[i-1].output, n_in=n_hidden[i-1], n_out=n_hidden[i], activation=activation) self.hidden_layers.append(hd) self.params.extend(hd.params) # The logistic regression layer gets as input the hidden units # of the hidden layer if len(n_hidden)<=0: self.logRegressionLayer = LogisticRegression( input=self.input_layer.output, n_in=n_in, n_out=n_out) else: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms self.L1_input=abs(self.input_layer.w).sum() self.L2_input=(self.input_layer.w **2).sum() self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum() # # penalize positive values L1s=[] L2_sqrs=[] #L1s.append(abs(self.hidden_layers[0].W).sum()) for i in range(len(n_hidden)): L1s.append (abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W ** 2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W ** 2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # self.cost = self.negative_log_likelihood(self.y) \ # + lambda1*(1.0-lambda2)*0.5*self.L2_input \ # + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \ # + lambda1*lambda2*lambda3*self.hinge_loss_neg \ # + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.cost = self.negative_log_likelihood(self.y) \ + lambda1*(1.0-lambda2)*0.5*self.L2_input \ + lambda1*lambda2*self.L1_input \ + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.y_pred=self.logRegressionLayer.y_pred
def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, lambda1=0, lambda2=0, alpha1=0, alpha2=0): """This class is made to support a variable number of layers. :type rng: numpy.random.RandomState :param rng: numpy random number generator used to draw initial weights :type n_in: int :param n_in: dimension of the input to the DFS :type n_hidden: list of ints :param n_hidden: intermediate layers size, must contain at least one value :type n_out: int :param n_out: dimension of the output of the network lambda1: float scalar, control the sparsity of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda1 is, the sparser the input weights are. lambda2: float scalar, control the smoothness of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda2 is, the smoother the input weights are. alpha1: float scalar, control the sparsity of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha1 is, the sparser the MLP weights are. alpha2: float scalar, control the smoothness of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha2 is, the smoother the MLP weights are. """ self.hidden_layers = [] self.rbm_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # input layer input_layer = InputLayer(input=self.x, n_in=n_in) self.params.extend(input_layer.params) self.input_layer = input_layer # hidden layers for i in range(len(n_hidden)): if i == 0: input_hidden = self.input_layer.output n_in_hidden = n_in else: input_hidden = self.hidden_layers[i - 1].output n_in_hidden = n_hidden[i - 1] hd = HiddenLayer(rng=rng, input=input_hidden, n_in=n_in_hidden, n_out=n_hidden[i], activation=T.nnet.sigmoid) self.hidden_layers.append(hd) self.params.extend(hd.params) # Construct an RBM that shared weights with this layer rbm_layer = RBM(numpy_rng=rng, theano_rng=None, input=input_hidden, n_visible=n_in_hidden, n_hidden=n_hidden[i], W=hd.W, hbias=hd.b) self.rbm_layers.append(rbm_layer) # The logistic regression layer gets as input the hidden units # of the hidden layer if len(n_hidden) <= 0: self.logRegressionLayer = LogisticRegression( input=self.input_layer.output, n_in=n_in, n_out=n_out) else: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms on coefficients of input layer self.L1_input = abs(self.input_layer.w).sum() self.L2_input = (self.input_layer.w**2).sum() #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum() # # penalize positive values # regularization terms on weights of hidden layers L1s = [] L2_sqrs = [] for i in range(len(n_hidden)): L1s.append(abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W**2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W**2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # self.cost = self.negative_log_likelihood(self.y) \ # + lambda1*(1.0-lambda2)*0.5*self.L2_input \ # + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \ # + lambda1*lambda2*lambda3*self.hinge_loss_neg \ # + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.cost = self.negative_log_likelihood(self.y) \ + lambda1*(1.0-lambda2)*0.5*self.L2_input \ + lambda1*lambda2*self.L1_input \ + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.y_pred = self.logRegressionLayer.y_pred
def __init__(self, rng, batch_size=100, input_size=None, nkerns=[4, 4, 4], receptive_fields=((2, 8), (2, 8), (2, 8)), poolsizes=((1, 8), (1, 8), (1, 4)), full_hidden=16, n_out=10): """ """ self.x = T.matrix(name='x', dtype=theano.config.floatX ) # the data is presented as rasterized images self.y = T.ivector('y') # the labels are presented as 1D vector of self.batch_size = theano.shared( value=batch_size, name='batch_size') #T.lscalar('batch_size') self.layers = [] self.params = [] for i in range(len(nkerns)): receptive_field = receptive_fields[i] if i == 0: featmap_size_after_downsample = input_size layeri_input = self.x.reshape( (batch_size, 1, featmap_size_after_downsample[0], featmap_size_after_downsample[1])) image_shape = (batch_size, 1, featmap_size_after_downsample[0], featmap_size_after_downsample[1]) filter_shape = (nkerns[i], 1, receptive_field[0], receptive_field[1]) else: layeri_input = self.layers[i - 1].output image_shape = (batch_size, nkerns[i - 1], featmap_size_after_downsample[0], featmap_size_after_downsample[1]) filter_shape = (nkerns[i], nkerns[i - 1], receptive_field[0], receptive_field[1]) layeri = LeNetConvPoolLayer(rng=rng, input=layeri_input, image_shape=image_shape, filter_shape=filter_shape, poolsize=poolsizes[i]) featmap_size_after_conv = get_featmap_size_after_conv( featmap_size_after_downsample, receptive_fields[i]) featmap_size_after_downsample = get_featmap_size_after_downsample( featmap_size_after_conv, poolsizes[i]) self.layers.append(layeri) self.params.extend(layeri.params) # fully connected layer print('going to fully connected layer') layer_full_input = self.layers[-1].output.flatten(2) # construct a fully-connected sigmoidal layer layer_full = HiddenLayer(rng=rng, input=layer_full_input, n_in=nkerns[-1] * featmap_size_after_downsample[0] * featmap_size_after_downsample[1], n_out=full_hidden, activation=T.tanh) self.layers.append(layer_full) self.params.extend(layer_full.params) # classify the values of the fully-connected sigmoidal layer print('going to output layer') self.logRegressionLayer = LogisticRegression( input=self.layers[-1].output, n_in=full_hidden, n_out=n_out) self.params.extend(self.logRegressionLayer.params) # the cost we minimize during training is the NLL of the model self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood( self.y) self.cost = self.logRegressionLayer.negative_log_likelihood(self.y) self.errors = self.logRegressionLayer.errors(self.y) self.y_pred = self.logRegressionLayer.y_pred
def __init__(self, rng, n_in, n_hidden, n_out, x=None, y=None, activation=T.tanh, lambda_reg=0.001, alpha_reg=0.0): """Initialize the parameters for the multilayer perceptron :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie :type lambda_reg: float :param lambda_reg: paramter to control the sparsity of weights by l_1 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * \sum||W||_2^2 + alpha_reg \sum||W||_1 ). Thus, the larger lambda_reg is, the sparser the weights are. :type alpha_reg: float :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm. The regularization term is lambda_reg( (1-alpha_reg)/2 * \sum||W||_2^2 + alpha_reg \sum||W||_1 ), Thus, the smaller alpha_reg is, the smoother the weights are. """ self.hidden_layers=[] self.params=[] self.n_layers=len(n_hidden) if not x: x=T.matrix('x') self.x=x if not y: y=T.ivector('y') self.y=y for i in range(len(n_hidden)): if i==0: # first hidden layer hd=HiddenLayer(rng=rng, input=self.x, n_in=n_in, n_out=n_hidden[i], activation=activation) else: hd=HiddenLayer(rng=rng, input=self.hidden_layers[i-1].output, n_in=n_hidden[i-1], n_out=n_hidden[i], activation=activation) self.hidden_layers.append(hd) self.params.extend(hd.params) # The logistic regression layer gets as input the hidden units # of the hidden layer if self.n_layers>0: self.logRegressionLayer = LogisticRegression(input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) else: self.logRegressionLayer = LogisticRegression(input=self.x, n_in=n_in, n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms L1s=[] L2_sqrs=[] #L1s.append(abs(self.hidden_layers[0].W).sum()) for i in range(len(n_hidden)): L1s.append (abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W ** 2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W ** 2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # cost function to be minimized self.cost = self.negative_log_likelihood(self.y) \ + lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr + alpha_reg*self.L1) self.y_pred=self.logRegressionLayer.y_pred
def __init__(self, rng, n_in=784, n_hidden=[500, 500], n_out=10, activation=T.nnet.sigmoid, lambda1=0, lambda2=0, alpha1=0, alpha2=0, batch_size=100): """ Initialize the parameters for the DFL class. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_hidden: int :param n_hidden: number of hidden units :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie activation: activation function, from {T.tanh, T.nnet.sigmoid (default)} lambda1: float scalar, control the sparsity of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda1 is, the sparser the input weights are. lambda2: float scalar, control the smoothness of the input weights. The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ). Thus, the larger lambda2 is, the smoother the input weights are. alpha1: float scalar, control the sparsity of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha1 is, the sparser the MLP weights are. alpha2: float scalar, control the smoothness of the weight matrices in MLP. The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ). Thus, the larger alpha2 is, the smoother the MLP weights are. batch_size: int, minibatch size. """ self.hidden_layers = [] self.cA_layers = [] self.params = [] self.n_layers = len(n_hidden) assert self.n_layers > 0 # allocate symbolic variables for the data self.x = T.matrix('x') # the data, each row is a sample self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # input layer input_layer = InputLayer(input=self.x, n_in=n_in) self.params.extend(input_layer.params) self.input_layer = input_layer # hidden layers for i in range(len(n_hidden)): if i == 0: input_hidden = self.input_layer.output n_in_hidden = n_in else: input_hidden = self.hidden_layers[i - 1].output n_in_hidden = n_hidden[i - 1] hd = HiddenLayer(rng=rng, input=input_hidden, n_in=n_in_hidden, n_out=n_hidden[i], activation=T.nnet.sigmoid) self.hidden_layers.append(hd) self.params.extend(hd.params) cA_layer = cA(numpy_rng=rng, input=input_hidden, n_visible=n_in_hidden, n_hidden=n_hidden[i], n_batchsize=batch_size, W=hd.W, bhid=hd.b) self.cA_layers.append(cA_layer) # The logistic regression layer gets as input the hidden units # of the hidden layer if len(n_hidden) <= 0: self.logRegressionLayer = LogisticRegression( input=self.input_layer.output, n_in=n_in, n_out=n_out) else: self.logRegressionLayer = LogisticRegression( input=self.hidden_layers[-1].output, n_in=n_hidden[-1], n_out=n_out) self.params.extend(self.logRegressionLayer.params) # regularization terms on coefficients of input layer self.L1_input = abs(self.input_layer.w).sum() self.L2_input = (self.input_layer.w**2).sum() #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum() # # penalize positive values # regularization terms on weights of hidden layers L1s = [] L2_sqrs = [] for i in range(len(n_hidden)): L1s.append(abs(self.hidden_layers[i].W).sum()) L2_sqrs.append((self.hidden_layers[i].W**2).sum()) L1s.append(abs(self.logRegressionLayer.W).sum()) L2_sqrs.append((self.logRegressionLayer.W**2).sum()) self.L1 = T.sum(L1s) self.L2_sqr = T.sum(L2_sqrs) # negative log likelihood of the MLP is given by the negative # log likelihood of the output of the model, computed in the # logistic regression layer self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood # same holds for the function computing the number of errors self.errors = self.logRegressionLayer.errors(self.y) # self.cost = self.negative_log_likelihood(self.y) \ # + lambda1*(1.0-lambda2)*0.5*self.L2_input \ # + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \ # + lambda1*lambda2*lambda3*self.hinge_loss_neg \ # + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.cost = self.negative_log_likelihood(self.y) \ + lambda1*(1.0-lambda2)*0.5*self.L2_input \ + lambda1*lambda2*self.L1_input \ + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1 self.y_pred = self.logRegressionLayer.y_pred