Python LogisticRegression.LogisticRegressionの例

プログラミング言語: Python

名前空間/パッケージ名: dl_methods.methods.dbn_mlp_sca.deep_learning.logistic_sgd

クラス/型: LogisticRegression

メソッド/関数: LogisticRegression

hotexamples.comのコード掲載数: 6

Python LogisticRegression.LogisticRegression - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdl_methods.methods.dbn_mlp_sca.deep_learning.logistic_sgd.LogisticRegression.LogisticRegressionの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

errors(7)

LogisticRegression(6)

negative_log_likelihood(3)

コード例 #1

ファイルを表示

ファイル: DBN.py プロジェクト: linker9x/bachelor-thesis

    def __init__(self,
                 rng,
                 n_in=784,
                 n_hidden=[500, 500],
                 n_out=10,
                 lambda_reg=0.001,
                 alpha_reg=0.001):
        """This class is made to support a variable number of layers.
    
        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                   weights
    
        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_in: int
        :param n_in: dimension of the input to the DBN
    
        :type n_hidden: list of ints
        :param n_hidden: intermediate layers size, must contain
                               at least one value

        :type n_out: int
        :param n_out: dimension of the output of the network
       
        :type lambda_reg: float
        :param lambda_reg: paramter to control the sparsity of weights by l_1 norm.
         The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ).
         Thus, the larger lambda_reg is, the sparser the weights are.
        
        :type alpha_reg: float
        :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm.
         The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ),
         Thus, the smaller alpha_reg is, the smoother the weights are.
        """

        self.hidden_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data, each row is a sample
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels

        for i in range(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_in
            else:
                input_size = n_hidden[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.hidden_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=n_hidden[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.hidden_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=rng,
                            theano_rng=None,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=n_hidden[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        if self.n_layers > 0:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(input=self.x,
                                                         n_in=input_size,
                                                         n_out=n_out)

        self.params.extend(self.logRegressionLayer.params)

        # regularization
        L1s = []
        L2_sqrs = []
        for i in range(self.n_layers):
            L1s.append(abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W**2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W**2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood(
            self.y)
        self.cost=self.negative_log_likelihood + \
        lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr +  alpha_reg*self.L1)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logRegressionLayer.errors(self.y)
        self.y_pred = self.logRegressionLayer.y_pred

コード例 #2

ファイルを表示

    def __init__(self, rng, n_in, n_hidden, n_out, x=None, y=None, activation=T.tanh,
                 lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0):
        """Initialize the parameters for the DFL class.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie
        
        activation: activation function, from {T.tanh, T.nnet.sigmoid}
        
        lambda1: float scalar, control the sparsity of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda1 is, the sparser the input weights are.
        
        lambda2: float scalar, control the smoothness of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda2 is, the smoother the input weights are.
        
        alpha1: float scalar, control the sparsity of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha1 is, the sparser the MLP weights are.
        
        alpha2: float scalar, control the smoothness of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha2 is, the smoother the MLP weights are.
        """
        
        if not x:
            x=T.matrix('x')
        self.x=x
        if not y:
            y=T.ivector('y')
        self.y=y
        
        self.hidden_layers=[]
        self.params=[]
        self.n_layers=len(n_hidden)        
        
        input_layer=InputLayer(input=self.x,n_in=n_in)
        self.params.extend(input_layer.params)
        self.input_layer=input_layer
        for i in range(len(n_hidden)):
            if i==0: # first hidden layer
                hd=HiddenLayer(rng=rng, input=self.input_layer.output, n_in=n_in, n_out=n_hidden[i],
                               activation=activation)
            else:
                hd=HiddenLayer(rng=rng, input=self.hidden_layers[i-1].output, n_in=n_hidden[i-1], n_out=n_hidden[i],
                               activation=activation)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            
        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if len(n_hidden)<=0:
            self.logRegressionLayer = LogisticRegression(
                input=self.input_layer.output,
                n_in=n_in,
                n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)
        
        # regularization terms
        self.L1_input=abs(self.input_layer.w).sum()
        self.L2_input=(self.input_layer.w **2).sum()
        self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values
        self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum()  # # penalize positive values
        L1s=[]
        L2_sqrs=[]
        #L1s.append(abs(self.hidden_layers[0].W).sum())
        for i in range(len(n_hidden)):
            L1s.append (abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W ** 2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W ** 2).sum())        
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
#        self.cost = self.negative_log_likelihood(self.y) \
#         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
#         + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \
#         + lambda1*lambda2*lambda3*self.hinge_loss_neg \
#         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.cost = self.negative_log_likelihood(self.y) \
         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
         + lambda1*lambda2*self.L1_input \
         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.y_pred=self.logRegressionLayer.y_pred

コード例 #3

ファイルを表示

ファイル: deep_feat_select_DBN.py プロジェクト: linker9x/bachelor-thesis

    def __init__(self,
                 rng,
                 n_in=784,
                 n_hidden=[500, 500],
                 n_out=10,
                 lambda1=0,
                 lambda2=0,
                 alpha1=0,
                 alpha2=0):
        """This class is made to support a variable number of layers.
    
        :type rng: numpy.random.RandomState
        :param rng: numpy random number generator used to draw initial
                   weights

        :type n_in: int
        :param n_in: dimension of the input to the DFS
    
        :type n_hidden: list of ints
        :param n_hidden: intermediate layers size, must contain
                               at least one value

        :type n_out: int
        :param n_out: dimension of the output of the network
       
        lambda1: float scalar, control the sparsity of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda1 is, the sparser the input weights are.
        
        lambda2: float scalar, control the smoothness of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda2 is, the smoother the input weights are.
        
        alpha1: float scalar, control the sparsity of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha1 is, the sparser the MLP weights are.
        
        alpha2: float scalar, control the smoothness of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha2 is, the smoother the MLP weights are.
        """
        self.hidden_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels
        # input layer
        input_layer = InputLayer(input=self.x, n_in=n_in)
        self.params.extend(input_layer.params)
        self.input_layer = input_layer
        # hidden layers
        for i in range(len(n_hidden)):
            if i == 0:
                input_hidden = self.input_layer.output
                n_in_hidden = n_in
            else:
                input_hidden = self.hidden_layers[i - 1].output
                n_in_hidden = n_hidden[i - 1]
            hd = HiddenLayer(rng=rng,
                             input=input_hidden,
                             n_in=n_in_hidden,
                             n_out=n_hidden[i],
                             activation=T.nnet.sigmoid)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=rng,
                            theano_rng=None,
                            input=input_hidden,
                            n_visible=n_in_hidden,
                            n_hidden=n_hidden[i],
                            W=hd.W,
                            hbias=hd.b)
            self.rbm_layers.append(rbm_layer)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if len(n_hidden) <= 0:
            self.logRegressionLayer = LogisticRegression(
                input=self.input_layer.output, n_in=n_in, n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)

        # regularization terms on coefficients of input layer
        self.L1_input = abs(self.input_layer.w).sum()
        self.L2_input = (self.input_layer.w**2).sum()
        #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values
        #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum()  # # penalize positive values
        # regularization terms on weights of hidden layers
        L1s = []
        L2_sqrs = []
        for i in range(len(n_hidden)):
            L1s.append(abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W**2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W**2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
        #        self.cost = self.negative_log_likelihood(self.y) \
        #         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
        #         + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \
        #         + lambda1*lambda2*lambda3*self.hinge_loss_neg \
        #         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.cost = self.negative_log_likelihood(self.y) \
          + lambda1*(1.0-lambda2)*0.5*self.L2_input \
          + lambda1*lambda2*self.L1_input \
          + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.y_pred = self.logRegressionLayer.y_pred

コード例 #4

ファイルを表示

    def __init__(self,
                 rng,
                 batch_size=100,
                 input_size=None,
                 nkerns=[4, 4, 4],
                 receptive_fields=((2, 8), (2, 8), (2, 8)),
                 poolsizes=((1, 8), (1, 8), (1, 4)),
                 full_hidden=16,
                 n_out=10):
        """
        
        """
        self.x = T.matrix(name='x', dtype=theano.config.floatX
                          )  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        self.batch_size = theano.shared(
            value=batch_size, name='batch_size')  #T.lscalar('batch_size')

        self.layers = []
        self.params = []
        for i in range(len(nkerns)):
            receptive_field = receptive_fields[i]
            if i == 0:
                featmap_size_after_downsample = input_size
                layeri_input = self.x.reshape(
                    (batch_size, 1, featmap_size_after_downsample[0],
                     featmap_size_after_downsample[1]))
                image_shape = (batch_size, 1, featmap_size_after_downsample[0],
                               featmap_size_after_downsample[1])
                filter_shape = (nkerns[i], 1, receptive_field[0],
                                receptive_field[1])
            else:
                layeri_input = self.layers[i - 1].output
                image_shape = (batch_size, nkerns[i - 1],
                               featmap_size_after_downsample[0],
                               featmap_size_after_downsample[1])
                filter_shape = (nkerns[i], nkerns[i - 1], receptive_field[0],
                                receptive_field[1])

            layeri = LeNetConvPoolLayer(rng=rng,
                                        input=layeri_input,
                                        image_shape=image_shape,
                                        filter_shape=filter_shape,
                                        poolsize=poolsizes[i])
            featmap_size_after_conv = get_featmap_size_after_conv(
                featmap_size_after_downsample, receptive_fields[i])
            featmap_size_after_downsample = get_featmap_size_after_downsample(
                featmap_size_after_conv, poolsizes[i])
            self.layers.append(layeri)
            self.params.extend(layeri.params)

        # fully connected layer
        print('going to fully connected layer')
        layer_full_input = self.layers[-1].output.flatten(2)

        # construct a fully-connected sigmoidal layer
        layer_full = HiddenLayer(rng=rng,
                                 input=layer_full_input,
                                 n_in=nkerns[-1] *
                                 featmap_size_after_downsample[0] *
                                 featmap_size_after_downsample[1],
                                 n_out=full_hidden,
                                 activation=T.tanh)
        self.layers.append(layer_full)
        self.params.extend(layer_full.params)

        # classify the values of the fully-connected sigmoidal layer
        print('going to output layer')
        self.logRegressionLayer = LogisticRegression(
            input=self.layers[-1].output, n_in=full_hidden, n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)

        # the cost we minimize during training is the NLL of the model
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood(
            self.y)
        self.cost = self.logRegressionLayer.negative_log_likelihood(self.y)
        self.errors = self.logRegressionLayer.errors(self.y)
        self.y_pred = self.logRegressionLayer.y_pred

コード例 #5

ファイルを表示

    def __init__(self, rng, n_in, n_hidden, n_out, x=None, y=None, activation=T.tanh,
                 lambda_reg=0.001, alpha_reg=0.0):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie
        
        :type lambda_reg: float
        :param lambda_reg: paramter to control the sparsity of weights by l_1 norm.
        The regularization term is lambda_reg( (1-alpha_reg)/2 * \sum||W||_2^2 + alpha_reg \sum||W||_1 ).
        Thus, the larger lambda_reg is, the sparser the weights are.
        
        :type alpha_reg: float
        :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm.
        The regularization term is lambda_reg( (1-alpha_reg)/2 * \sum||W||_2^2 + alpha_reg \sum||W||_1 ),
        Thus, the smaller alpha_reg is, the smoother the weights are.
        """
        self.hidden_layers=[]
        self.params=[]
        self.n_layers=len(n_hidden)
        if not x:
            x=T.matrix('x')
        self.x=x
        if not y:
            y=T.ivector('y')
        self.y=y
        for i in range(len(n_hidden)):
            if i==0: # first hidden layer
                hd=HiddenLayer(rng=rng, input=self.x, n_in=n_in, n_out=n_hidden[i],
                               activation=activation)
            else:
                hd=HiddenLayer(rng=rng, input=self.hidden_layers[i-1].output, n_in=n_hidden[i-1], n_out=n_hidden[i],
                               activation=activation)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            
        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if self.n_layers>0:
            self.logRegressionLayer = LogisticRegression(input=self.hidden_layers[-1].output,
                                                     n_in=n_hidden[-1], n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(input=self.x,
                                                     n_in=n_in, n_out=n_out)

            
        self.params.extend(self.logRegressionLayer.params)
        
        # regularization terms
        L1s=[]
        L2_sqrs=[]
        #L1s.append(abs(self.hidden_layers[0].W).sum())
        for i in range(len(n_hidden)):
            L1s.append (abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W ** 2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W ** 2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
        # cost function to be minimized
        self.cost =  self.negative_log_likelihood(self.y) \
         + lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr +  alpha_reg*self.L1)
        self.y_pred=self.logRegressionLayer.y_pred

コード例 #6

ファイルを表示

    def __init__(self,
                 rng,
                 n_in=784,
                 n_hidden=[500, 500],
                 n_out=10,
                 activation=T.nnet.sigmoid,
                 lambda1=0,
                 lambda2=0,
                 alpha1=0,
                 alpha2=0,
                 batch_size=100):
        """ Initialize the parameters for the DFL class.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie
        
        activation: activation function, from {T.tanh, T.nnet.sigmoid (default)}
        
        lambda1: float scalar, control the sparsity of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda1 is, the sparser the input weights are.
        
        lambda2: float scalar, control the smoothness of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda2 is, the smoother the input weights are.
        
        alpha1: float scalar, control the sparsity of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha1 is, the sparser the MLP weights are.
        
        alpha2: float scalar, control the smoothness of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha2 is, the smoother the MLP weights are.
        
        batch_size: int, minibatch size.
        """

        self.hidden_layers = []
        self.cA_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data, each row is a sample
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels
        # input layer
        input_layer = InputLayer(input=self.x, n_in=n_in)
        self.params.extend(input_layer.params)
        self.input_layer = input_layer
        # hidden layers
        for i in range(len(n_hidden)):
            if i == 0:
                input_hidden = self.input_layer.output
                n_in_hidden = n_in
            else:
                input_hidden = self.hidden_layers[i - 1].output
                n_in_hidden = n_hidden[i - 1]
            hd = HiddenLayer(rng=rng,
                             input=input_hidden,
                             n_in=n_in_hidden,
                             n_out=n_hidden[i],
                             activation=T.nnet.sigmoid)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            cA_layer = cA(numpy_rng=rng,
                          input=input_hidden,
                          n_visible=n_in_hidden,
                          n_hidden=n_hidden[i],
                          n_batchsize=batch_size,
                          W=hd.W,
                          bhid=hd.b)
            self.cA_layers.append(cA_layer)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if len(n_hidden) <= 0:
            self.logRegressionLayer = LogisticRegression(
                input=self.input_layer.output, n_in=n_in, n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)

        # regularization terms on coefficients of input layer
        self.L1_input = abs(self.input_layer.w).sum()
        self.L2_input = (self.input_layer.w**2).sum()
        #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values
        #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum()  # # penalize positive values
        # regularization terms on weights of hidden layers
        L1s = []
        L2_sqrs = []
        for i in range(len(n_hidden)):
            L1s.append(abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W**2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W**2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
        #        self.cost = self.negative_log_likelihood(self.y) \
        #         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
        #         + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \
        #         + lambda1*lambda2*lambda3*self.hinge_loss_neg \
        #         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.cost = self.negative_log_likelihood(self.y) \
         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
         + lambda1*lambda2*self.L1_input \
         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.y_pred = self.logRegressionLayer.y_pred