Example #1
0
    def __init__(
        self, numpy_rng,
        n_in, hidden_layer_sizes, n_out
    ):
        self.sigmoid_layers = []
        self.cA_layers = []
        self.params = []
        self.n_layers = len(hidden_layer_sizes)

        assert self.n_layers > 0

        self.input = T.matrix('input')

        for i in xrange(self.n_layers):

            if i == 0:
                input_size = n_in
            else:
                input_size = hidden_layer_sizes[i - 1]

            if i == 0:
                layer_input = self.input
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(
                rng=numpy_rng,
                input=layer_input,
                n_in=input_size,
                n_out=hidden_layer_sizes[i],
                activation=T.nnet.sigmoid
            )

            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            cA_layer = cA(
                numpy_rng=numpy_rng,
                input=layer_input,
                W=sigmoid_layer.W,
                b=sigmoid_layer.b,
                n_visible=input_size,
                n_hidden=hidden_layer_sizes[i],
                n_class=n_out
            )

            self.cA_layers.append(cA_layer)

        self.logLayer = OneSidedCostRegressor(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layer_sizes[-1],
            n_out=n_out
        )

        self.params.extend(self.logLayer.params)
Example #2
0
    def __init__(self,
                 rng,
                 n_in=784,
                 n_hidden=[500, 500],
                 n_out=10,
                 lambda_reg=0.001,
                 alpha_reg=0.001,
                 batch_size=100):
        """ This class is made to support a variable number of layers.

        :type rng: numpy.random.RandomState
        :param rng: numpy random number generator used to draw initial
                    weights

        :type n_in: int
        :param n_in: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_out: int
        :param n_out: dimension of the output of the network
        
        :type lambda_reg: float
        :param lambda_reg: paramter to control the sparsity of weights by l_1 norm.
        The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ).
        Thus, the larger lambda_reg is, the sparser the weights are.
        
        :type alpha_reg: float
        :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm.
        The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ),
        Thus, the smaller alpha_reg is, the smoother the weights are.

        :type batch_size: int
        :param batch_size: minibatch size
        """

        self.hidden_layers = []
        self.cA_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data, each row of which is a sample
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_in
            else:
                input_size = n_hidden[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the ScA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.hidden_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=n_hidden[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.hidden_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            # Construct a contractive autoencoder that shared weights with this
            # layer
            cA_layer = cA(numpy_rng=rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=n_hidden[i],
                          n_batchsize=batch_size,
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.cA_layers.append(cA_layer)

        # We now need to add a logistic layer on top of the MLP
        if self.n_layers > 0:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(input=self.x,
                                                         n_in=input_size,
                                                         n_out=n_out)

        self.params.extend(self.logRegressionLayer.params)

        # regularization
        L1s = []
        L2_sqrs = []
        for i in range(self.n_layers):
            L1s.append(abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W**2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W**2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood(
            self.y)
        self.cost=self.negative_log_likelihood + \
        lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr +  alpha_reg*self.L1)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logRegressionLayer.errors(self.y)
        self.y_pred = self.logRegressionLayer.y_pred
Example #3
0
    def __init__(
        self,
        numpy_rng,
        theano_rng=None,
        n_ins=784,
        hidden_layers_sizes=[500, 500],
        n_outs=10,
        corruption_levels=[0.1, 0.1]
    ):

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        self.x = T.matrix('x')
        self.y = T.ivector('y')
        
        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            self.sigmoid_layers.append(sigmoid_layer)

            self.params.extend(sigmoid_layer.params)

            dA_layer = cA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)

        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs
        )

        self.params.extend(self.logLayer.params)

        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        self.errors = self.logLayer.errors(self.y)
Example #4
0
    def __init__(self, rng, n_in=784,
                 n_hidden=[500, 500], 
                 n_out=10, activation=T.nnet.sigmoid,
                 lambda1=0,lambda2=0,alpha1=0,alpha2=0,batch_size=100):
        """ Initialize the parameters for the DFL class.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie
        
        activation: activation function, from {T.tanh, T.nnet.sigmoid (default)}
        
        lambda1: float scalar, control the sparsity of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda1 is, the sparser the input weights are.
        
        lambda2: float scalar, control the smoothness of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda2 is, the smoother the input weights are.
        
        alpha1: float scalar, control the sparsity of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha1 is, the sparser the MLP weights are.
        
        alpha2: float scalar, control the smoothness of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha2 is, the smoother the MLP weights are.
        
        batch_size: int, minibatch size.
        """

        self.hidden_layers = []
        self.cA_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data, each row is a sample
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels
        # input layer
        input_layer=InputLayer(input=self.x,n_in=n_in)
        self.params.extend(input_layer.params)
        self.input_layer=input_layer
        # hidden layers
        for i in range(len(n_hidden)):
            if i==0:
                input_hidden=self.input_layer.output
                n_in_hidden=n_in
            else:
                input_hidden=self.hidden_layers[i-1].output
                n_in_hidden=n_hidden[i-1]
            hd=HiddenLayer(rng=rng, input=input_hidden, n_in=n_in_hidden, n_out=n_hidden[i],
                           activation=T.nnet.sigmoid)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            cA_layer = cA(numpy_rng=rng,
                          input=input_hidden,
                          n_visible=n_in_hidden,
                          n_hidden=n_hidden[i],
                          n_batchsize=batch_size,
                          W=hd.W,
                          bhid=hd.b)
            self.cA_layers.append(cA_layer)
            
        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if len(n_hidden)<=0:
            self.logRegressionLayer = LogisticRegression(
                input=self.input_layer.output,
                n_in=n_in,
                n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)
        
        # regularization terms on coefficients of input layer 
        self.L1_input=abs(self.input_layer.w).sum()
        self.L2_input=(self.input_layer.w **2).sum()
        #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values
        #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum()  # # penalize positive values
        # regularization terms on weights of hidden layers        
        L1s=[]
        L2_sqrs=[]
        for i in range(len(n_hidden)):
            L1s.append (abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W ** 2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W ** 2).sum())        
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
#        self.cost = self.negative_log_likelihood(self.y) \
#         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
#         + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \
#         + lambda1*lambda2*lambda3*self.hinge_loss_neg \
#         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.cost = self.negative_log_likelihood(self.y) \
         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
         + lambda1*lambda2*self.L1_input \
         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.y_pred=self.logRegressionLayer.y_pred
        self.y_pred_prob=self.logRegressionLayer.y_pred_prob
Example #5
0
    def __init__(self, rng, n_in=784, n_hidden=[500, 500], 
                 n_out=10, lambda_reg=0.001, alpha_reg=0.001, batch_size=100):
        """ This class is made to support a variable number of layers.

        :type rng: numpy.random.RandomState
        :param rng: numpy random number generator used to draw initial
                    weights

        :type n_in: int
        :param n_in: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_out: int
        :param n_out: dimension of the output of the network
        
        :type lambda_reg: float
        :param lambda_reg: paramter to control the sparsity of weights by l_1 norm.
        The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ).
        Thus, the larger lambda_reg is, the sparser the weights are.
        
        :type alpha_reg: float
        :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm.
        The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ),
        Thus, the smaller alpha_reg is, the smoother the weights are.

        :type batch_size: int
        :param batch_size: minibatch size
        """

        self.hidden_layers = []
        self.cA_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data, each row of which is a sample
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels


        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_in
            else:
                input_size = n_hidden[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the ScA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.hidden_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=n_hidden[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.hidden_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            # Construct a contractive autoencoder that shared weights with this
            # layer
            cA_layer = cA(numpy_rng=rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=n_hidden[i],
                          n_batchsize=batch_size,
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.cA_layers.append(cA_layer)

        # We now need to add a logistic layer on top of the MLP
        if self.n_layers>0:
            self.logRegressionLayer = LogisticRegression(
                         input=self.hidden_layers[-1].output,
                         n_in=n_hidden[-1], n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(
                         input=self.x,
                         n_in=input_size, n_out=n_out)

        self.params.extend(self.logRegressionLayer.params)
        
        # regularization        
        L1s=[]
        L2_sqrs=[]
        for i in range(self.n_layers):
            L1s.append (abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W ** 2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W ** 2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood(self.y)
        self.cost=self.negative_log_likelihood + \
        lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr +  alpha_reg*self.L1)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logRegressionLayer.errors(self.y)
        self.y_pred = self.logRegressionLayer.y_pred
Example #6
0
print "#############"
print "## configs ##"
print "#############"
print configs

print "instantiating and training model"

numpy_rng  = np.random.RandomState(2355)
theano_rng = RandomStreams(2355)

models = []
h_in, h_out = zip([trtrainset.shape[1]]+configs['hid'],configs['hid'])[i]
print h_in,h_out

model = cA.cA(numpy_rng=numpy_rng, theano_rng=theano_rng, 
              numvis=h_in, numhid=h_out, 
              activation=T.tanh,
              vistype="real", contraction=configs['contract'][i])

sgd.train(trtrainset,trvalidset,model,
          batch_size=configs['batch_size'],
          wait_for=20,
          learning_rate=configs['lr'][i],
          epochs=training_epochs,
          epsylon=configs['epsylon'][i],
          aug=1.01)

X = T.matrix()
encoding = model.hiddens(X)
f = theano.function([X],encoding)
trtrainset = np.vstack(sgd.iterate([trtrainset],[f],configs['batch_size'])[0])
trvalidset = np.vstack(sgd.iterate([trvalidset],[f],configs['batch_size'])[0])