예제 #1
0
class DBN(object):
    """Deep Belief Network
    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """
    def __init__(self,
                 rng,
                 n_in=784,
                 n_hidden=[500, 500],
                 n_out=10,
                 lambda_reg=0.001,
                 alpha_reg=0.001):
        """This class is made to support a variable number of layers.
    
        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                   weights
    
        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_in: int
        :param n_in: dimension of the input to the DBN
    
        :type n_hidden: list of ints
        :param n_hidden: intermediate layers size, must contain
                               at least one value

        :type n_out: int
        :param n_out: dimension of the output of the network
       
        :type lambda_reg: float
        :param lambda_reg: paramter to control the sparsity of weights by l_1 norm.
         The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ).
         Thus, the larger lambda_reg is, the sparser the weights are.
        
        :type alpha_reg: float
        :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm.
         The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ),
         Thus, the smaller alpha_reg is, the smoother the weights are.
        """

        self.hidden_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data, each row is a sample
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels

        for i in range(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_in
            else:
                input_size = n_hidden[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.hidden_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=n_hidden[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.hidden_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=rng,
                            theano_rng=None,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=n_hidden[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        if self.n_layers > 0:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(input=self.x,
                                                         n_in=input_size,
                                                         n_out=n_out)

        self.params.extend(self.logRegressionLayer.params)

        # regularization
        L1s = []
        L2_sqrs = []
        for i in range(self.n_layers):
            L1s.append(abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W**2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W**2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood(
            self.y)
        self.cost=self.negative_log_likelihood + \
        lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr +  alpha_reg*self.L1)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logRegressionLayer.errors(self.y)
        self.y_pred = self.logRegressionLayer.y_pred

    def pretraining_functions(self, train_set_x, batch_size, persistent_k=15):
        '''
        Build the symbolic pretraining functions to update the parameter in one iteration.
        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        learning_rate = T.scalar('learning_rate')  # learning rate to use
        # number of batches
        #n_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm_layer in self.rbm_layers:
            # get the cost and the updates list
            cost, updates = rbm_layer.get_cost_updates(learning_rate,
                                                       persistent=None,
                                                       k=persistent_k)
            # compile the theano function
            fn = theano.function(
                inputs=[index, theano.Param(learning_rate, default=0.1)],
                outputs=cost,
                updates=updates,
                givens={self.x: train_set_x[batch_begin:batch_end]})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, train_set_x, train_set_y, valid_set_x,
                                 valid_set_y, batch_size,
                                 learning_rate_shared):
        '''
        Build the symbolic finetuning functions to update the parameters in one iteration. 
        Validation function is also defined.
        '''
        # compute number of minibatches for training, validation and testing
        n_valid_batches = int(
            math.ceil(
                valid_set_x.get_value(borrow=True).shape[0] / batch_size))

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.cost, self.params)
        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate_shared))

        train_fn = theano.function(
            inputs=[index],
            outputs=self.cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='train')

        #        test_score_i = theano.function([index], self.errors,
        #                 givens={
        #                   self.x: test_set_x[index * batch_size:
        #                                      (index + 1) * batch_size],
        #                   self.y: test_set_y[index * batch_size:
        #                                      (index + 1) * batch_size]},
        #                      name='test')

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='valid')

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in range(n_valid_batches)]

        # Create a function that scans the entire test set


#        def test_score():
#            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, valid_score

    def build_test_function(self, test_set_x, batch_size):
        """
        Build the symbolic test function.
        """
        n_test_batches = int(
            math.ceil(test_set_x.get_value(borrow=True).shape[0] / batch_size))
        index = T.lscalar('index')  # index to a [mini]batch
        test_score_i = theano.function(
            [index],
            self.y_pred,
            givens={
                self.x: test_set_x[index * batch_size:(index + 1) * batch_size]
            },
            name='test')

        # Create a function that scans the entire test set
        def test_score():
            y_pred = []
            for i in range(n_test_batches):
                y_pred.extend(test_score_i(i))
            return y_pred

        return test_score

    def get_params(self):
        return copy.deepcopy(self.params)

    def set_params(self, given_params):
        self.params = given_params

    def print_params(self):
        for param in self.params:
            print(param.get_value(borrow=True))

    def save_params(self, filename):
        f = open(filename, 'w')  # remove existing file
        f.close()
        f = open(filename, 'a')
        for param in self.params:
            pickle.dump(param.get_value(borrow=True), f)
        f.close()
예제 #2
0
    def __init__(self,
                 rng,
                 n_in=784,
                 n_hidden=[500, 500],
                 n_out=10,
                 lambda_reg=0.001,
                 alpha_reg=0.001):
        """This class is made to support a variable number of layers.
    
        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                   weights
    
        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_in: int
        :param n_in: dimension of the input to the DBN
    
        :type n_hidden: list of ints
        :param n_hidden: intermediate layers size, must contain
                               at least one value

        :type n_out: int
        :param n_out: dimension of the output of the network
       
        :type lambda_reg: float
        :param lambda_reg: paramter to control the sparsity of weights by l_1 norm.
         The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ).
         Thus, the larger lambda_reg is, the sparser the weights are.
        
        :type alpha_reg: float
        :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm.
         The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ),
         Thus, the smaller alpha_reg is, the smoother the weights are.
        """

        self.hidden_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data, each row is a sample
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels

        for i in range(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_in
            else:
                input_size = n_hidden[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.hidden_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=n_hidden[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.hidden_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=rng,
                            theano_rng=None,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=n_hidden[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        if self.n_layers > 0:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(input=self.x,
                                                         n_in=input_size,
                                                         n_out=n_out)

        self.params.extend(self.logRegressionLayer.params)

        # regularization
        L1s = []
        L2_sqrs = []
        for i in range(self.n_layers):
            L1s.append(abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W**2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W**2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood(
            self.y)
        self.cost=self.negative_log_likelihood + \
        lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr +  alpha_reg*self.L1)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logRegressionLayer.errors(self.y)
        self.y_pred = self.logRegressionLayer.y_pred
예제 #3
0
class DFS(object):
    """
    Deep feature selection class. One-one input layer + MLP. 
    """

    def __init__(self, rng, n_in, n_hidden, n_out, x=None, y=None, activation=T.tanh,
                 lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0):
        """Initialize the parameters for the DFL class.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie
        
        activation: activation function, from {T.tanh, T.nnet.sigmoid}
        
        lambda1: float scalar, control the sparsity of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda1 is, the sparser the input weights are.
        
        lambda2: float scalar, control the smoothness of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda2 is, the smoother the input weights are.
        
        alpha1: float scalar, control the sparsity of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha1 is, the sparser the MLP weights are.
        
        alpha2: float scalar, control the smoothness of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha2 is, the smoother the MLP weights are.
        """
        
        if not x:
            x=T.matrix('x')
        self.x=x
        if not y:
            y=T.ivector('y')
        self.y=y
        
        self.hidden_layers=[]
        self.params=[]
        self.n_layers=len(n_hidden)        
        
        input_layer=InputLayer(input=self.x,n_in=n_in)
        self.params.extend(input_layer.params)
        self.input_layer=input_layer
        for i in range(len(n_hidden)):
            if i==0: # first hidden layer
                hd=HiddenLayer(rng=rng, input=self.input_layer.output, n_in=n_in, n_out=n_hidden[i],
                               activation=activation)
            else:
                hd=HiddenLayer(rng=rng, input=self.hidden_layers[i-1].output, n_in=n_hidden[i-1], n_out=n_hidden[i],
                               activation=activation)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            
        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if len(n_hidden)<=0:
            self.logRegressionLayer = LogisticRegression(
                input=self.input_layer.output,
                n_in=n_in,
                n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)
        
        # regularization terms
        self.L1_input=abs(self.input_layer.w).sum()
        self.L2_input=(self.input_layer.w **2).sum()
        self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values
        self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum()  # # penalize positive values
        L1s=[]
        L2_sqrs=[]
        #L1s.append(abs(self.hidden_layers[0].W).sum())
        for i in range(len(n_hidden)):
            L1s.append (abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W ** 2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W ** 2).sum())        
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
#        self.cost = self.negative_log_likelihood(self.y) \
#         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
#         + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \
#         + lambda1*lambda2*lambda3*self.hinge_loss_neg \
#         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.cost = self.negative_log_likelihood(self.y) \
         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
         + lambda1*lambda2*self.L1_input \
         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.y_pred=self.logRegressionLayer.y_pred
    
    def build_train_function(self, train_set_x, train_set_y, batch_size, alpha, learning_rate_shared):
        """
        Create a function to compute the mistakes that are made by the model.
        """
        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        grads = T.grad(self.cost, self.params)
        
        # add momentum
        # initialize the delta_i-1
        delta_before=[]
        for param_i in self.params:
            delta_before_i=theano.shared(value=numpy.zeros(param_i.get_value().shape))
            delta_before.append(delta_before_i)
        
        updates = []
        for param_i, grad_i, delta_before_i in zip(self.params, grads, delta_before):
            delta_i=-learning_rate_shared * grad_i + alpha*delta_before_i
            updates.append((param_i, param_i + delta_i ))
            updates.append((delta_before_i,delta_i))
            
        train_model_cost = theano.function([index], self.cost, updates=updates,
                                      givens={
                                      self.x: train_set_x[index * batch_size: (index + 1) * batch_size],
                                      self.y: train_set_y[index * batch_size: (index + 1) * batch_size]},
                                      name='train')
        return train_model_cost

    def build_valid_function(self,valid_set_x, valid_set_y, batch_size):
        """
        Build symbolic validation function.
        """
        n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size))
        
        index = T.lscalar('index')  # index to a [mini]batch
        valid_error_i = theano.function([index], self.errors,
                                        givens={self.x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                                                self.y: valid_set_y[index * batch_size:(index + 1) * batch_size]},
                                        name='valid')

        # Create a function that scans the entire validation set
        def valid_error():
            return [valid_error_i(i) for i in range(n_valid_batches)]
        return valid_error
        
    def build_test_function(self, test_set_x, batch_size):
        """
        Build symbolic test function.
        
        """
        n_test_batches = int(math.ceil(test_set_x.get_value(borrow=True).shape[0] / batch_size))
        index = T.lscalar('index')  # index to a [mini]batch
        test_pred_i = theano.function([index], self.y_pred,
                                       givens={self.x: test_set_x[index * batch_size : (index + 1) * batch_size]},
                                       name='test')

        # Create a function that scans the entire test set
        def test_pred():
            y_pred=[]
            for i in range(n_test_batches):
                y_pred.extend(test_pred_i(i))
            return y_pred
        return test_pred     
    
    def get_predicted(self,data):
        for i in range(len(self.hidden_layers)):
            data=self.hidden_layers[i].get_predicted(data)
        p_y_given_x = T.nnet.softmax(T.dot(data, self.logRegressionLayer.W) + self.logRegressionLayer.b)
        y_pred = T.argmax(p_y_given_x, axis=1)
        return y_pred
        
    def get_params(self):
        return copy.deepcopy(self.params)

    def set_params(self, given_params):
        self.params=given_params          
        
    def print_params(self):
        for param in self.params:
            print(param.get_value(borrow=True))
            
    def save_params(self,filename):
        f=open(filename,'w') # remove existing file
        f.close()
        f=open(filename,'a')
        for param in self.params:
            pickle.dump(param.get_value(borrow=True),f)
        f.close()
예제 #4
0
    def __init__(self, rng, n_in, n_hidden, n_out, x=None, y=None, activation=T.tanh,
                 lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0):
        """Initialize the parameters for the DFL class.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie
        
        activation: activation function, from {T.tanh, T.nnet.sigmoid}
        
        lambda1: float scalar, control the sparsity of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda1 is, the sparser the input weights are.
        
        lambda2: float scalar, control the smoothness of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda2 is, the smoother the input weights are.
        
        alpha1: float scalar, control the sparsity of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha1 is, the sparser the MLP weights are.
        
        alpha2: float scalar, control the smoothness of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha2 is, the smoother the MLP weights are.
        """
        
        if not x:
            x=T.matrix('x')
        self.x=x
        if not y:
            y=T.ivector('y')
        self.y=y
        
        self.hidden_layers=[]
        self.params=[]
        self.n_layers=len(n_hidden)        
        
        input_layer=InputLayer(input=self.x,n_in=n_in)
        self.params.extend(input_layer.params)
        self.input_layer=input_layer
        for i in range(len(n_hidden)):
            if i==0: # first hidden layer
                hd=HiddenLayer(rng=rng, input=self.input_layer.output, n_in=n_in, n_out=n_hidden[i],
                               activation=activation)
            else:
                hd=HiddenLayer(rng=rng, input=self.hidden_layers[i-1].output, n_in=n_hidden[i-1], n_out=n_hidden[i],
                               activation=activation)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            
        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if len(n_hidden)<=0:
            self.logRegressionLayer = LogisticRegression(
                input=self.input_layer.output,
                n_in=n_in,
                n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)
        
        # regularization terms
        self.L1_input=abs(self.input_layer.w).sum()
        self.L2_input=(self.input_layer.w **2).sum()
        self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values
        self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum()  # # penalize positive values
        L1s=[]
        L2_sqrs=[]
        #L1s.append(abs(self.hidden_layers[0].W).sum())
        for i in range(len(n_hidden)):
            L1s.append (abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W ** 2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W ** 2).sum())        
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
#        self.cost = self.negative_log_likelihood(self.y) \
#         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
#         + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \
#         + lambda1*lambda2*lambda3*self.hinge_loss_neg \
#         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.cost = self.negative_log_likelihood(self.y) \
         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
         + lambda1*lambda2*self.L1_input \
         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.y_pred=self.logRegressionLayer.y_pred
class DFS(object):
    """Deep feature selection class. 
    This structure is input_layer + stacked RBM.
    """
    def __init__(self,
                 rng,
                 n_in=784,
                 n_hidden=[500, 500],
                 n_out=10,
                 lambda1=0,
                 lambda2=0,
                 alpha1=0,
                 alpha2=0):
        """This class is made to support a variable number of layers.
    
        :type rng: numpy.random.RandomState
        :param rng: numpy random number generator used to draw initial
                   weights

        :type n_in: int
        :param n_in: dimension of the input to the DFS
    
        :type n_hidden: list of ints
        :param n_hidden: intermediate layers size, must contain
                               at least one value

        :type n_out: int
        :param n_out: dimension of the output of the network
       
        lambda1: float scalar, control the sparsity of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda1 is, the sparser the input weights are.
        
        lambda2: float scalar, control the smoothness of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda2 is, the smoother the input weights are.
        
        alpha1: float scalar, control the sparsity of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha1 is, the sparser the MLP weights are.
        
        alpha2: float scalar, control the smoothness of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha2 is, the smoother the MLP weights are.
        """
        self.hidden_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels
        # input layer
        input_layer = InputLayer(input=self.x, n_in=n_in)
        self.params.extend(input_layer.params)
        self.input_layer = input_layer
        # hidden layers
        for i in range(len(n_hidden)):
            if i == 0:
                input_hidden = self.input_layer.output
                n_in_hidden = n_in
            else:
                input_hidden = self.hidden_layers[i - 1].output
                n_in_hidden = n_hidden[i - 1]
            hd = HiddenLayer(rng=rng,
                             input=input_hidden,
                             n_in=n_in_hidden,
                             n_out=n_hidden[i],
                             activation=T.nnet.sigmoid)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=rng,
                            theano_rng=None,
                            input=input_hidden,
                            n_visible=n_in_hidden,
                            n_hidden=n_hidden[i],
                            W=hd.W,
                            hbias=hd.b)
            self.rbm_layers.append(rbm_layer)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if len(n_hidden) <= 0:
            self.logRegressionLayer = LogisticRegression(
                input=self.input_layer.output, n_in=n_in, n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)

        # regularization terms on coefficients of input layer
        self.L1_input = abs(self.input_layer.w).sum()
        self.L2_input = (self.input_layer.w**2).sum()
        #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values
        #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum()  # # penalize positive values
        # regularization terms on weights of hidden layers
        L1s = []
        L2_sqrs = []
        for i in range(len(n_hidden)):
            L1s.append(abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W**2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W**2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
        #        self.cost = self.negative_log_likelihood(self.y) \
        #         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
        #         + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \
        #         + lambda1*lambda2*lambda3*self.hinge_loss_neg \
        #         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.cost = self.negative_log_likelihood(self.y) \
          + lambda1*(1.0-lambda2)*0.5*self.L2_input \
          + lambda1*lambda2*self.L1_input \
          + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.y_pred = self.logRegressionLayer.y_pred

    def get_params(self):
        return copy.deepcopy(self.params)

    def set_params(self, given_params):
        self.params = given_params

    def print_params(self):
        for param in self.params:
            print(param.get_value(borrow=True))

    def pretraining_functions(self, train_set_x, batch_size, persistent_k=15):
        '''
        Build the symbolic pretraining functions to update the parameter in one iteration.
        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        learning_rate = T.scalar('learning_rate')  # learning rate to use
        # number of batches
        #n_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm_layer in self.rbm_layers:
            # get the cost and the updates list
            cost, updates = rbm_layer.get_cost_updates(learning_rate,
                                                       persistent=None,
                                                       k=persistent_k)
            # compile the theano function
            fn = theano.function(
                inputs=[index, theano.Param(learning_rate, default=0.1)],
                outputs=cost,
                updates=updates,
                givens={self.x: train_set_x[batch_begin:batch_end]})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, train_set_x, train_set_y, valid_set_x,
                                 valid_set_y, batch_size,
                                 learning_rate_shared):
        '''
        Build symbolic funetuning functions for training and validating.
        '''
        # compute number of minibatches for training, validation and testing
        n_valid_batches = int(
            math.ceil(
                valid_set_x.get_value(borrow=True).shape[0] / batch_size))

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.cost, self.params)
        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate_shared))

        train_fn = theano.function(
            inputs=[index],
            outputs=self.cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='train')

        #        test_score_i = theano.function([index], self.errors,
        #                 givens={
        #                   self.x: test_set_x[index * batch_size:
        #                                      (index + 1) * batch_size],
        #                   self.y: test_set_y[index * batch_size:
        #                                      (index + 1) * batch_size]},
        #                      name='test')

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='valid')

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in range(n_valid_batches)]

        # Create a function that scans the entire test set


#        def test_score():
#            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, valid_score

    def build_test_function(self, test_set_x, batch_size):
        """
        Build a symbolic test function.
        """
        n_test_batches = int(
            math.ceil(test_set_x.get_value(borrow=True).shape[0] / batch_size))
        index = T.lscalar('index')  # index to a [mini]batch
        test_score_i = theano.function(
            [index],
            self.y_pred,
            givens={
                self.x: test_set_x[index * batch_size:(index + 1) * batch_size]
            },
            name='test')

        # Create a function that scans the entire test set
        def test_score():
            y_pred = []
            for i in range(n_test_batches):
                y_pred.extend(test_score_i(i))
            return y_pred

        return test_score
    def __init__(self,
                 rng,
                 n_in=784,
                 n_hidden=[500, 500],
                 n_out=10,
                 lambda1=0,
                 lambda2=0,
                 alpha1=0,
                 alpha2=0):
        """This class is made to support a variable number of layers.
    
        :type rng: numpy.random.RandomState
        :param rng: numpy random number generator used to draw initial
                   weights

        :type n_in: int
        :param n_in: dimension of the input to the DFS
    
        :type n_hidden: list of ints
        :param n_hidden: intermediate layers size, must contain
                               at least one value

        :type n_out: int
        :param n_out: dimension of the output of the network
       
        lambda1: float scalar, control the sparsity of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda1 is, the sparser the input weights are.
        
        lambda2: float scalar, control the smoothness of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda2 is, the smoother the input weights are.
        
        alpha1: float scalar, control the sparsity of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha1 is, the sparser the MLP weights are.
        
        alpha2: float scalar, control the smoothness of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha2 is, the smoother the MLP weights are.
        """
        self.hidden_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels
        # input layer
        input_layer = InputLayer(input=self.x, n_in=n_in)
        self.params.extend(input_layer.params)
        self.input_layer = input_layer
        # hidden layers
        for i in range(len(n_hidden)):
            if i == 0:
                input_hidden = self.input_layer.output
                n_in_hidden = n_in
            else:
                input_hidden = self.hidden_layers[i - 1].output
                n_in_hidden = n_hidden[i - 1]
            hd = HiddenLayer(rng=rng,
                             input=input_hidden,
                             n_in=n_in_hidden,
                             n_out=n_hidden[i],
                             activation=T.nnet.sigmoid)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=rng,
                            theano_rng=None,
                            input=input_hidden,
                            n_visible=n_in_hidden,
                            n_hidden=n_hidden[i],
                            W=hd.W,
                            hbias=hd.b)
            self.rbm_layers.append(rbm_layer)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if len(n_hidden) <= 0:
            self.logRegressionLayer = LogisticRegression(
                input=self.input_layer.output, n_in=n_in, n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)

        # regularization terms on coefficients of input layer
        self.L1_input = abs(self.input_layer.w).sum()
        self.L2_input = (self.input_layer.w**2).sum()
        #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values
        #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum()  # # penalize positive values
        # regularization terms on weights of hidden layers
        L1s = []
        L2_sqrs = []
        for i in range(len(n_hidden)):
            L1s.append(abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W**2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W**2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
        #        self.cost = self.negative_log_likelihood(self.y) \
        #         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
        #         + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \
        #         + lambda1*lambda2*lambda3*self.hinge_loss_neg \
        #         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.cost = self.negative_log_likelihood(self.y) \
          + lambda1*(1.0-lambda2)*0.5*self.L2_input \
          + lambda1*lambda2*self.L1_input \
          + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.y_pred = self.logRegressionLayer.y_pred
예제 #7
0
    def __init__(self,
                 rng,
                 batch_size=100,
                 input_size=None,
                 nkerns=[4, 4, 4],
                 receptive_fields=((2, 8), (2, 8), (2, 8)),
                 poolsizes=((1, 8), (1, 8), (1, 4)),
                 full_hidden=16,
                 n_out=10):
        """
        
        """
        self.x = T.matrix(name='x', dtype=theano.config.floatX
                          )  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        self.batch_size = theano.shared(
            value=batch_size, name='batch_size')  #T.lscalar('batch_size')

        self.layers = []
        self.params = []
        for i in range(len(nkerns)):
            receptive_field = receptive_fields[i]
            if i == 0:
                featmap_size_after_downsample = input_size
                layeri_input = self.x.reshape(
                    (batch_size, 1, featmap_size_after_downsample[0],
                     featmap_size_after_downsample[1]))
                image_shape = (batch_size, 1, featmap_size_after_downsample[0],
                               featmap_size_after_downsample[1])
                filter_shape = (nkerns[i], 1, receptive_field[0],
                                receptive_field[1])
            else:
                layeri_input = self.layers[i - 1].output
                image_shape = (batch_size, nkerns[i - 1],
                               featmap_size_after_downsample[0],
                               featmap_size_after_downsample[1])
                filter_shape = (nkerns[i], nkerns[i - 1], receptive_field[0],
                                receptive_field[1])

            layeri = LeNetConvPoolLayer(rng=rng,
                                        input=layeri_input,
                                        image_shape=image_shape,
                                        filter_shape=filter_shape,
                                        poolsize=poolsizes[i])
            featmap_size_after_conv = get_featmap_size_after_conv(
                featmap_size_after_downsample, receptive_fields[i])
            featmap_size_after_downsample = get_featmap_size_after_downsample(
                featmap_size_after_conv, poolsizes[i])
            self.layers.append(layeri)
            self.params.extend(layeri.params)

        # fully connected layer
        print('going to fully connected layer')
        layer_full_input = self.layers[-1].output.flatten(2)

        # construct a fully-connected sigmoidal layer
        layer_full = HiddenLayer(rng=rng,
                                 input=layer_full_input,
                                 n_in=nkerns[-1] *
                                 featmap_size_after_downsample[0] *
                                 featmap_size_after_downsample[1],
                                 n_out=full_hidden,
                                 activation=T.tanh)
        self.layers.append(layer_full)
        self.params.extend(layer_full.params)

        # classify the values of the fully-connected sigmoidal layer
        print('going to output layer')
        self.logRegressionLayer = LogisticRegression(
            input=self.layers[-1].output, n_in=full_hidden, n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)

        # the cost we minimize during training is the NLL of the model
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood(
            self.y)
        self.cost = self.logRegressionLayer.negative_log_likelihood(self.y)
        self.errors = self.logRegressionLayer.errors(self.y)
        self.y_pred = self.logRegressionLayer.y_pred
예제 #8
0
class cnn(object):
    """
    The CNN class.
    """
    def __init__(self,
                 rng,
                 batch_size=100,
                 input_size=None,
                 nkerns=[4, 4, 4],
                 receptive_fields=((2, 8), (2, 8), (2, 8)),
                 poolsizes=((1, 8), (1, 8), (1, 4)),
                 full_hidden=16,
                 n_out=10):
        """
        
        """
        self.x = T.matrix(name='x', dtype=theano.config.floatX
                          )  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        self.batch_size = theano.shared(
            value=batch_size, name='batch_size')  #T.lscalar('batch_size')

        self.layers = []
        self.params = []
        for i in range(len(nkerns)):
            receptive_field = receptive_fields[i]
            if i == 0:
                featmap_size_after_downsample = input_size
                layeri_input = self.x.reshape(
                    (batch_size, 1, featmap_size_after_downsample[0],
                     featmap_size_after_downsample[1]))
                image_shape = (batch_size, 1, featmap_size_after_downsample[0],
                               featmap_size_after_downsample[1])
                filter_shape = (nkerns[i], 1, receptive_field[0],
                                receptive_field[1])
            else:
                layeri_input = self.layers[i - 1].output
                image_shape = (batch_size, nkerns[i - 1],
                               featmap_size_after_downsample[0],
                               featmap_size_after_downsample[1])
                filter_shape = (nkerns[i], nkerns[i - 1], receptive_field[0],
                                receptive_field[1])

            layeri = LeNetConvPoolLayer(rng=rng,
                                        input=layeri_input,
                                        image_shape=image_shape,
                                        filter_shape=filter_shape,
                                        poolsize=poolsizes[i])
            featmap_size_after_conv = get_featmap_size_after_conv(
                featmap_size_after_downsample, receptive_fields[i])
            featmap_size_after_downsample = get_featmap_size_after_downsample(
                featmap_size_after_conv, poolsizes[i])
            self.layers.append(layeri)
            self.params.extend(layeri.params)

        # fully connected layer
        print('going to fully connected layer')
        layer_full_input = self.layers[-1].output.flatten(2)

        # construct a fully-connected sigmoidal layer
        layer_full = HiddenLayer(rng=rng,
                                 input=layer_full_input,
                                 n_in=nkerns[-1] *
                                 featmap_size_after_downsample[0] *
                                 featmap_size_after_downsample[1],
                                 n_out=full_hidden,
                                 activation=T.tanh)
        self.layers.append(layer_full)
        self.params.extend(layer_full.params)

        # classify the values of the fully-connected sigmoidal layer
        print('going to output layer')
        self.logRegressionLayer = LogisticRegression(
            input=self.layers[-1].output, n_in=full_hidden, n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)

        # the cost we minimize during training is the NLL of the model
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood(
            self.y)
        self.cost = self.logRegressionLayer.negative_log_likelihood(self.y)
        self.errors = self.logRegressionLayer.errors(self.y)
        self.y_pred = self.logRegressionLayer.y_pred

    def build_train_function(self, train_set_x, train_set_y, batch_size, alpha,
                             learning_rate_shared):
        """
        Build the symbolic training function to update the parameter in one iteration.
        """
        # create a function to compute the mistakes that are made by the model
        index = T.lscalar('index')  # index to a [mini]batch
        #batch_size_var = T.lscalar('batch_size_var')  # batch_size
        # compute the gradients with respect to the model parameters
        grads = T.grad(self.cost, self.params)

        # add momentum
        # initialize the delta_i-1
        delta_before = []
        for param_i in self.params:
            delta_before_i = theano.shared(
                value=numpy.zeros(param_i.get_value().shape))
            delta_before.append(delta_before_i)

        updates = []
        for param_i, grad_i, delta_before_i in zip(self.params, grads,
                                                   delta_before):
            delta_i = -learning_rate_shared * grad_i + alpha * delta_before_i
            updates.append((param_i, param_i + delta_i))
            updates.append((delta_before_i, delta_i))

        train_model_cost = theano.function(
            [index],
            self.cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            })
        return train_model_cost

    def build_valid_function(self, valid_set_x, valid_set_y, batch_size):
        """
        Build the symbolic validation function to get the validation error.
        """
        n_valid = valid_set_x.get_value(
            borrow=True).shape[0]  # number of validation samples
        n_valid_batches = n_valid // batch_size  #int(math.ceil( n_valid/ batch_size))

        index = T.lscalar('index')  # index to a [mini]batch
        #batch_size_var = T.lscalar('batch_size_var')  # batch_size
        valid_error_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='valid')

        # Create a function that scans the entire validation set
        def valid_error():
            return [valid_error_i(i) for i in range(n_valid_batches)]


#            errors=[]
#            for i in xrange(n_valid_batches):
#                if i==n_valid_batches-1:
#                    batch_size_current= n_valid - i*batch_size
#                else:
#                    batch_size_current=batch_size
#                errors.extend(valid_error_i(i,batch_size_current))
#            return errors

        return valid_error

    def build_test_function(self, test_set_x):
        """
        Build the symbolic test function to get predicted class labels.
        """
        n_test = test_set_x.get_value(borrow=True).shape[0]
        batch_size = self.batch_size.get_value(borrow=True)
        n_test_batches = n_test // batch_size  #int(math.ceil(n_test / batch_size))
        index = T.lscalar('index')  # index to a [mini]batch
        #        batch_size_var = T.lscalar('batch_size_var')  # batch_size
        #        test_pred_i = theano.function([index,batch_size_var], self.y_pred,
        #                                       givens={self.x: test_set_x[index * batch_size_var : (index + 1) * batch_size_var],
        #                                               self.batch_size: batch_size_var},
        #                                       name='test')

        test_pred_i = theano.function(
            [index],
            self.y_pred,
            givens={
                self.x: test_set_x[index * batch_size:(index + 1) * batch_size]
            },
            name='test')
        test_pred_last = theano.function(
            [],
            self.y_pred,
            givens={self.x: test_set_x[-batch_size:]},
            name='test')

        # Create a function that scans the entire test set
        def test_pred():
            y_pred = []
            y_pred = numpy.array(y_pred)
            for i in range(n_test_batches):
                #                if i==n_test_batches-1:
                #                    batch_size_current=n_test - i*batch_size
                #                else:
                #                    batch_size_current=batch_size
                #                y_pred.extend(test_pred_i(i,batch_size_current))
                y_pred = numpy.append(y_pred, test_pred_i(i))
            left_over = n_test % batch_size
            if left_over > 0:
                left_over_pred = test_pred_last()
            y_pred = numpy.append(y_pred, left_over_pred[-left_over:])
            return y_pred

        return test_pred

    def get_params(self):
        return copy.deepcopy(self.params)

    def set_params(self, given_params):
        self.params = given_params

    def print_params(self):
        for param in self.params:
            print(param.get_value(borrow=True))

    def save_params(self, filename):
        f = open(filename, 'w')  # remove existing file
        f.close()
        f = open(filename, 'a')
        for param in self.params:
            pickle.dump(param.get_value(borrow=True), f)
        f.close()
예제 #9
0
class ScA(object):
    """
    Stacked contractive auto-encoder class (ScA)
    """
    def __init__(self,
                 rng,
                 n_in=784,
                 n_hidden=[500, 500],
                 n_out=10,
                 lambda_reg=0.001,
                 alpha_reg=0.001,
                 batch_size=100):
        """ This class is made to support a variable number of layers.

        :type rng: numpy.random.RandomState
        :param rng: numpy random number generator used to draw initial
                    weights

        :type n_in: int
        :param n_in: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_out: int
        :param n_out: dimension of the output of the network
        
        :type lambda_reg: float
        :param lambda_reg: paramter to control the sparsity of weights by l_1 norm.
        The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ).
        Thus, the larger lambda_reg is, the sparser the weights are.
        
        :type alpha_reg: float
        :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm.
        The regularization term is lambda_reg( (1-alpha_reg)/2 * ||W||_2^2 + alpha_reg ||W||_1 ),
        Thus, the smaller alpha_reg is, the smoother the weights are.

        :type batch_size: int
        :param batch_size: minibatch size
        """

        self.hidden_layers = []
        self.cA_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data, each row of which is a sample
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels

        for i in range(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_in
            else:
                input_size = n_hidden[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the ScA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.hidden_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=n_hidden[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.hidden_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)

            # Construct a contractive autoencoder that shared weights with this
            # layer
            cA_layer = cA(numpy_rng=rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=n_hidden[i],
                          n_batchsize=batch_size,
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.cA_layers.append(cA_layer)

        # We now need to add a logistic layer on top of the MLP
        if self.n_layers > 0:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(input=self.x,
                                                         n_in=input_size,
                                                         n_out=n_out)

        self.params.extend(self.logRegressionLayer.params)

        # regularization
        L1s = []
        L2_sqrs = []
        for i in range(self.n_layers):
            L1s.append(abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W**2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W**2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood(
            self.y)
        self.cost=self.negative_log_likelihood + \
        lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr +  alpha_reg*self.L1)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logRegressionLayer.errors(self.y)
        self.y_pred = self.logRegressionLayer.y_pred

    def pretraining_functions(self, train_set_x, batch_size):
        ''' Generates a list of functions, each of them implementing one
        step in trainnig the cA corresponding to the layer with same index.
        The function will require as input the minibatch index, and to train
        a cA you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared variable that contains all datapoints used
                            for training the cA

        :type batch_size: int
        :param batch_size: size of a [mini]batch

        :type learning_rate: float
        :param learning_rate: learning rate used during training for any of
                              the cA layers
        '''

        index = T.lscalar('index')  # index to a minibatch
        contraction_level = T.scalar(
            'contraction_level')  # % of corruption to use
        learning_rate = T.scalar('learning_rate')  # learning rate to use
        # number of batches
        #n_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for cA_layer in self.cA_layers:
            # get the cost and the updates list
            cost, updates = cA_layer.get_cost_updates(contraction_level,
                                                      learning_rate)
            # compile the theano function
            fn = theano.function(
                inputs=[
                    index,
                    theano.Param(contraction_level, default=0.1),
                    theano.Param(learning_rate, default=0.1)
                ],
                outputs=[T.mean(cA_layer.L_rec), cA_layer.L_jacob],
                updates=updates,
                givens={self.x: train_set_x[batch_begin:batch_end]})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, train_set_x, train_set_y, valid_set_x,
                                 valid_set_y, batch_size,
                                 learning_rate_shared):
        '''
        Build symbolic funetuning functions for training and validating.
        '''
        # compute number of minibatches for training, validation and testing
        n_valid_batches = int(
            math.ceil(
                valid_set_x.get_value(borrow=True).shape[0] / batch_size))

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.cost, self.params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate_shared))

        train_fn = theano.function(
            inputs=[index],
            outputs=self.cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='train')

        #        test_score_i = theano.function([index], self.errors,
        #                 givens={
        #                   self.x: test_set_x[index * batch_size:
        #                                      (index + 1) * batch_size],
        #                   self.y: test_set_y[index * batch_size:
        #                                      (index + 1) * batch_size]},
        #                      name='test')

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='valid')

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in range(n_valid_batches)]

        # Create a function that scans the entire test set


#        def test_score():
#            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, valid_score

    def build_test_function(self, test_set_x, batch_size):
        """
        Build symbolic test function.
        """
        n_test_batches = int(
            math.ceil(test_set_x.get_value(borrow=True).shape[0] / batch_size))
        index = T.lscalar('index')  # index to a [mini]batch
        test_score_i = theano.function(
            [index],
            self.y_pred,
            givens={
                self.x: test_set_x[index * batch_size:(index + 1) * batch_size]
            },
            name='test')

        # Create a function that scans the entire test set
        def test_score():
            y_pred = []
            for i in range(n_test_batches):
                y_pred.extend(test_score_i(i))
            return y_pred

        return test_score

    def get_params(self):
        return copy.deepcopy(self.params)

    def set_params(self, given_params):
        self.params = given_params

    def print_params(self):
        for param in self.params:
            print(param.get_value(borrow=True))

    def save_params(self, filename):
        f = open(filename, 'w')  # remove existing file
        f.close()
        f = open(filename, 'a')
        for param in self.params:
            pickle.dump(param.get_value(borrow=True), f)
        f.close()
예제 #10
0
    def __init__(self, rng, n_in, n_hidden, n_out, x=None, y=None, activation=T.tanh,
                 lambda_reg=0.001, alpha_reg=0.0):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie
        
        :type lambda_reg: float
        :param lambda_reg: paramter to control the sparsity of weights by l_1 norm.
        The regularization term is lambda_reg( (1-alpha_reg)/2 * \sum||W||_2^2 + alpha_reg \sum||W||_1 ).
        Thus, the larger lambda_reg is, the sparser the weights are.
        
        :type alpha_reg: float
        :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm.
        The regularization term is lambda_reg( (1-alpha_reg)/2 * \sum||W||_2^2 + alpha_reg \sum||W||_1 ),
        Thus, the smaller alpha_reg is, the smoother the weights are.
        """
        self.hidden_layers=[]
        self.params=[]
        self.n_layers=len(n_hidden)
        if not x:
            x=T.matrix('x')
        self.x=x
        if not y:
            y=T.ivector('y')
        self.y=y
        for i in range(len(n_hidden)):
            if i==0: # first hidden layer
                hd=HiddenLayer(rng=rng, input=self.x, n_in=n_in, n_out=n_hidden[i],
                               activation=activation)
            else:
                hd=HiddenLayer(rng=rng, input=self.hidden_layers[i-1].output, n_in=n_hidden[i-1], n_out=n_hidden[i],
                               activation=activation)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            
        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if self.n_layers>0:
            self.logRegressionLayer = LogisticRegression(input=self.hidden_layers[-1].output,
                                                     n_in=n_hidden[-1], n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(input=self.x,
                                                     n_in=n_in, n_out=n_out)

            
        self.params.extend(self.logRegressionLayer.params)
        
        # regularization terms
        L1s=[]
        L2_sqrs=[]
        #L1s.append(abs(self.hidden_layers[0].W).sum())
        for i in range(len(n_hidden)):
            L1s.append (abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W ** 2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W ** 2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
        # cost function to be minimized
        self.cost =  self.negative_log_likelihood(self.y) \
         + lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr +  alpha_reg*self.L1)
        self.y_pred=self.logRegressionLayer.y_pred
예제 #11
0
class MLP(object):
    """Multi-Layer Perceptron Class

    A multilayer perceptron is a feedforward artificial neural network model
    that has one layer or more of hidden units and nonlinear activations.
    Intermediate layers usually have as activation function tanh or the
    sigmoid function (defined here by a ``HiddenLayer`` class)  while the
    top layer is a softamx layer (defined here by a ``LogisticRegression``
    class).
    """

    def __init__(self, rng, n_in, n_hidden, n_out, x=None, y=None, activation=T.tanh,
                 lambda_reg=0.001, alpha_reg=0.0):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie
        
        :type lambda_reg: float
        :param lambda_reg: paramter to control the sparsity of weights by l_1 norm.
        The regularization term is lambda_reg( (1-alpha_reg)/2 * \sum||W||_2^2 + alpha_reg \sum||W||_1 ).
        Thus, the larger lambda_reg is, the sparser the weights are.
        
        :type alpha_reg: float
        :param alpha_reg: paramter from interval [0,1] to control the smoothness of weights by squared l_2 norm.
        The regularization term is lambda_reg( (1-alpha_reg)/2 * \sum||W||_2^2 + alpha_reg \sum||W||_1 ),
        Thus, the smaller alpha_reg is, the smoother the weights are.
        """
        self.hidden_layers=[]
        self.params=[]
        self.n_layers=len(n_hidden)
        if not x:
            x=T.matrix('x')
        self.x=x
        if not y:
            y=T.ivector('y')
        self.y=y
        for i in range(len(n_hidden)):
            if i==0: # first hidden layer
                hd=HiddenLayer(rng=rng, input=self.x, n_in=n_in, n_out=n_hidden[i],
                               activation=activation)
            else:
                hd=HiddenLayer(rng=rng, input=self.hidden_layers[i-1].output, n_in=n_hidden[i-1], n_out=n_hidden[i],
                               activation=activation)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            
        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if self.n_layers>0:
            self.logRegressionLayer = LogisticRegression(input=self.hidden_layers[-1].output,
                                                     n_in=n_hidden[-1], n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(input=self.x,
                                                     n_in=n_in, n_out=n_out)

            
        self.params.extend(self.logRegressionLayer.params)
        
        # regularization terms
        L1s=[]
        L2_sqrs=[]
        #L1s.append(abs(self.hidden_layers[0].W).sum())
        for i in range(len(n_hidden)):
            L1s.append (abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W ** 2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W ** 2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
        # cost function to be minimized
        self.cost =  self.negative_log_likelihood(self.y) \
         + lambda_reg * ( (1.0-alpha_reg)*0.5* self.L2_sqr +  alpha_reg*self.L1)
        self.y_pred=self.logRegressionLayer.y_pred
    
    def build_train_function(self, train_set_x, train_set_y, batch_size, alpha, learning_rate_shared):
        """
        Create a function to compute the cost of model being trained.
        """
        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        grads = T.grad(self.cost, self.params)
        
        # add momentum
        # initialize the delta_i-1
        delta_before=[]
        for param_i in self.params:
            delta_before_i=theano.shared(value=numpy.zeros(param_i.get_value().shape))
            delta_before.append(delta_before_i)
        
        updates = []
        for param_i, grad_i, delta_before_i in zip(self.params, grads, delta_before):
            delta_i=-learning_rate_shared * grad_i + alpha*delta_before_i
            updates.append((param_i, param_i + delta_i ))
            updates.append((delta_before_i,delta_i))
            
        train_model_cost = theano.function([index], self.cost, updates=updates,
                                      givens={
                                      self.x: train_set_x[index * batch_size: (index + 1) * batch_size],
                                      self.y: train_set_y[index * batch_size: (index + 1) * batch_size]})
        return train_model_cost

    def build_valid_function(self,valid_set_x, valid_set_y, batch_size):
        """
        Build symbolic function to calculate the validation error of a validation set.
        """
        n_valid_batches = int(math.ceil(valid_set_x.get_value(borrow=True).shape[0] / batch_size))
        
        index = T.lscalar('index')  # index to a [mini]batch
        valid_error_i = theano.function([index], self.errors,
                                        givens={self.x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                                                self.y: valid_set_y[index * batch_size:(index + 1) * batch_size]},
                                        name='valid')

        # Create a function that scans the entire validation set
        def valid_error():
            return [valid_error_i(i) for i in range(n_valid_batches)]
        return valid_error
        
    def build_test_function(self, test_set_x, batch_size):
        """
        Build the symbolic test function to predict class labels.
        """
        n_test_batches = int(math.ceil(test_set_x.get_value(borrow=True).shape[0] / batch_size))
        index = T.lscalar('index')  # index to a [mini]batch
        test_pred_i = theano.function([index], self.y_pred,
                                       givens={self.x: test_set_x[index * batch_size : (index + 1) * batch_size]},
                                       name='test')

        # Create a function that scans the entire test set
        def test_pred():
            y_pred=[]
            for i in range(n_test_batches):
                y_pred.extend(test_pred_i(i))
            return y_pred
        return test_pred     
    
    def get_predicted(self,data):
        """
        Predict the class labels of given data.
        """
        for i in range(len(self.hidden_layers)):
            data=self.hidden_layers[i].get_predicted(data)
            p_y_given_x = T.nnet.softmax(T.dot(data, self.logRegressionLayer.W) + self.logRegressionLayer.b)
        y_pred = T.argmax(p_y_given_x, axis=1)
        return y_pred
        
    def get_params(self):
        return copy.deepcopy(self.params)

    def set_params(self, given_params):
        self.params=given_params  
        
    def print_params(self):
        for param in self.params:
            print(param.get_value(borrow=True))
            
    def save_params(self,filename):
        f=open(filename,'w') # remove existing file
        f.close()
        f=open(filename,'a')
        for param in self.params:
            pickle.dump(param.get_value(borrow=True),f)
        f.close()
예제 #12
0
    def __init__(self,
                 rng,
                 n_in=784,
                 n_hidden=[500, 500],
                 n_out=10,
                 activation=T.nnet.sigmoid,
                 lambda1=0,
                 lambda2=0,
                 alpha1=0,
                 alpha2=0,
                 batch_size=100):
        """ Initialize the parameters for the DFL class.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie
        
        activation: activation function, from {T.tanh, T.nnet.sigmoid (default)}
        
        lambda1: float scalar, control the sparsity of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda1 is, the sparser the input weights are.
        
        lambda2: float scalar, control the smoothness of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda2 is, the smoother the input weights are.
        
        alpha1: float scalar, control the sparsity of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha1 is, the sparser the MLP weights are.
        
        alpha2: float scalar, control the smoothness of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha2 is, the smoother the MLP weights are.
        
        batch_size: int, minibatch size.
        """

        self.hidden_layers = []
        self.cA_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data, each row is a sample
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels
        # input layer
        input_layer = InputLayer(input=self.x, n_in=n_in)
        self.params.extend(input_layer.params)
        self.input_layer = input_layer
        # hidden layers
        for i in range(len(n_hidden)):
            if i == 0:
                input_hidden = self.input_layer.output
                n_in_hidden = n_in
            else:
                input_hidden = self.hidden_layers[i - 1].output
                n_in_hidden = n_hidden[i - 1]
            hd = HiddenLayer(rng=rng,
                             input=input_hidden,
                             n_in=n_in_hidden,
                             n_out=n_hidden[i],
                             activation=T.nnet.sigmoid)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            cA_layer = cA(numpy_rng=rng,
                          input=input_hidden,
                          n_visible=n_in_hidden,
                          n_hidden=n_hidden[i],
                          n_batchsize=batch_size,
                          W=hd.W,
                          bhid=hd.b)
            self.cA_layers.append(cA_layer)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if len(n_hidden) <= 0:
            self.logRegressionLayer = LogisticRegression(
                input=self.input_layer.output, n_in=n_in, n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)

        # regularization terms on coefficients of input layer
        self.L1_input = abs(self.input_layer.w).sum()
        self.L2_input = (self.input_layer.w**2).sum()
        #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values
        #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum()  # # penalize positive values
        # regularization terms on weights of hidden layers
        L1s = []
        L2_sqrs = []
        for i in range(len(n_hidden)):
            L1s.append(abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W**2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W**2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
        #        self.cost = self.negative_log_likelihood(self.y) \
        #         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
        #         + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \
        #         + lambda1*lambda2*lambda3*self.hinge_loss_neg \
        #         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.cost = self.negative_log_likelihood(self.y) \
         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
         + lambda1*lambda2*self.L1_input \
         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.y_pred = self.logRegressionLayer.y_pred
예제 #13
0
class DFS(object):
    """Deep feature selection class. 
    This structure is input_layer + stacked contractive autoencoder.
    """
    def __init__(self,
                 rng,
                 n_in=784,
                 n_hidden=[500, 500],
                 n_out=10,
                 activation=T.nnet.sigmoid,
                 lambda1=0,
                 lambda2=0,
                 alpha1=0,
                 alpha2=0,
                 batch_size=100):
        """ Initialize the parameters for the DFL class.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie
        
        activation: activation function, from {T.tanh, T.nnet.sigmoid (default)}
        
        lambda1: float scalar, control the sparsity of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda1 is, the sparser the input weights are.
        
        lambda2: float scalar, control the smoothness of the input weights.
        The regularization term is lambda1( (1-lambda2)/2 * ||w||_2^2 + lambda2 * ||w||_1 ).
        Thus, the larger lambda2 is, the smoother the input weights are.
        
        alpha1: float scalar, control the sparsity of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha1 is, the sparser the MLP weights are.
        
        alpha2: float scalar, control the smoothness of the weight matrices in MLP.
        The regularization term is alpha1( (1-alpha2)/2 * \sum||W_i||_2^2 + alpha2 \sum||W_i||_1 ).
        Thus, the larger alpha2 is, the smoother the MLP weights are.
        
        batch_size: int, minibatch size.
        """

        self.hidden_layers = []
        self.cA_layers = []
        self.params = []
        self.n_layers = len(n_hidden)

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data, each row is a sample
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels
        # input layer
        input_layer = InputLayer(input=self.x, n_in=n_in)
        self.params.extend(input_layer.params)
        self.input_layer = input_layer
        # hidden layers
        for i in range(len(n_hidden)):
            if i == 0:
                input_hidden = self.input_layer.output
                n_in_hidden = n_in
            else:
                input_hidden = self.hidden_layers[i - 1].output
                n_in_hidden = n_hidden[i - 1]
            hd = HiddenLayer(rng=rng,
                             input=input_hidden,
                             n_in=n_in_hidden,
                             n_out=n_hidden[i],
                             activation=T.nnet.sigmoid)
            self.hidden_layers.append(hd)
            self.params.extend(hd.params)
            cA_layer = cA(numpy_rng=rng,
                          input=input_hidden,
                          n_visible=n_in_hidden,
                          n_hidden=n_hidden[i],
                          n_batchsize=batch_size,
                          W=hd.W,
                          bhid=hd.b)
            self.cA_layers.append(cA_layer)

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        if len(n_hidden) <= 0:
            self.logRegressionLayer = LogisticRegression(
                input=self.input_layer.output, n_in=n_in, n_out=n_out)
        else:
            self.logRegressionLayer = LogisticRegression(
                input=self.hidden_layers[-1].output,
                n_in=n_hidden[-1],
                n_out=n_out)
        self.params.extend(self.logRegressionLayer.params)

        # regularization terms on coefficients of input layer
        self.L1_input = abs(self.input_layer.w).sum()
        self.L2_input = (self.input_layer.w**2).sum()
        #self.hinge_loss_neg=(T.maximum(0,-self.input_layer.w)).sum() # penalize negative values
        #self.hinge_loss_pos=(T.maximum(0,self.input_layer.w)).sum()  # # penalize positive values
        # regularization terms on weights of hidden layers
        L1s = []
        L2_sqrs = []
        for i in range(len(n_hidden)):
            L1s.append(abs(self.hidden_layers[i].W).sum())
            L2_sqrs.append((self.hidden_layers[i].W**2).sum())
        L1s.append(abs(self.logRegressionLayer.W).sum())
        L2_sqrs.append((self.logRegressionLayer.W**2).sum())
        self.L1 = T.sum(L1s)
        self.L2_sqr = T.sum(L2_sqrs)

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors(self.y)
        #        self.cost = self.negative_log_likelihood(self.y) \
        #         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
        #         + lambda1*lambda2*(1.0-lambda3)*self.hinge_loss_pos \
        #         + lambda1*lambda2*lambda3*self.hinge_loss_neg \
        #         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.cost = self.negative_log_likelihood(self.y) \
         + lambda1*(1.0-lambda2)*0.5*self.L2_input \
         + lambda1*lambda2*self.L1_input \
         + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2 * self.L1
        self.y_pred = self.logRegressionLayer.y_pred

    def get_params(self):
        return copy.deepcopy(self.params)

    def set_params(self, given_params):
        self.params = given_params

    def print_params(self):
        for param in self.params:
            print(param.get_value(borrow=True))

    def pretraining_functions(self, train_set_x, batch_size):
        ''' Generates a list of functions, each of them implementing one
        step in trainnig the cA corresponding to the layer with same index.
        The function will require as input the minibatch index, and to train
        a cA you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared variable that contains all datapoints used
                            for training the cA

        :type batch_size: int
        :param batch_size: size of a [mini]batch

        :type learning_rate: float
        :param learning_rate: learning rate used during training for any of
                              the cA layers
        '''

        index = T.lscalar('index')  # index to a minibatch
        contraction_level = T.scalar(
            'contraction_level')  # % of corruption to use
        learning_rate = T.scalar('learning_rate')  # learning rate to use
        # number of batches
        #n_batches = int(math.ceil(train_set_x.get_value(borrow=True).shape[0] / batch_size))
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for cA_layer in self.cA_layers:
            # get the cost and the updates list
            cost, updates = cA_layer.get_cost_updates(contraction_level,
                                                      learning_rate)
            # compile the theano function
            fn = theano.function(
                inputs=[
                    index,
                    theano.Param(contraction_level, default=0.1),
                    theano.Param(learning_rate, default=0.1)
                ],
                outputs=[T.mean(cA_layer.L_rec), cA_layer.L_jacob],
                updates=updates,
                givens={self.x: train_set_x[batch_begin:batch_end]})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, train_set_x, train_set_y, valid_set_x,
                                 valid_set_y, batch_size,
                                 learning_rate_shared):
        '''
        Build symbolic funetuning functions for training and validating.
        '''
        # compute number of minibatches for training, validation and testing
        n_valid_batches = int(
            math.ceil(
                valid_set_x.get_value(borrow=True).shape[0] / batch_size))

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.cost, self.params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate_shared))

        train_fn = theano.function(
            inputs=[index],
            outputs=self.cost,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='train')

        #        test_score_i = theano.function([index], self.errors,
        #                 givens={
        #                   self.x: test_set_x[index * batch_size:
        #                                      (index + 1) * batch_size],
        #                   self.y: test_set_y[index * batch_size:
        #                                      (index + 1) * batch_size]},
        #                      name='test')

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            },
            name='valid')

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in range(n_valid_batches)]

        # Create a function that scans the entire test set


#        def test_score():
#            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, valid_score

    def build_test_function(self, test_set_x, batch_size):
        """
        Build a symbolic test function.
        
        """
        n_test_batches = int(
            math.ceil(test_set_x.get_value(borrow=True).shape[0] / batch_size))
        index = T.lscalar('index')  # index to a [mini]batch
        test_score_i = theano.function(
            [index],
            self.y_pred,
            givens={
                self.x: test_set_x[index * batch_size:(index + 1) * batch_size]
            },
            name='test')

        # Create a function that scans the entire test set
        def test_score():
            y_pred = []
            for i in range(n_test_batches):
                y_pred.extend(test_score_i(i))
            return y_pred

        return test_score