Exemple #1
0
    def update_weights(self, weight_updates, restriction, restriction_typ):
        ''' This function updates the weight parameters.
            
        :Parameters:
            weight_updates:     Update for the weight parameter.
                               -type: numpy array [input dim, output dim]
                              
            restriction:        If a scalar is given the weights will be 
                                forced after an update not to exceed this value. 
                                restriction_typ controls how the values are 
                                restricted.
                               -type: scalar or None
                                      
            restriction_typ:    If a value for the restriction is given, this parameter
                                determines the restriction typ. 'Cols', 'Rows', 'Mat' 
                                or 'Abs' to restricted the colums, rows or matrix norm 
                                or the matrix absolut values.
                               -type: string
           
        '''
        # Update weights
        self.orginalW += weight_updates

        # Restricts the gradient
        if numx.isscalar(restriction):
            if restriction > 0:
                if restriction_typ is 'Cols':
                    self.orginalW = numxExt.restrict_norms(
                        self.orginalW,
                        restriction, 0)
                if restriction_typ is 'Rows':
                    self.orginalW = numxExt.restrict_norms(
                        self.orginalW,
                        restriction, 1)
                if restriction_typ is 'Mat':
                    self.orginalW = numxExt.restrict_norms(
                        self.orginalW,
                        restriction, None)

                if restriction_typ is 'Val':
                    numx.clip(self.orginalW, -restriction, restriction, self.orginalW)

        self.weights = self._convolve(self.orginalW, self.mask)
Exemple #2
0
    def update_biases(self, bias_updates, restriction, restriction_typ):
        ''' This function updates the weight parameter.
            
        :Parameters:
            b_updates: Update for the bias parameter.
                      -type: numpy array [1, input dim]

        '''
        # Restricts the gradient
        if numx.isscalar(restriction):
            if restriction > 0:
                if restriction_typ is 'Cols' or restriction_typ is 'Rows' or restriction_typ is 'Mat':
                    bias_updates = numxExt.restrict_norms(
                        bias_updates, restriction)

                if restriction_typ is 'Val':
                    numx.clip(bias_updates, -restriction, restriction,
                              bias_updates)
        self.bias += bias_updates
Exemple #3
0
    def _adapt_gradient(self,
                        pos_gradients,
                        neg_gradients,
                        batch_size,
                        epsilon,
                        momentum,
                        regL1Norm,
                        regL2Norm,
                        regSparseness,
                        desired_sparseness,
                        mean_hidden_activity,
                        visible_offsets,
                        hidden_offsets,
                        use_centered_gradient,
                        restrict_gradient,
                        restriction_norm):
        ''' This function updates the parameter gradients.

        :Parameters:

            pos_gradients:            Positive Gradients.
                                     -type: numpy array[parameter index,
                                                        parameter shape]

            neg_gradients:            Negative Gradients.
                                     -type: numpy array[parameter index,
                                                        parameter shape]

            batch_size:               The batch_size of the data.
                                     -type: float

            epsilon:                  The learning rate.
                                     -type: numpy array[num parameters]

            momentum:                 The momentum term.
                                     -type: numpy array[num parameters]

            regL1Norm:                The parameter for the L1 regularization
                                     -type: float

            regL2Norm:                The parameter for the L2 regularization,
                                      also know as weight decay.
                                     -type: float

            regSparseness:            The parameter for the desired_sparseness.
                                      regularization.
                                     -type: None or float

            desired_sparseness:       Desired average hidden activation or
                                      None for no regularization.
                                     -type: None or float

            mean_hidden_activity:     Average hidden activation
                                      <P(h_i=1|x)>_h_i
                                     -type: numpy array [num samples]

            visible_offsets:          If not zero the gradient is centered
                                      around this value.
                                     -type: float

            hidden_offsets:           If not zero the gradient is centered
                                      around this value.
                                     -type: float

            use_centered_gradient:    Uses the centered gradient instead of
                                      centering.
                                     -type: bool

            restrict_gradient:        If a scalar is given the norm of the
                                      weight gradient (along the input dim) is
                                      restricted to stay below this value.
                                     -type: None, float

            restriction_norm:         restricts the column norm, row norm or
                                      Matrix norm.
                                     -type: string: 'Cols','Rows', 'Mat'



        '''
        # calculate normal gradient
        gradients = []
        for i in range(self.num_parameters):
            gradients.append((pos_gradients[i] - neg_gradients[i])/batch_size)

        # adapt to centered gradient
        if use_centered_gradient:
            gradients = self._calculate_centered_gradient(gradients,
                                                          visible_offsets,
                                                          hidden_offsets)

        # adapt parameters
        for i in range(self.num_parameters):
            self.parameter_updates[i] *= momentum[i]
            self.parameter_updates[i] += epsilon[i] * gradients[i]

        # Add sparse penalty
        if regSparseness != 0:
            if desired_sparseness is not None:
                self.parameter_updates[2] += (epsilon[2] * regSparseness * (
                                              desired_sparseness
                                              - mean_hidden_activity))
                #st = numx.clip(mean_hidden_activity,0.001,0.999)
                #st = -desired_sparseness/st+(1.0-desired_sparseness)/(1.0-st)
                #self.parameter_updates[2] -= epsilon[2] * regSparseness * st

        # add weight decay
        if regL1Norm != 0:
            self.parameter_updates[0] -= (epsilon[0] * regL1Norm
                                          * numx.sign(self.model.w))

        if regL2Norm != 0:
            self.parameter_updates[0] -= (epsilon[0] * regL2Norm
                                          * self.model.w)

        # Restricts the gradient
        if numx.isscalar(restrict_gradient):
            if restrict_gradient > 0:
                if restriction_norm == 'Cols':
                    self.parameter_updates[0] = npExt.restrict_norms(
                                                    self.parameter_updates[0],
                                                    restrict_gradient, 0 )
                if restriction_norm == 'Rows':
                    self.parameter_updates[0] = npExt.restrict_norms(
                                                    self.parameter_updates[0],
                                                    restrict_gradient, 1 )
                if restriction_norm == 'Mat':
                    self.parameter_updates[0] = npExt.restrict_norms(
                                                    self.parameter_updates[0],
                                                    restrict_gradient, None)
    def _adapt_gradient(self,
                        pos_gradients,
                        neg_gradients,
                        batch_size,
                        epsilon,
                        momentum,
                        reg_l1norm,
                        reg_l2norm,
                        reg_sparseness,
                        desired_sparseness,
                        mean_hidden_activity,
                        visible_offsets,
                        hidden_offsets,
                        use_centered_gradient,
                        restrict_gradient,
                        restriction_norm):
        """ This function updates the parameter gradients.

        :param pos_gradients: Positive Gradients.
        :type pos_gradients: numpy array[parameter index, parameter shape]

        :param neg_gradients: Negative Gradients.
        :type neg_gradients: numpy array[parameter index, parameter shape]

        :param batch_size: The batch_size of the data.
        :type batch_size: float

        :param epsilon: The learning rate.
        :type epsilon: numpy array[num parameters]

        :param momentum: The momentum term.
        :type momentum: numpy array[num parameters]

        :param reg_l1norm: The parameter for the L1 regularization
        :type reg_l1norm: float

        :param reg_l2norm: The parameter for the L2 regularization also know as weight decay.
        :type reg_l2norm: float

        :param reg_sparseness: The parameter for the desired_sparseness regularization.
        :type reg_sparseness: None or float

        :param desired_sparseness: Desired average hidden activation or None for no regularization.
        :type desired_sparseness: None or float

        :param mean_hidden_activity: Average hidden activation <P(h_i=1|x)>_h_i
        :type mean_hidden_activity: numpy array [num samples]

        :param visible_offsets: If not zero the gradient is centered around this value.
        :type visible_offsets: float

        :param hidden_offsets: If not zero the gradient is centered around this value.
        :type hidden_offsets: float

        :param use_centered_gradient: Uses the centered gradient instead of centering.
        :type use_centered_gradient: bool

        :param restrict_gradient: If a scalar is given the norm of the weight gradient (along the input dim) is \
                                  restricted to stay below this value.
        :type restrict_gradient: None, float

        :param restriction_norm: Restricts the column norm, row norm or Matrix norm.
        :type restriction_norm: string, 'Cols','Rows', 'Mat'
        """
        # calculate normal gradient
        gradients = []
        for i in range(self.num_parameters):
            gradients.append((pos_gradients[i] - neg_gradients[i]) / batch_size)

        # adapt to centered gradient
        if use_centered_gradient:
            gradients = self._calculate_centered_gradient(gradients, visible_offsets, hidden_offsets)

        # adapt parameters
        for i in range(self.num_parameters):
            self.parameter_updates[i] *= momentum[i]
            self.parameter_updates[i] += epsilon[i] * gradients[i]

        # Add sparse penalty
        if reg_sparseness != 0:
            if desired_sparseness is not None:
                self.parameter_updates[2] += (epsilon[2] * reg_sparseness * (desired_sparseness - mean_hidden_activity))
                # st = numx.clip(mean_hidden_activity,0.001,0.999)
                # st = -desired_sparseness/st+(1.0-desired_sparseness)/(1.0-st)
                # self.parameter_updates[2] -= epsilon[2] * reg_sparseness * st

        # add weight decay
        if reg_l1norm != 0:
            self.parameter_updates[0] -= (epsilon[0] * reg_l1norm * numx.sign(self.model.w))

        if reg_l2norm != 0:
            self.parameter_updates[0] -= (epsilon[0] * reg_l2norm * self.model.w)

        # Restricts the gradient
        if numx.isscalar(restrict_gradient):
            if restrict_gradient > 0:
                if restriction_norm is 'Cols':
                    self.parameter_updates[0] = numxext.restrict_norms(self.parameter_updates[0], restrict_gradient, 0)
                if restriction_norm is 'Rows':
                    self.parameter_updates[0] = numxext.restrict_norms(self.parameter_updates[0], restrict_gradient, 1)
                if restriction_norm is 'Mat':
                    self.parameter_updates[0] = numxext.restrict_norms(self.parameter_updates[0], restrict_gradient,
                                                                       None)
    def _train(self,
               data,
               epsilon,
               momentum,
               update_visible_offsets,
               update_hidden_offsets,
               corruptor,
               reg_L1Norm,
               reg_L2Norm,
               reg_sparseness,
               desired_sparseness,
               reg_contractive,
               reg_slowness,
               data_next,
               restrict_gradient,
               restriction_norm):

        ''' The training for one batch is performed using gradient descent.

        :Parameters:
            data:                     The training data
                                     -type: numpy array [num samples, input dim]

            epsilon:                  The learning rate.
                                     -type: numpy array[num parameters]

            momentum:                 The momentum term.
                                     -type: numpy array[num parameters]


            update_visible_offsets:  The update step size for the models
                                     visible offsets.
                                     Good value if functionality is used: 0.001
                                    -type: float

            update_hidden_offsets:   The update step size for the models hidden
                                     offsets.
                                     Good value if functionality is used: 0.001
                                    -type: float

            corruptor:                Defines if and how the data gets corrupted.
                                      (e.g. Gauss noise, dropout, Max out)
                                     -type: corruptor

            reg_L1Norm:                The parameter for the L1 regularization
                                     -type: float

            reg_L2Norm:                The parameter for the L2 regularization,
                                      also know as weight decay.
                                     -type: float

            reg_sparseness:           The parameter (epsilon) for the sparseness regularization.
                                    -type: float

            desired_sparseness:      Desired average hidden activation.
                                    -type: float

            reg_contractive:          The parameter (epsilon) for the contractive regularization.
                                    -type: float

            reg_slowness:             The parameter (epsilon) for the slowness regularization.
                                    -type: float

            data_next:               The next training data in the sequence.
                                    -type: numpy array [num samples, input dim]

            restrict_gradient:       If a scalar is given the norm of the
                                     weight gradient is restricted to stay
                                     below this value.
                                    -type: None, float

            restriction_norm:        restricts the column norm, row norm or
                                     Matrix norm.
                                    -type: string: 'Cols','Rows', 'Mat'

        '''
        x_next = None
        h_next = None
        a_h_next = None
        #orginal_h = None
        # Forward propagation, if corruptor is given the data is corrupted
        if corruptor == None:
            x = data
            x_next = data_next
            a_h,h = self.model._encode(x)
            #orginal_h = h
            a_y,y = self.model._decode(h)
            if reg_slowness > 0.0 and data_next is not None:
                a_h_next,h_next = self.model._encode(x_next)
        else:
            #_,orginal_h = self.model._encode(data)
            if isinstance(corruptor, list):
                x = corruptor[0].corrupt(data)
                a_h,h = self.model._encode(x)
                h = corruptor[1].corrupt(h)
                a_y,y = self.model._decode(h)
                y = corruptor[2].corrupt(y)
                if reg_slowness > 0.0 and data_next != None:
                    x_next = corruptor[0].corrupt(data_next)
                    a_h_next,h_next = self.model._encode(x_next)
            else:
                x = corruptor.corrupt(data)
                a_h,h = self.model._encode(x)
                h = corruptor.corrupt(h)
                a_y,y = self.model._decode(h)
                y = corruptor.corrupt(y)
                if reg_slowness > 0.0 and data_next != None:
                    x_next = corruptor.corrupt(data_next)
                    a_h_next,h_next = self.model._encode(x_next)

        # Update offsets
        mean_h = 0.0
        mean_x = 0.0
        if update_visible_offsets > 0.0:
            mean_x = numx.mean(x,axis=0).reshape(1,self.model.input_dim)
        if update_hidden_offsets > 0.0:
            mean_h = numx.mean(h,axis=0).reshape(1,self.model.output_dim)

        self.model.update_offsets(mean_x,
                                  mean_h,
                                  update_visible_offsets,
                                  update_hidden_offsets)

        # Get the gradients for the model
        gradients = self.model._get_gradients(data, a_h, h, a_y, y, reg_contractive, reg_sparseness, desired_sparseness,
                                              reg_slowness, x_next, a_h_next, h_next)

        # adapt parameters
        for i in range(self.num_parameters):
            self.parameter_updates[i] *= momentum[i]
            self.parameter_updates[i] -= epsilon[i] * gradients[i]

        # add weight decay L1 norm
        if reg_L1Norm != 0:
            self.parameter_updates[0] -= (epsilon[0] * reg_L1Norm
                                          * numx.sign(self.model.w))
        # add weight decay L2 norm
        if reg_L2Norm != 0:
            self.parameter_updates[0] -= (epsilon[0] * reg_L2Norm
                                          * self.model.w)

        # Restricts the gradient
        if numx.isscalar(restrict_gradient):
            if restrict_gradient > 0:
                if restriction_norm is 'Cols':
                    typ = 0
                if restriction_norm is 'Rows':
                    typ = 1
                if restriction_norm is 'Mat':
                    typ = None
                self.parameter_updates[0] = npExt.restrict_norms(self.parameter_updates[0], restrict_gradient, typ )

        # update the parameters with the calculated gradient
        self.model.update_parameters(self.parameter_updates)
Exemple #6
0
    def train(self, data, labels, costs, reg_costs, epsilon, update_offsets,
              corruptor, reg_L1Norm, reg_L2Norm, reg_sparseness,
              desired_sparseness, costs_sparseness, restrict_gradient,
              restriction_norm):
        ''' Train function which performes one step of gradient descent.
            Use check_setup() to check whether your training setup is valid.

        :Parameters:
            data:               Training data as numpy array.
                               -type: numpy arrays [batchsize, inpput dim]

            labels:             List of numpy arrays or None if a layer has no cost, the last layer has to have a cost
                                and thus the last item in labels has to be an array.
                               -type: List of numpy arrays and/or Nones

            costs:              List of Cost functions. The last layer has to have a cost.
                               -type: pydeep.base.costfunction

            reg_costs:          List of scalars controlling the strength of the cost functions. Last entry i.e. 1.
                               -type: scalar

            epsilon:            List of Learning rates.
                               -type: list of scalars

            update_offsets:     List of Shifting factors for centering.
                               -type: list of scalars

            corruptor:          List of Corruptor objects e.g. Dropout.
                               -type: list of pydeep.base.corruptors

            reg_L1Norm:          List of L1 Norm Regularization terms.
                               -type: list of scalars

            reg_L2Norm:         List of L2 Norm Regularization terms.
                               -type: list of scalars

            reg_sparseness:     List of scalars controlling the strength of the sparseness regularization.
                               -type: list of scalars

            desired_sparseness: List of scalars / target sparseness.
                               -type: list of scalars

            costs_sparseness:   List of sparseness cost and/or None values
                               -type: list of pydeep.base.costfunction and/or None

            restrict_gradient:  Maximal norm for the gradient or None
                               -type: list of scalars

            restriction_norm:   Defines how the weights will be restricted 'Cols', 'Rows' or 'Mat'.
                               -type: Strings 'Cols', 'Rows' or 'Mat'

        '''
        # Forward propagate through the entire network, possibly use corrupter states
        output = self.model.forward_propagate(data=data, corruptor=corruptor)

        # Reparameterize the network to the new mean - Update all offests and biases
        for l in range(len(self.model.layers)):
            self.model.layers[l].update_offsets(shift=update_offsets[l],
                                                new_mean=None)

        deltas = None
        # Go from top layer to last layer
        for l in range(self.model.num_layers - 1, -1, -1):
            # caluclate the delta values
            deltas = self.model.layers[l]._get_deltas(
                deltas=deltas,
                labels=labels[l],
                cost=costs[l],
                reg_cost=reg_costs[l],
                desired_sparseness=desired_sparseness[l],
                cost_sparseness=costs_sparseness[l],
                reg_sparseness=reg_sparseness[l])

            # backprop the error if it is not first/bottom most layer.
            if l > 0:
                deltas = self.model.layers[l]._backward_propagate()

            # Now we are ready to calculate the gradient
            grad = self.model.layers[l]._calculate_gradient()
            # Possibly add weight decay terms
            if reg_L1Norm[l] > 0.0:
                grad[0] += (reg_L1Norm[l] *
                            numx.sign(self.model.layers[l].weights))
            if reg_L2Norm[l] > 0.0:
                grad[0] += (reg_L2Norm[l] * self.model.layers[l].weights)

            # Apply learning rate ny ADA rule
            self._old_grad[l][0] += grad[0]**2
            self._old_grad[l][1] += grad[1]**2
            grad[0] /= (self._numerical_stabilty +
                        numx.sqrt(self._old_grad[l][0]))
            grad[1] /= (self._numerical_stabilty +
                        numx.sqrt(self._old_grad[l][1]))
            grad[0] *= epsilon[l]
            grad[1] *= epsilon[l]

            # Restricts the gradient is desired
            if numx.isscalar(restrict_gradient):
                if restrict_gradient > 0:
                    if restriction_norm is 'Cols':
                        grad[0] = numxExt.restrict_norms(
                            grad[0], restrict_gradient, 0)
                    if restriction_norm is 'Rows':
                        grad[0] = numxExt.restrict_norms(
                            grad[0], restrict_gradient, 1)
                    if restriction_norm is 'Mat':
                        grad[0] = numxExt.restrict_norms(
                            grad[0], restrict_gradient, None)
            # Update the model parameters
            self.model.layers[l].update_parameters([grad[0], grad[1]])