Example #1
0
 def test_partition_function_factorize_h(self):
     sys.stdout.write(
         'RBM Estimator -> Performing partition_function_factorize_v test ...'
     )
     sys.stdout.flush()
     LogZ = Estimator.partition_function_factorize_h(
         self.bbrbm, beta=None, batchsize_exponent='AUTO', status=False)
     assert numx.all(numx.abs(LogZ - self.bbrbmTruelogZ) < self.epsilon)
     LogZ = Estimator.partition_function_factorize_h(self.bbrbm,
                                                     beta=None,
                                                     batchsize_exponent=0,
                                                     status=False)
     assert numx.all(numx.abs(LogZ - self.bbrbmTruelogZ) < self.epsilon)
     LogZ = Estimator.partition_function_factorize_h(self.bbrbm,
                                                     beta=None,
                                                     batchsize_exponent=3,
                                                     status=False)
     assert numx.all(numx.abs(LogZ - self.bbrbmTruelogZ) < self.epsilon)
     LogZ = Estimator.partition_function_factorize_h(self.bbrbm,
                                                     beta=None,
                                                     batchsize_exponent=555,
                                                     status=False)
     assert numx.all(numx.abs(LogZ - self.bbrbmTruelogZ) < self.epsilon)
     print(' successfully passed!')
     sys.stdout.flush()
Example #2
0
def partition_function_exact(model, batchsize_exponent='AUTO'):
    ''' Computes the true partition function for the given model by factoring
        over the visible and hidden2 units.

    :Parameters:
        model:              The model
                           -type: Valid DBM model

        batchsize_exponent: 2^batchsize_exponent will be the batch size.
                           -type: int

    :Returns:
        Log Partition function for the model.
       -type: float

    '''
    # We transform the DBM to an RBM with restricted connections.
    rbm = RBM_MODEL.BinaryBinaryRBM(
        number_visibles=model.input_dim + model.hidden2_dim,
        number_hiddens=model.hidden1_dim,
        data=None,
        initial_weights=numx.vstack((model.W1, model.W2.T)),
        initial_visible_bias=numx.hstack((model.b1, model.b3)),
        initial_hidden_bias=model.b2,
        initial_visible_offsets=numx.hstack((model.o1, model.o3)),
        initial_hidden_offsets=model.o2)
    return RBM_ESTIMATOR.partition_function_factorize_h(rbm)
    def _train(self,
               data,
               epsilon,
               k,
               momentum,
               reg_l1norm,
               reg_l2norm,
               reg_sparseness,
               desired_sparseness,
               update_visible_offsets,
               update_hidden_offsets,
               offset_typ,
               use_centered_gradient,
               restrict_gradient,
               restriction_norm,
               use_hidden_states):
        """ The training for one batch is performed using True Gradient (GD) for k Gibbs-sampling steps.

        :param data: The data used for training.
        :type data: numpy array [batch_size, input dimension]

        :param epsilon: The learning rate.
        :type epsilon: scalar or numpy array[num parameters] or numpy array[num parameters, parameter shape]

        :param k: NUmber of sampling steps.
        :type k: int

        :param momentum: The momentum term.
        :type momentum: scalar or numpy array[num parameters] or numpy array[num parameters, parameter shape]

        :param reg_l1norm: The parameter for the L1 regularization
        :type reg_l1norm: float

        :param reg_l2norm: The parameter for the L2 regularization also know as weight decay.
        :type reg_l2norm: float

        :param reg_sparseness: The parameter for the desired_sparseness regularization.
        :type reg_sparseness: None or float

        :param desired_sparseness: Desired average hidden activation or None for no regularization.
        :type desired_sparseness: None or float

        :param update_visible_offsets: The update step size for the models visible offsets.
        :type update_visible_offsets: float

        :param update_hidden_offsets: The update step size for the models hidden offsets.
        :type update_hidden_offsets: float

        :param offset_typ: | Different offsets can be used to center the gradient.<br />
                           | Example: 'DM' uses the positive phase visible mean and the negative phase hidden mean.
                           | 'A0' uses the average of positive and negative phase mean for visible, zero for the
                           | hiddens. Possible values are out of {A,D,M,0}x{A,D,M,0}
        :type offset_typ: string

        :param use_centered_gradient: Uses the centered gradient instead of centering.
        :type use_centered_gradient: bool

        :param restrict_gradient: If a scalar is given the norm of the weight gradient (along the input dim) is \
                                  restricted to stay below this value.
        :type restrict_gradient: None, float

        :param restriction_norm: Restricts the column norm, row norm or Matrix norm.
        :type restriction_norm: string, 'Cols','Rows', 'Mat'

        :param use_hidden_states: If True, the hidden states are used for the gradient calculations, the hiddens \
                                     probabilities otherwise.
        :type use_hidden_states: bool
        """
        # Sample the first time
        hid_probs_pos = self.model.probability_h_given_v(data)

        if update_visible_offsets != 0.0:
            xmean_pos = numx.mean(data, axis=0).reshape(1, self.model.input_dim)
        hmean_pos = 0.0
        if update_hidden_offsets != 0.0 or reg_sparseness != 0.0:
            if use_hidden_states:
                hid_states_pos = self.model.sample_h(hid_probs_pos)
                hmean_pos = numx.mean(hid_states_pos, axis=0).reshape(1, self.model.output_dim)
            else:
                hmean_pos = numx.mean(hid_probs_pos, axis=0).reshape(1, self.model.output_dim)

        # Calculate the partition function
        if self.model.input_dim < self.model.output_dim:
            batch_size = numx.min([self.model.input_dim, 12])
            ln_z = estimator.partition_function_factorize_v(self.model, beta=1.0, batchsize_exponent=batch_size,
                                                            status=False)
        else:
            batch_size = numx.min([self.model.output_dim, 12])
            ln_z = estimator.partition_function_factorize_h(self.model, beta=1.0, batchsize_exponent=batch_size,
                                                            status=False)

        # empty negative phase parts
        neg_gradients = [numx.zeros(self.model.w.shape),
                         numx.zeros(self.model.bv.shape),
                         numx.zeros(self.model.bh.shape)]

        # Calculate gradient stepwise in batches
        bit_length = self.model.input_dim

        batchsize = numx.power(2, batch_size)
        num_combinations = numx.power(2, bit_length)
        num_batches = num_combinations / batchsize

        for batch in range(0, num_batches):
            # Generate current batch
            bit_combinations = numxext.generate_binary_code(bit_length, batch_size, batch)
            # P(x)
            prob_x = numx.exp(
                self.model.log_probability_v(ln_z, bit_combinations))
            # P(h|x)
            prob_h_x = self.model.probability_h_given_v(bit_combinations)
            # Calculate gradient
            neg_gradients[1] += numx.sum(numx.tile(prob_x, (1, self.model.input_dim)) * (bit_combinations -
                                                                                         self.model.ov), axis=0)
            prob_x = (numx.tile(prob_x, (1, self.model.output_dim)) * (prob_h_x - self.model.oh))
            neg_gradients[0] += numx.dot((bit_combinations - self.model.ov).T, prob_x)
            neg_gradients[2] += numx.sum(prob_x, axis=0)

        if update_visible_offsets != 0.0 and (offset_typ[0] is 'A' or offset_typ[0] is 'M'):
            bit_combinations = numxext.generate_binary_code(self.model.input_dim, None, 0)
            prob_x = numx.exp(self.model.log_probability_v(ln_z, bit_combinations))
            xmean_neg = numx.sum(prob_x * bit_combinations, axis=0).reshape(1, self.model.input_dim)

        if update_hidden_offsets != 0.0 and (offset_typ[1] is 'A' or offset_typ[1] is 'M'):
            bit_combinations = numxext.generate_binary_code(self.model.output_dim, None, 0)
            prob_h = numx.exp(self.model.log_probability_h(ln_z, bit_combinations))
            hmean_neg = numx.sum(prob_h * bit_combinations, axis=0).reshape(1, self.model.output_dim)

        new_visible_offsets = 0.0
        if update_visible_offsets != 0.0:
            if offset_typ[0] is 'A':
                new_visible_offsets = (xmean_pos + xmean_neg) * 0.5
            if offset_typ[0] is 'D':
                new_visible_offsets = xmean_pos
            if offset_typ[0] is 'M':
                new_visible_offsets = xmean_neg
            if offset_typ[0] is '0':
                new_visible_offsets = 0.0 * xmean_pos
        new_hidden_offsets = 0.0
        if update_hidden_offsets != 0.0:
            if offset_typ[1] is 'A':
                new_hidden_offsets = (hmean_pos + hmean_neg) * 0.5
            if offset_typ[1] is 'D':
                new_hidden_offsets = hmean_pos
            if offset_typ[1] is 'M':
                new_hidden_offsets = hmean_neg
            if offset_typ[1] is '0':
                new_hidden_offsets = 0.0 * hmean_pos

        if use_centered_gradient is False:
            # update the centers
            self.model.update_offsets(new_visible_offsets, new_hidden_offsets, update_visible_offsets,
                                      update_hidden_offsets)
            self.visible_offsets = 0.0
            self.hidden_offsets = 0.0
        else:
            self.hidden_offsets = ((1.0 - update_hidden_offsets) * self.hidden_offsets + update_hidden_offsets
                                   * new_hidden_offsets)
            self.visible_offsets = ((1.0 - update_visible_offsets) * self.visible_offsets + update_visible_offsets
                                    * new_visible_offsets)

        # Calculate positive phase gradient using states or probabilities
        if use_hidden_states:
            pos_gradients = self.model.calculate_gradients(data, hid_states_pos)
        else:
            pos_gradients = self.model.calculate_gradients(data, hid_probs_pos)

        # Times batch size since adpat gradient devides by batchsize
        neg_gradients[0] *= data.shape[0]
        neg_gradients[1] *= data.shape[0]
        neg_gradients[2] *= data.shape[0]

        # Adapt the gradients by weight decay momentum and learning rate
        self._adapt_gradient(pos_gradients=pos_gradients,
                             neg_gradients=neg_gradients,
                             batch_size=data.shape[0],
                             epsilon=epsilon,
                             momentum=momentum,
                             reg_l1norm=reg_l1norm,
                             reg_l2norm=reg_l2norm,
                             reg_sparseness=reg_sparseness,
                             desired_sparseness=desired_sparseness,
                             mean_hidden_activity=hmean_pos,
                             visible_offsets=self.visible_offsets,
                             hidden_offsets=self.hidden_offsets,
                             use_centered_gradient=use_centered_gradient,
                             restrict_gradient=restrict_gradient,
                             restriction_norm=restriction_norm)

        # update the parameters with the calculated gradient
        self.model.update_parameters(self.parameter_updates)
Example #4
0
print(
    'Epoch\tRecon. Error\tLog likelihood train\tLog likelihood test\tExpected End-Time'
)
for epoch in range(epochs):

    # Loop over all batches
    for b in range(0, train_data.shape[0], batch_size):
        batch = train_data[b:b + batch_size, :]
        trainer_pcd.train(data=batch,
                          epsilon=0.01,
                          update_visible_offsets=update_offsets,
                          update_hidden_offsets=update_offsets)

    # Calculate Log-Likelihood, reconstruction error and expected end time every 5th epoch
    if (epoch == 0 or (epoch + 1) % 5 == 0):
        logZ = estimator.partition_function_factorize_h(rbm)
        ll_train = numx.mean(estimator.log_likelihood_v(rbm, logZ, train_data))
        ll_test = numx.mean(estimator.log_likelihood_v(rbm, logZ, test_data))
        re = numx.mean(estimator.reconstruction_error(rbm, train_data))
        print('{}\t\t{:.4f}\t\t\t{:.4f}\t\t\t\t{:.4f}\t\t\t{}'.format(
            epoch + 1, re, ll_train, ll_test,
            measurer.get_expected_end_time(epoch + 1, epochs)))
    else:
        print(epoch + 1)

measurer.end()

# Print end/training time
print("End-time: \t{}".format(measurer.get_end_time()))
print("Training time:\t{}".format(measurer.get_interval()))
Example #5
0
                         momentum=0.0,
                         reg_l1norm=0.0,
                         reg_l2norm=0.0,
                         reg_sparseness=0.0,
                         desired_sparseness=0.0,
                         update_visible_offsets=0.0,
                         update_hidden_offsets=0.0,
                         restrict_gradient=False,
                         restriction_norm='Cols',
                         use_hidden_states=False,
                         use_centered_gradient=False)

    # Calculate Log likelihood and reconstruction error
    RE_train = numx.mean(estimator.reconstruction_error(rbm, train_data))
    RE_test = numx.mean(estimator.reconstruction_error(rbm, test_data))
    logZ = estimator.partition_function_factorize_h(rbm, batchsize_exponent=h1)
    LL_train = numx.mean(estimator.log_likelihood_v(rbm, logZ, train_data))
    LL_test = numx.mean(estimator.log_likelihood_v(rbm, logZ, test_data))
    print '%5d \t%0.5f \t%0.5f \t%0.5f \t%0.5f' % (epoch, RE_train, RE_test,
                                                   LL_train, LL_test)

# Calculate partition function and its AIS approximation
logZ = estimator.partition_function_factorize_h(rbm, batchsize_exponent=h1)
logZ_AIS = estimator.annealed_importance_sampling(rbm,
                                                  num_chains=100,
                                                  k=1,
                                                  betas=1000,
                                                  status=False)[0]
logZ_rAIS = estimator.reverse_annealed_importance_sampling(rbm,
                                                           num_chains=100,
                                                           k=1,
Example #6
0
# Measuring time
measurer = MEASURE.Stopwatch()

# Train model
print "Training"
print "Epoch\tRecon. Error\tLog likelihood \tExpected End-Time"
for epoch in range(1, epochs + 1):
    train_data = numx.random.permutation(train_data)
    for b in range(0, train_data.shape[0], batch_size):
        batch = train_data[b : b + batch_size, :]
        trainer.train(data=batch, epsilon=0.1, regL2Norm=0.001)

    # Calculate Log-Likelihood, reconstruction error and expected end time every 10th epoch
    if epoch % 10 == 0:
        Z = ESTIMATOR.partition_function_factorize_h(rbm)
        LL = numx.mean(ESTIMATOR.log_likelihood_v(rbm, Z, train_data))
        RE = numx.mean(ESTIMATOR.reconstruction_error(rbm, train_data))
        print "%d\t\t%8.6f\t\t%8.4f\t\t" % (epoch, RE, LL),
        print measurer.get_expected_end_time(epoch, epochs),
        print

measurer.end()

# Print end time
print
print "End-time: \t", measurer.get_end_time()
print "Training time:\t", measurer.get_interval()

# Calculate and approximate partition function
Z = ESTIMATOR.partition_function_factorize_h(rbm, batchsize_exponent=h1, status=False)