def random_sample_bas(length, num_samples): """ Generates the distribution corresponding to num_samples samples drawn from the (length x length) BAS dataset, showing bars or stripes. Args: length (int) : length of the bars/stripes. num_samples (int) : number of samples Returns: (1darray) : generated probability distribution. """ stripes = npext.generate_binary_code(length) stripes = np.repeat(stripes, length, 0) stripes = stripes.reshape(2 ** length, length * length) bars = npext.generate_binary_code(length) bars = bars.reshape(2 ** length * length, 1) bars = np.repeat(bars, length, 1) bars = bars.reshape(2 ** length, length * length) data = np.vstack((stripes[0:stripes.shape[0]-1],bars[1:bars.shape[0]])) distrib = np.zeros(2**(length*length)) for sample in range(num_samples): i = np.random.randint(0,len(data)) bin_list = [int(data[i,j] ) for j in range(length**2)] bin_string = '' for bit in bin_list: bin_string += str(bit) number = int(bin_string, 2) distrib[number] += 1/num_samples return distrib
def generate_bas_complete(length): """ Creates a true exact distribution of the (length x length) BAS dataset. Args: length (int) : length of the bars/stripes. Returns: (1darray) : generated probability distribution. """ :return: Samples. :rtype: numpy array [num_samples, length*length] stripes = npext.generate_binary_code(length) stripes = np.repeat(stripes, length, 0) stripes = stripes.reshape(2 ** length, length * length) bars = npext.generate_binary_code(length) bars = bars.reshape(2 ** length * length, 1) bars = np.repeat(bars, length, 1) bars = bars.reshape(2 ** length, length * length) data = np.vstack((stripes[0:stripes.shape[0]-1],bars[1:bars.shape[0]])) # generate distribution distrib = np.zeros(2**(length*length)) for i in range(len(data)): bin_list = [int(data[i,j] ) for j in range(length**2)] bin_string = '' for bit in bin_list: bin_string += str(bit) number = int(bin_string, 2) distrib[number] = 1/len(data) return distrib
def _LL_exact_check(model, x, lnZ): ''' Computes the exact log likelihood for x by summing over all possible states for h1, h2. Only possible for small hidden layers! This is just proof of concept, use LL_exact() instead, it is heaps faster! :Parameters: model: The model -type: Valid DBM model x: Input states. -type: numpy array [batch size, input dim] lnZ: Logarithm of the patition function. -type: float :Returns: Exact log likelihood for x. -type: numpy array [batch size, 1] ''' # Generate all binary codes all_h1 = npExt.generate_binary_code(model.W2.shape[0]) all_h2 = npExt.generate_binary_code(model.W2.shape[1]) result = numx.zeros(x.shape[0]) for i in range(x.shape[0]): for j in range(all_h1.shape[0]): for k in range(all_h2.shape[0]): result[i] += numx.exp(-model.energy( x[i].reshape(1, x.shape[1]), all_h1[j].reshape(1, all_h1.shape[1]), all_h2[k].reshape(1, all_h2.shape[1]), )) return numx.log(result) - lnZ
def unnormalized_log_probability_x(self, x): ''' Computes the unnormalized log probabilities of x. :Parameters: x: Input layer states. -type: numpy array [batch size, input dim] :Returns: Unnormalized log probability of x. -type: numpy array [batch size, 1] ''' # Generate all possibel binary codes for h1 and h2 all_h1 = npExt.generate_binary_code(self.W2.shape[0]) all_h2 = npExt.generate_binary_code(self.W2.shape[1]) # Center variables xtemp = x - self.o1 h1temp = all_h1 - self.o2 h2temp = all_h2 - self.o3 # Bias term bias = numx.dot(xtemp, self.b1.T) # Both quadratic terms part1 = numx.exp(numx.dot( numx.dot(xtemp, self.W1) + self.b2, h1temp.T)) part2 = numx.exp( numx.dot(numx.dot(h1temp, self.W2) + self.b3, h2temp.T)) # Dot product of all combination of all quadratic terms + bias return bias + numx.log( numx.sum(numx.dot(part1, part2), axis=1).reshape(x.shape[0], 1))
def partition_function_factorize_v(model, beta=None, batchsize_exponent='AUTO', status=False): ''' Computes the true partition function for the given model by factoring over the visible units. :Info: Exponential increase of computations by the number of visible units. (16 usually ~ 20 seconds) :Parameters: model: The model. -type: Valid RBM model. beta: Inverse temperature(s) for the models energy. -type: None, float, numpy array [batchsize,1] batchsize_exponent: 2^batchsize_exponent will be the batch size. -type: int status: If true prints the progress to the console. -type: bool :Returns: Log Partition function for the model. -type: float ''' if status is True: print "Calculating the partition function by factoring over v: " print '%3.2f' % (0.0), '%' bit_length = model.input_dim if batchsize_exponent == 'AUTO' or batchsize_exponent > 20: batchsize_exponent = numx.min([model.input_dim, 12]) batchSize = numx.power(2, batchsize_exponent) num_combinations = numx.power(2, bit_length) num_batches = num_combinations / batchSize bitCombinations = numx.zeros((batchSize, model.input_dim)) log_prob_vv_all = numx.zeros(num_combinations) for batch in range(1, num_batches + 1): # Generate current batch bitCombinations = npExt.generate_binary_code(bit_length, batchsize_exponent, batch - 1) # calculate LL log_prob_vv_all[(batch - 1) * batchSize:batch * batchSize] = model.\ unnormalized_log_probability_v(bitCombinations, beta).reshape( bitCombinations.shape[0]) # print status if wanted if status is True: print '%3.2f' %(100*numx.double(batch )/numx.double(num_batches)),'%' # return the log_sum of values return npExt.log_sum_exp(log_prob_vv_all)
def generate_bars_and_stripes_complete(length): """ Creates a dataset containing all possible samples showing bars or stripes. :param length: Length of the bars/stripes. :type length: int :return: Samples. :rtype: numpy array [num_samples, length*length] """ stripes = npext.generate_binary_code(length) stripes = numx.repeat(stripes, length, 0) stripes = stripes.reshape(2 ** length, length * length) bars = npext.generate_binary_code(length) bars = bars.reshape(2 ** length * length, 1) bars = numx.repeat(bars, length, 1) bars = bars.reshape(2 ** length, length * length) return numx.vstack((stripes[0:stripes.shape[0]-1],bars[1:bars.shape[0]]))
def partition_function_factorize_h(model, beta=None, batchsize_exponent='AUTO', status=False): """ Computes the true partition function for the given model by factoring over the hidden units. :Info: Exponential increase of computations by the number of visible units. (16 usually ~ 20 seconds) :param model: The model. :type model: Valid RBM model. :param beta: Inverse temperature(s) for the models energy. :type beta: None, float, numpy array [batchsize,1] :param batchsize_exponent: 2^batchsize_exponent will be the batch size. :type batchsize_exponent: int :param status: If true prints the progress to the console. :type status: bool :return: Log Partition function for the model. :rtype: float """ if status is True: print "Calculating the partition function by factoring over h: " print '%3.2f' % 0.0, '%' bit_length = model.output_dim if batchsize_exponent is 'AUTO' or batchsize_exponent > 20: batchsize_exponent = numx.min([model.output_dim, 12]) batchsize = numx.power(2, batchsize_exponent) num_combinations = numx.power(2, bit_length) num_batches = num_combinations / batchsize log_prob_vv_all = numx.zeros(num_combinations) for batch in range(1, num_batches + 1): # Generate current batch bitcombinations = numxext.generate_binary_code(bit_length, batchsize_exponent, batch - 1) # calculate LL log_prob_vv_all[(batch - 1) * batchsize:batch * batchsize] = model.unnormalized_log_probability_h( bitcombinations, beta).reshape(bitcombinations.shape[0]) # print status if wanted if status is True: print '%3.2f' % (100 * numx.double(batch) / numx.double(num_batches)), '%' # return the log_sum of values return numxext.log_sum_exp(log_prob_vv_all)
def generate_bars_and_stripes_complete(length): ''' Creates a dataset containing all possible samples showing bars or stripes. :Parameters: length: Length of the bars/stripes. -type: int :Returns: Samples -type: numpy array [num_samples, length*length] ''' stripes = npExt.generate_binary_code(length) stripes = numx.repeat(stripes, length, 0) stripes = stripes.reshape(2**length,length*length) bars = npExt.generate_binary_code(length) bars = bars.reshape(2**length*length,1) bars = numx.repeat(bars, length, 1) bars = bars.reshape(2**length,length*length) return numx.vstack((stripes[0:stripes.shape[0]-1],bars[1:bars.shape[0]]))
def _partition_function_exact_check(model, batchsize_exponent='AUTO'): ''' Computes the true partition function for the given model by factoring over the visible and hidden2 units. This is just proof of concept, use _partition_function_exact() instead, it is heaps faster! :Parameters: model: The model -type: Valid DBM model batchsize_exponent: 2^batchsize_exponent will be the batch size. -type: int :Returns: Log Partition function for the model. -type: float ''' bit_length = model.W1.shape[1] if batchsize_exponent is 'AUTO' or batchsize_exponent > 20: batchsize_exponent = numx.min([model.W1.shape[1], 12]) batchSize = numx.power(2, batchsize_exponent) num_combinations = numx.power(2, bit_length) num_batches = num_combinations // batchSize bitCombinations = numx.zeros((batchSize, model.W1.shape[1])) log_prob_vv_all = numx.zeros(num_combinations) for batch in range(1, num_batches + 1): # Generate current batch bitCombinations = npExt.generate_binary_code(bit_length, batchsize_exponent, batch - 1) # calculate LL log_prob_vv_all[(batch - 1) * batchSize:batch * batchSize] = model.unnormalized_log_probability_h1( bitCombinations).reshape(bitCombinations.shape[0]) # return the log_sum of values return npExt.log_sum_exp(log_prob_vv_all)
def _train(self, data, epsilon, k, momentum, reg_l1norm, reg_l2norm, reg_sparseness, desired_sparseness, update_visible_offsets, update_hidden_offsets, offset_typ, use_centered_gradient, restrict_gradient, restriction_norm, use_hidden_states): """ The training for one batch is performed using True Gradient (GD) for k Gibbs-sampling steps. :param data: The data used for training. :type data: numpy array [batch_size, input dimension] :param epsilon: The learning rate. :type epsilon: scalar or numpy array[num parameters] or numpy array[num parameters, parameter shape] :param k: NUmber of sampling steps. :type k: int :param momentum: The momentum term. :type momentum: scalar or numpy array[num parameters] or numpy array[num parameters, parameter shape] :param reg_l1norm: The parameter for the L1 regularization :type reg_l1norm: float :param reg_l2norm: The parameter for the L2 regularization also know as weight decay. :type reg_l2norm: float :param reg_sparseness: The parameter for the desired_sparseness regularization. :type reg_sparseness: None or float :param desired_sparseness: Desired average hidden activation or None for no regularization. :type desired_sparseness: None or float :param update_visible_offsets: The update step size for the models visible offsets. :type update_visible_offsets: float :param update_hidden_offsets: The update step size for the models hidden offsets. :type update_hidden_offsets: float :param offset_typ: | Different offsets can be used to center the gradient.<br /> | Example: 'DM' uses the positive phase visible mean and the negative phase hidden mean. | 'A0' uses the average of positive and negative phase mean for visible, zero for the | hiddens. Possible values are out of {A,D,M,0}x{A,D,M,0} :type offset_typ: string :param use_centered_gradient: Uses the centered gradient instead of centering. :type use_centered_gradient: bool :param restrict_gradient: If a scalar is given the norm of the weight gradient (along the input dim) is \ restricted to stay below this value. :type restrict_gradient: None, float :param restriction_norm: Restricts the column norm, row norm or Matrix norm. :type restriction_norm: string, 'Cols','Rows', 'Mat' :param use_hidden_states: If True, the hidden states are used for the gradient calculations, the hiddens \ probabilities otherwise. :type use_hidden_states: bool """ # Sample the first time hid_probs_pos = self.model.probability_h_given_v(data) if update_visible_offsets != 0.0: xmean_pos = numx.mean(data, axis=0).reshape(1, self.model.input_dim) hmean_pos = 0.0 if update_hidden_offsets != 0.0 or reg_sparseness != 0.0: if use_hidden_states: hid_states_pos = self.model.sample_h(hid_probs_pos) hmean_pos = numx.mean(hid_states_pos, axis=0).reshape(1, self.model.output_dim) else: hmean_pos = numx.mean(hid_probs_pos, axis=0).reshape(1, self.model.output_dim) # Calculate the partition function if self.model.input_dim < self.model.output_dim: batch_size = numx.min([self.model.input_dim, 12]) ln_z = estimator.partition_function_factorize_v(self.model, beta=1.0, batchsize_exponent=batch_size, status=False) else: batch_size = numx.min([self.model.output_dim, 12]) ln_z = estimator.partition_function_factorize_h(self.model, beta=1.0, batchsize_exponent=batch_size, status=False) # empty negative phase parts neg_gradients = [numx.zeros(self.model.w.shape), numx.zeros(self.model.bv.shape), numx.zeros(self.model.bh.shape)] # Calculate gradient stepwise in batches bit_length = self.model.input_dim batchsize = numx.power(2, batch_size) num_combinations = numx.power(2, bit_length) num_batches = num_combinations / batchsize for batch in range(0, num_batches): # Generate current batch bit_combinations = numxext.generate_binary_code(bit_length, batch_size, batch) # P(x) prob_x = numx.exp( self.model.log_probability_v(ln_z, bit_combinations)) # P(h|x) prob_h_x = self.model.probability_h_given_v(bit_combinations) # Calculate gradient neg_gradients[1] += numx.sum(numx.tile(prob_x, (1, self.model.input_dim)) * (bit_combinations - self.model.ov), axis=0) prob_x = (numx.tile(prob_x, (1, self.model.output_dim)) * (prob_h_x - self.model.oh)) neg_gradients[0] += numx.dot((bit_combinations - self.model.ov).T, prob_x) neg_gradients[2] += numx.sum(prob_x, axis=0) if update_visible_offsets != 0.0 and (offset_typ[0] is 'A' or offset_typ[0] is 'M'): bit_combinations = numxext.generate_binary_code(self.model.input_dim, None, 0) prob_x = numx.exp(self.model.log_probability_v(ln_z, bit_combinations)) xmean_neg = numx.sum(prob_x * bit_combinations, axis=0).reshape(1, self.model.input_dim) if update_hidden_offsets != 0.0 and (offset_typ[1] is 'A' or offset_typ[1] is 'M'): bit_combinations = numxext.generate_binary_code(self.model.output_dim, None, 0) prob_h = numx.exp(self.model.log_probability_h(ln_z, bit_combinations)) hmean_neg = numx.sum(prob_h * bit_combinations, axis=0).reshape(1, self.model.output_dim) new_visible_offsets = 0.0 if update_visible_offsets != 0.0: if offset_typ[0] is 'A': new_visible_offsets = (xmean_pos + xmean_neg) * 0.5 if offset_typ[0] is 'D': new_visible_offsets = xmean_pos if offset_typ[0] is 'M': new_visible_offsets = xmean_neg if offset_typ[0] is '0': new_visible_offsets = 0.0 * xmean_pos new_hidden_offsets = 0.0 if update_hidden_offsets != 0.0: if offset_typ[1] is 'A': new_hidden_offsets = (hmean_pos + hmean_neg) * 0.5 if offset_typ[1] is 'D': new_hidden_offsets = hmean_pos if offset_typ[1] is 'M': new_hidden_offsets = hmean_neg if offset_typ[1] is '0': new_hidden_offsets = 0.0 * hmean_pos if use_centered_gradient is False: # update the centers self.model.update_offsets(new_visible_offsets, new_hidden_offsets, update_visible_offsets, update_hidden_offsets) self.visible_offsets = 0.0 self.hidden_offsets = 0.0 else: self.hidden_offsets = ((1.0 - update_hidden_offsets) * self.hidden_offsets + update_hidden_offsets * new_hidden_offsets) self.visible_offsets = ((1.0 - update_visible_offsets) * self.visible_offsets + update_visible_offsets * new_visible_offsets) # Calculate positive phase gradient using states or probabilities if use_hidden_states: pos_gradients = self.model.calculate_gradients(data, hid_states_pos) else: pos_gradients = self.model.calculate_gradients(data, hid_probs_pos) # Times batch size since adpat gradient devides by batchsize neg_gradients[0] *= data.shape[0] neg_gradients[1] *= data.shape[0] neg_gradients[2] *= data.shape[0] # Adapt the gradients by weight decay momentum and learning rate self._adapt_gradient(pos_gradients=pos_gradients, neg_gradients=neg_gradients, batch_size=data.shape[0], epsilon=epsilon, momentum=momentum, reg_l1norm=reg_l1norm, reg_l2norm=reg_l2norm, reg_sparseness=reg_sparseness, desired_sparseness=desired_sparseness, mean_hidden_activity=hmean_pos, visible_offsets=self.visible_offsets, hidden_offsets=self.hidden_offsets, use_centered_gradient=use_centered_gradient, restrict_gradient=restrict_gradient, restriction_norm=restriction_norm) # update the parameters with the calculated gradient self.model.update_parameters(self.parameter_updates)