def get_auxiliary_ratio(self, index): if self.extrapolate_auxiliary_ratios: return np.power(index + 1., AUX_RATIO_POWER_LAW) else: if not self._initialized: raise CodingError("Coder has not been initialized yet, please call" "update_auxiliary_variance_ratios() first" " or use extrapolation") if index >= tf.shape(self.aux_variable_variance_ratios)[0]: raise CodingError("KL divergence higher than auxiliary variables can account for. " "Update auxiliary variable ratios with high-enough KL divergence." "Maximum possible number of partitions is {}." "Requested {}".format(self.aux_variable_variance_ratios.shape[0], index + 1)) return self.aux_variable_variance_ratios[index]
def get_r_pstar(log_ratios, t_mass, p_mass, r_buffer_size, dtype=tf.float32): t_mass = tf.cast(t_mass, dtype=tf.float64) p_mass = tf.cast(p_mass, dtype=tf.float64) ratios_np = tf.exp(log_ratios).numpy() t_cummass_np = tf.exp(tf.math.cumulative_logsumexp(t_mass)).numpy() p_cummass_np = tf.exp(tf.math.cumulative_logsumexp(p_mass)).numpy() p_zero = float(1. - np.exp(tf.reduce_logsumexp(p_mass))) pstar_buffer = tf.Variable(tf.zeros((r_buffer_size, ), dtype=dtype), trainable=False) r_buffer = tf.Variable(tf.zeros((r_buffer_size, ), dtype=dtype), trainable=False) r = 1. r_buffer[0].assign(r) i = 1 for r_ind, r_next in enumerate(ratios_np): if r_next < r: continue p_cum = p_zero + (p_cummass_np[r_ind - 1] if r_ind > 0 else 0.) t_cum = t_cummass_np[r_ind - 1] if r_ind > 0 else 0. # if final sample, the logarithm should be -infinity assert (r_ind != ratios_np.shape[0] - 1 or math.isclose(r_next, (1. - t_cum) / (1. - p_cum), rel_tol=1e-5)) if r_ind == ratios_np.shape[0] - 1: interval = r_buffer_size - i else: interval = min( r_buffer_size - i, int( math.ceil( np.log((r_next - (1. - t_cum) / (1. - p_cum)) / (r - (1. - t_cum) / (1. - p_cum))) // np.log(p_cum)))) # Work in log for numerical stability r_slice = -tf.exp( np.log(p_cum) * (1. + tf.range(interval, dtype=dtype)) + np.log( (1. - t_cum) / (1. - p_cum) - r)) + (1. - t_cum) / (1. - p_cum) r_buffer[i:i + interval].assign(r_slice) pstar_buffer[i - 1:i + interval - 1].assign((1. - p_cum) * r_buffer[i - 1:i + interval - 1] + t_cum) r = np.power(p_cum, interval) * (r - (1. - t_cum) / (1. - p_cum)) + (1. - t_cum) / (1. - p_cum) i += interval if i == r_buffer_size: pstar_buffer[r_buffer_size - 1].assign((1. - p_cum) * r + t_cum) break if r_ind == ratios_np.shape[0] - 1: raise CodingError( 'R Buffer incomplete after processing all samples. This is a bug.' ) return r_buffer, pstar_buffer
def merge(self, *args, shape=None, seed=42): """ Inverse operation to split :return: """ if shape is None: raise CodingError("Shape cannot be None!") # We first merge the blocks back tensors = [tf.concat(blocks, axis=0) for blocks in args] # Check that all tensors have the same shape now num_dims = tensors[0].shape[0] for tensor in tensors: if tf.rank(tensor) != 1: raise CodingError("All supplied tensors to merge must be rank 1!") if tensor.shape[0] != num_dims: raise CodingError("All tensors must have the same number of dimensions!") # We will inverse permute the indices and gather using them # to ensure that every block is un-shuffled the same way tf.random.set_seed(seed) indices = tf.range(num_dims, dtype=tf.int64) indices = tf.random.shuffle(indices) indices = tf.math.invert_permutation(indices)[:, None] tensors = [tf.gather_nd(tensor, indices) for tensor in tensors] # Reshape each tensor appropriately tensors = [tf.reshape(tensor, shape) for tensor in tensors] return tensors
def split(self, *args, seed=42): """ Splits the arguments into conformal blocks :return: """ tensor_shape = args[0].shape num_tensors = len(args) flattened = [] # Check if the shapes are alright for tensor in args: if tensor.shape != tensor_shape: raise CodingError("All tensor arguments supplied to split " "must have the same batch dimensions!") flattened.append(tf.reshape(tensor, [-1])) # Total number of dimensions for each tensor num_dims = flattened[0].shape[0] # We will permute the indices and gather using them to ensure that every block is # shuffled the same way tf.random.set_seed(seed) indices = tf.range(num_dims, dtype=tf.int64) indices = tf.random.shuffle(indices)[:, None] # Shuffle each tensor the same way flattened = [tf.gather_nd(flat, indices) for flat in flattened] # Split tensors into blocks # Calculate the number of blocks num_blocks = num_dims // self.block_size num_blocks += (0 if num_dims % self.block_size == 0 else 1) all_blocks = [] for tensor in flattened: blocks = [] for i in range(0, num_dims, self.block_size): # The minimum ensures that we do get indices out of bounds blocks.append(tensor[i:min(i + self.block_size, num_dims)]) all_blocks.append(blocks) return all_blocks
def encode_block(self, target_dist, coding_dist, seed, update_sampler=False, numpy=True): if target_dist.loc.shape[0] != 1: raise CodingError("For encoding, batch size must be 1.") total_kl = tf.reduce_sum(tfd.kl_divergence(target_dist, coding_dist)) print('Encoding latent variable with KL={}'.format(total_kl)) num_aux_variables = tf.cast( tf.math.ceil(total_kl / self.kl_per_partition), tf.int32) # We iterate backward until the second entry in ratios. The first entry is 1., # in which case we just draw the final sample. n_dims = len(target_dist.loc.shape) cumulative_auxiliary_variance = 0. iteration = 0 for i in range(num_aux_variables - 1, -1, -1): aux_variable_variance_ratio = self.get_auxiliary_ratio(i) auxiliary_var = aux_variable_variance_ratio * (tf.math.pow( coding_dist.scale, 2) - cumulative_auxiliary_variance) auxiliary_coder = get_auxiliary_coder(coder=coding_dist, auxiliary_var=auxiliary_var) cumulative_auxiliary_coder = get_auxiliary_coder( coder=coding_dist, auxiliary_var=auxiliary_var + cumulative_auxiliary_variance) auxiliary_target = get_auxiliary_target( target=target_dist, coder=coding_dist, auxiliary_var=auxiliary_var + cumulative_auxiliary_variance) if iteration > 0: samples = self.get_pseudo_random_sample( auxiliary_coder, self.n_samples, beam_indices, seed + iteration) combined_samples = beams + samples # n_samples x n_beams x sample_shape log_probs = tf.reduce_sum( auxiliary_target.log_prob(combined_samples) - cumulative_auxiliary_coder.log_prob(combined_samples), axis=range(2, n_dims + 2)) flat_log_probs = tf.reshape(log_probs, [-1]) sorted_ind_1d = tf.argsort(flat_log_probs, direction='DESCENDING') n_current_beams = beams.shape[0] best_ind_beam = sorted_ind_1d[:self.n_beams] % n_current_beams best_ind_aux = sorted_ind_1d[:self.n_beams] // n_current_beams assert (log_probs[best_ind_aux[0], best_ind_beam[0]] == flat_log_probs[sorted_ind_1d[0]]) beam_ind = tf.stack((best_ind_aux, best_ind_beam), axis=1) beams = tf.gather_nd(combined_samples, beam_ind) beam_indices = tf.concat((tf.gather_nd( beam_indices[:, :iteration], best_ind_beam[:, None]), best_ind_aux[:, None]), axis=1) else: samples = self.get_pseudo_random_sample( auxiliary_coder, self.n_samples, tf.constant([[]], dtype=tf.int32), seed + iteration)[:, 0] log_probs = tf.reduce_sum( auxiliary_target.log_prob(samples) - cumulative_auxiliary_coder.log_prob(samples), axis=range(1, n_dims + 1)) sorted_ind = tf.argsort(log_probs, direction='DESCENDING') beams = tf.gather_nd(samples, sorted_ind[:self.n_beams, None]) beam_indices = sorted_ind[:self.n_beams, None] iteration += 1 cumulative_auxiliary_variance += auxiliary_var target_sample = target_dist.sample() target_entropy = tf.reduce_sum( target_dist.log_prob(target_sample) - coding_dist.log_prob(target_sample)) print('Target entropy={}, log_density={}'.format( target_entropy, tf.reduce_sum( target_dist.log_prob(beams[0] + coding_dist.loc) - coding_dist.log_prob(beams[0] + coding_dist.loc)))) indices = beam_indices[0, :] if numpy: indices = indices.numpy() return list(indices), beams[0] + coding_dist.loc
def encode_block(self, target_dist, coding_dist, seed, update_sampler=False, verbose=False, numpy=True): if target_dist.loc.shape[0] != 1: raise CodingError("For encoding, batch size must be 1.") indices = [] total_kl = tf.reduce_sum(tfd.kl_divergence(target_dist, coding_dist)) print('Encoding latent variable with KL={}'.format(total_kl)) num_aux_variables = tf.cast(tf.math.ceil(total_kl / self.kl_per_partition), tf.int32) # We iterate backward until the second entry in ratios. The first entry is 1., # in which case we just draw the final sample. for i in range(num_aux_variables - 1, 0, -1): aux_variable_variance_ratio = self.get_auxiliary_ratio(i) auxiliary_var = aux_variable_variance_ratio * tf.math.pow(coding_dist.scale, 2) auxiliary_target = get_auxiliary_target(target=target_dist, coder=coding_dist, auxiliary_var=auxiliary_var) auxiliary_coder = get_auxiliary_coder(coder=coding_dist, auxiliary_var=auxiliary_var) if update_sampler: self.sampler.update(auxiliary_target, auxiliary_coder) auxiliary_sample = auxiliary_target.sample() print('Sampler updated') else: index, auxiliary_sample = self.sampler.coded_sample(target=auxiliary_target, coder=auxiliary_coder, seed=seed) if verbose: print(f'Auxiliary sample {i} found at index {index}') if numpy: index = index.numpy() indices.append(index) seed += 1 target_dist = get_conditional_target(target=target_dist, coder=coding_dist, auxiliary_var=auxiliary_var, auxiliary_sample=auxiliary_sample) coding_dist = get_conditional_coder(coder=coding_dist, auxiliary_var=auxiliary_var, auxiliary_sample=auxiliary_sample) # Sample the last auxiliary variable if update_sampler: self.sampler.update(target_dist, coding_dist) sample = target_dist.sample() print('Sampler updated') else: index, sample = self.sampler.coded_sample(target=target_dist, coder=coding_dist, seed=seed) if verbose: print('Auxiliary sample found at index {}'.format(index)) if numpy: index = index.numpy() indices.append(index) return indices, sample
def encode_gaussian_importance_sample(t_loc, t_scale, p_loc, p_scale, coding_bits, seed, log_weighting_fn=None, alpha=float('inf')): """ Encodes a single sample from a Gaussian target distribution using another Gaussian coding distribution. Note that the runtime of this function is O(e^KL(q || p)), hence it is the job of the caller to potentially partition a larger Gaussian into smaller codable chunks. :param coding_bits: number of bits to use to code each sample :param t_loc: location parameter of the target Gaussian :param t_scale: scale parameter of the target Gaussian :param p_loc: location parameter of the coding/proposal Gaussian :param p_scale: scale parameter of the coding/proposal Gaussian :param seed: seed that defines the infinite string of random samples from the coding distribution. :param alpha: draw the seed according to the L_alpha norm. alpha=1 results in sampling the atomic distribution defined by the importance weights, and alpha=inf just selects the sample with the maximal importance weight. Must be in the range [1, inf) :return: (sample, index) - tuple containing the sample and the """ if alpha < 1.: raise CodingError( f"Alpha must be in the range [1, inf), but {alpha} was given!") # Fix seed tf.random.set_seed(seed) # Standardize the target w.r.t the coding distribution t_loc = (t_loc - p_loc) / p_scale t_scale = t_scale / p_scale target = tfd.Normal(loc=t_loc, scale=t_scale) proposal = tfd.Normal(loc=tf.zeros_like(p_loc), scale=tf.ones_like(p_scale)) # We need to draw approximately e^KL samples to be guaranteed a low bias sample num_samples = tf.cast(tf.math.ceil(tf.exp(coding_bits * tf.math.log(2.))), tf.int32) # Draw e^KL samples samples = proposal.sample(num_samples) # Calculate the log-unnormalized importance_weights if log_weighting_fn is None: log_importance_weights = tf.reduce_sum( target.log_prob(samples) - proposal.log_prob(samples), axis=range(1, tf.rank(t_loc) + 1)) else: log_importance_weights = log_weighting_fn(samples) # If we are using the infinity norm, we can just take the argmax as a shortcut if tf.math.is_inf(alpha): index = tf.argmax(log_importance_weights) # If we are using any other alpha, we just calculate the atomic distribution else: # Sample index using the Gumbel-max trick perturbed = alpha * log_importance_weights + stateless_gumbel_sample( log_importance_weights.shape, seed + 1) index = tf.argmax(perturbed) chosen_sample = samples[index, ...] # Rescale the sample chosen_sample = p_scale * chosen_sample + p_loc return index, chosen_sample
def gaussian_rejection_sample_small(t_dist, p_dist, sample_buffer_size, r_buffer_size, sample_generator, seed=42069): """ Encodes a single sample from a Gaussian target distribution using another Gaussian coding distribution. Note that the runtime of this function is O(e^KL(q || p)), hence it is the job of the caller to potentially partition a larger Gaussian into smaller codable chunks. :param t_dist: the target Gaussian :param p_dist: the coding/proposal Gaussian :param sample_buffer_size: buffer size of the samples :param r_buffer_size: buffer size of rejection sampling, samples beyond this index are treated as if they were drawn at with index :param seed: seed that defines the infinite string of random samples from the coding distribution. :return: (sample, index) - tuple containing the sample and the index """ assert (r_buffer_size % sample_buffer_size == 0) assert t_dist.loc.shape.as_list() == p_dist.loc.shape.as_list() log_ratios, t_mass, p_mass = get_t_p_mass(t_dist, p_dist, n_samples=100, oversampling=100) r_buffer, pstar_buffer = get_r_pstar(log_ratios, t_mass, p_mass, r_buffer_size=r_buffer_size) kl = tf.reduce_sum(tfp.distributions.kl_divergence(t_dist, p_dist)) if kl >= 20.: raise CodingError( 'KL divergence={} is too high for rejection sampling'.format(kl)) i = 0 for _ in range(int(r_buffer_size // sample_buffer_size)): sample_ratios = sample_generator.get_ratios(t_dist, p_dist, seed=seed + i // sample_buffer_size) accepted = (tf.exp(sample_ratios) - r_buffer[i:i+sample_buffer_size]) / \ (1. - pstar_buffer[i:i+sample_buffer_size]) + tf.random.uniform(shape=sample_ratios.shape) accepted_ind = tf.where(accepted > 0.) if accepted_ind.shape[0] > 0: index = int(accepted_ind[0, 0]) return i + index, sample_generator.get_index(index) i += sample_buffer_size # If not finished in buffer, we accept anything above ratio r r = r_buffer[-1] while True: sample_ratios = sample_generator.get_ratios(t_dist, p_dist, seed=seed + i // sample_buffer_size) accepted_ind = tf.where(sample_ratios > tf.math.log(r)) if accepted_ind.shape[0] > 0: index = int(accepted_ind[0, 0]) return i + index, sample_generator.get_index(index) else: i += sample_buffer_size