def _sample_n(self, n, seed=None): gamma1_seed, gamma2_seed, binomial_seed = samplers.split_seed( seed, n=3, salt='beta_binomial') total_count, concentration1, concentration0 = self._params_list_as_tensors() batch_shape = self._batch_shape_tensor(total_count=total_count, concentration1=concentration1, concentration0=concentration0) expanded_concentration1 = tf.broadcast_to(concentration1, batch_shape) expanded_concentration0 = tf.broadcast_to(concentration0, batch_shape) # probs = g1 / (g1 + g2) # logits = log(probs) - log(1 - probs) # = log(g1 / (g1 + g2)) - log(1 - g1 / (g1 + g2)) # = log(g1) - log(g1 + g2) - log(((g1 + g2) - g1) / (g1 + g2)) # = log(g1) - log(g1 + g2) - (log(g1 + g2 - g1) - log(g1 + g2)) # = log(g1) - log(g1 + g2) - log(g2) + log(g1 + g2)) # = log(g1) - log(g2) log_gamma1 = gamma_lib.random_gamma( shape=[n], concentration=expanded_concentration1, seed=gamma1_seed, log_space=True) log_gamma2 = gamma_lib.random_gamma( shape=[n], concentration=expanded_concentration0, seed=gamma2_seed, log_space=True) return binomial.Binomial( total_count, logits=log_gamma1 - log_gamma2, validate_args=self.validate_args).sample(seed=binomial_seed)
def testSampleGammaLogRateLogSpaceDerivatives(self): conc = tf.constant(np.linspace(.8, 1.2, 5), tf.float64) rate = np.linspace(.5, 2, 5) np.random.shuffle(rate) rate = tf.constant(rate, tf.float64) n = int(1e5) seed = test_util.test_seed() # pylint: disable=g-long-lambda lambdas = [ # Each should sample the same distribution. lambda c, r: gamma_lib.random_gamma( [n], c, r, seed=seed, log_space=True), lambda c, r: gamma_lib.random_gamma( [n], c, log_rate=tf.math.log(r), seed=seed, log_space=True), lambda c, r: tf.math.log(gamma_lib.random_gamma( [n], c, r, seed=seed)), lambda c, r: tf.math.log(gamma_lib.random_gamma( [n], c, log_rate=tf.math.log(r), seed=seed)), ] # pylint: enable=g-long-lambda samps = [] dconc = [] drate = [] for fn in lambdas: # Take samples without the nonlinearity. samps.append(fn(conc, rate)) # We compute gradient through a nonlinearity to catch a class of errors. _, (dc_i, dr_i) = tfp.math.value_and_gradient( lambda c, r: tf.reduce_mean(tf.square(fn(c, r))), (conc, rate)) # pylint: disable=cell-var-from-loop dconc.append(dc_i) drate.append(dr_i) # Assert d rate correctness. Note that the non-logspace derivative for rate # depends on the realized sample whereas the logspace one does not. Also, # comparing grads with differently-placed log/exp is numerically perilous. self.assertAllClose(drate[0], drate[1], rtol=0.06) self.assertAllClose(drate[0], drate[2], rtol=0.06) self.assertAllClose(drate[1], drate[3], rtol=0.06) # Assert sample correctness. If incorrect, dconc will be incorrect. self.assertLess( self.evaluate( st.min_num_samples_for_dkwm_cdf_test( discrepancy=0.04, false_fail_rate=1e-9, false_pass_rate=1e-9)), n) equiv_dist = tfb.Log()(tfd.Gamma(conc, rate)) self.evaluate(st.assert_true_cdf_equal_by_dkwm( samps[0], equiv_dist.cdf, false_fail_rate=1e-9)) self.evaluate(st.assert_true_cdf_equal_by_dkwm( samps[1], equiv_dist.cdf, false_fail_rate=1e-9)) self.evaluate(st.assert_true_cdf_equal_by_dkwm( samps[2], equiv_dist.cdf, false_fail_rate=1e-9)) self.evaluate(st.assert_true_cdf_equal_by_dkwm( samps[3], equiv_dist.cdf, false_fail_rate=1e-9)) # Assert d concentration correctness. These are sensitive to sample values, # which are more strongly effected by the log/exp, thus looser tolerances. self.assertAllClose(dconc[0], dconc[1], rtol=0.06) self.assertAllClose(dconc[0], dconc[2], rtol=0.06) self.assertAllClose(dconc[1], dconc[3], rtol=0.06)
def _sample_n(self, n, seed=None): seed1, seed2 = samplers.split_seed(seed, salt='beta') concentration1 = tf.convert_to_tensor(self.concentration1) concentration0 = tf.convert_to_tensor(self.concentration0) shape = self._batch_shape_tensor(concentration1, concentration0) expanded_concentration1 = tf.broadcast_to(concentration1, shape) expanded_concentration0 = tf.broadcast_to(concentration0, shape) gamma1_sample = gamma_lib.random_gamma( shape=[n], concentration=expanded_concentration1, seed=seed1) gamma2_sample = gamma_lib.random_gamma( shape=[n], concentration=expanded_concentration0, seed=seed2) beta_sample = gamma1_sample / (gamma1_sample + gamma2_sample) return beta_sample
def _sample_n(self, n, seed=None): seed1, seed2 = samplers.split_seed(seed, salt='beta') concentration1 = tf.convert_to_tensor(self.concentration1) concentration0 = tf.convert_to_tensor(self.concentration0) shape = self._batch_shape_tensor(concentration1, concentration0) expanded_concentration1 = tf.broadcast_to(concentration1, shape) expanded_concentration0 = tf.broadcast_to(concentration0, shape) log_gamma1 = gamma_lib.random_gamma( shape=[n], concentration=expanded_concentration1, seed=seed1, log_space=True) log_gamma2 = gamma_lib.random_gamma( shape=[n], concentration=expanded_concentration0, seed=seed2, log_space=True) return tf.math.sigmoid(log_gamma1 - log_gamma2)
def _sample_n(self, n, seed=None): concentration = tf.convert_to_tensor(self.concentration) mixing_concentration = tf.convert_to_tensor(self.mixing_concentration) mixing_rate = tf.convert_to_tensor(self.mixing_rate) seed_rate, seed_samples = samplers.split_seed(seed, salt='gamma_gamma') rate = gamma_lib.random_gamma( shape=[n], # Be sure to draw enough rates for the fully-broadcasted gamma-gamma. concentration=mixing_concentration + tf.zeros_like(concentration), rate=mixing_rate, seed=seed_rate) return gamma_lib.random_gamma(shape=[], concentration=concentration, rate=rate, seed=seed_samples)
def sample_n(n, df, loc, scale, batch_shape, dtype, seed): """Draw n samples from a Student T distribution. Note that `scale` can be negative or zero. The sampling method comes from the fact that if: X ~ Normal(0, 1) Z ~ Chi2(df) Y = X / sqrt(Z / df) then: Y ~ StudentT(df) Args: n: int, number of samples df: Floating-point `Tensor`. The degrees of freedom of the distribution(s). `df` must contain only positive values. loc: Floating-point `Tensor`; the location(s) of the distribution(s). scale: Floating-point `Tensor`; the scale(s) of the distribution(s). Must contain only positive values. batch_shape: Callable to compute batch shape dtype: Return dtype. seed: Optional seed for random draw. Returns: samples: a `Tensor` with prepended dimensions `n`. """ normal_seed, gamma_seed = samplers.split_seed(seed, salt='student_t') shape = ps.concat([[n], batch_shape], 0) normal_sample = samplers.normal(shape, dtype=dtype, seed=normal_seed) df = df * tf.ones(batch_shape, dtype=dtype) gamma_sample = gamma_lib.random_gamma( [n], concentration=0.5 * df, rate=0.5, seed=gamma_seed) samples = normal_sample * tf.math.rsqrt(gamma_sample / df) return samples * scale + loc
def testSampleGammaLogSpace(self): concentration = np.linspace(.1, 2., 10) rate = np.linspace(.5, 2, 10) np.random.shuffle(rate) num_samples = int(1e5) self.assertLess( self.evaluate( st.min_num_samples_for_dkwm_cdf_test(discrepancy=0.04, false_fail_rate=1e-9, false_pass_rate=1e-9)), num_samples) samples = gamma_lib.random_gamma([num_samples], concentration, rate, seed=test_util.test_seed(), log_space=True) exp_gamma = tfb.Log()(tfd.Gamma(concentration=concentration, rate=rate, validate_args=True)) self.evaluate( st.assert_true_cdf_equal_by_dkwm(samples, exp_gamma.cdf, false_fail_rate=1e-9)) self.assertAllClose(self.evaluate(tf.math.reduce_mean(samples, axis=0)), tf.math.digamma(concentration) - tf.math.log(rate), rtol=0.02, atol=0.01) self.assertAllClose(self.evaluate( tf.math.reduce_variance(samples, axis=0)), tf.math.polygamma(1., concentration), rtol=0.05)
def _sample_n(self, n, seed=None): return tf.math.exp( -gamma_lib.random_gamma(shape=[n], concentration=self.concentration, rate=self.scale, seed=seed, log_space=True))
def testSampleXLA(self): self.skip_if_no_xla() if not tf.executing_eagerly(): return # experimental_compile is eager-only. concentration = np.exp(np.random.rand(4, 3).astype(np.float32)) rate = np.exp(np.random.rand(4, 3).astype(np.float32)) dist = tfd.Gamma(concentration=concentration, rate=rate, validate_args=True) # Verify the compile succeeds going all the way through the distribution. self.evaluate( tf.function(lambda: dist.sample(5, seed=test_util.test_seed()), experimental_compile=True)()) # Also test the low-level sampler and verify the XLA-friendly variant. # TODO(bjp): functools.partial, after eliminating PY2 which breaks # tf_inspect in interesting ways: # ValueError: Some arguments ['concentration', 'rate'] do not have default # value, but they are positioned after those with default values. This can # not be expressed with ArgSpec. scalar_gamma = tf.function( lambda **kwargs: gamma_lib.random_gamma(shape=[], **kwargs), experimental_compile=True) _, runtime = self.evaluate( scalar_gamma( concentration=tf.constant(1.), rate=tf.constant(1.), seed=test_util.test_seed())) self.assertEqual(implementation_selection._RUNTIME_DEFAULT, runtime)
def _sample_n(self, n, seed): df = tf.convert_to_tensor(self.df) batch_shape = self._batch_shape_tensor(df) event_shape = self._event_shape_tensor() batch_ndims = tf.shape(batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 shape = tf.concat([[n], batch_shape, event_shape], 0) normal_seed, gamma_seed = samplers.split_seed(seed, salt='Wishart') # Complexity: O(nbk**2) x = samplers.normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=normal_seed) # Complexity: O(nbk) # This parameterization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) expanded_df = df * tf.ones(self._scale.batch_shape_tensor(), dtype=dtype_util.base_dtype(df.dtype)) g = gamma_lib.random_gamma(shape=[n], concentration=self._multi_gamma_sequence( 0.5 * expanded_df, self._dimension()), rate=0.5, seed=gamma_seed) # Complexity: O(nbk**2) x = tf.linalg.band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = tf.linalg.set_diag(x, tf.sqrt(g)) # Make batch-op ready. # Complexity: O(nbk**2) perm = tf.concat([tf.range(1, ndims), [0]], 0) x = tf.transpose(a=x, perm=perm) shape = tf.concat( [batch_shape, [event_shape[0]], [event_shape[1] * n]], 0) x = tf.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. For LinearOperatorLowerTriangular, each matmul is O(k^3) so # this step has complexity O(nbk^3). x = self._scale.matmul(x) # Undo make batch-op ready. # Complexity: O(nbk**2) shape = tf.concat([batch_shape, event_shape, [n]], 0) x = tf.reshape(x, shape) perm = tf.concat([[ndims - 1], tf.range(0, ndims - 1)], 0) x = tf.transpose(a=x, perm=perm) if not self.input_output_cholesky: # Complexity: O(nbk**3) x = tf.matmul(x, x, adjoint_b=True) return x
def testSampleCPU(self): with tf.device('CPU'): _, runtime = self.evaluate( gamma_lib.random_gamma(shape=tf.constant([], dtype=tf.int32), concentration=tf.constant(1.), rate=tf.constant(1.), seed=test_util.test_seed())) self.assertEqual(implementation_selection._RUNTIME_CPU, runtime)
def _sample_n(self, n, seed=None): # We use the log-space gamma sampler to avoid the bump-up-from-0 correction, # and to apply the concentration < 1 recurrence in log-space. This improves # accuracy for small concentrations. log_gamma_sample = gamma_lib.random_gamma( shape=[n], concentration=self.concentration, seed=seed, log_space=True) return tf.math.exp( log_gamma_sample - tf.math.reduce_logsumexp(log_gamma_sample, axis=-1, keepdims=True))
def _sample_n(self, n, seed=None): broadcast_shape = prefer_static.broadcast_shape( prefer_static.shape(self.concentration), prefer_static.shape(self.scale)) return 1. / gamma.random_gamma(sample_shape=tf.concat( [[n], broadcast_shape], axis=0), alpha=self.concentration, beta=self.scale, seed=seed)
def testSampleGPU(self): if not tf.test.is_gpu_available(): self.skipTest('no GPU') with tf.device('GPU'): _, runtime = self.evaluate(gamma_lib.random_gamma( shape=tf.constant([], dtype=tf.int32), concentration=tf.constant(1.), rate=tf.constant(1.), seed=test_util.test_seed())) self.assertEqual(implementation_selection._RUNTIME_DEFAULT, runtime)
def _sample_n(self, n, seed=None): seed = samplers.sanitize_seed(seed, salt='exp_gamma') return gamma_lib.random_gamma( shape=ps.convert_to_shape_tensor([n]), concentration=tf.convert_to_tensor(self.concentration), rate=None if self.rate is None else tf.convert_to_tensor(self.rate), log_rate=(None if self.log_rate is None else tf.convert_to_tensor(self.log_rate)), seed=seed, log_space=True)
def _sample_n(self, n, seed=None): gamma_seed, multinomial_seed = samplers.split_seed( seed, salt='dirichlet_multinomial') concentration = tf.convert_to_tensor(self._concentration) total_count = tf.convert_to_tensor(self._total_count) n_draws = tf.cast(total_count, dtype=tf.int32) k = self._event_shape_tensor(concentration)[0] alpha = tf.math.multiply( tf.ones_like(total_count[..., tf.newaxis]), concentration, name='alpha') unnormalized_logits = gamma_lib.random_gamma( shape=[n], concentration=alpha, seed=gamma_seed, log_space=True) x = multinomial.draw_sample( 1, k, unnormalized_logits, n_draws, self.dtype, multinomial_seed) final_shape = ps.concat( [[n], self._batch_shape_tensor(concentration=concentration, total_count=total_count), [k]], 0) return tf.reshape(x, final_shape)
def _sample_n(self, n, seed=None): gamma_sample = gamma_lib.random_gamma(shape=[n], concentration=self.concentration, seed=seed) return gamma_sample / tf.reduce_sum( gamma_sample, axis=-1, keepdims=True)
def _sample_n(self, n, seed=None): return gamma_lib.random_gamma(shape=[n], concentration=0.5 * self.df, rate=tf.convert_to_tensor( 0.5, dtype=self.dtype), seed=seed)
def _sample_n(self, n, seed=None): return 1. / gamma_lib.random_gamma(shape=[n], concentration=self.concentration, rate=self.scale, seed=seed)
def slice_sampler_one_dim(target_log_prob, x_initial, step_size=0.01, max_doublings=30, seed=None, name=None): """For a given x position in each Markov chain, returns the next x. Applies the one dimensional slice sampling algorithm as defined in Neal (2003) to an input tensor x of shape (num_chains,) where num_chains is the number of simulataneous Markov chains, and returns the next tensor x of shape (num_chains,) when these chains are evolved by the slice sampling algorithm. Args: target_log_prob: Callable accepting a tensor like `x_initial` and returning a tensor containing the log density at that point of the same shape. x_initial: A tensor of any shape. The initial positions of the chains. This function assumes that all the dimensions of `x_initial` are batch dimensions (i.e. the event shape is `[]`). step_size: A tensor of shape and dtype compatible with `x_initial`. The min interval size in the doubling algorithm. max_doublings: Scalar tensor of dtype `tf.int32`. The maximum number of doublings to try to find the slice bounds. seed: (Optional) positive int, or Tensor seed pair. The random seed. name: Python `str` name prefixed to Ops created by this function. Default value: `None` (i.e., 'find_slice_bounds'). Returns: retval: A tensor of the same shape and dtype as `x_initial`. The next state of the Markov chain. next_target_log_prob: The target log density evaluated at `retval`. bounds_satisfied: A tensor of bool dtype and shape batch dimensions. upper_bounds: Tensor of the same shape and dtype as `x_initial`. The upper bounds for the slice found. lower_bounds: Tensor of the same shape and dtype as `x_initial`. The lower bounds for the slice found. """ gamma_seed, bounds_seed, sample_seed = samplers.split_seed( seed, n=3, salt='ssu.slice_sampler_one_dim') with tf.name_scope(name or 'slice_sampler_one_dim'): dtype = dtype_util.common_dtype([x_initial, step_size], dtype_hint=tf.float32) x_initial = tf.convert_to_tensor(x_initial, dtype=dtype) step_size = tf.convert_to_tensor(step_size, dtype=dtype) # Obtain the input dtype of the array. # Select the height of the slice. Tensor of shape x_initial.shape. log_slice_heights = target_log_prob( x_initial) - gamma_lib.random_gamma(ps.shape(x_initial), concentration=tf.ones( [], dtype=dtype), seed=gamma_seed) # Given the above x and slice heights, compute the bounds of the slice for # each chain. upper_bounds, lower_bounds, bounds_satisfied = slice_bounds_by_doubling( x_initial, target_log_prob, log_slice_heights, max_doublings, step_size, seed=bounds_seed) retval = _sample_with_shrinkage(x_initial, target_log_prob=target_log_prob, log_slice_heights=log_slice_heights, step_size=step_size, lower_bounds=lower_bounds, upper_bounds=upper_bounds, seed=sample_seed) return (retval, target_log_prob(retval), bounds_satisfied, upper_bounds, lower_bounds)