def testLogits(self): logits = [-42., 42.] dist = bernoulli.Bernoulli(logits=logits) self.assertAllClose(logits, self.evaluate(dist.logits)) if not special: return self.assertAllClose(special.expit(logits), self.evaluate(dist.probs)) p = [0.01, 0.99, 0.42] dist = bernoulli.Bernoulli(probs=p) self.assertAllClose(special.logit(p), self.evaluate(dist.logits))
def testEntropyWithBatch(self): p = [[0.1, 0.7], [0.2, 0.6]] dist = bernoulli.Bernoulli(probs=p, validate_args=False) self.assertAllClose( self.evaluate(dist.entropy()), [[entropy(0.1), entropy(0.7)], [entropy(0.2), entropy(0.6)]])
def testBernoulliBernoulliKL(self): batch_size = 6 a_p = np.array([0.5] * batch_size, dtype=np.float32) b_p = np.array([0.4] * batch_size, dtype=np.float32) a = bernoulli.Bernoulli(probs=a_p) b = bernoulli.Bernoulli(probs=b_p) kl = kullback_leibler.kl_divergence(a, b) kl_val = self.evaluate(kl) kl_expected = (a_p * np.log(a_p / b_p) + (1. - a_p) * np.log( (1. - a_p) / (1. - b_p))) self.assertEqual(kl.shape, (batch_size,)) self.assertAllClose(kl_val, kl_expected)
def testPmfShapes(self): probs = lambda p: tf.placeholder_with_default(p, shape=None) dist = lambda p: bernoulli.Bernoulli(probs=probs(p)) self.assertEqual( 2, len(self.evaluate(dist([[0.5], [0.5]]).log_prob(1)).shape)) dist = bernoulli.Bernoulli(probs=0.5) self.assertEqual(2, len(self.evaluate(dist.log_prob([[1], [1]])).shape)) dist = bernoulli.Bernoulli(probs=0.5) self.assertEqual((), dist.log_prob(1).shape) self.assertEqual((1), dist.log_prob([1]).shape) self.assertEqual((2, 1), dist.log_prob([[1], [1]]).shape) dist = bernoulli.Bernoulli(probs=[[0.5], [0.5]]) self.assertEqual((2, 1), dist.log_prob(1).shape)
def testPmfInvalid(self): p = [0.1, 0.2, 0.7] dist = bernoulli.Bernoulli(probs=p, validate_args=True) with self.assertRaisesOpError("must be non-negative."): self.evaluate(dist.prob([1, 1, -1])) with self.assertRaisesOpError("Elements cannot exceed 1."): self.evaluate(dist.prob([2, 0, 1]))
def testPmfShapes(self): with self.cached_session(): p = tf.placeholder(tf.float32, shape=[None, 1]) dist = bernoulli.Bernoulli(probs=p) self.assertEqual(2, len(dist.log_prob(1).eval({p: [[0.5], [0.5]]}).shape)) dist = bernoulli.Bernoulli(probs=0.5) self.assertEqual(2, len(self.evaluate(dist.log_prob([[1], [1]])).shape)) dist = bernoulli.Bernoulli(probs=0.5) self.assertEqual((), dist.log_prob(1).get_shape()) self.assertEqual((1), dist.log_prob([1]).get_shape()) self.assertEqual((2, 1), dist.log_prob([[1], [1]]).get_shape()) dist = bernoulli.Bernoulli(probs=[[0.5], [0.5]]) self.assertEqual((2, 1), dist.log_prob(1).get_shape())
def _sample_n(self, n, seed=None): indices_seed, edpp_seed = samplers.split_seed(seed) eigvals = tf.convert_to_tensor(self.eigenvalues) eigvecs = tf.convert_to_tensor(self.eigenvectors) batch_shape = self._batch_shape_tensor(eigenvalues=eigvals, eigenvectors=eigvecs) ground_set_size = ps.shape(eigvecs)[-2] vecs_size = ps.shape(eigvecs)[-1] # First, we select an elementary DPP to construct an elementary DPP kernel. # An elementary DPP (E-DPP) is a DPP whose kernel's eigenvalues are in # `{0, 1}`. Any DPP is a mixture of E-DPPs. The standard DPP sampling # algorithms first selects an E-DPP (this algorithm) before sampling from # the E-DPP. batch_eigvals_shape = ps.concat([batch_shape, [vecs_size]], axis=0) logits = tf.broadcast_to(tf.math.log(eigvals), batch_eigvals_shape) # Shape: [n, batch_shape, vecs_size] edpp_indices = bernoulli.Bernoulli(logits=logits).sample( n, seed=indices_seed) # Shape: [n, batch_shape, ground_set_size, vecs_size] n_batch_eigvecs_shape = ps.concat( [[n], batch_shape, [ground_set_size, vecs_size]], axis=0) eigvecs = tf.broadcast_to(eigvecs, n_batch_eigvecs_shape) # Shape: [n, batch_shape, ground_set_size] return _sample_from_edpp(eigvecs, edpp_indices, seed=edpp_seed)
def testNotReparameterized(self): p = tf.constant([0.2, 0.6]) with tf.GradientTape() as tape: tape.watch(p) dist = bernoulli.Bernoulli(probs=p) samples = dist.sample(100) grad_p = tape.gradient(samples, p) self.assertIsNone(grad_p)
def testBroadcasting(self): probs = lambda p: tf.placeholder_with_default(p, shape=None) dist = lambda p: bernoulli.Bernoulli(probs=probs(p)) self.assertAllClose(np.log(0.5), self.evaluate(dist(0.5).log_prob(1))) self.assertAllClose( np.log([0.5, 0.5, 0.5]), self.evaluate(dist(0.5).log_prob([1, 1, 1]))) self.assertAllClose(np.log([0.5, 0.5, 0.5]), self.evaluate(dist([0.5, 0.5, 0.5]).log_prob(1)))
def testPmfWithFloatArgReturnsXEntropy(self): p = [[0.2], [0.4], [0.3], [0.6]] samps = [0, 0.1, 0.8] self.assertAllClose( np.float32(samps) * np.log(np.float32(p)) + (1 - np.float32(samps)) * np.log(1 - np.float32(p)), self.evaluate( bernoulli.Bernoulli(probs=p, validate_args=False).log_prob(samps)))
def testInvalidP(self): invalid_ps = [1.01, 2.] for p in invalid_ps: with self.assertRaisesOpError("probs has components greater than 1"): dist = bernoulli.Bernoulli(probs=p, validate_args=True) self.evaluate(dist.probs) invalid_ps = [-0.01, -3.] for p in invalid_ps: with self.assertRaisesOpError("Condition x >= 0"): dist = bernoulli.Bernoulli(probs=p, validate_args=True) self.evaluate(dist.probs) valid_ps = [0.0, 0.5, 1.0] for p in valid_ps: dist = bernoulli.Bernoulli(probs=p) self.assertEqual(p, self.evaluate(dist.probs)) # Should not fail
def testPmfCorrectBroadcastDynamicShape(self): p = tf.placeholder_with_default([0.2, 0.3, 0.4], shape=None) dist = bernoulli.Bernoulli(probs=p) event1 = [1, 0, 1] event2 = [[1, 0, 1]] self.assertAllClose( [0.2, 0.7, 0.4], self.evaluate(dist.prob(event1))) self.assertAllClose( [[0.2, 0.7, 0.4]], self.evaluate(dist.prob(event2)))
def testBroadcasting(self): with self.cached_session(): p = tf.placeholder(tf.float32) dist = bernoulli.Bernoulli(probs=p) self.assertAllClose(np.log(0.5), dist.log_prob(1).eval({p: 0.5})) self.assertAllClose(np.log([0.5, 0.5, 0.5]), dist.log_prob([1, 1, 1]).eval({p: 0.5})) self.assertAllClose(np.log([0.5, 0.5, 0.5]), dist.log_prob(1).eval({p: [0.5, 0.5, 0.5]}))
def testSampleN(self): p = [0.2, 0.6] dist = bernoulli.Bernoulli(probs=p) n = 100000 samples = dist.sample(n) samples.set_shape([n, 2]) self.assertEqual(samples.dtype, tf.int32) sample_values = self.evaluate(samples) self.assertTrue(np.all(sample_values >= 0)) self.assertTrue(np.all(sample_values <= 1)) # Note that the standard error for the sample mean is ~ sqrt(p * (1 - p) / # n). This means that the tolerance is very sensitive to the value of p # as well as n. self.assertAllClose(p, np.mean(sample_values, axis=0), atol=1e-2) self.assertEqual(set([0, 1]), set(sample_values.flatten())) # In this test we're just interested in verifying there isn't a crash # owing to mismatched types. b/30940152 dist = bernoulli.Bernoulli(np.log([.2, .4])) self.assertAllEqual((1, 2), dist.sample(1, seed=42).shape.as_list())
def testPmfCorrectBroadcastDynamicShape(self): with self.cached_session(): p = tf.placeholder(dtype=tf.float32) dist = bernoulli.Bernoulli(probs=p) event1 = [1, 0, 1] event2 = [[1, 0, 1]] self.assertAllClose( dist.prob(event1).eval({p: [0.2, 0.3, 0.4]}), [0.2, 0.7, 0.4]) self.assertAllClose( dist.prob(event2).eval({p: [0.2, 0.3, 0.4]}), [[0.2, 0.7, 0.4]])
def testSampleActsLikeSampleN(self): with self.cached_session() as sess: p = [0.2, 0.6] dist = bernoulli.Bernoulli(probs=p) n = 1000 seed = 42 self.assertAllEqual( self.evaluate(dist.sample(n, seed)), self.evaluate(dist.sample(n, seed))) n = tf.placeholder(tf.int32) sample1, sample2 = sess.run([dist.sample(n, seed), dist.sample(n, seed)], feed_dict={n: 1000}) self.assertAllEqual(sample1, sample2)
def resample_one_feature(step, seed, sampler_state): seed, next_seed = samplers.split_seed(seed, n=2) idx = tf.gather(feature_permutation, step) # Maybe flip this weight's sparsity indicator. proposed_sampler_state = self._flip_feature(sampler_state, idx=idx) should_flip = bernoulli.Bernoulli( logits=(proposed_sampler_state.unnormalized_log_prob - sampler_state.unnormalized_log_prob), dtype=tf.bool).sample(seed=seed) return step + 1, next_seed, mcmc_util.choose( should_flip, proposed_sampler_state, sampler_state)
def testVarianceAndStd(self): var = lambda p: p * (1. - p) p = [[0.2, 0.7], [0.5, 0.4]] dist = bernoulli.Bernoulli(probs=p) self.assertAllClose( self.evaluate(dist.variance()), np.array([[var(0.2), var(0.7)], [var(0.5), var(0.4)]], dtype=np.float32)) self.assertAllClose( self.evaluate(dist.stddev()), np.array([[np.sqrt(var(0.2)), np.sqrt(var(0.7))], [np.sqrt(var(0.5)), np.sqrt(var(0.4))]], dtype=np.float32))
def testSampleActsLikeSampleN(self): p = [0.2, 0.6] dist = bernoulli.Bernoulli(probs=p) n = 1000 seed = 42 self.assertAllEqual( self.evaluate(dist.sample(n, seed)), self.evaluate(dist.sample(n, seed))) n = tf.placeholder_with_default(np.int32(1000), shape=None) if tf.executing_eagerly(): tf.set_random_seed(42) sample1 = dist.sample(n, None if tf.executing_eagerly() else 42) if tf.executing_eagerly(): tf.set_random_seed(42) sample2 = dist.sample(n, None if tf.executing_eagerly() else 42) sample1, sample2 = self.evaluate([sample1, sample2]) self.assertAllEqual(sample1, sample2)
def testSampleDeterministicScalarVsVector(self): p = [0.2, 0.6] dist = bernoulli.Bernoulli(probs=p) n = 1000 def _maybe_seed(): if tf.executing_eagerly(): tf.set_random_seed(42) return None return 42 self.assertAllEqual( self.evaluate(dist.sample(n, _maybe_seed())), self.evaluate(dist.sample([n], _maybe_seed()))) n = tf.placeholder_with_default(np.int32(1000), shape=None) sample1 = dist.sample(n, _maybe_seed()) sample2 = dist.sample([n], _maybe_seed()) sample1, sample2 = self.evaluate([sample1, sample2]) self.assertAllEqual(sample1, sample2)
def _testPmf(self, **kwargs): dist = bernoulli.Bernoulli(**kwargs) # pylint: disable=bad-continuation xs = [ 0, [1], [1, 0], [[1, 0]], [[1, 0], [1, 1]], ] expected_pmfs = [ [[0.8, 0.6], [0.7, 0.4]], [[0.2, 0.4], [0.3, 0.6]], [[0.2, 0.6], [0.3, 0.4]], [[0.2, 0.6], [0.3, 0.4]], [[0.2, 0.6], [0.3, 0.6]], ] # pylint: enable=bad-continuation for x, expected_pmf in zip(xs, expected_pmfs): self.assertAllClose(self.evaluate(dist.prob(x)), expected_pmf) self.assertAllClose(self.evaluate(dist.log_prob(x)), np.log(expected_pmf))
def testEntropyNoBatch(self): p = 0.2 dist = bernoulli.Bernoulli(probs=p) self.assertAllClose(self.evaluate(dist.entropy()), entropy(p))
def __init__(self, design_matrix, nonzero_prior_prob=0.5, weights_prior_precision=None, default_pseudo_observations=1., observation_noise_variance_prior_concentration=0.005, observation_noise_variance_prior_scale=0.0025, observation_noise_variance_upper_bound=None, num_missing=0.): """Initializes priors for the spike and slab sampler. Args: design_matrix: (batch of) float `Tensor`(s) regression design matrix (`X` in [1]) having shape `[num_outputs, num_features]`. nonzero_prior_prob: scalar float `Tensor` prior probability of the 'slab', i.e., prior probability that any given feature has nonzero weight (`pi` in [1]). Default value: `0.5`. weights_prior_precision: (batch of) float `Tensor` complete prior precision matrix(s) over the weights, of shape `[num_features, num_features]`. If not specified, defaults to the Zellner g-prior specified in `[1]` as `Omega^{-1} = kappa * (X'X + diag(X'X)) / (2 * num_outputs)`, in which we've plugged in the suggested default of `w = 0.5`. The parameter `kappa` is controlled by the `default_pseudo_observations` argument. Default value: `None`. default_pseudo_observations: scalar float `Tensor` Controls the number of pseudo-observations for the prior precision matrix over the weights. Corresponds to `kappa` in [1]. See also `weights_prior_precision`. observation_noise_variance_prior_concentration: scalar float `Tensor` concentration parameter of the inverse gamma prior on the noise variance. Corresponds to `nu / 2` in [1]. Default value: 0.005. observation_noise_variance_prior_scale: scalar float `Tensor` scale parameter of the inverse gamma prior on the noise variance. Corresponds to `ss / 2` in [1]. Default value: 0.0025. observation_noise_variance_upper_bound: optional scalar float `Tensor` maximum value of sampled observation noise variance. Specifying a bound can help avoid divergence when the sampler is initialized far from the posterior. Default value: `None`. num_missing: Optional scalar float `Tensor`. Corrects for how many missing values are are coded as zero in the design matrix. """ with tf.name_scope('spike_slab_sampler'): dtype = dtype_util.common_dtype([ design_matrix, nonzero_prior_prob, weights_prior_precision, observation_noise_variance_prior_concentration, observation_noise_variance_prior_scale, observation_noise_variance_upper_bound, num_missing ], dtype_hint=tf.float32) design_matrix = tf.convert_to_tensor(design_matrix, dtype=dtype) nonzero_prior_prob = tf.convert_to_tensor(nonzero_prior_prob, dtype=dtype) observation_noise_variance_prior_concentration = tf.convert_to_tensor( observation_noise_variance_prior_concentration, dtype=dtype) observation_noise_variance_prior_scale = tf.convert_to_tensor( observation_noise_variance_prior_scale, dtype=dtype) num_missing = tf.convert_to_tensor(num_missing, dtype=dtype) if observation_noise_variance_upper_bound is not None: observation_noise_variance_upper_bound = tf.convert_to_tensor( observation_noise_variance_upper_bound, dtype=dtype) design_shape = ps.shape(design_matrix) num_outputs = tf.cast(design_shape[-2], dtype=dtype) - num_missing num_features = design_shape[-1] x_transpose_x = tf.matmul(design_matrix, design_matrix, adjoint_a=True) if weights_prior_precision is None: # Default prior: 'Zellner’s g−prior' from section 3.2.1 of [1]: # `omega^{-1} = kappa * (w X'X + (1 − w) diag(X'X))/n` # with default `w = 0.5`. padded_inputs = broadcast_util.left_justified_expand_dims_like( num_outputs, x_transpose_x) weights_prior_precision = default_pseudo_observations * tf.linalg.set_diag( 0.5 * x_transpose_x, tf.linalg.diag_part(x_transpose_x)) / padded_inputs observation_noise_variance_posterior_concentration = ( observation_noise_variance_prior_concentration + tf.convert_to_tensor(num_outputs / 2., dtype=dtype)) self.num_outputs = num_outputs self.num_features = num_features self.design_matrix = design_matrix self.x_transpose_x = x_transpose_x self.dtype = dtype self.nonzeros_prior = sample_dist.Sample( bernoulli.Bernoulli(probs=nonzero_prior_prob), sample_shape=[num_features]) self.weights_prior_precision = weights_prior_precision self.observation_noise_variance_prior_concentration = ( observation_noise_variance_prior_concentration) self.observation_noise_variance_prior_scale = ( observation_noise_variance_prior_scale) self.observation_noise_variance_upper_bound = ( observation_noise_variance_upper_bound) self.observation_noise_variance_posterior_concentration = ( observation_noise_variance_posterior_concentration)
def _left_doubling_increments(batch_shape, max_doublings, step_size, seed=None, name=None): """Computes the doubling increments for the left end point. The doubling procedure expands an initial interval to find a superset of the true slice. At each doubling iteration, the interval width is doubled to either the left or the right hand side with equal probability. If, initially, the left end point is at `L(0)` and the width of the interval is `w(0)`, then the left end point and the width at the k-th iteration (denoted L(k) and w(k) respectively) are given by the following recursions: ```none w(k) = 2 * w(k-1) L(k) = L(k-1) - w(k-1) * X_k, X_k ~ Bernoulli(0.5) or, L(0) - L(k) = w(0) Sum(2^i * X(i+1), 0 <= i < k) ``` This function computes the sequence of `L(0)-L(k)` and `w(k)` for k between 0 and `max_doublings` independently for each chain. Args: batch_shape: Positive int32 `tf.Tensor`. The batch shape. max_doublings: Scalar positive int32 `tf.Tensor`. The maximum number of doublings to consider. step_size: A real `tf.Tensor` with shape compatible with [num_chains]. The size of the initial interval. seed: PRNG seed; see `tfp.random.sanitize_seed` for details. name: Python `str` name prefixed to Ops created by this function. Default value: `None` (i.e., 'find_slice_bounds'). Returns: left_increments: A tensor of shape (max_doublings+1, batch_shape). The relative position of the left end point after the doublings. widths: A tensor of shape (max_doublings+1, ones_like(batch_shape)). The widths of the intervals at each stage of the doubling. """ with tf.name_scope(name or 'left_doubling_increments'): step_size = tf.convert_to_tensor(value=step_size) dtype = dtype_util.base_dtype(step_size.dtype) # Output shape of the left increments tensor. output_shape = ps.concat(([max_doublings + 1], batch_shape), axis=0) # A sample realization of X_k. expand_left = bernoulli_lib.Bernoulli(0.5, dtype=dtype).sample( sample_shape=output_shape, seed=seed) # The widths of the successive intervals. Starts with 1.0 and ends with # 2^max_doublings. width_multipliers = tf.cast(2**tf.range(0, max_doublings + 1), dtype=dtype) # Output shape of the `widths` tensor. widths_shape = ps.concat( ([max_doublings + 1], ps.ones_like(batch_shape)), axis=0) width_multipliers = tf.reshape(width_multipliers, shape=widths_shape) # Widths shape is [max_doublings + 1, 1, 1, 1...]. widths = width_multipliers * step_size # Take the cumulative sum of the left side increments in slice width to give # the resulting distance from the initial lower bound. left_increments = tf.cumsum(widths * expand_left, exclusive=True, axis=0) return left_increments, widths
def testP(self): p = [0.2, 0.4] dist = bernoulli.Bernoulli(probs=p) self.assertAllClose(p, self.evaluate(dist.probs))
def make_bernoulli(batch_shape, dtype=tf.int32): p = np.random.uniform(size=list(batch_shape)) p = tf.constant(p, dtype=tf.float32) return bernoulli.Bernoulli(probs=p, dtype=dtype)
def testBoundaryConditions(self): dist = bernoulli.Bernoulli(probs=1.0) self.assertAllClose(np.nan, self.evaluate(dist.log_prob(0))) self.assertAllClose([np.nan], [self.evaluate(dist.log_prob(1))])
def testMean(self): p = np.array([[0.2, 0.7], [0.5, 0.4]], dtype=np.float32) dist = bernoulli.Bernoulli(probs=p) self.assertAllEqual(self.evaluate(dist.mean()), p)