def testAssertsPositiveScale(self): scale = tf.Variable([1., 2., -3.]) with self.assertRaisesError('Argument `scale` must be positive.'): d = tfd.HalfNormal(scale=scale, validate_args=True) self.evaluate([v.initializer for v in d.variables]) self.evaluate(d.sample(seed=test_util.test_seed()))
def get_model_ready(self, dist, successes): return windowed_sampling._setup_mcmc(dist, self.n_chains, test_util.test_seed(), successes=successes)
def test_ordereddict_sample_log_prob(self): build_ordereddict = lambda e, scale, loc, m, x: collections.OrderedDict( [ # pylint: disable=g-long-lambda ('e', e), ('scale', scale), ('loc', loc), ('m', m), ('x', x) ]) # pylint: disable=bad-whitespace model = build_ordereddict( e=tfd.Independent(tfd.Exponential(rate=[100, 120]), 1), scale=lambda e: tfd.Gamma(concentration=e[..., 0], rate=e[..., 1]), loc=tfd.Normal(loc=0, scale=2.), m=tfd.Normal, x=lambda m: tfd.Sample(tfd.Bernoulli(logits=m), 12)) # pylint: enable=bad-whitespace d = tfd.JointDistributionNamed(model, validate_args=True) self.assertEqual(( ('e', ()), ('scale', ('e', )), ('loc', ()), ('m', ('loc', 'scale')), ('x', ('m', )), ), d.resolve_graph()) xs = d.sample(seed=test_util.test_seed()) self.assertLen(xs, 5) # We'll verify the shapes work as intended when we plumb these back into the # respective log_probs. ds, _ = d.sample_distributions(value=xs, seed=test_util.test_seed()) self.assertLen(ds, 5) values = tuple(ds.values()) self.assertIsInstance(values[0], tfd.Independent) self.assertIsInstance(values[1], tfd.Gamma) self.assertIsInstance(values[2], tfd.Normal) self.assertIsInstance(values[3], tfd.Normal) self.assertIsInstance(values[4], tfd.Sample) # Static properties. self.assertAllEqual( build_ordereddict(e=tf.float32, scale=tf.float32, loc=tf.float32, m=tf.float32, x=tf.int32), d.dtype) batch_shape_tensor_, event_shape_tensor_ = self.evaluate( [d.batch_shape_tensor(), d.event_shape_tensor()]) expected_batch_shape = build_ordereddict(e=[], scale=[], loc=[], m=[], x=[]) for (expected, actual_tensorshape, actual_shape_tensor_) in zip(expected_batch_shape, d.batch_shape, batch_shape_tensor_): self.assertAllEqual(expected, actual_tensorshape) self.assertAllEqual(expected, actual_shape_tensor_) expected_event_shape = build_ordereddict(e=[2], scale=[], loc=[], m=[], x=[12]) for (expected, actual_tensorshape, actual_shape_tensor_) in zip(expected_event_shape, d.event_shape, event_shape_tensor_): self.assertAllEqual(expected, actual_tensorshape) self.assertAllEqual(expected, actual_shape_tensor_) expected_jlp = sum( d.log_prob(x) for d, x in zip(ds.values(), xs.values())) actual_jlp = d.log_prob(xs) self.assertAllClose(*self.evaluate([expected_jlp, actual_jlp]), atol=0., rtol=1e-4)
def test_latent_dirichlet_allocation(self): """Tests Latent Dirichlet Allocation joint model. The LDA generative process can be written as: ```none N[i] ~ Poisson(xi) theta[i] ~ Dirichlet(alpha) Z[i] ~ Multinomial(N[i], theta[i]) for k in 1...K: X[i,k] ~ Multinomial(Z[i, k], beta[j]) ``` Typically `xi` is specified and `alpha`, `beta` are fit using type-II maximum likelihood estimators. Reference: http://www.jmlr.org/papers/volume3/blei03a/blei03a.pdf """ # Hyperparameters. num_topics = 3 num_words = 10 avg_doc_length = 5 u = tfd.Uniform(low=-1., high=1.) alpha = tfp.util.TransformedVariable(u.sample( [num_topics], seed=test_util.test_seed()), tfb.Softplus(), name='alpha') beta = tf.Variable(u.sample([num_topics, num_words], seed=test_util.test_seed()), name='beta') # LDA Model. # Note near 1:1 with mathematical specification. The main distinction is the # use of Independent--this lets us easily aggregate multinomials across # topics (and in any "shape" of documents). lda = tfd.JointDistributionSequential( [ tfd.Poisson(rate=avg_doc_length), # n tfd.Dirichlet(concentration=alpha), # theta lambda theta, n: tfd.Multinomial(total_count=n, probs=theta ), # z lambda z: tfd.Independent( # x pylint: disable=g-long-lambda tfd.Multinomial(total_count=z, logits=beta), reinterpreted_batch_ndims=1), ], validate_args=True) # Now, let's sample some "documents" and compute the log-prob of each. docs_shape = [2, 4] # That is, 8 docs in the shape of [2, 4]. [n, theta, z, x] = lda.sample(docs_shape, seed=test_util.test_seed()) log_probs = lda.log_prob([n, theta, z, x]) self.assertEqual(docs_shape, log_probs.shape) # Verify we correctly track trainable variables. self.assertLen(lda.trainable_variables, 2) self.assertIs(alpha.pretransformed_input, lda.trainable_variables[0]) self.assertIs(beta, lda.trainable_variables[1]) # Ensure we can compute gradients. with tf.GradientTape() as tape: # Note: The samples are not taped, hence implicitly "stop_gradient." negloglik = -lda.log_prob([n, theta, z, x]) grads = tape.gradient(negloglik, lda.trainable_variables) self.assertLen(grads, 2) self.assertAllEqual((alpha.pretransformed_input.shape, beta.shape), (grads[0].shape, grads[1].shape)) self.assertAllNotNone(grads)
def testZipfSample_AvoidsInfiniteLoop(self): zipf = tfd.Zipf(power=1., validate_args=False) n = 1000 self.evaluate(zipf.sample(n, seed=test_util.test_seed()))
def tf_exp_gamma(a, b): return tf.math.log( tf.random.gamma(shape=[num_samples], alpha=a, beta=b, seed=test_util.test_seed()))
def testAssertsPositiveRate(self): rate = tf.Variable([1., 2., -3.]) self.evaluate(rate.initializer) with self.assertRaisesOpError('Argument `rate` must be positive.'): d = tfd.ExpGamma(concentration=[5.], rate=rate, validate_args=True) self.evaluate(d.sample(seed=test_util.test_seed()))
def _testMVN(self, base_distribution_class, base_distribution_kwargs, event_shape=()): # Base distribution shapes must be compatible w/bijector; most bijectors are # batch_shape agnostic and only care about event_ndims. # In the case of `ScaleMatvecTriL`, if we got it wrong then it would fire an # exception due to incompatible dimensions. event_shape_var = tf.Variable(np.int32(event_shape), shape=tf.TensorShape(None), name='dynamic_event_shape') base_distribution_dynamic_kwargs = { k: tf.Variable(v, shape=tf.TensorShape(None), name='dynamic_{}'.format(k)) for k, v in base_distribution_kwargs.items() } fake_mvn_dynamic = self._cls()( distribution=tfd.Sample(base_distribution_class( validate_args=True, **base_distribution_dynamic_kwargs), sample_shape=event_shape_var), bijector=tfb.Chain([ tfb.Shift(shift=self._shift), tfb.ScaleMatvecTriL(scale_tril=self._tril) ]), validate_args=True) fake_mvn_static = self._cls()( distribution=tfd.Sample(base_distribution_class( validate_args=True, **base_distribution_kwargs), sample_shape=event_shape), bijector=tfb.Chain([ tfb.Shift(shift=self._shift), tfb.ScaleMatvecTriL(scale_tril=self._tril) ]), validate_args=True) actual_mean = np.tile(self._shift, [2, 1]) # ScaleMatvecTriL elided tile. actual_cov = np.matmul(self._tril, np.transpose(self._tril, [0, 2, 1])) def actual_mvn_log_prob(x): return np.concatenate([ [ # pylint: disable=g-complex-comprehension stats.multivariate_normal(actual_mean[i], actual_cov[i]).logpdf(x[:, i, :]) ] for i in range(len(actual_cov)) ]).T actual_mvn_entropy = np.concatenate([[ stats.multivariate_normal(actual_mean[i], actual_cov[i]).entropy() ] for i in range(len(actual_cov))]) self.assertAllEqual([3], fake_mvn_static.event_shape) self.assertAllEqual([2], fake_mvn_static.batch_shape) if not tf.executing_eagerly(): self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.event_shape) self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.batch_shape) num_samples = 7e3 for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]: # Ensure sample works by checking first, second moments. y = fake_mvn.sample(int(num_samples), seed=test_util.test_seed()) x = y[0:5, ...] sample_mean = tf.reduce_mean(y, axis=0) centered_y = tf.transpose(a=y - sample_mean, perm=[1, 2, 0]) sample_cov = tf.matmul(centered_y, centered_y, transpose_b=True) / num_samples self.evaluate([ v.initializer for v in base_distribution_dynamic_kwargs.values() ] + [event_shape_var.initializer]) [ sample_mean_, sample_cov_, x_, fake_event_shape_, fake_batch_shape_, fake_log_prob_, fake_prob_, fake_mean_, fake_entropy_, ] = self.evaluate([ sample_mean, sample_cov, x, fake_mvn.event_shape_tensor(), fake_mvn.batch_shape_tensor(), fake_mvn.log_prob(x), fake_mvn.prob(x), fake_mvn.mean(), fake_mvn.entropy(), ]) self.assertAllClose(actual_mean, sample_mean_, atol=0.1, rtol=0.1) self.assertAllClose(actual_cov, sample_cov_, atol=0., rtol=0.1) # Ensure all other functions work as intended. self.assertAllEqual([5, 2, 3], x_.shape) self.assertAllEqual([3], fake_event_shape_) self.assertAllEqual([2], fake_batch_shape_) self.assertAllClose(actual_mvn_log_prob(x_), fake_log_prob_, atol=0., rtol=1e-6) self.assertAllClose(np.exp(actual_mvn_log_prob(x_)), fake_prob_, atol=0., rtol=1e-5) self.assertAllClose(actual_mean, fake_mean_, atol=0., rtol=1e-6) self.assertAllClose(actual_mvn_entropy, fake_entropy_, atol=0., rtol=1e-6)
def testMatrixEvent(self): loc = 0. batched_loc = [loc] * 2 batched_loc_var = tf.Variable(batched_loc, shape=tf.TensorShape(None), name='dynamic_batch_shape') event_shape = [2, 3, 3] event_shape_var = tf.Variable(np.int32(event_shape), shape=tf.TensorShape(None), name='dynamic_event_shape') scale = 2. fake_mvn_dynamic = self._cls()(distribution=tfd.Sample( tfd.Normal(loc=batched_loc_var, scale=scale), sample_shape=event_shape_var), bijector=DummyMatrixTransform(), validate_args=True) fake_mvn_static = self._cls()( distribution=tfd.Sample(tfd.Normal(loc=batched_loc, scale=scale), sample_shape=event_shape), bijector=DummyMatrixTransform(), validate_args=True) def actual_mvn_log_prob(x): # This distribution is the normal PDF, reduced over the # last 3 dimensions + a jacobian term which corresponds # to the determinant of x. return ( np.sum(stats.norm(loc, scale).logpdf(x), axis=(-1, -2, -3)) + np.sum(np.linalg.det(x), axis=-1)) self.assertAllEqual([2, 3, 3], fake_mvn_static.event_shape) self.assertAllEqual([2], fake_mvn_static.batch_shape) if not tf.executing_eagerly(): self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.event_shape) self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.batch_shape) num_samples = 5e3 self.evaluate( [event_shape_var.initializer, batched_loc_var.initializer]) for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]: # Ensure sample works by checking first, second moments. y = fake_mvn.sample(int(num_samples), seed=test_util.test_seed()) x = y[0:5, ...] [ x_, fake_event_shape_, fake_batch_shape_, fake_log_prob_, fake_prob_, ] = self.evaluate([ x, fake_mvn.event_shape_tensor(), fake_mvn.batch_shape_tensor(), fake_mvn.log_prob(x), fake_mvn.prob(x), ]) # Ensure all other functions work as intended. self.assertAllEqual([5, 2, 2, 3, 3], x_.shape) self.assertAllEqual([2, 3, 3], fake_event_shape_) self.assertAllEqual([2], fake_batch_shape_) self.assertAllClose(actual_mvn_log_prob(x_), fake_log_prob_, atol=0., rtol=1e-6) # With this many dimensions and samples, the direct space probability # may underflow. self.assertAllClose(np.exp(actual_mvn_log_prob(x_)), fake_prob_, atol=1e-12, rtol=1e-5)
def testSampleGammaLogRateLogSpaceDerivatives(self): conc = tf.constant(np.linspace(.8, 1.2, 5), tf.float64) rate = np.linspace(.5, 2, 5) np.random.shuffle(rate) rate = tf.constant(rate, tf.float64) n = int(1e5) seed = test_util.test_seed() # pylint: disable=g-long-lambda lambdas = [ # Each should sample the same distribution. lambda c, r: gamma_lib.random_gamma( [n], c, r, seed=seed, log_space=True), lambda c, r: gamma_lib.random_gamma( [n], c, log_rate=tf.math.log(r), seed=seed, log_space=True), lambda c, r: tf.math.log( gamma_lib.random_gamma([n], c, r, seed=seed)), lambda c, r: tf.math.log( gamma_lib.random_gamma( [n], c, log_rate=tf.math.log(r), seed=seed)), ] # pylint: enable=g-long-lambda samps = [] dconc = [] drate = [] for fn in lambdas: # Take samples without the nonlinearity. samps.append(fn(conc, rate)) # We compute gradient through a nonlinearity to catch a class of errors. _, (dc_i, dr_i) = tfp.math.value_and_gradient( lambda c, r: tf.reduce_mean(tf.square(fn(c, r))), (conc, rate)) # pylint: disable=cell-var-from-loop dconc.append(dc_i) drate.append(dr_i) # Assert d rate correctness. Note that the non-logspace derivative for rate # depends on the realized sample whereas the logspace one does not. Also, # comparing grads with differently-placed log/exp is numerically perilous. self.assertAllClose(drate[0], drate[1], rtol=0.06) self.assertAllClose(drate[0], drate[2], rtol=0.06) self.assertAllClose(drate[1], drate[3], rtol=0.06) # Assert sample correctness. If incorrect, dconc will be incorrect. self.assertLess( self.evaluate( st.min_num_samples_for_dkwm_cdf_test(discrepancy=0.04, false_fail_rate=1e-9, false_pass_rate=1e-9)), n) equiv_dist = tfb.Log()(tfd.Gamma(conc, rate)) self.evaluate( st.assert_true_cdf_equal_by_dkwm(samps[0], equiv_dist.cdf, false_fail_rate=1e-9)) self.evaluate( st.assert_true_cdf_equal_by_dkwm(samps[1], equiv_dist.cdf, false_fail_rate=1e-9)) self.evaluate( st.assert_true_cdf_equal_by_dkwm(samps[2], equiv_dist.cdf, false_fail_rate=1e-9)) self.evaluate( st.assert_true_cdf_equal_by_dkwm(samps[3], equiv_dist.cdf, false_fail_rate=1e-9)) # Assert d concentration correctness. These are sensitive to sample values, # which are more strongly effected by the log/exp, thus looser tolerances. self.assertAllClose(dconc[0], dconc[1], rtol=0.06) self.assertAllClose(dconc[0], dconc[2], rtol=0.06) self.assertAllClose(dconc[1], dconc[3], rtol=0.06)
def gen_samples(concentration, rate): return tfd.Gamma(concentration, rate).sample(num_samples, seed=test_util.test_seed())
def tfp_gamma(a, b): return tfd.Gamma(concentration=a, rate=b, validate_args=True).sample( num_samples, seed=test_util.test_seed())
def test_batch_of_filters(self): batch_shape = [3, 2] num_particles = 1000 num_timesteps = 40 # Batch of priors on object 1D positions and velocities. initial_state_prior = tfd.JointDistributionNamed({ 'position': tfd.Normal(loc=0., scale=tf.ones(batch_shape)), 'velocity': tfd.Normal(loc=0., scale=tf.ones(batch_shape) * 0.1) }) def transition_fn(_, previous_state): return tfd.JointDistributionNamed({ 'position': tfd.Normal(loc=previous_state['position'] + previous_state['velocity'], scale=0.1), 'velocity': tfd.Normal(loc=previous_state['velocity'], scale=0.01) }) def observation_fn(_, state): return tfd.Normal(loc=state['position'], scale=0.1) # Batch of synthetic observations, . true_initial_positions = np.random.randn(*batch_shape).astype( self.dtype) true_velocities = 0.1 * np.random.randn(*batch_shape).astype( self.dtype) observed_positions = ( true_velocities * np.arange(num_timesteps).astype( self.dtype)[..., tf.newaxis, tf.newaxis] + true_initial_positions) (particles, log_weights, parent_indices, incremental_log_marginal_likelihoods) = self.evaluate( tfp.experimental.mcmc.particle_filter( observations=observed_positions, initial_state_prior=initial_state_prior, transition_fn=transition_fn, observation_fn=observation_fn, num_particles=num_particles, seed=test_util.test_seed())) self.assertAllEqual(particles['position'].shape, [num_timesteps, num_particles] + batch_shape) self.assertAllEqual(particles['velocity'].shape, [num_timesteps, num_particles] + batch_shape) self.assertAllEqual(parent_indices.shape, [num_timesteps, num_particles] + batch_shape) self.assertAllEqual(incremental_log_marginal_likelihoods.shape, [num_timesteps] + batch_shape) self.assertAllClose(self.evaluate( tf.reduce_sum(tf.exp(log_weights) * particles['position'], axis=1)), observed_positions, atol=0.1) velocity_means = tf.reduce_sum(tf.exp(log_weights) * particles['velocity'], axis=1) self.assertAllClose(self.evaluate( tf.reduce_mean(velocity_means, axis=0)), true_velocities, atol=0.05) # Uncertainty in velocity should decrease over time. velocity_stddev = self.evaluate( tf.math.reduce_std(particles['velocity'], axis=1)) self.assertAllLess((velocity_stddev[-1] - velocity_stddev[0]), 0.) trajectories = self.evaluate( tfp.experimental.mcmc.reconstruct_trajectories( particles, parent_indices)) self.assertAllEqual([num_timesteps, num_particles] + batch_shape, trajectories['position'].shape) self.assertAllEqual([num_timesteps, num_particles] + batch_shape, trajectories['velocity'].shape) # Verify that `infer_trajectories` also works on batches. trajectories, incremental_log_marginal_likelihoods = self.evaluate( tfp.experimental.mcmc.infer_trajectories( observations=observed_positions, initial_state_prior=initial_state_prior, transition_fn=transition_fn, observation_fn=observation_fn, num_particles=num_particles, seed=test_util.test_seed())) self.assertAllEqual([num_timesteps, num_particles] + batch_shape, trajectories['position'].shape) self.assertAllEqual([num_timesteps, num_particles] + batch_shape, trajectories['velocity'].shape) self.assertAllEqual(incremental_log_marginal_likelihoods.shape, [num_timesteps] + batch_shape)
def test_epidemiological_model(self): # A toy, discrete version of an SIR (Susceptible, Infected, Recovered) # model (https://en.wikipedia.org/wiki/Compartmental_models_in_epidemiology) population_size = 1000 infection_rate = tf.convert_to_tensor(1.1) infectious_period = tf.convert_to_tensor(8.0) initial_state_prior = tfd.JointDistributionNamed({ 'susceptible': tfd.Deterministic(999.), 'infected': tfd.Deterministic(1.), 'new_infections': tfd.Deterministic(1.), 'new_recoveries': tfd.Deterministic(0.) }) # Dynamics model: new infections and recoveries are given by the SIR # model with Poisson noise. def infection_dynamics(_, previous_state): new_infections = tfd.Poisson( infection_rate * previous_state['infected'] * previous_state['susceptible'] / population_size) new_recoveries = tfd.Poisson(previous_state['infected'] / infectious_period) def susceptible(new_infections): return tfd.Deterministic( ps.maximum(0., previous_state['susceptible'] - new_infections)) def infected(new_infections, new_recoveries): return tfd.Deterministic( ps.maximum( 0., previous_state['infected'] + new_infections - new_recoveries)) return tfd.JointDistributionNamed({ 'new_infections': new_infections, 'new_recoveries': new_recoveries, 'susceptible': susceptible, 'infected': infected }) # Observation model: each day we detect new cases, noisily. def infection_observations(_, state): return tfd.Poisson(state['infected']) # pylint: disable=bad-whitespace observations = tf.convert_to_tensor([ 0., 4., 1., 5., 23., 27., 75., 127., 248., 384., 540., 683., 714., 611., 561., 493., 385., 348., 300., 277., 249., 219., 216., 174., 132., 122., 115., 99., 76., 84., 77., 56., 42., 56., 46., 38., 34., 44., 25., 27. ]) # pylint: enable=bad-whitespace trajectories, _ = self.evaluate( tfp.experimental.mcmc.infer_trajectories( observations=observations, initial_state_prior=initial_state_prior, transition_fn=infection_dynamics, observation_fn=infection_observations, num_particles=100, seed=test_util.test_seed())) # The susceptible population should decrease over time. self.assertAllLessEqual( trajectories['susceptible'][1:, ...] - trajectories['susceptible'][:-1, ...], 0.0)
def testSingularScaleRaises(self): mu = [-1., 1] diag = [1., 0] with self.assertRaisesOpError('Singular'): dist = tfd.MultivariateNormalDiag(mu, diag, validate_args=True) self.evaluate(dist.sample(seed=test_util.test_seed()))
def testAssertsPositiveRate(self): rate = tf.Variable([1., 2., -3.]) self.evaluate(rate.initializer) with self.assertRaisesOpError('Argument `rate` must be positive.'): dist = self._make_poisson(rate=rate, validate_args=True) self.evaluate(dist.sample(seed=test_util.test_seed()))
def testVectorParams(self): mu = [-1.] diag = [-5.] dist = tfd.MultivariateNormalDiag(mu, diag, validate_args=True) self.assertAllEqual([3, 1], dist.sample( 3, seed=test_util.test_seed()).shape)
def testDistribution(self, dist_name, data): seed = test_util.test_seed() # Explicitly draw event_dim here to avoid relying on _params_event_ndims # later, so this test can support distributions that do not implement the # slicing protocol. event_dim = data.draw(hps.integers(min_value=2, max_value=6)) dist = data.draw( dhps.distributions(dist_name=dist_name, event_dim=event_dim, enable_vars=True)) batch_shape = dist.batch_shape batch_shape2 = data.draw( tfp_hps.broadcast_compatible_shape(batch_shape)) dist2 = data.draw( dhps.distributions(dist_name=dist_name, batch_shape=batch_shape2, event_dim=event_dim, enable_vars=True)) self.evaluate([var.initializer for var in dist.variables]) # Check that the distribution passes Variables through to the accessor # properties (without converting them to Tensor or anything like that). for k, v in six.iteritems(dist.parameters): if not tensor_util.is_ref(v): continue self.assertIs(getattr(dist, k), v) # Check that standard statistics do not read distribution parameters more # than twice (once in the stat itself and up to once in any validation # assertions). max_permissible = 2 + extra_tensor_conversions_allowed(dist) for stat in sorted( data.draw( hps.sets(hps.one_of( map(hps.just, [ 'covariance', 'entropy', 'mean', 'mode', 'stddev', 'variance' ])), min_size=3, max_size=3))): hp.note('Testing excessive var usage in {}.{}'.format( dist_name, stat)) try: with tfp_hps.assert_no_excessive_var_usage( 'statistic `{}` of `{}`'.format(stat, dist), max_permissible=max_permissible): getattr(dist, stat)() except NotImplementedError: pass # Check that `sample` doesn't read distribution parameters more than twice, # and that it produces non-None gradients (if the distribution is fully # reparameterized). with tf.GradientTape() as tape: # TDs do bijector assertions twice (once by distribution.sample, and once # by bijector.forward). max_permissible = 2 + extra_tensor_conversions_allowed(dist) with tfp_hps.assert_no_excessive_var_usage( 'method `sample` of `{}`'.format(dist), max_permissible=max_permissible): sample = dist.sample(seed=seed) if dist.reparameterization_type == tfd.FULLY_REPARAMETERIZED: grads = tape.gradient(sample, dist.variables) for grad, var in zip(grads, dist.variables): var_name = var.name.rstrip('_0123456789:') if var_name in NO_SAMPLE_PARAM_GRADS.get(dist_name, ()): continue if grad is None: raise AssertionError( 'Missing sample -> {} grad for distribution {}'.format( var_name, dist_name)) # Turn off validations, since TODO(b/129271256) log_prob can choke on dist's # own samples. Also, to relax conversion counts for KL (might do >2 w/ # validate_args). dist = dist.copy(validate_args=False) dist2 = dist2.copy(validate_args=False) # Test that KL divergence reads distribution parameters at most once, and # that is produces non-None gradients. try: for d1, d2 in (dist, dist2), (dist2, dist): if dist_name in SKIP_KL_CHECK_DIST_VAR_GRADS: continue with tf.GradientTape() as tape: with tfp_hps.assert_no_excessive_var_usage( '`kl_divergence` of (`{}` (vars {}), `{}` (vars {}))' .format(d1, d1.variables, d2, d2.variables), max_permissible=1 ): # No validation => 1 convert per var. kl = d1.kl_divergence(d2) wrt_vars = list(d1.variables) + list(d2.variables) grads = tape.gradient(kl, wrt_vars) for grad, var in zip(grads, wrt_vars): if grad is None and dist_name not in NO_KL_PARAM_GRADS: raise AssertionError( 'Missing KL({} || {}) -> {} grad:\n' # pylint: disable=duplicate-string-formatting-argument '{} vars: {}\n{} vars: {}'.format( d1, d2, var, d1, d1.variables, d2, d2.variables)) except NotImplementedError: # Raised by kl_divergence if no registered KL is found. pass # Test that log_prob produces non-None gradients, except for distributions # on the NO_LOG_PROB_PARAM_GRADS blocklist. if dist_name not in NO_LOG_PROB_PARAM_GRADS: with tf.GradientTape() as tape: lp = dist.log_prob(tf.stop_gradient(sample)) grads = tape.gradient(lp, dist.variables) for grad, var in zip(grads, dist.variables): if grad is None: raise AssertionError( 'Missing log_prob -> {} grad for distribution {}'. format(var, dist_name)) # Test that all forms of probability evaluation avoid reading distribution # parameters more than once. for evaluative in sorted( data.draw( hps.sets(hps.one_of( map(hps.just, [ 'log_prob', 'prob', 'log_cdf', 'cdf', 'log_survival_function', 'survival_function' ])), min_size=3, max_size=3))): hp.note('Testing excessive var usage in {}.{}'.format( dist_name, evaluative)) try: # No validation => 1 convert. But for TD we allow 2: # dist.log_prob(bijector.inverse(samp)) + bijector.ildj(samp) max_permissible = 2 + extra_tensor_conversions_allowed(dist) with tfp_hps.assert_no_excessive_var_usage( 'evaluative `{}` of `{}`'.format(evaluative, dist), max_permissible=max_permissible): getattr(dist, evaluative)(sample) except NotImplementedError: pass
def tfp_exp_gamma(a, b): return tf.math.square( tfd.ExpGamma(concentration=a, rate=b, validate_args=True).sample( num_samples, seed=test_util.test_seed()))
def testReparameterized(self): prob = tf.constant([0.2, 0.6]) _, grad_prob = tfp.math.value_and_gradient( lambda x: tfd.ContinuousBernoulli(probs=x, validate_args=True).sample( # pylint: disable=g-long-lambda 100, seed=test_util.test_seed()), prob) self.assertIsNotNone(grad_prob)
def gen_samples(concentration, rate): return tf.math.exp( tfd.ExpGamma(concentration, rate).sample(num_samples, seed=test_util.test_seed()))
def testMeanNonInfNaN(self): prob = tf.random.uniform([int(1e4)], seed=test_util.test_seed()) dist = tfd.ContinuousBernoulli(probs=prob, validate_args=True) mean_ = self.evaluate(dist.mean()) self.assertFalse(np.any(np.isinf(mean_))) self.assertFalse(np.any(np.isnan(mean_)))
def testUniformSamplePdf(self): a = 10.0 b = [11.0, 100.0] uniform = tfd.Uniform(a, b, validate_args=True) samps = uniform.sample(10, seed=test_util.test_seed()) self.assertTrue(self.evaluate(tf.reduce_all(uniform.prob(samps) > 0)))
def f(n, c1, c0): dist = tfd.BetaBinomial(n, c1, c0, validate_args=True) return dist.sample(100, seed=test_util.test_seed())
def testInvalidEventDtype(self): with self.assertRaisesWithPredicateMatch( TypeError, "power.dtype .* not a supported .* type"): power = tf.constant(5., dtype=tf.float16) zipf = tfd.Zipf(power=power, dtype=tf.int32, validate_args=True) self.evaluate(zipf.sample(seed=test_util.test_seed()))
def testAssertionProbsLessThanZero(self): x = tf.Variable([-0.1, 0.7, 0.0]) d = tfd.NegativeBinomial(total_count=8., probs=x, validate_args=True) self.evaluate(x.initializer) with self.assertRaisesOpError('`probs` has components less than 0.'): self.evaluate(d.sample(seed=test_util.test_seed()))
def run_hmc_on_model( model, num_chains, num_steps, num_leapfrog_steps, step_size, target_accept_prob=0.9, seed=None, dtype=tf.float32, use_xla=False, ): """Runs HMC on a target. Args: model: The model to validate. num_chains: Number of chains to run in parallel. num_steps: Total number of steps to take. The first half are used to warm up the sampler. num_leapfrog_steps: Number of leapfrog steps to take. step_size: Step size to use. target_accept_prob: Target acceptance probability. seed: Optional seed to use. By default, `test_util.test_seed()` is used. dtype: DType to use for the algorithm. use_xla: Whether to use XLA. Returns: mcmc_results: `MCMCResults`. """ step_size = tf.convert_to_tensor(step_size, dtype) def target_log_prob_fn(*x): x = tf.nest.pack_sequence_as(model.dtype, x) return model.unnormalized_log_prob(x) if seed is None: seed = test_util.test_seed() if tf.executing_eagerly(): # TODO(b/68017812,b/141368747): remove once eager correctly supports seed. tf.random.set_seed(seed) seed = None current_state = tf.nest.map_structure( lambda b, e: b( # pylint: disable=g-long-lambda tf.zeros([num_chains] + list(e), dtype=dtype)), model.default_event_space_bijector, model.event_shape) # tfp.mcmc only works well with lists. current_state = tf.nest.flatten(current_state) hmc = tfp.mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_log_prob_fn, num_leapfrog_steps=num_leapfrog_steps, step_size=[tf.fill(s.shape, step_size) for s in current_state], seed=seed) hmc = tfp.mcmc.TransformedTransitionKernel( hmc, tf.nest.flatten(model.default_event_space_bijector)) hmc = tfp.mcmc.DualAveragingStepSizeAdaptation( hmc, num_adaptation_steps=int(num_steps // 2 * 0.8), target_accept_prob=target_accept_prob) chain, is_accepted = tf.function( lambda: tfp.mcmc.sample_chain( # pylint: disable=g-long-lambda current_state=current_state, kernel=hmc, num_results=num_steps // 2, num_burnin_steps=num_steps // 2, trace_fn=lambda _, pkr: # pylint: disable=g-long-lambda (pkr.inner_results.inner_results.is_accepted), parallel_iterations=1), autograph=False, experimental_compile=use_xla)() accept_rate = tf.reduce_mean(tf.cast(is_accepted, dtype)) ess = tf.nest.map_structure( lambda c: tfp.mcmc.effective_sample_size( # pylint: disable=g-long-lambda c, cross_chain_dims=1, filter_beyond_positive_pairs=True), chain) r_hat = tf.nest.map_structure(tfp.mcmc.potential_scale_reduction, chain) mcmc_results = MCMCResults( chain=tf.nest.pack_sequence_as(model.default_event_space_bijector, chain), accept_rate=accept_rate, ess=ess, r_hat=r_hat, ) return mcmc_results
def _seed(seed=None): seed = test_util.test_seed() if seed is None else seed if tf.executing_eagerly(): tf.random.set_seed(seed) return seed
def test_dict_sample_log_prob(self): # pylint: disable=bad-whitespace d = tfd.JointDistributionNamed(dict( e=tfd.Independent(tfd.Exponential(rate=[100, 120]), 1), scale=lambda e: tfd.Gamma(concentration=e[..., 0], rate=e[..., 1]), loc=tfd.Normal(loc=0, scale=2.), m=tfd.Normal, x=lambda m: tfd.Sample(tfd.Bernoulli(logits=m), 12)), validate_args=True) # pylint: enable=bad-whitespace self.assertEqual(( ('e', ()), ('scale', ('e', )), ('loc', ()), ('m', ('loc', 'scale')), ('x', ('m', )), ), d.resolve_graph()) xs = d.sample(seed=test_util.test_seed()) self.assertLen(xs, 5) # We'll verify the shapes work as intended when we plumb these back into the # respective log_probs. ds, _ = d.sample_distributions(value=xs, seed=test_util.test_seed()) self.assertLen(ds, 5) self.assertIsInstance(ds['e'], tfd.Independent) self.assertIsInstance(ds['scale'], tfd.Gamma) self.assertIsInstance(ds['loc'], tfd.Normal) self.assertIsInstance(ds['m'], tfd.Normal) self.assertIsInstance(ds['x'], tfd.Sample) # Static properties. self.assertAllEqual( { 'e': tf.float32, 'scale': tf.float32, 'loc': tf.float32, 'm': tf.float32, 'x': tf.int32 }, d.dtype) batch_shape_tensor_, event_shape_tensor_ = self.evaluate( [d.batch_shape_tensor(), d.event_shape_tensor()]) expected_batch_shape = { 'e': [], 'scale': [], 'loc': [], 'm': [], 'x': [] } batch_tensorshape = d.batch_shape for k in expected_batch_shape: self.assertAllEqual(expected_batch_shape[k], batch_tensorshape[k]) self.assertAllEqual(expected_batch_shape[k], batch_shape_tensor_[k]) expected_event_shape = { 'e': [2], 'scale': [], 'loc': [], 'm': [], 'x': [12] } event_tensorshape = d.event_shape for k in expected_event_shape: self.assertAllEqual(expected_event_shape[k], event_tensorshape[k]) self.assertAllEqual(expected_event_shape[k], event_shape_tensor_[k]) expected_jlp = sum(ds[k].log_prob(xs[k]) for k in ds.keys()) actual_jlp = d.log_prob(xs) self.assertAllClose(*self.evaluate([expected_jlp, actual_jlp]), atol=0., rtol=1e-4)
def _verifySampleAndPdfConsistency(self, vmf, rtol=0.075): """Verifies samples are consistent with the PDF using importance sampling. In particular, we verify an estimate the surface area of the n-dimensional hypersphere, and the surface areas of the spherical caps demarcated by a handful of survival rates. Args: vmf: A `VonMisesFisher` distribution instance. rtol: Relative difference tolerable. """ dim = tf.compat.dimension_value(vmf.event_shape[-1]) nsamples = 50000 samples = vmf.sample(sample_shape=[nsamples], seed=tfp_test_util.test_seed()) samples = tf.debugging.check_numerics(samples, 'samples') log_prob = vmf.log_prob(samples) log_prob = tf.debugging.check_numerics(log_prob, 'log_prob') log_importance = -log_prob sphere_surface_area_estimate, samples, importance, conc = self.evaluate( [ tf.exp( tf.reduce_logsumexp(input_tensor=log_importance, axis=0) - tf.math.log(tf.cast(nsamples, dtype=tf.float32))), samples, tf.exp(log_importance), vmf.concentration ]) true_sphere_surface_area = 2 * (np.pi)**(dim / 2) * self.evaluate( tf.exp(-tf.math.lgamma(dim / 2))) # Broadcast to correct size true_sphere_surface_area += np.zeros_like(sphere_surface_area_estimate) # Highly concentrated distributions do not get enough coverage to provide # a reasonable full-sphere surface area estimate. These are covered below # by CDF-based hypersphere cap surface area estimates. self.assertAllClose(true_sphere_surface_area[np.where(conc < 3)], sphere_surface_area_estimate[np.where(conc < 3)], rtol=rtol) # Assert surface area of hyperspherical cap For some CDFs in [.05,.45], # (h must be greater than 0 for the hypersphere cap surface area # calculation to hold). for survival_rate in 0.95, .9, .75, .6: cdf = (1 - survival_rate) mean_dir = self.evaluate(vmf.mean_direction) dotprods = np.sum(samples * mean_dir, -1) # Empirical estimate of the effective dot-product of the threshold that # selects for a given CDF level, that is the cosine of the largest # passable angle, or the minimum cosine for a within-CDF sample. dotprod_thresh = np.percentile(dotprods, 100 * survival_rate, axis=0, keepdims=True) dotprod_above_thresh = np.float32(dotprods > dotprod_thresh) sphere_cap_surface_area_ests = ( cdf * (importance * dotprod_above_thresh).sum(0) / dotprod_above_thresh.sum(0)) h = (1 - dotprod_thresh) self.assertGreaterEqual(h.min(), 0) # h must be >= 0 for the eqn below true_sphere_cap_surface_area = ( 0.5 * true_sphere_surface_area * self.evaluate(tf.math.betainc( (dim - 1) / 2, 0.5, 2 * h - h**2))) if dim == 3: # For 3-d we have a simpler form we can double-check. self.assertAllClose(2 * np.pi * h, true_sphere_cap_surface_area) self.assertAllClose(true_sphere_cap_surface_area, sphere_cap_surface_area_ests + np.zeros_like(true_sphere_cap_surface_area), rtol=rtol)