def testAssertsPositiveScale(self):
     scale = tf.Variable([1., 2., -3.])
     with self.assertRaisesError('Argument `scale` must be positive.'):
         d = tfd.HalfNormal(scale=scale, validate_args=True)
         self.evaluate([v.initializer for v in d.variables])
         self.evaluate(d.sample(seed=test_util.test_seed()))
 def get_model_ready(self, dist, successes):
     return windowed_sampling._setup_mcmc(dist,
                                          self.n_chains,
                                          test_util.test_seed(),
                                          successes=successes)
    def test_ordereddict_sample_log_prob(self):
        build_ordereddict = lambda e, scale, loc, m, x: collections.OrderedDict(
            [  # pylint: disable=g-long-lambda
                ('e', e), ('scale', scale), ('loc', loc), ('m', m), ('x', x)
            ])

        # pylint: disable=bad-whitespace
        model = build_ordereddict(
            e=tfd.Independent(tfd.Exponential(rate=[100, 120]), 1),
            scale=lambda e: tfd.Gamma(concentration=e[..., 0], rate=e[..., 1]),
            loc=tfd.Normal(loc=0, scale=2.),
            m=tfd.Normal,
            x=lambda m: tfd.Sample(tfd.Bernoulli(logits=m), 12))
        # pylint: enable=bad-whitespace
        d = tfd.JointDistributionNamed(model, validate_args=True)

        self.assertEqual((
            ('e', ()),
            ('scale', ('e', )),
            ('loc', ()),
            ('m', ('loc', 'scale')),
            ('x', ('m', )),
        ), d.resolve_graph())

        xs = d.sample(seed=test_util.test_seed())
        self.assertLen(xs, 5)
        # We'll verify the shapes work as intended when we plumb these back into the
        # respective log_probs.

        ds, _ = d.sample_distributions(value=xs, seed=test_util.test_seed())
        self.assertLen(ds, 5)
        values = tuple(ds.values())
        self.assertIsInstance(values[0], tfd.Independent)
        self.assertIsInstance(values[1], tfd.Gamma)
        self.assertIsInstance(values[2], tfd.Normal)
        self.assertIsInstance(values[3], tfd.Normal)
        self.assertIsInstance(values[4], tfd.Sample)

        # Static properties.
        self.assertAllEqual(
            build_ordereddict(e=tf.float32,
                              scale=tf.float32,
                              loc=tf.float32,
                              m=tf.float32,
                              x=tf.int32), d.dtype)

        batch_shape_tensor_, event_shape_tensor_ = self.evaluate(
            [d.batch_shape_tensor(),
             d.event_shape_tensor()])

        expected_batch_shape = build_ordereddict(e=[],
                                                 scale=[],
                                                 loc=[],
                                                 m=[],
                                                 x=[])
        for (expected, actual_tensorshape,
             actual_shape_tensor_) in zip(expected_batch_shape, d.batch_shape,
                                          batch_shape_tensor_):
            self.assertAllEqual(expected, actual_tensorshape)
            self.assertAllEqual(expected, actual_shape_tensor_)

        expected_event_shape = build_ordereddict(e=[2],
                                                 scale=[],
                                                 loc=[],
                                                 m=[],
                                                 x=[12])
        for (expected, actual_tensorshape,
             actual_shape_tensor_) in zip(expected_event_shape, d.event_shape,
                                          event_shape_tensor_):
            self.assertAllEqual(expected, actual_tensorshape)
            self.assertAllEqual(expected, actual_shape_tensor_)

        expected_jlp = sum(
            d.log_prob(x) for d, x in zip(ds.values(), xs.values()))
        actual_jlp = d.log_prob(xs)
        self.assertAllClose(*self.evaluate([expected_jlp, actual_jlp]),
                            atol=0.,
                            rtol=1e-4)
    def test_latent_dirichlet_allocation(self):
        """Tests Latent Dirichlet Allocation joint model.

    The LDA generative process can be written as:

    ```none
    N[i] ~ Poisson(xi)
    theta[i] ~ Dirichlet(alpha)
    Z[i] ~ Multinomial(N[i], theta[i])
    for k in 1...K:
      X[i,k] ~ Multinomial(Z[i, k], beta[j])
    ```

    Typically `xi` is specified and `alpha`, `beta` are fit using type-II
    maximum likelihood estimators.

    Reference: http://www.jmlr.org/papers/volume3/blei03a/blei03a.pdf
    """

        # Hyperparameters.
        num_topics = 3
        num_words = 10
        avg_doc_length = 5
        u = tfd.Uniform(low=-1., high=1.)
        alpha = tfp.util.TransformedVariable(u.sample(
            [num_topics], seed=test_util.test_seed()),
                                             tfb.Softplus(),
                                             name='alpha')
        beta = tf.Variable(u.sample([num_topics, num_words],
                                    seed=test_util.test_seed()),
                           name='beta')

        # LDA Model.
        # Note near 1:1 with mathematical specification. The main distinction is the
        # use of Independent--this lets us easily aggregate multinomials across
        # topics (and in any "shape" of documents).
        lda = tfd.JointDistributionSequential(
            [
                tfd.Poisson(rate=avg_doc_length),  # n
                tfd.Dirichlet(concentration=alpha),  # theta
                lambda theta, n: tfd.Multinomial(total_count=n, probs=theta
                                                 ),  # z
                lambda z: tfd.Independent(  # x  pylint: disable=g-long-lambda
                    tfd.Multinomial(total_count=z, logits=beta),
                    reinterpreted_batch_ndims=1),
            ],
            validate_args=True)

        # Now, let's sample some "documents" and compute the log-prob of each.
        docs_shape = [2, 4]  # That is, 8 docs in the shape of [2, 4].
        [n, theta, z, x] = lda.sample(docs_shape, seed=test_util.test_seed())
        log_probs = lda.log_prob([n, theta, z, x])
        self.assertEqual(docs_shape, log_probs.shape)

        # Verify we correctly track trainable variables.
        self.assertLen(lda.trainable_variables, 2)
        self.assertIs(alpha.pretransformed_input, lda.trainable_variables[0])
        self.assertIs(beta, lda.trainable_variables[1])

        # Ensure we can compute gradients.
        with tf.GradientTape() as tape:
            # Note: The samples are not taped, hence implicitly "stop_gradient."
            negloglik = -lda.log_prob([n, theta, z, x])
        grads = tape.gradient(negloglik, lda.trainable_variables)

        self.assertLen(grads, 2)
        self.assertAllEqual((alpha.pretransformed_input.shape, beta.shape),
                            (grads[0].shape, grads[1].shape))
        self.assertAllNotNone(grads)
Example #5
0
 def testZipfSample_AvoidsInfiniteLoop(self):
   zipf = tfd.Zipf(power=1., validate_args=False)
   n = 1000
   self.evaluate(zipf.sample(n, seed=test_util.test_seed()))
Example #6
0
 def tf_exp_gamma(a, b):
     return tf.math.log(
         tf.random.gamma(shape=[num_samples],
                         alpha=a,
                         beta=b,
                         seed=test_util.test_seed()))
Example #7
0
 def testAssertsPositiveRate(self):
     rate = tf.Variable([1., 2., -3.])
     self.evaluate(rate.initializer)
     with self.assertRaisesOpError('Argument `rate` must be positive.'):
         d = tfd.ExpGamma(concentration=[5.], rate=rate, validate_args=True)
         self.evaluate(d.sample(seed=test_util.test_seed()))
    def _testMVN(self,
                 base_distribution_class,
                 base_distribution_kwargs,
                 event_shape=()):
        # Base distribution shapes must be compatible w/bijector; most bijectors are
        # batch_shape agnostic and only care about event_ndims.
        # In the case of `ScaleMatvecTriL`, if we got it wrong then it would fire an
        # exception due to incompatible dimensions.
        event_shape_var = tf.Variable(np.int32(event_shape),
                                      shape=tf.TensorShape(None),
                                      name='dynamic_event_shape')

        base_distribution_dynamic_kwargs = {
            k: tf.Variable(v,
                           shape=tf.TensorShape(None),
                           name='dynamic_{}'.format(k))
            for k, v in base_distribution_kwargs.items()
        }
        fake_mvn_dynamic = self._cls()(
            distribution=tfd.Sample(base_distribution_class(
                validate_args=True, **base_distribution_dynamic_kwargs),
                                    sample_shape=event_shape_var),
            bijector=tfb.Chain([
                tfb.Shift(shift=self._shift),
                tfb.ScaleMatvecTriL(scale_tril=self._tril)
            ]),
            validate_args=True)

        fake_mvn_static = self._cls()(
            distribution=tfd.Sample(base_distribution_class(
                validate_args=True, **base_distribution_kwargs),
                                    sample_shape=event_shape),
            bijector=tfb.Chain([
                tfb.Shift(shift=self._shift),
                tfb.ScaleMatvecTriL(scale_tril=self._tril)
            ]),
            validate_args=True)

        actual_mean = np.tile(self._shift,
                              [2, 1])  # ScaleMatvecTriL elided tile.
        actual_cov = np.matmul(self._tril, np.transpose(self._tril, [0, 2, 1]))

        def actual_mvn_log_prob(x):
            return np.concatenate([
                [  # pylint: disable=g-complex-comprehension
                    stats.multivariate_normal(actual_mean[i],
                                              actual_cov[i]).logpdf(x[:, i, :])
                ] for i in range(len(actual_cov))
            ]).T

        actual_mvn_entropy = np.concatenate([[
            stats.multivariate_normal(actual_mean[i], actual_cov[i]).entropy()
        ] for i in range(len(actual_cov))])

        self.assertAllEqual([3], fake_mvn_static.event_shape)
        self.assertAllEqual([2], fake_mvn_static.batch_shape)

        if not tf.executing_eagerly():
            self.assertAllEqual(tf.TensorShape(None),
                                fake_mvn_dynamic.event_shape)
            self.assertAllEqual(tf.TensorShape(None),
                                fake_mvn_dynamic.batch_shape)

        num_samples = 7e3
        for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]:
            # Ensure sample works by checking first, second moments.
            y = fake_mvn.sample(int(num_samples), seed=test_util.test_seed())
            x = y[0:5, ...]
            sample_mean = tf.reduce_mean(y, axis=0)
            centered_y = tf.transpose(a=y - sample_mean, perm=[1, 2, 0])
            sample_cov = tf.matmul(centered_y, centered_y,
                                   transpose_b=True) / num_samples
            self.evaluate([
                v.initializer
                for v in base_distribution_dynamic_kwargs.values()
            ] + [event_shape_var.initializer])
            [
                sample_mean_,
                sample_cov_,
                x_,
                fake_event_shape_,
                fake_batch_shape_,
                fake_log_prob_,
                fake_prob_,
                fake_mean_,
                fake_entropy_,
            ] = self.evaluate([
                sample_mean,
                sample_cov,
                x,
                fake_mvn.event_shape_tensor(),
                fake_mvn.batch_shape_tensor(),
                fake_mvn.log_prob(x),
                fake_mvn.prob(x),
                fake_mvn.mean(),
                fake_mvn.entropy(),
            ])

            self.assertAllClose(actual_mean, sample_mean_, atol=0.1, rtol=0.1)
            self.assertAllClose(actual_cov, sample_cov_, atol=0., rtol=0.1)

            # Ensure all other functions work as intended.
            self.assertAllEqual([5, 2, 3], x_.shape)
            self.assertAllEqual([3], fake_event_shape_)
            self.assertAllEqual([2], fake_batch_shape_)
            self.assertAllClose(actual_mvn_log_prob(x_),
                                fake_log_prob_,
                                atol=0.,
                                rtol=1e-6)
            self.assertAllClose(np.exp(actual_mvn_log_prob(x_)),
                                fake_prob_,
                                atol=0.,
                                rtol=1e-5)
            self.assertAllClose(actual_mean, fake_mean_, atol=0., rtol=1e-6)
            self.assertAllClose(actual_mvn_entropy,
                                fake_entropy_,
                                atol=0.,
                                rtol=1e-6)
    def testMatrixEvent(self):
        loc = 0.
        batched_loc = [loc] * 2
        batched_loc_var = tf.Variable(batched_loc,
                                      shape=tf.TensorShape(None),
                                      name='dynamic_batch_shape')

        event_shape = [2, 3, 3]
        event_shape_var = tf.Variable(np.int32(event_shape),
                                      shape=tf.TensorShape(None),
                                      name='dynamic_event_shape')

        scale = 2.
        fake_mvn_dynamic = self._cls()(distribution=tfd.Sample(
            tfd.Normal(loc=batched_loc_var, scale=scale),
            sample_shape=event_shape_var),
                                       bijector=DummyMatrixTransform(),
                                       validate_args=True)

        fake_mvn_static = self._cls()(
            distribution=tfd.Sample(tfd.Normal(loc=batched_loc, scale=scale),
                                    sample_shape=event_shape),
            bijector=DummyMatrixTransform(),
            validate_args=True)

        def actual_mvn_log_prob(x):
            # This distribution is the normal PDF, reduced over the
            # last 3 dimensions + a jacobian term which corresponds
            # to the determinant of x.
            return (
                np.sum(stats.norm(loc, scale).logpdf(x), axis=(-1, -2, -3)) +
                np.sum(np.linalg.det(x), axis=-1))

        self.assertAllEqual([2, 3, 3], fake_mvn_static.event_shape)
        self.assertAllEqual([2], fake_mvn_static.batch_shape)

        if not tf.executing_eagerly():
            self.assertAllEqual(tf.TensorShape(None),
                                fake_mvn_dynamic.event_shape)
            self.assertAllEqual(tf.TensorShape(None),
                                fake_mvn_dynamic.batch_shape)

        num_samples = 5e3
        self.evaluate(
            [event_shape_var.initializer, batched_loc_var.initializer])
        for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]:
            # Ensure sample works by checking first, second moments.
            y = fake_mvn.sample(int(num_samples), seed=test_util.test_seed())
            x = y[0:5, ...]
            [
                x_,
                fake_event_shape_,
                fake_batch_shape_,
                fake_log_prob_,
                fake_prob_,
            ] = self.evaluate([
                x,
                fake_mvn.event_shape_tensor(),
                fake_mvn.batch_shape_tensor(),
                fake_mvn.log_prob(x),
                fake_mvn.prob(x),
            ])

            # Ensure all other functions work as intended.
            self.assertAllEqual([5, 2, 2, 3, 3], x_.shape)
            self.assertAllEqual([2, 3, 3], fake_event_shape_)
            self.assertAllEqual([2], fake_batch_shape_)
            self.assertAllClose(actual_mvn_log_prob(x_),
                                fake_log_prob_,
                                atol=0.,
                                rtol=1e-6)
            # With this many dimensions and samples, the direct space probability
            # may underflow.
            self.assertAllClose(np.exp(actual_mvn_log_prob(x_)),
                                fake_prob_,
                                atol=1e-12,
                                rtol=1e-5)
Example #10
0
    def testSampleGammaLogRateLogSpaceDerivatives(self):
        conc = tf.constant(np.linspace(.8, 1.2, 5), tf.float64)
        rate = np.linspace(.5, 2, 5)
        np.random.shuffle(rate)
        rate = tf.constant(rate, tf.float64)
        n = int(1e5)

        seed = test_util.test_seed()
        # pylint: disable=g-long-lambda
        lambdas = [  # Each should sample the same distribution.
            lambda c, r: gamma_lib.random_gamma(
                [n], c, r, seed=seed, log_space=True),
            lambda c, r: gamma_lib.random_gamma(
                [n], c, log_rate=tf.math.log(r), seed=seed, log_space=True),
            lambda c, r: tf.math.log(
                gamma_lib.random_gamma([n], c, r, seed=seed)),
            lambda c, r: tf.math.log(
                gamma_lib.random_gamma(
                    [n], c, log_rate=tf.math.log(r), seed=seed)),
        ]
        # pylint: enable=g-long-lambda
        samps = []
        dconc = []
        drate = []
        for fn in lambdas:
            # Take samples without the nonlinearity.
            samps.append(fn(conc, rate))
            # We compute gradient through a nonlinearity to catch a class of errors.
            _, (dc_i, dr_i) = tfp.math.value_and_gradient(
                lambda c, r: tf.reduce_mean(tf.square(fn(c, r))), (conc, rate))  # pylint: disable=cell-var-from-loop
            dconc.append(dc_i)
            drate.append(dr_i)

        # Assert d rate correctness. Note that the non-logspace derivative for rate
        # depends on the realized sample whereas the logspace one does not. Also,
        # comparing grads with differently-placed log/exp is numerically perilous.
        self.assertAllClose(drate[0], drate[1], rtol=0.06)
        self.assertAllClose(drate[0], drate[2], rtol=0.06)
        self.assertAllClose(drate[1], drate[3], rtol=0.06)

        # Assert sample correctness. If incorrect, dconc will be incorrect.
        self.assertLess(
            self.evaluate(
                st.min_num_samples_for_dkwm_cdf_test(discrepancy=0.04,
                                                     false_fail_rate=1e-9,
                                                     false_pass_rate=1e-9)), n)
        equiv_dist = tfb.Log()(tfd.Gamma(conc, rate))
        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm(samps[0],
                                             equiv_dist.cdf,
                                             false_fail_rate=1e-9))
        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm(samps[1],
                                             equiv_dist.cdf,
                                             false_fail_rate=1e-9))
        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm(samps[2],
                                             equiv_dist.cdf,
                                             false_fail_rate=1e-9))
        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm(samps[3],
                                             equiv_dist.cdf,
                                             false_fail_rate=1e-9))

        # Assert d concentration correctness. These are sensitive to sample values,
        # which are more strongly effected by the log/exp, thus looser tolerances.
        self.assertAllClose(dconc[0], dconc[1], rtol=0.06)
        self.assertAllClose(dconc[0], dconc[2], rtol=0.06)
        self.assertAllClose(dconc[1], dconc[3], rtol=0.06)
Example #11
0
 def gen_samples(concentration, rate):
     return tfd.Gamma(concentration,
                      rate).sample(num_samples,
                                   seed=test_util.test_seed())
Example #12
0
 def tfp_gamma(a, b):
     return tfd.Gamma(concentration=a, rate=b,
                      validate_args=True).sample(
                          num_samples, seed=test_util.test_seed())
Example #13
0
    def test_batch_of_filters(self):

        batch_shape = [3, 2]
        num_particles = 1000
        num_timesteps = 40

        # Batch of priors on object 1D positions and velocities.
        initial_state_prior = tfd.JointDistributionNamed({
            'position':
            tfd.Normal(loc=0., scale=tf.ones(batch_shape)),
            'velocity':
            tfd.Normal(loc=0., scale=tf.ones(batch_shape) * 0.1)
        })

        def transition_fn(_, previous_state):
            return tfd.JointDistributionNamed({
                'position':
                tfd.Normal(loc=previous_state['position'] +
                           previous_state['velocity'],
                           scale=0.1),
                'velocity':
                tfd.Normal(loc=previous_state['velocity'], scale=0.01)
            })

        def observation_fn(_, state):
            return tfd.Normal(loc=state['position'], scale=0.1)

        # Batch of synthetic observations, .
        true_initial_positions = np.random.randn(*batch_shape).astype(
            self.dtype)
        true_velocities = 0.1 * np.random.randn(*batch_shape).astype(
            self.dtype)
        observed_positions = (
            true_velocities * np.arange(num_timesteps).astype(
                self.dtype)[..., tf.newaxis, tf.newaxis] +
            true_initial_positions)

        (particles, log_weights, parent_indices,
         incremental_log_marginal_likelihoods) = self.evaluate(
             tfp.experimental.mcmc.particle_filter(
                 observations=observed_positions,
                 initial_state_prior=initial_state_prior,
                 transition_fn=transition_fn,
                 observation_fn=observation_fn,
                 num_particles=num_particles,
                 seed=test_util.test_seed()))

        self.assertAllEqual(particles['position'].shape,
                            [num_timesteps, num_particles] + batch_shape)
        self.assertAllEqual(particles['velocity'].shape,
                            [num_timesteps, num_particles] + batch_shape)
        self.assertAllEqual(parent_indices.shape,
                            [num_timesteps, num_particles] + batch_shape)
        self.assertAllEqual(incremental_log_marginal_likelihoods.shape,
                            [num_timesteps] + batch_shape)

        self.assertAllClose(self.evaluate(
            tf.reduce_sum(tf.exp(log_weights) * particles['position'],
                          axis=1)),
                            observed_positions,
                            atol=0.1)

        velocity_means = tf.reduce_sum(tf.exp(log_weights) *
                                       particles['velocity'],
                                       axis=1)
        self.assertAllClose(self.evaluate(
            tf.reduce_mean(velocity_means, axis=0)),
                            true_velocities,
                            atol=0.05)

        # Uncertainty in velocity should decrease over time.
        velocity_stddev = self.evaluate(
            tf.math.reduce_std(particles['velocity'], axis=1))
        self.assertAllLess((velocity_stddev[-1] - velocity_stddev[0]), 0.)

        trajectories = self.evaluate(
            tfp.experimental.mcmc.reconstruct_trajectories(
                particles, parent_indices))
        self.assertAllEqual([num_timesteps, num_particles] + batch_shape,
                            trajectories['position'].shape)
        self.assertAllEqual([num_timesteps, num_particles] + batch_shape,
                            trajectories['velocity'].shape)

        # Verify that `infer_trajectories` also works on batches.
        trajectories, incremental_log_marginal_likelihoods = self.evaluate(
            tfp.experimental.mcmc.infer_trajectories(
                observations=observed_positions,
                initial_state_prior=initial_state_prior,
                transition_fn=transition_fn,
                observation_fn=observation_fn,
                num_particles=num_particles,
                seed=test_util.test_seed()))
        self.assertAllEqual([num_timesteps, num_particles] + batch_shape,
                            trajectories['position'].shape)
        self.assertAllEqual([num_timesteps, num_particles] + batch_shape,
                            trajectories['velocity'].shape)
        self.assertAllEqual(incremental_log_marginal_likelihoods.shape,
                            [num_timesteps] + batch_shape)
Example #14
0
    def test_epidemiological_model(self):
        # A toy, discrete version of an SIR (Susceptible, Infected, Recovered)
        # model (https://en.wikipedia.org/wiki/Compartmental_models_in_epidemiology)

        population_size = 1000
        infection_rate = tf.convert_to_tensor(1.1)
        infectious_period = tf.convert_to_tensor(8.0)

        initial_state_prior = tfd.JointDistributionNamed({
            'susceptible':
            tfd.Deterministic(999.),
            'infected':
            tfd.Deterministic(1.),
            'new_infections':
            tfd.Deterministic(1.),
            'new_recoveries':
            tfd.Deterministic(0.)
        })

        # Dynamics model: new infections and recoveries are given by the SIR
        # model with Poisson noise.
        def infection_dynamics(_, previous_state):
            new_infections = tfd.Poisson(
                infection_rate * previous_state['infected'] *
                previous_state['susceptible'] / population_size)
            new_recoveries = tfd.Poisson(previous_state['infected'] /
                                         infectious_period)

            def susceptible(new_infections):
                return tfd.Deterministic(
                    ps.maximum(0.,
                               previous_state['susceptible'] - new_infections))

            def infected(new_infections, new_recoveries):
                return tfd.Deterministic(
                    ps.maximum(
                        0., previous_state['infected'] + new_infections -
                        new_recoveries))

            return tfd.JointDistributionNamed({
                'new_infections': new_infections,
                'new_recoveries': new_recoveries,
                'susceptible': susceptible,
                'infected': infected
            })

        # Observation model: each day we detect new cases, noisily.
        def infection_observations(_, state):
            return tfd.Poisson(state['infected'])

        # pylint: disable=bad-whitespace
        observations = tf.convert_to_tensor([
            0., 4., 1., 5., 23., 27., 75., 127., 248., 384., 540., 683., 714.,
            611., 561., 493., 385., 348., 300., 277., 249., 219., 216., 174.,
            132., 122., 115., 99., 76., 84., 77., 56., 42., 56., 46., 38., 34.,
            44., 25., 27.
        ])
        # pylint: enable=bad-whitespace

        trajectories, _ = self.evaluate(
            tfp.experimental.mcmc.infer_trajectories(
                observations=observations,
                initial_state_prior=initial_state_prior,
                transition_fn=infection_dynamics,
                observation_fn=infection_observations,
                num_particles=100,
                seed=test_util.test_seed()))

        # The susceptible population should decrease over time.
        self.assertAllLessEqual(
            trajectories['susceptible'][1:, ...] -
            trajectories['susceptible'][:-1, ...], 0.0)
Example #15
0
 def testSingularScaleRaises(self):
   mu = [-1., 1]
   diag = [1., 0]
   with self.assertRaisesOpError('Singular'):
     dist = tfd.MultivariateNormalDiag(mu, diag, validate_args=True)
     self.evaluate(dist.sample(seed=test_util.test_seed()))
Example #16
0
 def testAssertsPositiveRate(self):
     rate = tf.Variable([1., 2., -3.])
     self.evaluate(rate.initializer)
     with self.assertRaisesOpError('Argument `rate` must be positive.'):
         dist = self._make_poisson(rate=rate, validate_args=True)
         self.evaluate(dist.sample(seed=test_util.test_seed()))
Example #17
0
 def testVectorParams(self):
   mu = [-1.]
   diag = [-5.]
   dist = tfd.MultivariateNormalDiag(mu, diag, validate_args=True)
   self.assertAllEqual([3, 1], dist.sample(
       3, seed=test_util.test_seed()).shape)
    def testDistribution(self, dist_name, data):
        seed = test_util.test_seed()
        # Explicitly draw event_dim here to avoid relying on _params_event_ndims
        # later, so this test can support distributions that do not implement the
        # slicing protocol.
        event_dim = data.draw(hps.integers(min_value=2, max_value=6))
        dist = data.draw(
            dhps.distributions(dist_name=dist_name,
                               event_dim=event_dim,
                               enable_vars=True))
        batch_shape = dist.batch_shape
        batch_shape2 = data.draw(
            tfp_hps.broadcast_compatible_shape(batch_shape))
        dist2 = data.draw(
            dhps.distributions(dist_name=dist_name,
                               batch_shape=batch_shape2,
                               event_dim=event_dim,
                               enable_vars=True))
        self.evaluate([var.initializer for var in dist.variables])

        # Check that the distribution passes Variables through to the accessor
        # properties (without converting them to Tensor or anything like that).
        for k, v in six.iteritems(dist.parameters):
            if not tensor_util.is_ref(v):
                continue
            self.assertIs(getattr(dist, k), v)

        # Check that standard statistics do not read distribution parameters more
        # than twice (once in the stat itself and up to once in any validation
        # assertions).
        max_permissible = 2 + extra_tensor_conversions_allowed(dist)
        for stat in sorted(
                data.draw(
                    hps.sets(hps.one_of(
                        map(hps.just, [
                            'covariance', 'entropy', 'mean', 'mode', 'stddev',
                            'variance'
                        ])),
                             min_size=3,
                             max_size=3))):
            hp.note('Testing excessive var usage in {}.{}'.format(
                dist_name, stat))
            try:
                with tfp_hps.assert_no_excessive_var_usage(
                        'statistic `{}` of `{}`'.format(stat, dist),
                        max_permissible=max_permissible):
                    getattr(dist, stat)()

            except NotImplementedError:
                pass

        # Check that `sample` doesn't read distribution parameters more than twice,
        # and that it produces non-None gradients (if the distribution is fully
        # reparameterized).
        with tf.GradientTape() as tape:
            # TDs do bijector assertions twice (once by distribution.sample, and once
            # by bijector.forward).
            max_permissible = 2 + extra_tensor_conversions_allowed(dist)
            with tfp_hps.assert_no_excessive_var_usage(
                    'method `sample` of `{}`'.format(dist),
                    max_permissible=max_permissible):
                sample = dist.sample(seed=seed)
        if dist.reparameterization_type == tfd.FULLY_REPARAMETERIZED:
            grads = tape.gradient(sample, dist.variables)
            for grad, var in zip(grads, dist.variables):
                var_name = var.name.rstrip('_0123456789:')
                if var_name in NO_SAMPLE_PARAM_GRADS.get(dist_name, ()):
                    continue
                if grad is None:
                    raise AssertionError(
                        'Missing sample -> {} grad for distribution {}'.format(
                            var_name, dist_name))

        # Turn off validations, since TODO(b/129271256) log_prob can choke on dist's
        # own samples.  Also, to relax conversion counts for KL (might do >2 w/
        # validate_args).
        dist = dist.copy(validate_args=False)
        dist2 = dist2.copy(validate_args=False)

        # Test that KL divergence reads distribution parameters at most once, and
        # that is produces non-None gradients.
        try:
            for d1, d2 in (dist, dist2), (dist2, dist):
                if dist_name in SKIP_KL_CHECK_DIST_VAR_GRADS:
                    continue
                with tf.GradientTape() as tape:
                    with tfp_hps.assert_no_excessive_var_usage(
                            '`kl_divergence` of (`{}` (vars {}), `{}` (vars {}))'
                            .format(d1, d1.variables, d2, d2.variables),
                            max_permissible=1
                    ):  # No validation => 1 convert per var.
                        kl = d1.kl_divergence(d2)
                wrt_vars = list(d1.variables) + list(d2.variables)
                grads = tape.gradient(kl, wrt_vars)
                for grad, var in zip(grads, wrt_vars):
                    if grad is None and dist_name not in NO_KL_PARAM_GRADS:
                        raise AssertionError(
                            'Missing KL({} || {}) -> {} grad:\n'  # pylint: disable=duplicate-string-formatting-argument
                            '{} vars: {}\n{} vars: {}'.format(
                                d1, d2, var, d1, d1.variables, d2,
                                d2.variables))
        except NotImplementedError:
            # Raised by kl_divergence if no registered KL is found.
            pass

        # Test that log_prob produces non-None gradients, except for distributions
        # on the NO_LOG_PROB_PARAM_GRADS blocklist.
        if dist_name not in NO_LOG_PROB_PARAM_GRADS:
            with tf.GradientTape() as tape:
                lp = dist.log_prob(tf.stop_gradient(sample))
            grads = tape.gradient(lp, dist.variables)
            for grad, var in zip(grads, dist.variables):
                if grad is None:
                    raise AssertionError(
                        'Missing log_prob -> {} grad for distribution {}'.
                        format(var, dist_name))

        # Test that all forms of probability evaluation avoid reading distribution
        # parameters more than once.
        for evaluative in sorted(
                data.draw(
                    hps.sets(hps.one_of(
                        map(hps.just, [
                            'log_prob', 'prob', 'log_cdf', 'cdf',
                            'log_survival_function', 'survival_function'
                        ])),
                             min_size=3,
                             max_size=3))):
            hp.note('Testing excessive var usage in {}.{}'.format(
                dist_name, evaluative))
            try:
                # No validation => 1 convert. But for TD we allow 2:
                # dist.log_prob(bijector.inverse(samp)) + bijector.ildj(samp)
                max_permissible = 2 + extra_tensor_conversions_allowed(dist)
                with tfp_hps.assert_no_excessive_var_usage(
                        'evaluative `{}` of `{}`'.format(evaluative, dist),
                        max_permissible=max_permissible):
                    getattr(dist, evaluative)(sample)
            except NotImplementedError:
                pass
Example #19
0
 def tfp_exp_gamma(a, b):
     return tf.math.square(
         tfd.ExpGamma(concentration=a, rate=b,
                      validate_args=True).sample(
                          num_samples, seed=test_util.test_seed()))
 def testReparameterized(self):
   prob = tf.constant([0.2, 0.6])
   _, grad_prob = tfp.math.value_and_gradient(
       lambda x: tfd.ContinuousBernoulli(probs=x, validate_args=True).sample(  # pylint: disable=g-long-lambda
           100, seed=test_util.test_seed()), prob)
   self.assertIsNotNone(grad_prob)
Example #21
0
 def gen_samples(concentration, rate):
     return tf.math.exp(
         tfd.ExpGamma(concentration,
                      rate).sample(num_samples,
                                   seed=test_util.test_seed()))
 def testMeanNonInfNaN(self):
   prob = tf.random.uniform([int(1e4)], seed=test_util.test_seed())
   dist = tfd.ContinuousBernoulli(probs=prob, validate_args=True)
   mean_ = self.evaluate(dist.mean())
   self.assertFalse(np.any(np.isinf(mean_)))
   self.assertFalse(np.any(np.isnan(mean_)))
Example #23
0
 def testUniformSamplePdf(self):
     a = 10.0
     b = [11.0, 100.0]
     uniform = tfd.Uniform(a, b, validate_args=True)
     samps = uniform.sample(10, seed=test_util.test_seed())
     self.assertTrue(self.evaluate(tf.reduce_all(uniform.prob(samps) > 0)))
 def f(n, c1, c0):
     dist = tfd.BetaBinomial(n, c1, c0, validate_args=True)
     return dist.sample(100, seed=test_util.test_seed())
Example #25
0
 def testInvalidEventDtype(self):
   with self.assertRaisesWithPredicateMatch(
       TypeError, "power.dtype .* not a supported .* type"):
     power = tf.constant(5., dtype=tf.float16)
     zipf = tfd.Zipf(power=power, dtype=tf.int32, validate_args=True)
     self.evaluate(zipf.sample(seed=test_util.test_seed()))
Example #26
0
 def testAssertionProbsLessThanZero(self):
     x = tf.Variable([-0.1, 0.7, 0.0])
     d = tfd.NegativeBinomial(total_count=8., probs=x, validate_args=True)
     self.evaluate(x.initializer)
     with self.assertRaisesOpError('`probs` has components less than 0.'):
         self.evaluate(d.sample(seed=test_util.test_seed()))
Example #27
0
def run_hmc_on_model(
    model,
    num_chains,
    num_steps,
    num_leapfrog_steps,
    step_size,
    target_accept_prob=0.9,
    seed=None,
    dtype=tf.float32,
    use_xla=False,
):
    """Runs HMC on a target.

  Args:
    model: The model to validate.
    num_chains: Number of chains to run in parallel.
    num_steps: Total number of steps to take. The first half are used to warm up
      the sampler.
    num_leapfrog_steps: Number of leapfrog steps to take.
    step_size: Step size to use.
    target_accept_prob: Target acceptance probability.
    seed: Optional seed to use. By default, `test_util.test_seed()` is used.
    dtype: DType to use for the algorithm.
    use_xla: Whether to use XLA.

  Returns:
    mcmc_results: `MCMCResults`.
  """
    step_size = tf.convert_to_tensor(step_size, dtype)

    def target_log_prob_fn(*x):
        x = tf.nest.pack_sequence_as(model.dtype, x)
        return model.unnormalized_log_prob(x)

    if seed is None:
        seed = test_util.test_seed()
    if tf.executing_eagerly():
        # TODO(b/68017812,b/141368747): remove once eager correctly supports seed.
        tf.random.set_seed(seed)
        seed = None
    current_state = tf.nest.map_structure(
        lambda b, e: b(  # pylint: disable=g-long-lambda
            tf.zeros([num_chains] + list(e), dtype=dtype)),
        model.default_event_space_bijector,
        model.event_shape)

    # tfp.mcmc only works well with lists.
    current_state = tf.nest.flatten(current_state)

    hmc = tfp.mcmc.HamiltonianMonteCarlo(
        target_log_prob_fn=target_log_prob_fn,
        num_leapfrog_steps=num_leapfrog_steps,
        step_size=[tf.fill(s.shape, step_size) for s in current_state],
        seed=seed)
    hmc = tfp.mcmc.TransformedTransitionKernel(
        hmc, tf.nest.flatten(model.default_event_space_bijector))
    hmc = tfp.mcmc.DualAveragingStepSizeAdaptation(
        hmc,
        num_adaptation_steps=int(num_steps // 2 * 0.8),
        target_accept_prob=target_accept_prob)

    chain, is_accepted = tf.function(
        lambda: tfp.mcmc.sample_chain(  # pylint: disable=g-long-lambda
            current_state=current_state,
            kernel=hmc,
            num_results=num_steps // 2,
            num_burnin_steps=num_steps // 2,
            trace_fn=lambda _, pkr:  # pylint: disable=g-long-lambda
            (pkr.inner_results.inner_results.is_accepted),
            parallel_iterations=1),
        autograph=False,
        experimental_compile=use_xla)()

    accept_rate = tf.reduce_mean(tf.cast(is_accepted, dtype))
    ess = tf.nest.map_structure(
        lambda c: tfp.mcmc.effective_sample_size(  # pylint: disable=g-long-lambda
            c,
            cross_chain_dims=1,
            filter_beyond_positive_pairs=True),
        chain)
    r_hat = tf.nest.map_structure(tfp.mcmc.potential_scale_reduction, chain)

    mcmc_results = MCMCResults(
        chain=tf.nest.pack_sequence_as(model.default_event_space_bijector,
                                       chain),
        accept_rate=accept_rate,
        ess=ess,
        r_hat=r_hat,
    )
    return mcmc_results
Example #28
0
 def _seed(seed=None):
     seed = test_util.test_seed() if seed is None else seed
     if tf.executing_eagerly():
         tf.random.set_seed(seed)
     return seed
    def test_dict_sample_log_prob(self):
        # pylint: disable=bad-whitespace
        d = tfd.JointDistributionNamed(dict(
            e=tfd.Independent(tfd.Exponential(rate=[100, 120]), 1),
            scale=lambda e: tfd.Gamma(concentration=e[..., 0], rate=e[..., 1]),
            loc=tfd.Normal(loc=0, scale=2.),
            m=tfd.Normal,
            x=lambda m: tfd.Sample(tfd.Bernoulli(logits=m), 12)),
                                       validate_args=True)
        # pylint: enable=bad-whitespace

        self.assertEqual((
            ('e', ()),
            ('scale', ('e', )),
            ('loc', ()),
            ('m', ('loc', 'scale')),
            ('x', ('m', )),
        ), d.resolve_graph())

        xs = d.sample(seed=test_util.test_seed())
        self.assertLen(xs, 5)
        # We'll verify the shapes work as intended when we plumb these back into the
        # respective log_probs.

        ds, _ = d.sample_distributions(value=xs, seed=test_util.test_seed())
        self.assertLen(ds, 5)
        self.assertIsInstance(ds['e'], tfd.Independent)
        self.assertIsInstance(ds['scale'], tfd.Gamma)
        self.assertIsInstance(ds['loc'], tfd.Normal)
        self.assertIsInstance(ds['m'], tfd.Normal)
        self.assertIsInstance(ds['x'], tfd.Sample)

        # Static properties.
        self.assertAllEqual(
            {
                'e': tf.float32,
                'scale': tf.float32,
                'loc': tf.float32,
                'm': tf.float32,
                'x': tf.int32
            }, d.dtype)

        batch_shape_tensor_, event_shape_tensor_ = self.evaluate(
            [d.batch_shape_tensor(),
             d.event_shape_tensor()])

        expected_batch_shape = {
            'e': [],
            'scale': [],
            'loc': [],
            'm': [],
            'x': []
        }
        batch_tensorshape = d.batch_shape
        for k in expected_batch_shape:
            self.assertAllEqual(expected_batch_shape[k], batch_tensorshape[k])
            self.assertAllEqual(expected_batch_shape[k],
                                batch_shape_tensor_[k])

        expected_event_shape = {
            'e': [2],
            'scale': [],
            'loc': [],
            'm': [],
            'x': [12]
        }
        event_tensorshape = d.event_shape
        for k in expected_event_shape:
            self.assertAllEqual(expected_event_shape[k], event_tensorshape[k])
            self.assertAllEqual(expected_event_shape[k],
                                event_shape_tensor_[k])

        expected_jlp = sum(ds[k].log_prob(xs[k]) for k in ds.keys())
        actual_jlp = d.log_prob(xs)
        self.assertAllClose(*self.evaluate([expected_jlp, actual_jlp]),
                            atol=0.,
                            rtol=1e-4)
Example #30
0
    def _verifySampleAndPdfConsistency(self, vmf, rtol=0.075):
        """Verifies samples are consistent with the PDF using importance sampling.

    In particular, we verify an estimate the surface area of the n-dimensional
    hypersphere, and the surface areas of the spherical caps demarcated by
    a handful of survival rates.

    Args:
      vmf: A `VonMisesFisher` distribution instance.
      rtol: Relative difference tolerable.
    """
        dim = tf.compat.dimension_value(vmf.event_shape[-1])
        nsamples = 50000
        samples = vmf.sample(sample_shape=[nsamples],
                             seed=tfp_test_util.test_seed())
        samples = tf.debugging.check_numerics(samples, 'samples')
        log_prob = vmf.log_prob(samples)
        log_prob = tf.debugging.check_numerics(log_prob, 'log_prob')
        log_importance = -log_prob
        sphere_surface_area_estimate, samples, importance, conc = self.evaluate(
            [
                tf.exp(
                    tf.reduce_logsumexp(input_tensor=log_importance, axis=0) -
                    tf.math.log(tf.cast(nsamples, dtype=tf.float32))), samples,
                tf.exp(log_importance), vmf.concentration
            ])
        true_sphere_surface_area = 2 * (np.pi)**(dim / 2) * self.evaluate(
            tf.exp(-tf.math.lgamma(dim / 2)))
        # Broadcast to correct size
        true_sphere_surface_area += np.zeros_like(sphere_surface_area_estimate)
        # Highly concentrated distributions do not get enough coverage to provide
        # a reasonable full-sphere surface area estimate. These are covered below
        # by CDF-based hypersphere cap surface area estimates.
        self.assertAllClose(true_sphere_surface_area[np.where(conc < 3)],
                            sphere_surface_area_estimate[np.where(conc < 3)],
                            rtol=rtol)

        # Assert surface area of hyperspherical cap For some CDFs in [.05,.45],
        # (h must be greater than 0 for the hypersphere cap surface area
        # calculation to hold).
        for survival_rate in 0.95, .9, .75, .6:
            cdf = (1 - survival_rate)
            mean_dir = self.evaluate(vmf.mean_direction)
            dotprods = np.sum(samples * mean_dir, -1)
            # Empirical estimate of the effective dot-product of the threshold that
            # selects for a given CDF level, that is the cosine of the largest
            # passable angle, or the minimum cosine for a within-CDF sample.
            dotprod_thresh = np.percentile(dotprods,
                                           100 * survival_rate,
                                           axis=0,
                                           keepdims=True)
            dotprod_above_thresh = np.float32(dotprods > dotprod_thresh)
            sphere_cap_surface_area_ests = (
                cdf * (importance * dotprod_above_thresh).sum(0) /
                dotprod_above_thresh.sum(0))
            h = (1 - dotprod_thresh)
            self.assertGreaterEqual(h.min(),
                                    0)  # h must be >= 0 for the eqn below
            true_sphere_cap_surface_area = (
                0.5 * true_sphere_surface_area *
                self.evaluate(tf.math.betainc(
                    (dim - 1) / 2, 0.5, 2 * h - h**2)))
            if dim == 3:  # For 3-d we have a simpler form we can double-check.
                self.assertAllClose(2 * np.pi * h,
                                    true_sphere_cap_surface_area)

            self.assertAllClose(true_sphere_cap_surface_area,
                                sphere_cap_surface_area_ests +
                                np.zeros_like(true_sphere_cap_surface_area),
                                rtol=rtol)