Ejemplo n.º 1
0
  def testSampleGammaLogRateLogSpaceDerivatives(self):
    conc = tf.constant(np.linspace(.8, 1.2, 5), tf.float64)
    rate = np.linspace(.5, 2, 5)
    np.random.shuffle(rate)
    rate = tf.constant(rate, tf.float64)
    n = int(1e5)

    seed = test_util.test_seed()
    # pylint: disable=g-long-lambda
    lambdas = [  # Each should sample the same distribution.
        lambda c, r: gamma_lib.random_gamma(
            [n], c, r, seed=seed, log_space=True),
        lambda c, r: gamma_lib.random_gamma(
            [n], c, log_rate=tf.math.log(r), seed=seed, log_space=True),
        lambda c, r: tf.math.log(gamma_lib.random_gamma(
            [n], c, r, seed=seed)),
        lambda c, r: tf.math.log(gamma_lib.random_gamma(
            [n], c, log_rate=tf.math.log(r), seed=seed)),
    ]
    # pylint: enable=g-long-lambda
    samps = []
    dconc = []
    drate = []
    for fn in lambdas:
      # Take samples without the nonlinearity.
      samps.append(fn(conc, rate))
      # We compute gradient through a nonlinearity to catch a class of errors.
      _, (dc_i, dr_i) = tfp.math.value_and_gradient(
          lambda c, r: tf.reduce_mean(tf.square(fn(c, r))), (conc, rate))  # pylint: disable=cell-var-from-loop
      dconc.append(dc_i)
      drate.append(dr_i)

    # Assert d rate correctness. Note that the non-logspace derivative for rate
    # depends on the realized sample whereas the logspace one does not. Also,
    # comparing grads with differently-placed log/exp is numerically perilous.
    self.assertAllClose(drate[0], drate[1], rtol=0.06)
    self.assertAllClose(drate[0], drate[2], rtol=0.06)
    self.assertAllClose(drate[1], drate[3], rtol=0.06)

    # Assert sample correctness. If incorrect, dconc will be incorrect.
    self.assertLess(
        self.evaluate(
            st.min_num_samples_for_dkwm_cdf_test(
                discrepancy=0.04, false_fail_rate=1e-9, false_pass_rate=1e-9)),
        n)
    equiv_dist = tfb.Log()(tfd.Gamma(conc, rate))
    self.evaluate(st.assert_true_cdf_equal_by_dkwm(
        samps[0], equiv_dist.cdf, false_fail_rate=1e-9))
    self.evaluate(st.assert_true_cdf_equal_by_dkwm(
        samps[1], equiv_dist.cdf, false_fail_rate=1e-9))
    self.evaluate(st.assert_true_cdf_equal_by_dkwm(
        samps[2], equiv_dist.cdf, false_fail_rate=1e-9))
    self.evaluate(st.assert_true_cdf_equal_by_dkwm(
        samps[3], equiv_dist.cdf, false_fail_rate=1e-9))

    # Assert d concentration correctness. These are sensitive to sample values,
    # which are more strongly effected by the log/exp, thus looser tolerances.
    self.assertAllClose(dconc[0], dconc[1], rtol=0.06)
    self.assertAllClose(dconc[0], dconc[2], rtol=0.06)
    self.assertAllClose(dconc[1], dconc[3], rtol=0.06)
Ejemplo n.º 2
0
  def test_dkwm_cdf_one_sample_assertion(self, dtype):
    rng = np.random.RandomState(seed=0)
    num_samples = 13000

    d = st.min_discrepancy_of_true_cdfs_detectable_by_dkwm(
        num_samples, false_fail_rate=1e-6, false_pass_rate=1e-6)
    d = self.evaluate(d)
    self.assertLess(d, 0.05)

    # Test that the test assertion agrees that the cdf of the standard
    # uniform distribution is the identity.
    samples = rng.uniform(size=num_samples).astype(dtype=dtype)
    self.evaluate(st.assert_true_cdf_equal_by_dkwm(
        samples, lambda x: x, false_fail_rate=1e-6))

    # Test that the test assertion confirms that the cdf of a
    # scaled uniform distribution is not the identity.
    with self.assertRaisesOpError('Empirical CDF outside K-S envelope'):
      samples = rng.uniform(
          low=0., high=0.9, size=num_samples).astype(dtype=dtype)
      self.evaluate(st.assert_true_cdf_equal_by_dkwm(
          samples, lambda x: x, false_fail_rate=1e-6))

    # Test that the test assertion confirms that the cdf of a
    # shifted uniform distribution is not the identity.
    with self.assertRaisesOpError('Empirical CDF outside K-S envelope'):
      samples = rng.uniform(
          low=0.1, high=1.1, size=num_samples).astype(dtype=dtype)
      self.evaluate(st.assert_true_cdf_equal_by_dkwm(
          samples, lambda x: x, false_fail_rate=1e-6))
Ejemplo n.º 3
0
 def propSampleCorrectMarginals(self,
                                dist,
                                special_class,
                                under_hypothesis=False):
     # Property: When projected on one class, multinomial should sample the
     # binomial distribution.
     seed = test_util.test_seed()
     num_samples = 120000
     needed = self.evaluate(
         st.min_num_samples_for_dkwm_cdf_test(0.02,
                                              false_fail_rate=1e-9,
                                              false_pass_rate=1e-9))
     self.assertGreater(num_samples, needed)
     samples = dist.sample(num_samples, seed=seed)
     successes = samples[..., special_class]
     prob_success = dist._probs_parameter_no_checks()[..., special_class]
     if under_hypothesis:
         hp.note('Expected probability of success {}'.format(prob_success))
         hp.note('Successes obtained {}'.format(successes))
     expected_dist = tfd.Binomial(dist.total_count, probs=prob_success)
     self.evaluate(
         st.assert_true_cdf_equal_by_dkwm(
             successes,
             expected_dist.cdf,
             st.left_continuous_cdf_discrete_distribution(expected_dist),
             false_fail_rate=1e-9))
Ejemplo n.º 4
0
    def testSamplePoissonLowAndHighRates(self):
        rate = [1., 3., 5., 6., 7., 10., 13.0, 14., 15., 18.]
        log_rate = np.log(rate)
        num_samples = int(1e5)
        self.assertLess(
            self.evaluate(
                st.min_num_samples_for_dkwm_cdf_test(discrepancy=0.04,
                                                     false_fail_rate=1e-9,
                                                     false_pass_rate=1e-9)),
            num_samples)

        samples = self.evaluate(
            poisson_dist.random_poisson_rejection_sampler(
                [num_samples, 10], log_rate, seed=test_util.test_seed()))

        poisson = tfd.Poisson(log_rate=log_rate, validate_args=True)
        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm(
                samples,
                poisson.cdf,
                st.left_continuous_cdf_discrete_distribution(poisson),
                false_fail_rate=1e-9))

        self.assertAllClose(self.evaluate(tf.math.reduce_mean(samples,
                                                              axis=0)),
                            stats.poisson.mean(rate),
                            rtol=0.01)
        self.assertAllClose(self.evaluate(
            tf.math.reduce_variance(samples, axis=0)),
                            stats.poisson.var(rate),
                            rtol=0.05)
Ejemplo n.º 5
0
    def testSampleGammaLogSpace(self):
        concentration = np.linspace(.1, 2., 10)
        rate = np.linspace(.5, 2, 10)
        np.random.shuffle(rate)
        num_samples = int(1e5)
        self.assertLess(
            self.evaluate(
                st.min_num_samples_for_dkwm_cdf_test(discrepancy=0.04,
                                                     false_fail_rate=1e-9,
                                                     false_pass_rate=1e-9)),
            num_samples)

        samples = gamma_lib.random_gamma([num_samples],
                                         concentration,
                                         rate,
                                         seed=test_util.test_seed(),
                                         log_space=True)

        exp_gamma = tfb.Log()(tfd.Gamma(concentration=concentration,
                                        rate=rate,
                                        validate_args=True))
        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm(samples,
                                             exp_gamma.cdf,
                                             false_fail_rate=1e-9))

        self.assertAllClose(self.evaluate(tf.math.reduce_mean(samples,
                                                              axis=0)),
                            tf.math.digamma(concentration) - tf.math.log(rate),
                            rtol=0.02,
                            atol=0.01)
        self.assertAllClose(self.evaluate(
            tf.math.reduce_variance(samples, axis=0)),
                            tf.math.polygamma(1., concentration),
                            rtol=0.05)
  def test_dkwm_cdf_one_sample_batch_discrete_assertion(self, dtype):
    rng = np.random.RandomState(seed=0)
    num_samples = 13000
    batch_shape = [3, 2]
    shape = [num_samples] + batch_shape

    probs = [0.1, 0.2, 0.3, 0.4]
    samples = rng.choice(4, size=shape, p=probs).astype(dtype=dtype)
    def cdf(x):
      ones = tf.ones_like(x)
      answer = tf.where(x < 3, 0.6 * ones, ones)
      answer = tf.where(x < 2, 0.3 * ones, answer)
      answer = tf.where(x < 1, 0.1 * ones, answer)
      return tf.where(x < 0, 0 * ones, answer)
    def left_continuous_cdf(x):
      ones = tf.ones_like(x)
      answer = tf.where(x <= 3, 0.6 * ones, ones)
      answer = tf.where(x <= 2, 0.3 * ones, answer)
      answer = tf.where(x <= 1, 0.1 * ones, answer)
      return tf.where(x <= 0, 0 * ones, answer)
    self.evaluate(st.assert_true_cdf_equal_by_dkwm(
        samples, cdf, left_continuous_cdf=left_continuous_cdf,
        false_fail_rate=1e-6))
    d = st.min_discrepancy_of_true_cdfs_detectable_by_dkwm(
        tf.ones(batch_shape) * num_samples,
        false_fail_rate=1e-6, false_pass_rate=1e-6)
    self.evaluate(d < 0.05)
Ejemplo n.º 7
0
    def testSamplePoissonHighRates(self):
        # High rate (>= log(10.)) samples would use rejection sampling.
        rate = [10., 10.5, 11., 11.5, 12.0, 12.5, 13.0, 13.5, 14.0, 14.5]
        log_rate = np.log(rate)
        num_samples = int(1e5)
        self.assertLess(
            self.evaluate(
                st.min_num_samples_for_dkwm_cdf_test(discrepancy=0.04,
                                                     false_fail_rate=1e-9,
                                                     false_pass_rate=1e-9)),
            num_samples)

        samples = poisson_lib._random_poisson_noncpu(
            shape=[num_samples],
            log_rates=log_rate,
            output_dtype=tf.float64,
            seed=test_util.test_seed())

        poisson = tfd.Poisson(log_rate=log_rate, validate_args=True)
        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm(
                samples,
                poisson.cdf,
                st.left_continuous_cdf_discrete_distribution(poisson),
                false_fail_rate=1e-9))

        self.assertAllClose(self.evaluate(tf.math.reduce_mean(samples,
                                                              axis=0)),
                            stats.poisson.mean(rate),
                            rtol=0.01)
        self.assertAllClose(self.evaluate(
            tf.math.reduce_variance(samples, axis=0)),
                            stats.poisson.var(rate),
                            rtol=0.05)
Ejemplo n.º 8
0
    def testSampleGammaHighConcentration(self):
        concentration = np.linspace(10., 20., 10)
        rate = np.float64(1.)
        num_samples = int(1e5)
        self.assertLess(
            self.evaluate(
                st.min_num_samples_for_dkwm_cdf_test(discrepancy=0.04,
                                                     false_fail_rate=1e-9,
                                                     false_pass_rate=1e-9)),
            num_samples)

        samples = gamma_lib._random_gamma_noncpu(shape=[num_samples, 10],
                                                 concentration=concentration,
                                                 rate=rate,
                                                 seed=test_util.test_seed())

        gamma = tfd.Gamma(concentration=concentration,
                          rate=rate,
                          validate_args=True)
        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm(samples,
                                             gamma.cdf,
                                             false_fail_rate=1e-9))

        self.assertAllClose(self.evaluate(tf.math.reduce_mean(samples,
                                                              axis=0)),
                            sp_stats.gamma.mean(concentration, scale=1 / rate),
                            rtol=0.01)
        self.assertAllClose(self.evaluate(
            tf.math.reduce_variance(samples, axis=0)),
                            sp_stats.gamma.var(concentration, scale=1 / rate),
                            rtol=0.05)
Ejemplo n.º 9
0
def assert_univariate_target_conservation(test, mk_target, step_size,
                                          stackless):
    # Sample count limited partly by memory reliably available on Forge.  The test
    # remains reasonable even if the nuts recursion limit is severely curtailed
    # (e.g., 3 or 4 levels), so use that to recover some memory footprint and bump
    # the sample count.
    num_samples = int(5e4)
    num_steps = 1
    target_d = mk_target()
    strm = tfp.util.SeedStream(salt='univariate_nuts_test', seed=1)
    # We wrap the initial values in `tf.identity` to avoid broken gradients
    # resulting from a bijector cache hit, since bijectors of the same
    # type/parameterization now share a cache.
    # TODO(b/72831017): Fix broken gradients caused by bijector caching.
    initialization = tf.identity(target_d.sample([num_samples], seed=strm()))

    def target(*args):
        # TODO(axch): Just use target_d.log_prob directly, and accept target_d
        # itself as an argument instead of a maker function.  Blocked by
        # b/128932888.  It would then also be nice not to eta-expand
        # target_d.log_prob; that was blocked by b/122414321, but maybe tfp's port
        # of value_and_gradients_function fixed that bug.
        return mk_target().log_prob(*args)

    operator = tfp.experimental.mcmc.NoUTurnSampler(target,
                                                    step_size=step_size,
                                                    max_tree_depth=3,
                                                    use_auto_batching=True,
                                                    stackless=stackless,
                                                    unrolled_leapfrog_steps=2,
                                                    seed=strm())
    result, extra = tfp.mcmc.sample_chain(num_results=num_steps,
                                          num_burnin_steps=0,
                                          current_state=initialization,
                                          kernel=operator)
    # Note: sample_chain puts the chain history on top, not the (independent)
    # chains.
    test.assertAllEqual([num_steps, num_samples], result.shape)
    answer = result[0]
    check_cdf_agrees = st.assert_true_cdf_equal_by_dkwm(answer,
                                                        target_d.cdf,
                                                        false_fail_rate=1e-6)
    check_enough_power = tf1.assert_less(
        st.min_discrepancy_of_true_cdfs_detectable_by_dkwm(
            num_samples, false_fail_rate=1e-6, false_pass_rate=1e-6), 0.025)
    test.assertAllEqual([num_samples], extra.leapfrogs_taken[0].shape)
    unique, _ = tf.unique(extra.leapfrogs_taken[0])
    check_leapfrogs_vary = tf1.assert_greater_equal(
        tf.shape(input=unique)[0], 3)
    avg_leapfrogs = tf.math.reduce_mean(input_tensor=extra.leapfrogs_taken[0])
    check_leapfrogs = tf1.assert_greater_equal(
        avg_leapfrogs, tf.constant(4, dtype=avg_leapfrogs.dtype))
    movement = tf.abs(answer - initialization)
    test.assertAllEqual([num_samples], movement.shape)
    # This movement distance (1 * step_size) was selected by reducing until 100
    # runs with independent seeds all passed.
    check_movement = tf1.assert_greater_equal(
        tf.reduce_mean(input_tensor=movement), 1 * step_size)
    return (check_cdf_agrees, check_enough_power, check_leapfrogs_vary,
            check_leapfrogs, check_movement)
Ejemplo n.º 10
0
 def testXLAFriendlySampler(self):
     if tf.executing_eagerly():
         msg = 'XLA requires tf.function, mode switching is meaningless.'
         self.skipTest(msg)
     dist = tfd.BetaBinomial(total_count=50,
                             concentration0=1e-7,
                             concentration1=1e-5)
     seed = test_util.test_seed(sampler_type='stateless')
     num_samples = 20000
     sample = self.evaluate(
         tf.function(jit_compile=True)(dist.sample)(num_samples, seed=seed))
     self.assertAllEqual(np.zeros_like(sample), np.isnan(sample))
     # Beta(1e-7, 1e-5) should basically always be either 1 or 0, and 1 should
     # occur with probability 100/101.
     # Ergo, the beta binomial samples should basically always be either 50 or 0,
     # and 50 should occur with probability 100/101.
     high_samples_mask = sample == 50
     low_samples_mask = sample == 0
     self.assertAllEqual(np.ones_like(sample),
                         high_samples_mask | low_samples_mask)
     expect = tfd.Bernoulli(probs=100.0 / 101.0)
     self.evaluate(
         st.assert_true_cdf_equal_by_dkwm(
             samples=tf.cast(high_samples_mask, tf.float32),
             cdf=expect.cdf,
             left_continuous_cdf=st.
             left_continuous_cdf_discrete_distribution(expect),
             false_fail_rate=1e-9))
     self.assertGreater(
         num_samples,
         self.evaluate(
             st.min_num_samples_for_dkwm_cdf_test(0.05,
                                                  false_fail_rate=1e-9,
                                                  false_pass_rate=1e-9)))
Ejemplo n.º 11
0
    def testSampleHighConcentration(self):
        concentration = np.linspace(10., 20., 10)
        rate = np.float64(1.)
        num_samples = int(1e5)
        self.assertLess(
            self.evaluate(
                st.min_num_samples_for_dkwm_cdf_test(discrepancy=0.04,
                                                     false_fail_rate=1e-9,
                                                     false_pass_rate=1e-9)),
            num_samples)

        d = tfd.ExpGamma(concentration=concentration,
                         rate=rate,
                         validate_args=True)
        samples = d.sample(num_samples, seed=test_util.test_seed())
        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm(samples,
                                             d.cdf,
                                             false_fail_rate=1e-9))

        self.assertAllClose(self.evaluate(tf.math.reduce_mean(samples,
                                                              axis=0)),
                            d.mean(),
                            rtol=0.01)
        self.assertAllClose(self.evaluate(
            tf.math.reduce_variance(samples, axis=0)),
                            d.variance(),
                            rtol=0.05)
Ejemplo n.º 12
0
    def testSamplePoissonLowRates(self):
        # Low log rate (< log(10.)) samples would use Knuth's algorithm.
        rate = [1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5]
        log_rate = np.log(rate)
        num_samples = int(1e5)
        self.assertLess(
            self.evaluate(
                st.min_num_samples_for_dkwm_cdf_test(discrepancy=0.04,
                                                     false_fail_rate=1e-9,
                                                     false_pass_rate=1e-9)),
            num_samples)

        samples = self.evaluate(
            poisson_dist.random_poisson_rejection_sampler(
                [num_samples, 10], log_rate, seed=test_util.test_seed()))

        poisson = tfd.Poisson(log_rate=log_rate, validate_args=True)
        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm(
                samples,
                poisson.cdf,
                st.left_continuous_cdf_discrete_distribution(poisson),
                false_fail_rate=1e-9))

        self.assertAllClose(self.evaluate(tf.math.reduce_mean(samples,
                                                              axis=0)),
                            stats.poisson.mean(rate),
                            rtol=0.01)
        self.assertAllClose(self.evaluate(
            tf.math.reduce_variance(samples, axis=0)),
                            stats.poisson.var(rate),
                            rtol=0.05)
Ejemplo n.º 13
0
 def check_catches_mistake(wrong_probs):
   wrong_samples = rng.choice(
       len(wrong_probs), size=shape, p=wrong_probs).astype(dtype=dtype)
   with self.assertRaisesOpError('Empirical CDF outside K-S envelope'):
     self.evaluate(st.assert_true_cdf_equal_by_dkwm(
         wrong_samples, cdf, left_continuous_cdf=left_continuous_cdf,
         false_fail_rate=1e-6))
Ejemplo n.º 14
0
    def testSampleEmpiricalCDF(self):
        num_samples = 300000
        dist = tfd.HalfStudentT(df=5., loc=10., scale=2., validate_args=True)
        samples = dist.sample(num_samples, seed=test_util.test_seed())

        check_cdf_agrees = st.assert_true_cdf_equal_by_dkwm(
            samples, dist.cdf, false_fail_rate=1e-6)
        check_enough_power = assert_util.assert_less(
            st.min_discrepancy_of_true_cdfs_detectable_by_dkwm(
                num_samples, false_fail_rate=1e-6, false_pass_rate=1e-6), 0.01)
        self.evaluate([check_cdf_agrees, check_enough_power])
Ejemplo n.º 15
0
    def testSampleEmpiricalCDF(self):
        num_samples = 300000
        temperature, low, peak, high = 2., 1., 7., 10.
        dist = tfd.PERT(low, peak, high, temperature, validate_args=True)
        samples = dist.sample(num_samples, seed=test_util.test_seed())

        check_cdf_agrees = st.assert_true_cdf_equal_by_dkwm(
            samples, dist.cdf, false_fail_rate=1e-6)
        check_enough_power = assert_util.assert_less(
            st.min_discrepancy_of_true_cdfs_detectable_by_dkwm(
                num_samples, false_fail_rate=1e-6, false_pass_rate=1e-6), 0.01)
        self.evaluate([check_cdf_agrees, check_enough_power])
    def test_dkwm_cdf_one_sample_batch_discrete_assertion(self, dtype):
        rng = np.random.RandomState(seed=0)
        num_samples = 13000
        batch_shape = [3, 2]
        shape = [num_samples] + batch_shape

        probs = [0.1, 0.2, 0.3, 0.4]
        samples = rng.choice(4, size=shape, p=probs).astype(dtype=dtype)

        def cdf(x):
            ones = tf.ones_like(x)
            answer = tf1.where(x < 3, 0.6 * ones, ones)
            answer = tf1.where(x < 2, 0.3 * ones, answer)
            answer = tf1.where(x < 1, 0.1 * ones, answer)
            return tf1.where(x < 0, 0 * ones, answer)

        def left_continuous_cdf(x):
            ones = tf.ones_like(x)
            answer = tf1.where(x <= 3, 0.6 * ones, ones)
            answer = tf1.where(x <= 2, 0.3 * ones, answer)
            answer = tf1.where(x <= 1, 0.1 * ones, answer)
            return tf1.where(x <= 0, 0 * ones, answer)

        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm(
                samples,
                cdf,
                left_continuous_cdf=left_continuous_cdf,
                false_fail_rate=1e-6))
        d = st.min_discrepancy_of_true_cdfs_detectable_by_dkwm(
            tf.ones(batch_shape) * num_samples,
            false_fail_rate=1e-6,
            false_pass_rate=1e-6)
        self.assertTrue(np.all(self.evaluate(d) < 0.05))

        def check_catches_mistake(wrong_probs):
            wrong_samples = rng.choice(len(wrong_probs),
                                       size=shape,
                                       p=wrong_probs).astype(dtype=dtype)
            with self.assertRaisesOpError(
                    'Empirical CDF outside K-S envelope'):
                self.evaluate(
                    st.assert_true_cdf_equal_by_dkwm(
                        wrong_samples,
                        cdf,
                        left_continuous_cdf=left_continuous_cdf,
                        false_fail_rate=1e-6))

        check_catches_mistake([0.1, 0.2, 0.3, 0.3, 0.1])
        check_catches_mistake([0.2, 0.2, 0.3, 0.3])
Ejemplo n.º 17
0
def assert_univariate_target_conservation(test, target_d, step_size):
    # Sample count limited partly by memory reliably available on Forge.  The test
    # remains reasonable even if the nuts recursion limit is severely curtailed
    # (e.g., 3 or 4 levels), so use that to recover some memory footprint and bump
    # the sample count.
    num_samples = int(5e4)
    num_steps = 1
    strm = test_util.test_seed_stream()
    # We wrap the initial values in `tf.identity` to avoid broken gradients
    # resulting from a bijector cache hit, since bijectors of the same
    # type/parameterization now share a cache.
    # TODO(b/72831017): Fix broken gradients caused by bijector caching.
    initialization = tf.identity(target_d.sample([num_samples], seed=strm()))

    @tf.function(autograph=False)
    def run_chain():
        nuts = tfp.experimental.mcmc.PreconditionedNoUTurnSampler(
            target_d.log_prob,
            step_size=step_size,
            max_tree_depth=3,
            unrolled_leapfrog_steps=2)
        result = tfp.mcmc.sample_chain(num_results=num_steps,
                                       num_burnin_steps=0,
                                       current_state=initialization,
                                       trace_fn=None,
                                       kernel=nuts,
                                       seed=strm())
        return result

    result = run_chain()
    test.assertAllEqual([num_steps, num_samples], result.shape)
    answer = result[0]
    check_cdf_agrees = st.assert_true_cdf_equal_by_dkwm(answer,
                                                        target_d.cdf,
                                                        false_fail_rate=1e-6)
    check_enough_power = assert_util.assert_less(
        st.min_discrepancy_of_true_cdfs_detectable_by_dkwm(
            num_samples, false_fail_rate=1e-6, false_pass_rate=1e-6), 0.025)
    movement = tf.abs(answer - initialization)
    test.assertAllEqual([num_samples], movement.shape)
    # This movement distance (1 * step_size) was selected by reducing until 100
    # runs with independent seeds all passed.
    check_movement = assert_util.assert_greater_equal(tf.reduce_mean(movement),
                                                      1 * step_size)
    return (check_cdf_agrees, check_enough_power, check_movement)
Ejemplo n.º 18
0
  def testSample(self):
    a = tf.constant(1.0)
    b = tf.constant(2.0)
    n = 500000
    d = tfd.SigmoidBeta(concentration0=a, concentration1=b, validate_args=True)
    samples = d.sample(n, seed=test_util.test_seed())
    sample_values = self.evaluate(samples)
    self.assertEqual(samples.shape, (n,))
    self.assertEqual(sample_values.shape, (n,))
    self.assertTrue(self._kstest(a, b, sample_values))

    check_cdf_agrees = st.assert_true_cdf_equal_by_dkwm(
        samples, d.cdf, false_fail_rate=1e-6)
    self.evaluate(check_cdf_agrees)
    check_enough_power = assert_util.assert_less(
        st.min_discrepancy_of_true_cdfs_detectable_by_dkwm(
            n, false_fail_rate=1e-6, false_pass_rate=1e-6), 0.01)
    self.evaluate(check_enough_power)
Ejemplo n.º 19
0
def assert_univariate_target_conservation(test, target_d, step_size):
  # Sample count limited partly by memory reliably available on Forge.  The test
  # remains reasonable even if the nuts recursion limit is severely curtailed
  # (e.g., 3 or 4 levels), so use that to recover some memory footprint and bump
  # the sample count.
  num_samples = int(5e4)
  num_steps = 1
  strm = tfp.util.SeedStream(salt='univariate_nuts_test', seed=1)
  initialization = target_d.sample([num_samples], seed=strm())

  @tf.function(autograph=False)
  def run_chain():
    nuts = tfp.mcmc.NoUTurnSampler(
        target_d.log_prob,
        step_size=step_size,
        max_tree_depth=3,
        unrolled_leapfrog_steps=2,
        seed=strm())
    result, _ = tfp.mcmc.sample_chain(
        num_results=num_steps,
        num_burnin_steps=0,
        current_state=initialization,
        kernel=nuts)
    return result

  result = run_chain()
  test.assertAllEqual([num_steps, num_samples], result.shape)
  answer = result[0]
  check_cdf_agrees = st.assert_true_cdf_equal_by_dkwm(
      answer, target_d.cdf, false_fail_rate=1e-6)
  check_enough_power = assert_util.assert_less(
      st.min_discrepancy_of_true_cdfs_detectable_by_dkwm(
          num_samples, false_fail_rate=1e-6, false_pass_rate=1e-6), 0.025)
  movement = tf.abs(answer - initialization)
  test.assertAllEqual([num_samples], movement.shape)
  # This movement distance (1 * step_size) was selected by reducing until 100
  # runs with independent seeds all passed.
  check_movement = assert_util.assert_greater_equal(
      tf.reduce_mean(movement), 1 * step_size)
  return (check_cdf_agrees, check_enough_power, check_movement)
    def VerifySampleAndPdfConsistency(self, uniform):
        """Verifies samples are consistent with the PDF using importance sampling.

    In particular, we verify an estimate the surface area of the n-dimensional
    hypersphere, and the surface areas of the spherical caps demarcated by
    a handful of survival rates.

    Args:
      uniform: A `SphericalUniform` distribution instance.
    """
        dim = tf.compat.dimension_value(uniform.event_shape[-1])
        nsamples = int(6e4)
        self.assertLess(
            self.evaluate(
                st.min_num_samples_for_dkwm_cdf_test(discrepancy=0.04,
                                                     false_fail_rate=1e-9,
                                                     false_pass_rate=1e-9)),
            nsamples)
        samples = uniform.sample(sample_shape=[nsamples],
                                 seed=test_util.test_seed())
        samples = tf.debugging.check_numerics(samples, 'samples')
        log_prob = uniform.log_prob(samples)
        log_prob = self.evaluate(
            tf.debugging.check_numerics(log_prob, 'log_prob'))
        true_sphere_surface_area = 2 * (np.pi)**(dim / 2) * self.evaluate(
            tf.exp(-tf.math.lgamma(dim / 2)))
        true_sphere_surface_area += np.zeros_like(log_prob)
        # Check the log prob is a constant and is the reciprocal of the surface
        # area.
        self.assertAllClose(np.exp(log_prob), 1. / true_sphere_surface_area)

        # For sampling, let's check the marginals. x_i**2 ~ Beta(0.5, d - 1 / 2)
        beta_dist = tfp.distributions.Beta(self.dtype(0.5),
                                           self.dtype((dim - 1.) / 2.))
        for i in range(dim):
            self.evaluate(
                st.assert_true_cdf_equal_by_dkwm(samples[..., i]**2,
                                                 cdf=beta_dist.cdf,
                                                 false_fail_rate=1e-9))
Ejemplo n.º 21
0
  def testSamplePoissonLowAndHighRates(self):
    rate = [1., 3., 5., 6., 7., 10., 13.0, 14., 15., 18.]
    log_rate = np.log(rate)
    num_samples = int(1e5)
    poisson = tfd.Poisson(log_rate=log_rate, validate_args=True)
    self.assertLess(
        self.evaluate(
            st.min_num_samples_for_dkwm_cdf_test(
                discrepancy=0.04, false_fail_rate=1e-9, false_pass_rate=1e-9)),
        num_samples)

    samples = poisson_lib._random_poisson_noncpu(
        shape=[num_samples],
        log_rates=log_rate,
        output_dtype=tf.float64,
        seed=test_util.test_seed())

    self.evaluate(
        st.assert_true_cdf_equal_by_dkwm(
            samples,
            poisson.cdf,
            st.left_continuous_cdf_discrete_distribution(poisson),
            false_fail_rate=1e-9))
  def testSampleMarginals(self):
    # Verify that the marginals of the LKJ distribution are distributed
    # according to a (scaled) Beta distribution. The LKJ distributed samples are
    # obtained by sampling a CholeskyLKJ distribution using HMC and the
    # CorrelationCholesky bijector.
    dim = 4
    concentration = np.array(2.5, dtype=np.float64)
    beta_concentration = np.array(.5 * dim + concentration - 1, np.float64)
    beta_dist = beta.Beta(
        concentration0=beta_concentration, concentration1=beta_concentration)

    inner_kernel = hmc.HamiltonianMonteCarlo(
        target_log_prob_fn=cholesky_lkj.CholeskyLKJ(
            dimension=dim, concentration=concentration).log_prob,
        num_leapfrog_steps=3,
        step_size=0.3)

    kernel = transformed_kernel.TransformedTransitionKernel(
        inner_kernel=inner_kernel, bijector=tfb.CorrelationCholesky())

    num_chains = 10
    num_total_samples = 30000

    # Make sure that we have enough samples to catch a wrong sampler to within
    # a small enough discrepancy.
    self.assertLess(
        self.evaluate(
            st.min_num_samples_for_dkwm_cdf_test(
                discrepancy=0.04, false_fail_rate=1e-9, false_pass_rate=1e-9)),
        num_total_samples)

    @tf.function  # Ensure that MCMC sampling is done efficiently.
    def sample_mcmc_chain():
      return sample.sample_chain(
          num_results=num_total_samples // num_chains,
          num_burnin_steps=1000,
          current_state=tf.eye(dim, batch_shape=[num_chains], dtype=tf.float64),
          trace_fn=lambda _, pkr: pkr.inner_results.is_accepted,
          kernel=kernel,
          seed=test_util.test_seed())

    # Draw samples from the HMC chains.
    chol_lkj_samples, is_accepted = self.evaluate(sample_mcmc_chain())

    # Ensure that the per-chain acceptance rate is high enough.
    self.assertAllGreater(np.mean(is_accepted, axis=0), 0.8)

    # Transform from Cholesky LKJ samples to LKJ samples.
    lkj_samples = tf.matmul(chol_lkj_samples, chol_lkj_samples, adjoint_b=True)
    lkj_samples = tf.reshape(lkj_samples, shape=[num_total_samples, dim, dim])

    # Only look at the entries strictly below the diagonal which is achieved by
    # the OutputToUnconstrained bijector. Also scale the marginals from the
    # range [-1,1] to [0,1].
    scaled_lkj_samples = .5 * (OutputToUnconstrained().forward(lkj_samples) + 1)

    # Each of the off-diagonal marginals should be distributed according to a
    # Beta distribution.
    for i in range(dim * (dim - 1) // 2):
      self.evaluate(
          st.assert_true_cdf_equal_by_dkwm(
              scaled_lkj_samples[..., i],
              cdf=beta_dist.cdf,
              false_fail_rate=1e-9))