def testEmpiricalCdfAgainstDirichletMultinomial(self):
        # This test is too slow for Eager mode.
        if tf.executing_eagerly():
            return

        seed_stream = test_util.test_seed_stream()

        n = 10
        c1 = self.evaluate(
            1. + 2. *
            tf.random.uniform(shape=[3], dtype=tf.float32, seed=seed_stream()))
        c0 = self.evaluate(
            1. + 2. *
            tf.random.uniform(shape=[3], dtype=tf.float32, seed=seed_stream()))

        beta_binomial = tfd.BetaBinomial(n, c1, c0, validate_args=True)
        dirichlet_multinomial = tfd.DirichletMultinomial(n,
                                                         tf.stack([c1, c0],
                                                                  axis=-1),
                                                         validate_args=True)

        num_samples_to_draw = tf.math.floor(
            1 + st.min_num_samples_for_dkwm_cdf_two_sample_test(.02)[0])

        beta_binomial_samples = beta_binomial.sample(num_samples_to_draw)

        dirichlet_multinomial_samples = dirichlet_multinomial.sample(
            num_samples_to_draw)
        dirichlet_multinomial_samples = tf.squeeze(
            dirichlet_multinomial_samples[..., 0])

        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm_two_sample(
                beta_binomial_samples, dirichlet_multinomial_samples))
 def check_catches_mistake(wrong_probs):
   wrong_samples = rng.choice(
       len(wrong_probs), size=shape, p=wrong_probs).astype(dtype=dtype)
   with self.assertRaisesOpError(
       'Empirical CDFs outside joint K-S envelope'):
     self.evaluate(st.assert_true_cdf_equal_by_dkwm_two_sample(
         samples1, wrong_samples, false_fail_rate=1e-6))
  def test_dkwm_cdf_two_sample_batch_discrete_assertion(self, dtype):
    rng = np.random.RandomState(seed=0)
    num_samples = 52000
    batch_shape = [3, 2]
    shape = [num_samples] + batch_shape

    probs = [0.1, 0.2, 0.3, 0.4]
    samples1 = rng.choice(4, size=shape, p=probs).astype(dtype=dtype)
    samples2 = rng.choice(4, size=shape, p=probs).astype(dtype=dtype)
    self.evaluate(st.assert_true_cdf_equal_by_dkwm_two_sample(
        samples1, samples2, false_fail_rate=1e-6))

    def check_catches_mistake(wrong_probs):
      wrong_samples = rng.choice(
          len(wrong_probs), size=shape, p=wrong_probs).astype(dtype=dtype)
      with self.assertRaisesOpError(
          'Empirical CDFs outside joint K-S envelope'):
        self.evaluate(st.assert_true_cdf_equal_by_dkwm_two_sample(
            samples1, wrong_samples, false_fail_rate=1e-6))

    n = tf.ones(batch_shape) * num_samples
    d = st.min_discrepancy_of_true_cdfs_detectable_by_dkwm_two_sample(
        n, n, false_fail_rate=1e-6, false_pass_rate=1e-6)
    self.assertTrue(np.all(self.evaluate(d) < 0.05))

    check_catches_mistake([0.1, 0.2, 0.3, 0.3, 0.1])
    check_catches_mistake([0.2, 0.2, 0.3, 0.3])
    def testMixtureTargetLogProb(self, make_kernel_fn, optimal_accept):
        seed = test_util.test_seed()
        n = 4
        mu = np.ones(n) * (1. / 2)
        w = 0.1

        proposal = tfd.Sample(tfd.Normal(0., 10.), sample_shape=n)
        init_state = proposal.sample(5000, seed=seed)

        likelihood_dist = tfd.MixtureSameFamily(
            mixture_distribution=tfd.Categorical(probs=[w, 1. - w]),
            components_distribution=tfd.MultivariateNormalDiag(
                loc=np.asarray([mu, -mu]).astype(np.float32),
                scale_identity_multiplier=[.1, .2]))

        # Uniform prior
        init_log_prob = tf.zeros_like(proposal.log_prob(init_state))

        [n_stage, final_state,
         _] = tfp.experimental.mcmc.sample_sequential_monte_carlo(
             lambda x: init_log_prob,
             likelihood_dist.log_prob,
             init_state,
             make_kernel_fn=make_kernel_fn,
             tuning_fn=functools.partial(simple_heuristic_tuning,
                                         optimal_accept=optimal_accept),
             max_num_steps=50,
             parallel_iterations=1,
             seed=None if tf.executing_eagerly() else seed)

        assert_cdf_equal_sample = st.assert_true_cdf_equal_by_dkwm_two_sample(
            final_state, likelihood_dist.sample(5000, seed=seed))

        n_stage, _ = self.evaluate((n_stage, assert_cdf_equal_sample))
        self.assertTrue(n_stage, 15)
Example #5
0
    def testLatentsOfMixedRank(self, batch_shape, num_steps):
        strm = test_util.test_seed_stream()

        init0 = [tf.ones(batch_shape + [6])]
        init1 = [
            tf.ones(batch_shape + []),
            tf.ones(batch_shape + [1]),
            tf.ones(batch_shape + [2, 2])
        ]

        @tf.function(autograph=False)
        def run_two_chains(init0, init1):
            def log_prob0(x):
                return tf.squeeze(
                    tfd.Independent(tfd.Normal(tf.range(6, dtype=tf.float32),
                                               tf.constant(1.)),
                                    reinterpreted_batch_ndims=1).log_prob(x))

            kernel0 = tfp.experimental.mcmc.PreconditionedNoUTurnSampler(
                log_prob0, step_size=0.3)
            [results0] = tfp.mcmc.sample_chain(num_results=num_steps,
                                               num_burnin_steps=10,
                                               current_state=init0,
                                               kernel=kernel0,
                                               trace_fn=None,
                                               seed=strm())

            def log_prob1(state0, state1, state2):
                return tf.squeeze(
                    tfd.Normal(tf.constant(0.), tf.constant(1.)).log_prob(
                        state0) + tfd.Independent(
                            tfd.Normal(tf.constant([1.]), tf.constant(1.)),
                            reinterpreted_batch_ndims=1).log_prob(state1) +
                    tfd.Independent(tfd.Normal(
                        tf.constant([[2., 3.], [4., 5.]]), tf.constant(1.)),
                                    reinterpreted_batch_ndims=2).log_prob(
                                        state2))

            kernel1 = tfp.experimental.mcmc.PreconditionedNoUTurnSampler(
                log_prob1, step_size=0.3)
            results1_ = tfp.mcmc.sample_chain(num_results=num_steps,
                                              num_burnin_steps=10,
                                              current_state=init1,
                                              kernel=kernel1,
                                              trace_fn=None,
                                              seed=strm())
            results1 = tf.concat([
                tf.reshape(x, [num_steps] + batch_shape + [-1])
                for x in results1_
            ],
                                 axis=-1)

            return results0, results1

        results0, results1 = run_two_chains(init0, init1)

        self.evaluate(
            st.assert_true_cdf_equal_by_dkwm_two_sample(results0, results1))
Example #6
0
  def testLatentsOfMixedRank(self, batch_shape, num_steps):
    strm = tfp.util.SeedStream(5, salt='LatentsOfMixedRankTest')

    init0 = [tf.ones(batch_shape + [6])]
    init1 = [tf.ones(batch_shape + []),
             tf.ones(batch_shape + [1]),
             tf.ones(batch_shape + [2, 2])]

    def log_prob0(x):
      return tf.squeeze(tfd.Independent(
          tfd.Normal(tf.range(6, dtype=tf.float32),
                     tf.constant(1.)),
          reinterpreted_batch_ndims=1).log_prob(x))
    kernel0 = tfp.mcmc.NoUTurnSampler(
        log_prob0,
        step_size=0.3,
        seed=strm())
    [results0] = tfp.mcmc.sample_chain(
        num_results=num_steps,
        num_burnin_steps=10,
        current_state=init0,
        kernel=kernel0,
        trace_fn=None,
        parallel_iterations=1)

    def log_prob1(state0, state1, state2):
      return tf.squeeze(
          tfd.Normal(tf.constant(0.), tf.constant(1.)).log_prob(state0)
          + tfd.Independent(
              tfd.Normal(tf.constant([1.]), tf.constant(1.)),
              reinterpreted_batch_ndims=1).log_prob(state1)
          + tfd.Independent(
              tfd.Normal(tf.constant([[2., 3.], [4., 5.]]), tf.constant(1.)),
              reinterpreted_batch_ndims=2).log_prob(state2)
      )
    kernel1 = tfp.mcmc.NoUTurnSampler(
        log_prob1,
        step_size=0.3,
        seed=strm())
    results1_ = tfp.mcmc.sample_chain(
        num_results=num_steps,
        num_burnin_steps=10,
        current_state=init1,
        kernel=kernel1,
        trace_fn=None,
        parallel_iterations=1)
    results1 = tf.concat(
        [tf.reshape(x, [num_steps] + batch_shape + [-1]) for x in results1_],
        axis=-1)
    self.evaluate(
        st.assert_true_cdf_equal_by_dkwm_two_sample(results0, results1))
  def testMixtureMultiBatch(self):
    seed = test_util.test_seed()
    # Generate 3 copies (batches) of 2 component Gaussian Mixture in 2 dimension
    nd = 2
    n_batch = 3
    w = tf.constant([0.1, .25, .5], tf.float64)
    mixture_weight = tf.transpose(tf.stack([w, 1. - w]))
    mu = np.ones(nd) * .5
    loc = tf.cast(np.asarray([mu, -mu]), tf.float64)
    component_loc = tf.repeat(loc[tf.newaxis, ...], n_batch, axis=0)

    likelihood_dist = tfd.MixtureSameFamily(
        mixture_distribution=tfd.Categorical(
            probs=mixture_weight),
        components_distribution=tfd.MultivariateNormalDiag(
            loc=component_loc,
            scale_identity_multiplier=[.1, .2]))

    proposal = tfd.Sample(tfd.Normal(tf.constant(0., tf.float64), 10.),
                          sample_shape=nd)
    init_state = proposal.sample([5000, n_batch], seed=seed)
    log_prob_fn = likelihood_dist.log_prob
    print(log_prob_fn(init_state).shape)

    # Uniform prior
    init_log_prob = tf.zeros_like(log_prob_fn(init_state))

    [
        n_stage, final_state, _
    ] = tfp.experimental.mcmc.sample_sequential_monte_carlo(
        lambda x: init_log_prob,
        log_prob_fn,
        init_state,
        make_kernel_fn=make_test_nuts_kernel_fn,
        tuning_fn=functools.partial(simple_heuristic_tuning,
                                    optimal_accept=0.8),
        max_num_steps=50,
        parallel_iterations=1,
        seed=None if tf.executing_eagerly() else seed)

    assert_cdf_equal_sample = st.assert_true_cdf_equal_by_dkwm_two_sample(
        final_state, likelihood_dist.sample(5000, seed=seed), 1e-5)

    n_stage, _ = self.evaluate((n_stage, assert_cdf_equal_sample))
    self.assertLess(n_stage, 15)
  def testMixtureTargetLogProb(self, make_kernel_fn, optimal_accept):
    if tf.executing_eagerly():
      self.skipTest('Skipping eager-mode test to reduce test weight.')

    seed = test_util.test_seed()
    # Generate a 2 component Gaussian Mixture in 3 dimension
    nd = 3
    w = 0.1
    mixture_weight = tf.constant([w, 1. - w], tf.float64)
    mu = np.ones(nd) * .5
    component_loc = tf.cast(np.asarray([mu, -mu]), tf.float64)

    proposal = tfd.Sample(tfd.Normal(tf.constant(0., tf.float64), 10.),
                          sample_shape=nd)
    init_state = proposal.sample(5000, seed=seed)

    likelihood_dist = tfd.MixtureSameFamily(
        mixture_distribution=tfd.Categorical(probs=mixture_weight),
        components_distribution=tfd.MultivariateNormalDiag(
            loc=component_loc,
            scale_identity_multiplier=[.1, .2]))

    # Uniform prior
    init_log_prob = tf.zeros_like(proposal.log_prob(init_state))

    [
        n_stage, final_state, _
    ] = tfp.experimental.mcmc.sample_sequential_monte_carlo(
        lambda x: init_log_prob,
        likelihood_dist.log_prob,
        init_state,
        make_kernel_fn=make_kernel_fn,
        tuning_fn=functools.partial(simple_heuristic_tuning,
                                    optimal_accept=optimal_accept),
        max_num_steps=50,
        seed=seed)

    assert_cdf_equal_sample = st.assert_true_cdf_equal_by_dkwm_two_sample(
        final_state, likelihood_dist.sample(5000, seed=seed), 1e-5)

    n_stage, _ = self.evaluate((n_stage, assert_cdf_equal_sample))
    self.assertLess(n_stage, 15)