def test_initialize_model_change_point(init_strategy): def model(data): alpha = 1 / np.mean(data) lambda1 = sample('lambda1', dist.Exponential(alpha)) lambda2 = sample('lambda2', dist.Exponential(alpha)) tau = sample('tau', dist.Uniform(0, 1)) lambda12 = np.where(np.arange(len(data)) < tau * len(data), lambda1, lambda2) sample('obs', dist.Poisson(lambda12), obs=data) count_data = np.array([ 13, 24, 8, 24, 7, 35, 14, 11, 15, 11, 22, 22, 11, 57, 11, 19, 29, 6, 19, 12, 22, 12, 18, 72, 32, 9, 7, 13, 19, 23, 27, 20, 6, 17, 13, 10, 14, 6, 16, 15, 7, 2, 15, 15, 19, 70, 49, 7, 53, 22, 21, 31, 19, 11, 18, 20, 12, 35, 17, 23, 17, 4, 2, 31, 30, 13, 27, 0, 39, 37, 5, 14, 13, 22, ]) rngs = random.split(random.PRNGKey(1), 12) init_params, _, _ = initialize_model(rngs, model, count_data, init_strategy=init_strategy) for i in range(12): init_params_i, _, _ = initialize_model(rngs[i], model, count_data, init_strategy=init_strategy) for name, p in init_params.items(): # XXX: the result is equal if we disable fast-math-mode assert_allclose(p[i], init_params_i[name], atol=1e-6)
def test_dirichlet_categorical(algo, dense_mass): warmup_steps, num_samples = 100, 20000 def model(data): concentration = np.array([1.0, 1.0, 1.0]) p_latent = numpyro.sample('p_latent', dist.Dirichlet(concentration)) numpyro.sample('obs', dist.Categorical(p_latent), obs=data) return p_latent true_probs = np.array([0.1, 0.6, 0.3]) data = dist.Categorical(true_probs).sample(random.PRNGKey(1), (2000, )) init_params, potential_fn, constrain_fn = initialize_model( random.PRNGKey(2), model, data) samples = mcmc(warmup_steps, num_samples, init_params, constrain_fn=constrain_fn, progbar=False, print_summary=False, potential_fn=potential_fn, algo=algo, trajectory_length=1., dense_mass=dense_mass) assert_allclose(np.mean(samples['p_latent'], 0), true_probs, atol=0.02) if 'JAX_ENABLE_x64' in os.environ: assert samples['p_latent'].dtype == np.float64
def test_binomial_stable(with_logits): # Ref: https://github.com/pyro-ppl/pyro/issues/1706 warmup_steps, num_samples = 200, 200 def model(data): p = numpyro.sample('p', dist.Beta(1., 1.)) if with_logits: logits = logit(p) numpyro.sample('obs', dist.Binomial(data['n'], logits=logits), obs=data['x']) else: numpyro.sample('obs', dist.Binomial(data['n'], probs=p), obs=data['x']) data = {'n': 5000000, 'x': 3849} init_params, potential_fn, constrain_fn = initialize_model( random.PRNGKey(2), model, data) init_kernel, sample_kernel = hmc(potential_fn) hmc_state = init_kernel(init_params, num_warmup=warmup_steps) samples = fori_collect(0, num_samples, sample_kernel, hmc_state, transform=lambda x: constrain_fn(x.z)) assert_allclose(np.mean(samples['p'], 0), data['x'] / data['n'], rtol=0.05) if 'JAX_ENABLE_x64' in os.environ: assert samples['p'].dtype == np.float64
def test_chain(): N, dim = 3000, 3 num_warmup, num_samples = 5000, 5000 data = random.normal(random.PRNGKey(0), (N, dim)) true_coefs = np.arange(1., dim + 1.) logits = np.sum(true_coefs * data, axis=-1) labels = dist.Bernoulli(logits=logits).sample(random.PRNGKey(1)) def model(labels): coefs = numpyro.sample('coefs', dist.Normal(np.zeros(dim), np.ones(dim))) logits = np.sum(coefs * data, axis=-1) return numpyro.sample('obs', dist.Bernoulli(logits=logits), obs=labels) rngs = random.split(random.PRNGKey(2), 2) init_params, potential_fn, constrain_fn = initialize_model( rngs, model, labels) samples = mcmc(num_warmup, num_samples, init_params, num_chains=2, potential_fn=potential_fn, constrain_fn=constrain_fn) assert samples['coefs'].shape[0] == 2 * num_samples assert_allclose(np.mean(samples['coefs'], 0), true_coefs, atol=0.21)
def run_inference(model, args, rng): if args.num_chains > 1: rng = random.split(rng, args.num_chains) init_params, potential_fn, constrain_fn = initialize_model(rng, model) samples = mcmc(args.num_warmup, args.num_samples, init_params, num_chains=args.num_chains, potential_fn=potential_fn, constrain_fn=constrain_fn) return samples
def main(args): jax_config.update('jax_platform_name', args.device) print('Simulating data...') (transition_prior, emission_prior, transition_prob, emission_prob, supervised_categories, supervised_words, unsupervised_words) = simulate_data( random.PRNGKey(1), num_categories=args.num_categories, num_words=args.num_words, num_supervised_data=args.num_supervised, num_unsupervised_data=args.num_unsupervised, ) print('Starting inference...') rng = random.PRNGKey(2) if args.num_chains > 1: rng = random.split(rng, args.num_chains) init_params, potential_fn, constrain_fn = initialize_model( rng, semi_supervised_hmm, transition_prior, emission_prior, supervised_categories, supervised_words, unsupervised_words, ) start = time.time() samples = mcmc(args.num_warmup, args.num_samples, init_params, num_chains=args.num_chains, potential_fn=potential_fn, constrain_fn=constrain_fn, progbar=True) print('\nMCMC elapsed time:', time.time() - start) print_results(samples, transition_prob, emission_prob)
def test_change_point(): # Ref: https://forum.pyro.ai/t/i-dont-understand-why-nuts-code-is-not-working-bayesian-hackers-mail/696 warmup_steps, num_samples = 500, 3000 def model(data): alpha = 1 / np.mean(data) lambda1 = sample('lambda1', dist.Exponential(alpha)) lambda2 = sample('lambda2', dist.Exponential(alpha)) tau = sample('tau', dist.Uniform(0, 1)) lambda12 = np.where(np.arange(len(data)) < tau * len(data), lambda1, lambda2) sample('obs', dist.Poisson(lambda12), obs=data) count_data = np.array([ 13, 24, 8, 24, 7, 35, 14, 11, 15, 11, 22, 22, 11, 57, 11, 19, 29, 6, 19, 12, 22, 12, 18, 72, 32, 9, 7, 13, 19, 23, 27, 20, 6, 17, 13, 10, 14, 6, 16, 15, 7, 2, 15, 15, 19, 70, 49, 7, 53, 22, 21, 31, 19, 11, 18, 20, 12, 35, 17, 23, 17, 4, 2, 31, 30, 13, 27, 0, 39, 37, 5, 14, 13, 22, ]) init_params, potential_fn, constrain_fn = initialize_model(random.PRNGKey(4), model, count_data) init_kernel, sample_kernel = hmc(potential_fn) hmc_state = init_kernel(init_params, num_warmup=warmup_steps) samples = fori_collect(num_samples, sample_kernel, hmc_state, transform=lambda x: constrain_fn(x.z)) tau_posterior = (samples['tau'] * len(count_data)).astype("int") tau_values, counts = onp.unique(tau_posterior, return_counts=True) mode_ind = np.argmax(counts) mode = tau_values[mode_ind] assert mode == 44 if 'JAX_ENABLE_x64' in os.environ: assert samples['lambda1'].dtype == np.float64 assert samples['lambda2'].dtype == np.float64 assert samples['tau'].dtype == np.float64
def test_dirichlet_categorical(algo, dense_mass): warmup_steps, num_samples = 100, 20000 def model(data): concentration = np.array([1.0, 1.0, 1.0]) p_latent = sample('p_latent', dist.Dirichlet(concentration)) sample('obs', dist.Categorical(p_latent), obs=data) return p_latent true_probs = np.array([0.1, 0.6, 0.3]) data = dist.Categorical(true_probs).sample(random.PRNGKey(1), (2000,)) init_params, potential_fn, constrain_fn = initialize_model(random.PRNGKey(2), model, data) init_kernel, sample_kernel = hmc(potential_fn, algo=algo) hmc_state = init_kernel(init_params, trajectory_length=1., num_warmup=warmup_steps, progbar=False, dense_mass=dense_mass) hmc_states = fori_collect(num_samples, sample_kernel, hmc_state, transform=lambda x: constrain_fn(x.z), progbar=False) assert_allclose(np.mean(hmc_states['p_latent'], 0), true_probs, atol=0.02) if 'JAX_ENABLE_x64' in os.environ: assert hmc_states['p_latent'].dtype == np.float64
def test_logistic_regression(algo): N, dim = 3000, 3 warmup_steps, num_samples = 1000, 8000 data = random.normal(random.PRNGKey(0), (N, dim)) true_coefs = np.arange(1., dim + 1.) logits = np.sum(true_coefs * data, axis=-1) labels = dist.Bernoulli(logits=logits).sample(random.PRNGKey(1)) def model(labels): coefs = numpyro.sample('coefs', dist.Normal(np.zeros(dim), np.ones(dim))) logits = np.sum(coefs * data, axis=-1) return numpyro.sample('obs', dist.Bernoulli(logits=logits), obs=labels) init_params, potential_fn, constrain_fn = initialize_model( random.PRNGKey(2), model, labels) samples = mcmc(warmup_steps, num_samples, init_params, sampler='hmc', algo=algo, potential_fn=potential_fn, trajectory_length=10, constrain_fn=constrain_fn) assert_allclose(np.mean(samples['coefs'], 0), true_coefs, atol=0.21) if 'JAX_ENABLE_x64' in os.environ: assert samples['coefs'].dtype == np.float64
def init(self, rng, num_warmup, init_params=None, model_args=(), model_kwargs={}): constrain_fn = None if self.model is not None: rng, rng_init_model = random.split(rng) init_params_, self.potential_fn, constrain_fn = initialize_model(rng_init_model, self.model, *model_args, **model_kwargs) if init_params is None: init_params = init_params_ else: # User needs to provide valid `init_params` if using `potential_fn`. if init_params is None: raise ValueError('Valid value of `init_params` must be provided with' ' `potential_fn`.') hmc_init_fn, self._sample_fn = hmc(self.potential_fn, self.kinetic_fn, algo=self.algo) init_state = hmc_init_fn(init_params, num_warmup=num_warmup, step_size=self.step_size, adapt_step_size=self.adapt_step_size, adapt_mass_matrix=self.adapt_mass_matrix, dense_mass=self.dense_mass, target_accept_prob=self.target_accept_prob, trajectory_length=self.trajectory_length, max_tree_depth=self.max_tree_depth, run_warmup=False, rng=rng) return init_state, constrain_fn
def test_beta_bernoulli(algo): warmup_steps, num_samples = 500, 20000 def model(data): alpha = np.array([1.1, 1.1]) beta = np.array([1.1, 1.1]) p_latent = numpyro.sample('p_latent', dist.Beta(alpha, beta)) numpyro.sample('obs', dist.Bernoulli(p_latent), obs=data) return p_latent true_probs = np.array([0.9, 0.1]) data = dist.Bernoulli(true_probs).sample(random.PRNGKey(1), (1000, 2)) init_params, potential_fn, constrain_fn = initialize_model( random.PRNGKey(2), model, data) init_kernel, sample_kernel = hmc(potential_fn, algo=algo) hmc_state = init_kernel(init_params, trajectory_length=1., num_warmup=warmup_steps, progbar=False) samples = fori_collect(0, num_samples, sample_kernel, hmc_state, transform=lambda x: constrain_fn(x.z), progbar=False) assert_allclose(np.mean(samples['p_latent'], 0), true_probs, atol=0.05) if 'JAX_ENABLE_x64' in os.environ: assert samples['p_latent'].dtype == np.float64
def run_inference(model, at_bats, hits, rng, args): if args.num_chains > 1: rng = random.split(rng, args.num_chains) init_params, potential_fn, constrain_fn = initialize_model(rng, model, at_bats, hits) hmc_states = mcmc(args.num_warmup, args.num_samples, init_params, num_chains=args.num_chains, sampler='hmc', potential_fn=potential_fn, constrain_fn=constrain_fn) return hmc_states
def run_inference(dept, male, applications, admit, rng, args): init_params, potential_fn, constrain_fn = initialize_model( rng, glmm, dept, male, applications, admit) init_kernel, sample_kernel = hmc(potential_fn, algo='NUTS') hmc_state = init_kernel(init_params, args.num_warmup_steps) hmc_states = fori_collect(args.num_samples, sample_kernel, hmc_state, transform=lambda hmc_state: constrain_fn(hmc_state.z)) return hmc_states
def run_inference(dept, male, applications, admit, rng, args): if args.num_chains > 1: rng = random.split(rng, args.num_chains) init_params, potential_fn, constrain_fn = initialize_model( rng, glmm, dept, male, applications, admit) samples = mcmc(args.num_warmup, args.num_samples, init_params, num_chains=args.num_chains, potential_fn=potential_fn, constrain_fn=constrain_fn) return samples
def run_inference(model, args, rng, X, Y, hypers): if args.num_chains > 1: rng = random.split(rng, args.num_chains) init_params, potential_fn, constrain_fn = initialize_model(rng, model, X, Y, hypers) start = time.time() samples = mcmc(args.num_warmup, args.num_samples, init_params, num_chains=args.num_chains, sampler='hmc', potential_fn=potential_fn, constrain_fn=constrain_fn) print('\nMCMC elapsed time:', time.time() - start) return samples
def run_inference(model, args, rng, X, Y, D_H): init_params, potential_fn, constrain_fn = initialize_model( rng, model, X, Y, D_H) samples = mcmc(args.num_warmup, args.num_samples, init_params, sampler='hmc', potential_fn=potential_fn, constrain_fn=constrain_fn) return samples
def test_initialize_model_dirichlet_categorical(init_strategy): def model(data): concentration = np.array([1.0, 1.0, 1.0]) p_latent = sample('p_latent', dist.Dirichlet(concentration)) sample('obs', dist.Categorical(p_latent), obs=data) return p_latent true_probs = np.array([0.1, 0.6, 0.3]) data = dist.Categorical(true_probs).sample(random.PRNGKey(1), (2000,)) rngs = random.split(random.PRNGKey(1), 12) init_params, _, _ = initialize_model(rngs, model, data, init_strategy=init_strategy) for i in range(12): init_params_i, _, _ = initialize_model(rngs[i], model, data, init_strategy=init_strategy) for name, p in init_params.items(): # XXX: the result is equal if we disable fast-math-mode assert_allclose(p[i], init_params_i[name], atol=1e-6)
def main(args): jax_config.update('jax_platform_name', args.device) _, fetch = load_dataset(SP500, shuffle=False) dates, returns = fetch() init_rng, sample_rng = random.split(random.PRNGKey(args.rng)) init_params, potential_fn, constrain_fn = initialize_model(init_rng, model, returns) init_kernel, sample_kernel = hmc(potential_fn, algo='NUTS') hmc_state = init_kernel(init_params, args.num_warmup, rng=sample_rng) hmc_states = fori_collect(0, args.num_samples, sample_kernel, hmc_state, transform=lambda hmc_state: constrain_fn(hmc_state.z)) print_results(hmc_states, dates)
def test_improper_prior(): true_mean, true_std = 1., 2. num_warmup, num_samples = 1000, 8000 def model(data): mean = param('mean', 0.) std = param('std', 1., constraint=constraints.positive) return sample('obs', dist.Normal(mean, std), obs=data) data = dist.Normal(true_mean, true_std).sample(random.PRNGKey(1), (2000,)) init_params, potential_fn, constrain_fn = initialize_model(random.PRNGKey(2), model, data) samples = mcmc(num_warmup, num_samples, init_params, potential_fn=potential_fn, constrain_fn=constrain_fn) assert_allclose(np.mean(samples['mean']), true_mean, rtol=0.05) assert_allclose(np.mean(samples['std']), true_std, rtol=0.05)
def benchmark_hmc(args, features, labels): step_size = np.sqrt(0.5 / features.shape[0]) trajectory_length = step_size * args.num_steps rng = random.PRNGKey(1) if args.num_chains > 1: rng = random.split(rng, args.num_chains) init_params, potential_fn, _ = initialize_model(rng, model, features, labels) start = time.time() mcmc(0, args.num_samples, init_params, num_chains=args.num_chains, potential_fn=potential_fn, trajectory_length=trajectory_length) print('\nMCMC elapsed time:', time.time() - start)
def test_uniform_normal(): true_coef = 0.9 def model(data): alpha = numpyro.sample('alpha', dist.Uniform(0, 1)) loc = numpyro.sample('loc', dist.Uniform(0, alpha)) numpyro.sample('obs', dist.Normal(loc, 0.1), obs=data) data = true_coef + random.normal(random.PRNGKey(0), (1000, )) init_params, potential_fn, constrain_fn = initialize_model( random.PRNGKey(2), model, data) samples = mcmc(1000, 1000, init_params, potential_fn=potential_fn, constrain_fn=constrain_fn) assert_allclose(np.mean(samples['loc'], 0), true_coef, atol=0.05)
def run_inference(transition_prior, emission_prior, supervised_categories, supervised_words, unsupervised_words, rng, args): init_params, potential_fn, constrain_fn = initialize_model( rng, semi_supervised_hmm, transition_prior, emission_prior, supervised_categories, supervised_words, unsupervised_words, ) init_kernel, sample_kernel = hmc(potential_fn, algo='NUTS') hmc_state = init_kernel(init_params, args.num_warmup) hmc_states = fori_collect(args.num_samples, sample_kernel, hmc_state, transform=lambda state: constrain_fn(state.z)) return hmc_states
def benchmark_hmc(args, features, labels): trajectory_length = step_size * args.num_steps _, potential_fn, _ = initialize_model(random.PRNGKey(1), model, features, labels) init_kernel, sample_kernel = hmc(potential_fn, algo=args.algo) t0 = time.time() # TODO: Use init_params from `initialize_model` instead of fixed params. hmc_state, _, _ = init_kernel(init_params, num_warmup=0, step_size=step_size, trajectory_length=trajectory_length, adapt_step_size=False, run_warmup=False) t1 = time.time() print("time for hmc_init: ", t1 - t0) def transform(state): return {'coefs': state.z['coefs'], 'num_steps': state.num_steps} hmc_states = fori_collect(args.num_samples, sample_kernel, hmc_state, transform=transform) num_leapfrogs = np.sum(hmc_states['num_steps']) print('number of leapfrog steps: ', num_leapfrogs) print('avg. time for each step: ', (time.time() - t1) / num_leapfrogs)
def main(args): jax_config.update('jax_platform_name', args.device) print('Simulating data...') (transition_prior, emission_prior, transition_prob, emission_prob, supervised_categories, supervised_words, unsupervised_words) = simulate_data( random.PRNGKey(1), num_categories=args.num_categories, num_words=args.num_words, num_supervised_data=args.num_supervised, num_unsupervised_data=args.num_unsupervised, ) print('Starting inference...') init_params, potential_fn, constrain_fn = initialize_model( random.PRNGKey(2), semi_supervised_hmm, transition_prior, emission_prior, supervised_categories, supervised_words, unsupervised_words, ) samples = mcmc(args.num_warmup, args.num_samples, init_params, potential_fn=potential_fn, constrain_fn=constrain_fn) print_results(samples, transition_prob, emission_prob)
def test_logistic_regression(algo): N, dim = 3000, 3 warmup_steps, num_samples = 1000, 8000 data = random.normal(random.PRNGKey(0), (N, dim)) true_coefs = np.arange(1., dim + 1.) logits = np.sum(true_coefs * data, axis=-1) labels = dist.bernoulli(logits, is_logits=True).rvs(random_state=random.PRNGKey(1)) with validation_disabled(): def model(labels): coefs = sample('coefs', dist.norm(np.zeros(dim), np.ones(dim))) logits = np.sum(coefs * data, axis=-1) return sample('obs', dist.bernoulli(logits, is_logits=True), obs=labels) init_params, potential_fn = initialize_model(random.PRNGKey(2), model, (labels,), {}) init_kernel, sample_kernel = hmc_kernel(potential_fn, algo=algo) hmc_state = init_kernel(init_params, step_size=0.1, num_steps=15, num_warmup_steps=warmup_steps) sample_kernel = jit(sample_kernel) hmc_states = tscan(lambda state, i: sample_kernel(state), hmc_state, np.arange(num_samples)) assert_allclose(np.mean(hmc_states.z['coefs'], 0), true_coefs, atol=0.2)
def main(args): jax_config.update('jax_platform_name', args.device) print("Start vanilla HMC...") vanilla_samples = mcmc(args.num_warmup, args.num_samples, init_params=np.array([2., 0.]), potential_fn=dual_moon_pe, progbar=True) opt_init, opt_update, get_params = optimizers.adam(0.001) rng_guide, rng_init, rng_train = random.split(random.PRNGKey(1), 3) guide = AutoIAFNormal(rng_guide, dual_moon_model, get_params, hidden_dims=[args.num_hidden]) svi_init, svi_update, _ = svi(dual_moon_model, guide, elbo, opt_init, opt_update, get_params) opt_state, _ = svi_init(rng_init) def body_fn(val, i): opt_state_, rng_ = val loss, opt_state_, rng_ = svi_update(i, rng_, opt_state_) return (opt_state_, rng_), loss print("Start training guide...") (last_state, _), losses = lax.scan(body_fn, (opt_state, rng_train), np.arange(args.num_iters)) print("Finish training guide. Extract samples...") guide_samples = guide.sample_posterior(random.PRNGKey(0), last_state, sample_shape=(args.num_samples,)) transform = guide.get_transform(last_state) unpack_fn = guide.unpack_latent _, potential_fn, constrain_fn = initialize_model(random.PRNGKey(0), dual_moon_model) transformed_potential_fn = make_transformed_pe(potential_fn, transform, unpack_fn) transformed_constrain_fn = lambda x: constrain_fn(unpack_fn(transform(x))) # noqa: E731 init_params = np.zeros(guide.latent_size) print("\nStart NeuTra HMC...") zs = mcmc(args.num_warmup, args.num_samples, init_params, potential_fn=transformed_potential_fn) print("Transform samples into unwarped space...") samples = vmap(transformed_constrain_fn)(zs) summary(tree_map(lambda x: x[None, ...], samples)) # make plots # IAF guide samples (for plotting) iaf_base_samples = dist.Normal(np.zeros(2), 1.).sample(random.PRNGKey(0), (1000,)) iaf_trans_samples = vmap(transformed_constrain_fn)(iaf_base_samples)['x'] x1 = np.linspace(-3, 3, 100) x2 = np.linspace(-3, 3, 100) X1, X2 = np.meshgrid(x1, x2) P = np.clip(np.exp(-dual_moon_pe(np.stack([X1, X2], axis=-1))), a_min=0.) fig = plt.figure(figsize=(12, 16), constrained_layout=True) gs = GridSpec(3, 2, figure=fig) ax1 = fig.add_subplot(gs[0, 0]) ax2 = fig.add_subplot(gs[0, 1]) ax3 = fig.add_subplot(gs[1, 0]) ax4 = fig.add_subplot(gs[1, 1]) ax5 = fig.add_subplot(gs[2, 0]) ax6 = fig.add_subplot(gs[2, 1]) ax1.plot(np.log(losses[1000:])) ax1.set_title('Autoguide training log loss (after 1000 steps)') ax2.contourf(X1, X2, P, cmap='OrRd') sns.kdeplot(guide_samples['x'][:, 0].copy(), guide_samples['x'][:, 1].copy(), n_levels=30, ax=ax2) ax2.set(xlim=[-3, 3], ylim=[-3, 3], xlabel='x0', ylabel='x1', title='Posterior using AutoIAFNormal guide') sns.scatterplot(iaf_base_samples[:, 0], iaf_base_samples[:, 1], ax=ax3, hue=iaf_trans_samples[:, 0] < 0.) ax3.set(xlim=[-3, 3], ylim=[-3, 3], xlabel='x0', ylabel='x1', title='AutoIAFNormal base samples (True=left moon; False=right moon)') ax4.contourf(X1, X2, P, cmap='OrRd') sns.kdeplot(vanilla_samples[:, 0].copy(), vanilla_samples[:, 1].copy(), n_levels=30, ax=ax4) ax4.plot(vanilla_samples[-50:, 0], vanilla_samples[-50:, 1], 'bo-', alpha=0.5) ax4.set(xlim=[-3, 3], ylim=[-3, 3], xlabel='x0', ylabel='x1', title='Posterior using vanilla HMC sampler') sns.scatterplot(zs[:, 0], zs[:, 1], ax=ax5, hue=samples['x'][:, 0] < 0., s=30, alpha=0.5, edgecolor="none") ax5.set(xlim=[-5, 5], ylim=[-5, 5], xlabel='x0', ylabel='x1', title='Samples from the warped posterior - p(z)') ax6.contourf(X1, X2, P, cmap='OrRd') sns.kdeplot(samples['x'][:, 0].copy(), samples['x'][:, 1].copy(), n_levels=30, ax=ax6) ax6.plot(samples['x'][-50:, 0], samples['x'][-50:, 1], 'bo-', alpha=0.2) ax6.set(xlim=[-3, 3], ylim=[-3, 3], xlabel='x0', ylabel='x1', title='Posterior using NeuTra HMC sampler') plt.savefig("neutra.pdf") plt.close()
def main(args): jax_config.update('jax_platform_name', args.device) print("Start vanilla HMC...") nuts_kernel = NUTS(potential_fn=dual_moon_pe) mcmc = MCMC(nuts_kernel, args.num_warmup, args.num_samples) mcmc.run(random.PRNGKey(11), init_params=np.array([2., 0.])) vanilla_samples = mcmc.get_samples() adam = optim.Adam(0.001) rng_init, rng_train = random.split(random.PRNGKey(1), 2) guide = AutoIAFNormal(dual_moon_model, hidden_dims=[args.num_hidden], skip_connections=True) svi = SVI(dual_moon_model, guide, elbo, adam) svi_state = svi.init(rng_init) print("Start training guide...") last_state, losses = lax.scan(lambda state, i: svi.update(state), svi_state, np.zeros(args.num_iters)) params = svi.get_params(last_state) print("Finish training guide. Extract samples...") guide_samples = guide.sample_posterior(random.PRNGKey(0), params, sample_shape=(args.num_samples,)) transform = guide.get_transform(params) unpack_fn = guide.unpack_latent _, potential_fn, constrain_fn = initialize_model(random.PRNGKey(0), dual_moon_model) transformed_potential_fn = make_transformed_pe(potential_fn, transform, unpack_fn) transformed_constrain_fn = lambda x: constrain_fn(unpack_fn(transform(x))) # noqa: E731 init_params = np.zeros(guide.latent_size) print("\nStart NeuTra HMC...") # TODO: exlore why neutra samples are not good # Issue: https://github.com/pyro-ppl/numpyro/issues/256 nuts_kernel = NUTS(potential_fn=transformed_potential_fn) mcmc = MCMC(nuts_kernel, args.num_warmup, args.num_samples) mcmc.run(random.PRNGKey(10), init_params=init_params) zs = mcmc.get_samples() print("Transform samples into unwarped space...") samples = vmap(transformed_constrain_fn)(zs) summary(tree_map(lambda x: x[None, ...], samples)) # make plots # IAF guide samples (for plotting) iaf_base_samples = dist.Normal(np.zeros(2), 1.).sample(random.PRNGKey(0), (1000,)) iaf_trans_samples = vmap(transformed_constrain_fn)(iaf_base_samples)['x'] x1 = np.linspace(-3, 3, 100) x2 = np.linspace(-3, 3, 100) X1, X2 = np.meshgrid(x1, x2) P = np.clip(np.exp(-dual_moon_pe(np.stack([X1, X2], axis=-1))), a_min=0.) fig = plt.figure(figsize=(12, 16), constrained_layout=True) gs = GridSpec(3, 2, figure=fig) ax1 = fig.add_subplot(gs[0, 0]) ax2 = fig.add_subplot(gs[0, 1]) ax3 = fig.add_subplot(gs[1, 0]) ax4 = fig.add_subplot(gs[1, 1]) ax5 = fig.add_subplot(gs[2, 0]) ax6 = fig.add_subplot(gs[2, 1]) ax1.plot(np.log(losses[1000:])) ax1.set_title('Autoguide training log loss (after 1000 steps)') ax2.contourf(X1, X2, P, cmap='OrRd') sns.kdeplot(guide_samples['x'][:, 0].copy(), guide_samples['x'][:, 1].copy(), n_levels=30, ax=ax2) ax2.set(xlim=[-3, 3], ylim=[-3, 3], xlabel='x0', ylabel='x1', title='Posterior using AutoIAFNormal guide') sns.scatterplot(iaf_base_samples[:, 0], iaf_base_samples[:, 1], ax=ax3, hue=iaf_trans_samples[:, 0] < 0.) ax3.set(xlim=[-3, 3], ylim=[-3, 3], xlabel='x0', ylabel='x1', title='AutoIAFNormal base samples (True=left moon; False=right moon)') ax4.contourf(X1, X2, P, cmap='OrRd') sns.kdeplot(vanilla_samples[:, 0].copy(), vanilla_samples[:, 1].copy(), n_levels=30, ax=ax4) ax4.plot(vanilla_samples[-50:, 0], vanilla_samples[-50:, 1], 'bo-', alpha=0.5) ax4.set(xlim=[-3, 3], ylim=[-3, 3], xlabel='x0', ylabel='x1', title='Posterior using vanilla HMC sampler') sns.scatterplot(zs[:, 0], zs[:, 1], ax=ax5, hue=samples['x'][:, 0] < 0., s=30, alpha=0.5, edgecolor="none") ax5.set(xlim=[-5, 5], ylim=[-5, 5], xlabel='x0', ylabel='x1', title='Samples from the warped posterior - p(z)') ax6.contourf(X1, X2, P, cmap='OrRd') sns.kdeplot(samples['x'][:, 0].copy(), samples['x'][:, 1].copy(), n_levels=30, ax=ax6) ax6.plot(samples['x'][-50:, 0], samples['x'][-50:, 1], 'bo-', alpha=0.2) ax6.set(xlim=[-3, 3], ylim=[-3, 3], xlabel='x0', ylabel='x1', title='Posterior using NeuTra HMC sampler') plt.savefig("neutra.pdf") plt.close()