def main(args): baseball_dataset = pd.read_csv(DATA_URL, "\t") train, _, player_names = train_test_split(baseball_dataset) at_bats, hits = train[:, 0], train[:, 1] nuts_kernel = NUTS(conditioned_model, adapt_step_size=True) logging.info("Original Dataset:") logging.info(baseball_dataset) # (1) Full Pooling Model posterior_fully_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps) \ .run(fully_pooled, at_bats, hits) logging.info("\nModel: Fully Pooled") logging.info("===================") logging.info("\nphi:") logging.info( summary(posterior_fully_pooled, sites=["phi"], player_names=player_names)["phi"]) posterior_predictive = TracePredictive(fully_pooled, posterior_fully_pooled, num_samples=args.num_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(fully_pooled, posterior_fully_pooled, baseball_dataset) # (2) No Pooling Model posterior_not_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps) \ .run(not_pooled, at_bats, hits) logging.info("\nModel: Not Pooled") logging.info("=================") logging.info("\nphi:") logging.info( summary(posterior_not_pooled, sites=["phi"], player_names=player_names)["phi"]) posterior_predictive = TracePredictive(not_pooled, posterior_not_pooled, num_samples=args.num_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(not_pooled, posterior_not_pooled, baseball_dataset) # (3) Partially Pooled Model posterior_partially_pooled = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps) \ .run(partially_pooled, at_bats, hits) logging.info("\nModel: Partially Pooled") logging.info("=======================") logging.info("\nSigmoid(alpha):") logging.info( summary(posterior_partially_pooled, sites=["alpha"], player_names=player_names, transforms={"alpha": lambda x: 1. / (1 + np.exp(-x))})["alpha"]) posterior_predictive = TracePredictive(partially_pooled, posterior_partially_pooled, num_samples=args.num_samples) sample_posterior_predictive(posterior_predictive, baseball_dataset) evaluate_log_predictive_density(partially_pooled, posterior_partially_pooled, baseball_dataset)
def pyro_centered_schools(data, draws, chains): """Centered eight schools implementation in Pyro. Note there is not really a deterministic node in pyro, so I do not know how to do a non-centered implementation. """ del chains y = torch.Tensor(data['y']).type(torch.Tensor) sigma = torch.Tensor(data['sigma']).type(torch.Tensor) nuts_kernel = NUTS(_pyro_conditioned_model, adapt_step_size=True) posterior = MCMC( # pylint:disable=not-callable nuts_kernel, num_samples=draws, warmup_steps=500, ).run(_pyro_centered_model, sigma, y) # This block lets the posterior be pickled for trace in posterior.exec_traces: for node in trace.nodes.values(): node.pop('fn', None) posterior.kernel = None posterior.run = None posterior.logger = None return posterior
def fit(self, inputs, targets, *args, **kwargs): self.mcmc_run = MCMC(self.kernel, num_samples=self.num_samples, warmup_steps=100).run(inputs, targets) self.all_samples = torch.cat(list( self.mcmc_run.marginal(sites="t").support(flatten=True).values()), dim=-1)
def test_gaussian_hmm(num_steps): dim = 4 def model(data): initialize = pyro.sample("initialize", dist.Dirichlet(torch.ones(dim))) with pyro.plate("states", dim): transition = pyro.sample("transition", dist.Dirichlet(torch.ones(dim, dim))) emission_loc = pyro.sample( "emission_loc", dist.Normal(torch.zeros(dim), torch.ones(dim)) ) emission_scale = pyro.sample( "emission_scale", dist.LogNormal(torch.zeros(dim), torch.ones(dim)) ) x = None with ignore_jit_warnings([("Iterating over a tensor", RuntimeWarning)]): for t, y in pyro.markov(enumerate(data)): x = pyro.sample( "x_{}".format(t), dist.Categorical(initialize if x is None else transition[x]), infer={"enumerate": "parallel"}, ) pyro.sample( "y_{}".format(t), dist.Normal(emission_loc[x], emission_scale[x]), obs=y, ) def _get_initial_trace(): guide = AutoDelta( poutine.block( model, expose_fn=lambda msg: not msg["name"].startswith("x") and not msg["name"].startswith("y"), ) ) elbo = TraceEnum_ELBO(max_plate_nesting=1) svi = SVI(model, guide, optim.Adam({"lr": 0.01}), elbo) for _ in range(100): svi.step(data) return poutine.trace(guide).get_trace(data) def _generate_data(): transition_probs = torch.rand(dim, dim) emissions_loc = torch.arange(dim, dtype=torch.Tensor().dtype) emissions_scale = 1.0 state = torch.tensor(1) obs = [dist.Normal(emissions_loc[state], emissions_scale).sample()] for _ in range(num_steps): state = dist.Categorical(transition_probs[state]).sample() obs.append(dist.Normal(emissions_loc[state], emissions_scale).sample()) return torch.stack(obs) data = _generate_data() nuts_kernel = NUTS( model, max_plate_nesting=1, jit_compile=True, ignore_jit_warnings=True ) if num_steps == 30: nuts_kernel.initial_trace = _get_initial_trace() mcmc = MCMC(nuts_kernel, num_samples=5, warmup_steps=5) mcmc.run(data)
def numpyro_schools_model(data, draws, chains): """Centered eight schools implementation in NumPyro.""" import jax import numpyro import numpyro.distributions as dist from numpyro.mcmc import MCMC, NUTS def model(): mu = numpyro.sample("mu", dist.Normal(0, 5)) tau = numpyro.sample("tau", dist.HalfCauchy(5)) # TODO: use numpyro.plate or `sample_shape` kwargs instead of # pylint: disable=fixme # multiplying with np.ones(J) in future versions of NumPyro theta = numpyro.sample("theta", dist.Normal(mu * np.ones(data["J"]), tau)) numpyro.sample("obs", dist.Normal(theta, data["sigma"]), obs=data["y"]) mcmc = MCMC( NUTS(model), num_warmup=draws, num_samples=draws, num_chains=chains, chain_method="sequential", ) mcmc.run(jax.random.PRNGKey(0), collect_fields=("z", "diverging")) return mcmc
def _main(self, ratings, sigma, args): self.n_thread = args.n_threads # split data to training & testing train_pct = 0.9 rand_state.shuffle(ratings) train_size = int(train_pct * ratings.shape[0]) train = ratings[:train_size] validation = ratings[train_size:] self.data = train nuts_kernel = NUTS(self._conditioned_model, jit_compile=args.jit,) posterior = MCMC(nuts_kernel, num_samples=args.num_samples, warmup_steps=args.warmup_steps, num_chains=args.num_chains, disable_progbar=False).run(self._model, sigma, train) sites = ['mu_item', 'mu_user'] + ['u_temp_feature' + str(user_id) for user_id in xrange(self.n_user)] + ['i_temp_feature' + str(item_id) for item_id in xrange(self.n_item)] marginal = posterior.marginal(sites=sites) marginal = torch.cat(list(marginal.support(flatten=True).values()), dim=-1).cpu().numpy() avg_mu_item = np.average(marginal[:, :self.n_feature], axis=0) avg_mu_user = np.average(marginal[:, self.n_feature:2*self.n_feature], axis=0) avg_u_temp_feature = np.average(marginal[:, 2*self.n_feature:2*self.n_feature + self.n_user*self.n_feature].reshape(-1,self.n_user,self.n_feature), axis=0) avg_i_temp_feature = np.average(marginal[:, 2*self.n_feature + self.n_user*self.n_feature:].reshape(-1,self.n_item, self.n_feature), axis=0) train_preds = self._predict(train[:, :2], True, avg_u_temp_feature, avg_i_temp_feature) train_rmse = RMSE(train_preds, train[:, 2]) val_preds = self._predict(validation[:, :2], True, avg_u_temp_feature, avg_i_temp_feature) val_rmse = RMSE(val_preds, validation[:, 2]) print("After %d iteration, train RMSE: %.6f, validation RMSE: %.6f" % (self.iter_, train_rmse, val_rmse))
def test_saasbo_sample(self): for use_saas, use_input_warping in product((False, True), repeat=2): with torch.random.fork_rng(): torch.manual_seed(0) X = torch.randn(3, 2) Y = torch.randn(3, 1) Yvar = torch.randn(3, 1) kernel = NUTS(pyro_model, max_tree_depth=1) mcmc = MCMC(kernel, warmup_steps=0, num_samples=1) mcmc.run( X, Y, Yvar, use_input_warping=use_input_warping, use_saas=use_saas, ) samples = mcmc.get_samples() if use_saas: self.assertTrue("kernel_tausq" in samples) self.assertTrue("_kernel_inv_length_sq" in samples) self.assertTrue("lengthscale" not in samples) else: self.assertTrue("kernel_tausq" not in samples) self.assertTrue("_kernel_inv_length_sq" not in samples) self.assertTrue("lengthscale" in samples) if use_input_warping: self.assertIn("c0", samples) self.assertIn("c1", samples) else: self.assertNotIn("c0", samples) self.assertNotIn("c1", samples)
def test_gamma_poisson(hyperpriors): def model(data): with pyro.plate("latent_dim", data.shape[1]): alpha = ( pyro.sample("alpha", dist.HalfCauchy(1.0)) if hyperpriors else torch.tensor([1.0, 1.0]) ) beta = ( pyro.sample("beta", dist.HalfCauchy(1.0)) if hyperpriors else torch.tensor([1.0, 1.0]) ) gamma_poisson = GammaPoissonPair() rate = pyro.sample("rate", gamma_poisson.latent(alpha, beta)) with pyro.plate("data", data.shape[0]): pyro.sample("obs", gamma_poisson.conditional(rate), obs=data) true_rate = torch.tensor([3.0, 10.0]) num_samples = 100 data = dist.Poisson(rate=true_rate).sample(sample_shape=(torch.Size((100,)))) hmc_kernel = NUTS( collapse_conjugate(model), jit_compile=True, ignore_jit_warnings=True ) mcmc = MCMC(hmc_kernel, num_samples=num_samples, warmup_steps=50) mcmc.run(data) samples = mcmc.get_samples() posterior = posterior_replay(model, samples, data, num_samples=num_samples) assert_equal(posterior["rate"].mean(0), true_rate, prec=0.3)
def compute_posteriors(self, x_obs: torch.Tensor, mcmc_kwargs: dict = None): """ :param x_obs: :param mcmc_kwargs: By default: {num_samples=1000, warmup_steps=1000, num_chains=4) :return: """ if mcmc_kwargs is None: mcmc_kwargs = { "num_samples": 1000, "warmup_steps": 1000, "num_chains": 4 } kernel = NUTS( self.pyro_mdl, adapt_step_size=True, max_plate_nesting=1, jit_compile=True, target_accept_prob=0.6, ) mcmc_run = MCMC(kernel, **mcmc_kwargs).run(data=x_obs) marginals = mcmc_run.marginal(sites=["z"]) marginals_supp = marginals.support() z_x = marginals_supp["z"] return z_x, marginals
def test_nuts_conjugate_gaussian( fixture, num_samples, warmup_steps, expected_means, expected_precs, mean_tol, std_tol, ): pyro.get_param_store().clear() nuts_kernel = NUTS(fixture.model) mcmc = MCMC(nuts_kernel, num_samples, warmup_steps) mcmc.run(fixture.data) samples = mcmc.get_samples() for i in range(1, fixture.chain_len + 1): param_name = "loc_" + str(i) latent = samples[param_name] latent_loc = latent.mean(0) latent_std = latent.std(0) expected_mean = torch.ones(fixture.dim) * expected_means[i - 1] expected_std = 1 / torch.sqrt(torch.ones(fixture.dim) * expected_precs[i - 1]) # Actual vs expected posterior means for the latents logger.debug("Posterior mean (actual) - {}".format(param_name)) logger.debug(latent_loc) logger.debug("Posterior mean (expected) - {}".format(param_name)) logger.debug(expected_mean) assert_equal(rmse(latent_loc, expected_mean).item(), 0.0, prec=mean_tol) # Actual vs expected posterior precisions for the latents logger.debug("Posterior std (actual) - {}".format(param_name)) logger.debug(latent_std) logger.debug("Posterior std (expected) - {}".format(param_name)) logger.debug(expected_std) assert_equal(rmse(latent_std, expected_std).item(), 0.0, prec=std_tol)
def test_beta_binomial(hyperpriors): def model(data): with pyro.plate("plate_0", data.shape[-1]): alpha = pyro.sample( "alpha", dist.HalfCauchy(1.)) if hyperpriors else torch.tensor( [1., 1.]) beta = pyro.sample( "beta", dist.HalfCauchy(1.)) if hyperpriors else torch.tensor( [1., 1.]) beta_binom = BetaBinomialPair() with pyro.plate("plate_1", data.shape[-2]): probs = pyro.sample("probs", beta_binom.latent(alpha, beta)) with pyro.plate("data", data.shape[0]): pyro.sample("binomial", beta_binom.conditional( probs=probs, total_count=total_count), obs=data) true_probs = torch.tensor([[0.7, 0.4], [0.6, 0.4]]) total_count = torch.tensor([[1000, 600], [400, 800]]) num_samples = 80 data = dist.Binomial( total_count=total_count, probs=true_probs).sample(sample_shape=(torch.Size((10, )))) hmc_kernel = NUTS(collapse_conjugate(model), jit_compile=True, ignore_jit_warnings=True) mcmc = MCMC(hmc_kernel, num_samples=num_samples, warmup_steps=50) mcmc.run(data) samples = mcmc.get_samples() posterior = posterior_replay(model, samples, data, num_samples=num_samples) assert_equal(posterior["probs"].mean(0), true_probs, prec=0.05)
def test_gaussian_mixture_model(jit): K, N = 3, 1000 def gmm(data): mix_proportions = pyro.sample("phi", dist.Dirichlet(torch.ones(K))) with pyro.plate("num_clusters", K): cluster_means = pyro.sample( "cluster_means", dist.Normal(torch.arange(float(K)), 1.0) ) with pyro.plate("data", data.shape[0]): assignments = pyro.sample("assignments", dist.Categorical(mix_proportions)) pyro.sample("obs", dist.Normal(cluster_means[assignments], 1.0), obs=data) return cluster_means true_cluster_means = torch.tensor([1.0, 5.0, 10.0]) true_mix_proportions = torch.tensor([0.1, 0.3, 0.6]) cluster_assignments = dist.Categorical(true_mix_proportions).sample( torch.Size((N,)) ) data = dist.Normal(true_cluster_means[cluster_assignments], 1.0).sample() nuts_kernel = NUTS( gmm, max_plate_nesting=1, jit_compile=jit, ignore_jit_warnings=True ) mcmc = MCMC(nuts_kernel, num_samples=300, warmup_steps=100) mcmc.run(data) samples = mcmc.get_samples() assert_equal(samples["phi"].mean(0).sort()[0], true_mix_proportions, prec=0.05) assert_equal( samples["cluster_means"].mean(0).sort()[0], true_cluster_means, prec=0.2 )
def sample(draws=500, model=None, warmup_steps=None, num_chains=1, kernel='nuts'): """Markov-chain Monte Carlo sampling. Sampling should be run within the context of a model or the model should be passed as an argument `model` explicitly. Number of samples is given by `draws` which defaults to `500`. Warm-up steps are assumed to be 30% of sample count. MCMC kernel can be selected by setting `kernel`. `hmc` and `nuts` are available. `pmpyro.inference.sample` returns a trace of samples """ # get model from context if model is None: model = Context.get_context() stfn = model.stfn # get stochastic function from model data = model.args # get data # make nuts kernel kernels = {'nuts': NUTS(stfn, adapt_step_size=True), 'hmc': HMC(stfn)} # if not num_chains: # figure out number of chains # num_chains = max(os.cpu_count() -1, 2) if not warmup_steps: # figure out warm-up steps warmup_steps = int(0.3 * draws) # run MCMC mcmc = MCMC(kernels[kernel], num_samples=draws, warmup_steps=warmup_steps, num_chains=num_chains) mcmc.run(*data) # get num samples num_samples = num_chains * draws return mcmc.get_samples()
class BinomialQuadraticApproximator(): def __init__(self, X, N, n_steps, learning_rate, prior_type, infer_type): self.X = X self.N = N self.n_steps = n_steps self.prior_type = prior_type self.infer_type = infer_type self.optimiser = pyro.optim.Adam({'lr': learning_rate}) self.map_guide = AutoLaplaceApproximation(self.model) def plot(self): plt.subplot(3, 1, 1) plt.plot(self.losses) plt.title('losses') plt.subplot(3, 1, 2) plt.plot(self._posterior_approximate_mean) plt.title('Posterior Mean') plt.subplot(3, 1, 3) plt.plot(self._posterior_approximate_scale) plt.title('Posterior Scale (Variance)') def train(self, **kwargs): pyro.clear_param_store() if self.infer_type == 'svi': self._svi_trainer(**kwargs) elif self.infer_type == 'mcmc': self._mcmc_trainer(**kwargs) def _svi_trainer(self): svi = SVI(self.model, self.map_guide, self.optimiser, Trace_ELBO()) self.losses = [] self._posterior_approximate_mean = [] self._posterior_approximate_scale = [] for step in range(self.n_steps): loss = svi.step(self.X) self.losses.append(loss) quadratic_approximation = self.map_guide.laplace_approximation(self.X).get_posterior() self._posterior_approximate_mean.append(quadratic_approximation.loc.item()) self._posterior_approximate_scale.append(quadratic_approximation.scale_tril.item()) if step % 50 == 0: print('[iter {}] loss: {:.4f}'.format(step, loss)) def _mcmc_trainer(self, step_size=0.1, num_samples=1000, warmup_steps=100): mcmc_kernel = NUTS(self.model, step_size=step_size) self.mcmc = MCMC(mcmc_kernel, num_samples=num_samples, warmup_steps=warmup_steps) self.mcmc.run(self.X) def model(self, data): if self.prior_type == 'uniform': p = pyro.sample('p', dist.Uniform(0, 1)) elif self.prior_type == 'beta': p = pyro.sample('p', dist.Beta(10.0, 10.0)) return pyro.sample('obs', dist.Binomial(total_count=self.N, probs=p), obs=self.X)
def fit(self): pyro.clear_param_store() self.kernel = NUTS(self.model) self.mcmc = MCMC(self.kernel, warmup_steps=self.warmup_steps, num_samples=self.num_samples) self.mcmc.run() self._posterior_sample_df()
def train_nuts(model, data, num_warmup, num_samples, num_chains=1, **kwargs): _kwargs = dict(adapt_step_size=True, adapt_mass_matrix=True, jit_compile=True) _kwargs.update(kwargs) print(_kwargs) kernel = NUTS(model, **_kwargs) engine = MCMC(kernel, num_samples, num_warmup, num_chains=num_chains) engine.run(data, training=True) return engine
def fit_fully_bayesian_model_nuts( model: SaasFullyBayesianSingleTaskGP, max_tree_depth: int = 6, warmup_steps: int = 512, num_samples: int = 256, thinning: int = 16, disable_progbar: bool = False, ) -> None: r"""Fit a fully Bayesian model using the No-U-Turn-Sampler (NUTS) Args: model: SaasFullyBayesianSingleTaskGP to be fitted. max_tree_depth: Maximum tree depth for NUTS warmup_steps: The number of burn-in steps for NUTS. num_samples: The number of MCMC samples. Note that with thinning, num_samples / thinning samples are retained. thinning: The amount of thinning. Every nth sample is retained. disable_progbar: A boolean indicating whether to print the progress bar and diagnostics during MCMC. Example: >>> gp = SaasFullyBayesianSingleTaskGP(train_X, train_Y) >>> fit_fully_bayesian_model_nuts(gp) """ model.train() # Do inference with NUTS nuts = NUTS( model.pyro_model.sample, jit_compile=True, full_mass=True, ignore_jit_warnings=True, max_tree_depth=max_tree_depth, ) mcmc = MCMC( nuts, warmup_steps=warmup_steps, num_samples=num_samples, disable_progbar=disable_progbar, ) mcmc.run() # Get final MCMC samples from the Pyro model mcmc_samples = model.pyro_model.postprocess_mcmc_samples( mcmc_samples=mcmc.get_samples()) for k, v in mcmc_samples.items(): mcmc_samples[k] = v[::thinning] # Load the MCMC samples back into the BoTorch model model.load_mcmc_samples(mcmc_samples) model.eval()
def test_structured_mass(): def model(cov): w = pyro.sample("w", dist.Normal(0, 1000).expand([2]).to_event(1)) x = pyro.sample("x", dist.Normal(0, 1000).expand([1]).to_event(1)) y = pyro.sample("y", dist.Normal(0, 1000).expand([1]).to_event(1)) z = pyro.sample("z", dist.Normal(0, 1000).expand([1]).to_event(1)) wxyz = torch.cat([w, x, y, z]) pyro.sample("obs", dist.MultivariateNormal(torch.zeros(5), cov), obs=wxyz) w_cov = torch.tensor([[1.5, 0.5], [0.5, 1.5]]) xy_cov = torch.tensor([[2., 1.], [1., 3.]]) z_var = torch.tensor([2.5]) cov = torch.zeros(5, 5) cov[:2, :2] = w_cov cov[2:4, 2:4] = xy_cov cov[4, 4] = z_var # smoke tests for dense_mass in [True, False]: kernel = NUTS(model, jit_compile=True, ignore_jit_warnings=True, full_mass=dense_mass) mcmc = MCMC(kernel, num_samples=1, warmup_steps=1) mcmc.run(cov) assert kernel.inverse_mass_matrix[("w", "x", "y", "z")].dim() == 1 + int(dense_mass) kernel = NUTS(model, jit_compile=True, ignore_jit_warnings=True, full_mass=[("w",), ("x", "y")]) mcmc = MCMC(kernel, num_samples=1, warmup_steps=1000) mcmc.run(cov) assert_close(kernel.inverse_mass_matrix[("w",)], w_cov, atol=0.5, rtol=0.5) assert_close(kernel.inverse_mass_matrix[("x", "y")], xy_cov, atol=0.5, rtol=0.5) assert_close(kernel.inverse_mass_matrix[("z",)], z_var, atol=0.5, rtol=0.5)
def test_gp_kernels(self): torch.manual_seed(0) X = torch.randn(3, 2) Y = torch.randn(3, 1) Yvar = torch.randn(3, 1) kernel = NUTS(single_task_pyro_model, max_tree_depth=1) with self.assertRaises(ValueError): mcmc = MCMC(kernel, warmup_steps=0, num_samples=1) mcmc.run( X, Y, Yvar, gp_kernel="some_kernel_we_dont_support", )
def test_arrowhead_mass(): def model(prec): w = pyro.sample("w", dist.Normal(0, 1000).expand([2]).to_event(1)) x = pyro.sample("x", dist.Normal(0, 1000).expand([1]).to_event(1)) y = pyro.sample("y", dist.Normal(0, 1000).expand([1]).to_event(1)) z = pyro.sample("z", dist.Normal(0, 1000).expand([2]).to_event(1)) wyxz = torch.cat([w, y, x, z]) pyro.sample("obs", dist.MultivariateNormal(torch.zeros(6), precision_matrix=prec), obs=wyxz) A = torch.randn(6, 12) prec = A @ A.t() * 0.1 # smoke tests for dense_mass in [True, False]: kernel = NUTS(model, jit_compile=True, ignore_jit_warnings=True, full_mass=dense_mass) mcmc = MCMC(kernel, num_samples=1, warmup_steps=1) mcmc.run(prec) assert kernel.inverse_mass_matrix[("w", "x", "y", "z")].dim() == 1 + int(dense_mass) kernel = NUTS(model, jit_compile=True, ignore_jit_warnings=True, full_mass=[("w",), ("y", "x")]) kernel.mass_matrix_adapter = ArrowheadMassMatrix() mcmc = MCMC(kernel, num_samples=1, warmup_steps=1000) mcmc.run(prec) assert ("w", "y", "x", "z") in kernel.inverse_mass_matrix mass_matrix = kernel.mass_matrix_adapter.mass_matrix[("w", "y", "x", "z")] assert mass_matrix.top.shape == (4, 6) assert mass_matrix.bottom_diag.shape == (2,) assert_close(mass_matrix.top, prec[:4], atol=0.2, rtol=0.2) assert_close(mass_matrix.bottom_diag, prec.diag()[4:], atol=0.2, rtol=0.2)
def test_pyro_sampling(self): try: import pyro # noqa from pyro.infer.mcmc import NUTS, MCMC except ImportError: return train_x, test_x, train_y, test_y = self._get_data(cuda=False) likelihood = GaussianLikelihood( noise_constraint=gpytorch.constraints.Positive()) gp_model = ExactGPModel(train_x, train_y, likelihood) # Register normal GPyTorch priors gp_model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1), "constant") gp_model.covar_module.base_kernel.register_prior( "lengthscale_prior", UniformPrior(0.01, 0.5), "lengthscale") gp_model.covar_module.register_prior("outputscale_prior", UniformPrior(1, 2), "outputscale") likelihood.register_prior("noise_prior", UniformPrior(0.05, 0.3), "noise") def pyro_model(x, y): with gpytorch.settings.fast_computations(False, False, False): sampled_model = gp_model.pyro_sample_from_prior() output = sampled_model.likelihood(sampled_model(x)) pyro.sample("obs", output, obs=y) return y nuts_kernel = NUTS(pyro_model, adapt_step_size=True) mcmc_run = MCMC(nuts_kernel, num_samples=3, warmup_steps=20, disable_progbar=True) mcmc_run.run(train_x, train_y) gp_model.pyro_load_from_samples(mcmc_run.get_samples()) gp_model.eval() expanded_test_x = test_x.unsqueeze(-1).repeat(3, 1, 1) output = gp_model(expanded_test_x) self.assertEqual(output.mean.size(0), 3) # All 3 samples should do reasonably well on a noiseless dataset. self.assertLess( torch.norm(output.mean[0] - test_y) / test_y.norm(), 0.2) self.assertLess( torch.norm(output.mean[1] - test_y) / test_y.norm(), 0.2) self.assertLess( torch.norm(output.mean[2] - test_y) / test_y.norm(), 0.2)
def nuts_fit(model, data, num_samples=None, warmup_steps=None, var_names=None): """ Use No U-turn Sampler for inferring latent variables """ num_samples = int(0.8 * len(data)) if num_samples is None else num_samples warmup_steps = len( data) - num_samples if warmup_steps is None else warmup_steps nuts_kernel = NUTS(model, adapt_step_size=True) trace = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps).run(data) if var_names is None: posteriors = None else: posteriors = trace.marginal(var_names).empirical return trace, posteriors
def test_gamma_beta(jit): def model(data): alpha_prior = pyro.sample('alpha', dist.Gamma(concentration=1., rate=1.)) beta_prior = pyro.sample('beta', dist.Gamma(concentration=1., rate=1.)) pyro.sample('x', dist.Beta(concentration1=alpha_prior, concentration0=beta_prior), obs=data) true_alpha = torch.tensor(5.) true_beta = torch.tensor(1.) data = dist.Beta(concentration1=true_alpha, concentration0=true_beta).sample(torch.Size((5000,))) nuts_kernel = NUTS(model, jit_compile=jit, ignore_jit_warnings=True) mcmc = MCMC(nuts_kernel, num_samples=500, warmup_steps=200) mcmc.run(data) samples = mcmc.get_samples() assert_equal(samples["alpha"].mean(0), true_alpha, prec=0.08) assert_equal(samples["beta"].mean(0), true_beta, prec=0.05)
def test_dirichlet_categorical(jit): def model(data): concentration = torch.tensor([1.0, 1.0, 1.0]) p_latent = pyro.sample("p_latent", dist.Dirichlet(concentration)) pyro.sample("obs", dist.Categorical(p_latent), obs=data) return p_latent true_probs = torch.tensor([0.1, 0.6, 0.3]) data = dist.Categorical(true_probs).sample(sample_shape=(torch.Size((2000,)))) nuts_kernel = NUTS(model, jit_compile=jit, ignore_jit_warnings=True) mcmc = MCMC(nuts_kernel, num_samples=200, warmup_steps=100) mcmc.run(data) samples = mcmc.get_samples() posterior = samples["p_latent"] assert_equal(posterior.mean(0), true_probs, prec=0.02)
def test_pyro_sampling(self): try: import pyro from pyro.infer.mcmc import NUTS, MCMC except: return train_x, test_x, train_y, test_y = self._get_data(cuda=False) likelihood = GaussianLikelihood( noise_constraint=gpytorch.constraints.Positive()) gp_model = ExactGPModel(train_x, train_y, likelihood) # Register normal GPyTorch priors gp_model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1), "constant") gp_model.covar_module.base_kernel.register_prior( "lengthscale_prior", UniformPrior(0.01, 0.2), "lengthscale") gp_model.covar_module.register_prior("outputscale_prior", UniformPrior(1, 2), "outputscale") likelihood.register_prior("noise_prior", LogNormalPrior(-1.5, 0.1), "noise") mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) def pyro_model(x, y): gp_model.pyro_sample_from_prior() output = gp_model(x) loss = mll.pyro_factor(output, y) return y nuts_kernel = NUTS(pyro_model, adapt_step_size=True) mcmc_run = MCMC(nuts_kernel, num_samples=3, warmup_steps=20) mcmc_run.run(train_x, train_y) gp_model.pyro_load_from_samples(mcmc_run.get_samples()) gp_model.eval() expanded_test_x = test_x.unsqueeze(-1).repeat(3, 1, 1) output = gp_model(expanded_test_x) self.assertEqual(output.mean.size(0), 3) # All 3 samples should do reasonably well on a noiseless dataset. self.assertLess( torch.norm(output.mean[0] - test_y) / test_y.norm(), 0.2) self.assertLess( torch.norm(output.mean[1] - test_y) / test_y.norm(), 0.2) self.assertLess( torch.norm(output.mean[2] - test_y) / test_y.norm(), 0.2)
def test_beta_bernoulli(step_size, adapt_step_size, adapt_mass_matrix, full_mass): def model(data): alpha = torch.tensor([1.1, 1.1]) beta = torch.tensor([1.1, 1.1]) p_latent = pyro.sample("p_latent", dist.Beta(alpha, beta)) pyro.sample("obs", dist.Bernoulli(p_latent), obs=data) return p_latent true_probs = torch.tensor([0.9, 0.1]) data = dist.Bernoulli(true_probs).sample(sample_shape=(torch.Size((1000,)))) nuts_kernel = NUTS(model, step_size=step_size, adapt_step_size=adapt_step_size, adapt_mass_matrix=adapt_mass_matrix, full_mass=full_mass) mcmc = MCMC(nuts_kernel, num_samples=400, warmup_steps=200) mcmc.run(data) samples = mcmc.get_samples() assert_equal(samples["p_latent"].mean(0), true_probs, prec=0.02)
def _train_hmc(self, train_loader, n_samples, warmup, step_size, num_steps, device): print("\n == HMC training ==") pyro.clear_param_store() num_batches = int(len(train_loader.dataset) / train_loader.batch_size) batch_samples = int(n_samples / num_batches) + 1 print("\nn_batches=", num_batches, "\tbatch_samples =", batch_samples) kernel = HMC(self.model, step_size=step_size, num_steps=num_steps) mcmc = MCMC(kernel=kernel, num_samples=batch_samples, warmup_steps=warmup, num_chains=1) start = time.time() for x_batch, y_batch in train_loader: x_batch = x_batch.to(device) labels = y_batch.to(device).argmax(-1) mcmc.run(x_batch, labels) execution_time(start=start, end=time.time()) self.posterior_predictive = {} posterior_samples = mcmc.get_samples(n_samples) state_dict_keys = list(self.basenet.state_dict().keys()) if DEBUG: print("\n", list(posterior_samples.values())[-1]) for model_idx in range(n_samples): net_copy = copy.deepcopy(self.basenet) model_dict = OrderedDict({}) for weight_idx, weights in enumerate(posterior_samples.values()): model_dict.update( {state_dict_keys[weight_idx]: weights[model_idx]}) net_copy.load_state_dict(model_dict) self.posterior_predictive.update({str(model_idx): net_copy}) if DEBUG: print("\n", weights[model_idx]) self.save()
def nested(): true_probs = torch.ones(5) * 0.7 num_trials = torch.ones(5) * 1000 num_success = dist.Binomial(num_trials, true_probs).sample() conditioned_model = poutine.condition(model, data={"obs": num_success}) nuts_kernel = NUTS(conditioned_model, adapt_step_size=True) mcmc_run = MCMC(nuts_kernel, num_samples=10, warmup_steps=2).run(num_trials) return mcmc_run
def test_gamma_normal(jit, use_multinomial_sampling): def model(data): rate = torch.tensor([1.0, 1.0]) concentration = torch.tensor([1.0, 1.0]) p_latent = pyro.sample('p_latent', dist.Gamma(rate, concentration)) pyro.sample("obs", dist.Normal(3, p_latent), obs=data) return p_latent true_std = torch.tensor([0.5, 2]) data = dist.Normal(3, true_std).sample(sample_shape=(torch.Size((2000, )))) nuts_kernel = NUTS(model, use_multinomial_sampling=use_multinomial_sampling, jit_compile=jit, ignore_jit_warnings=True) mcmc = MCMC(nuts_kernel, num_samples=200, warmup_steps=100) mcmc.run(data) samples = mcmc.get_samples() assert_equal(samples["p_latent"].mean(0), true_std, prec=0.05)
def run(param): nn_model, p_tgt, save_fn, args = param if (not args.overwrite) and os.path.isfile(save_fn): print(save_fn + ' already exists!') return if not os.path.isfile(save_fn): fo = open(save_fn, 'w') # write the file first to signal working on it. fo.write('\n') fo.close() nuts = NUTS(program_arbitrary) mcmc = MCMC(nuts, num_samples=args.num_samples, warmup_steps=args.num_warmups, num_chains=args.num_chains) mcmc.run(nn_model, p_tgt) zs = mcmc.get_samples()['z'].detach().cpu().numpy() np.savetxt(save_fn, zs)