def sample(draws=500, model=None, warmup_steps=None, num_chains=1, kernel='nuts'): """Markov-chain Monte Carlo sampling. Sampling should be run within the context of a model or the model should be passed as an argument `model` explicitly. Number of samples is given by `draws` which defaults to `500`. Warm-up steps are assumed to be 30% of sample count. MCMC kernel can be selected by setting `kernel`. `hmc` and `nuts` are available. `pmpyro.inference.sample` returns a trace of samples """ # get model from context if model is None: model = Context.get_context() stfn = model.stfn # get stochastic function from model data = model.args # get data # make nuts kernel kernels = {'nuts': NUTS(stfn, adapt_step_size=True), 'hmc': HMC(stfn)} # if not num_chains: # figure out number of chains # num_chains = max(os.cpu_count() -1, 2) if not warmup_steps: # figure out warm-up steps warmup_steps = int(0.3 * draws) # run MCMC mcmc = MCMC(kernels[kernel], num_samples=draws, warmup_steps=warmup_steps, num_chains=num_chains) mcmc.run(*data) # get num samples num_samples = num_chains * draws return mcmc.get_samples()
def test_saasbo_sample(self): for use_saas, use_input_warping in product((False, True), repeat=2): with torch.random.fork_rng(): torch.manual_seed(0) X = torch.randn(3, 2) Y = torch.randn(3, 1) Yvar = torch.randn(3, 1) kernel = NUTS(pyro_model, max_tree_depth=1) mcmc = MCMC(kernel, warmup_steps=0, num_samples=1) mcmc.run( X, Y, Yvar, use_input_warping=use_input_warping, use_saas=use_saas, ) samples = mcmc.get_samples() if use_saas: self.assertTrue("kernel_tausq" in samples) self.assertTrue("_kernel_inv_length_sq" in samples) self.assertTrue("lengthscale" not in samples) else: self.assertTrue("kernel_tausq" not in samples) self.assertTrue("_kernel_inv_length_sq" not in samples) self.assertTrue("lengthscale" in samples) if use_input_warping: self.assertIn("c0", samples) self.assertIn("c1", samples) else: self.assertNotIn("c0", samples) self.assertNotIn("c1", samples)
def test_nuts_conjugate_gaussian( fixture, num_samples, warmup_steps, expected_means, expected_precs, mean_tol, std_tol, ): pyro.get_param_store().clear() nuts_kernel = NUTS(fixture.model) mcmc = MCMC(nuts_kernel, num_samples, warmup_steps) mcmc.run(fixture.data) samples = mcmc.get_samples() for i in range(1, fixture.chain_len + 1): param_name = "loc_" + str(i) latent = samples[param_name] latent_loc = latent.mean(0) latent_std = latent.std(0) expected_mean = torch.ones(fixture.dim) * expected_means[i - 1] expected_std = 1 / torch.sqrt(torch.ones(fixture.dim) * expected_precs[i - 1]) # Actual vs expected posterior means for the latents logger.debug("Posterior mean (actual) - {}".format(param_name)) logger.debug(latent_loc) logger.debug("Posterior mean (expected) - {}".format(param_name)) logger.debug(expected_mean) assert_equal(rmse(latent_loc, expected_mean).item(), 0.0, prec=mean_tol) # Actual vs expected posterior precisions for the latents logger.debug("Posterior std (actual) - {}".format(param_name)) logger.debug(latent_std) logger.debug("Posterior std (expected) - {}".format(param_name)) logger.debug(expected_std) assert_equal(rmse(latent_std, expected_std).item(), 0.0, prec=std_tol)
def test_gamma_poisson(hyperpriors): def model(data): with pyro.plate("latent_dim", data.shape[1]): alpha = ( pyro.sample("alpha", dist.HalfCauchy(1.0)) if hyperpriors else torch.tensor([1.0, 1.0]) ) beta = ( pyro.sample("beta", dist.HalfCauchy(1.0)) if hyperpriors else torch.tensor([1.0, 1.0]) ) gamma_poisson = GammaPoissonPair() rate = pyro.sample("rate", gamma_poisson.latent(alpha, beta)) with pyro.plate("data", data.shape[0]): pyro.sample("obs", gamma_poisson.conditional(rate), obs=data) true_rate = torch.tensor([3.0, 10.0]) num_samples = 100 data = dist.Poisson(rate=true_rate).sample(sample_shape=(torch.Size((100,)))) hmc_kernel = NUTS( collapse_conjugate(model), jit_compile=True, ignore_jit_warnings=True ) mcmc = MCMC(hmc_kernel, num_samples=num_samples, warmup_steps=50) mcmc.run(data) samples = mcmc.get_samples() posterior = posterior_replay(model, samples, data, num_samples=num_samples) assert_equal(posterior["rate"].mean(0), true_rate, prec=0.3)
def test_gaussian_mixture_model(jit): K, N = 3, 1000 def gmm(data): mix_proportions = pyro.sample("phi", dist.Dirichlet(torch.ones(K))) with pyro.plate("num_clusters", K): cluster_means = pyro.sample( "cluster_means", dist.Normal(torch.arange(float(K)), 1.0) ) with pyro.plate("data", data.shape[0]): assignments = pyro.sample("assignments", dist.Categorical(mix_proportions)) pyro.sample("obs", dist.Normal(cluster_means[assignments], 1.0), obs=data) return cluster_means true_cluster_means = torch.tensor([1.0, 5.0, 10.0]) true_mix_proportions = torch.tensor([0.1, 0.3, 0.6]) cluster_assignments = dist.Categorical(true_mix_proportions).sample( torch.Size((N,)) ) data = dist.Normal(true_cluster_means[cluster_assignments], 1.0).sample() nuts_kernel = NUTS( gmm, max_plate_nesting=1, jit_compile=jit, ignore_jit_warnings=True ) mcmc = MCMC(nuts_kernel, num_samples=300, warmup_steps=100) mcmc.run(data) samples = mcmc.get_samples() assert_equal(samples["phi"].mean(0).sort()[0], true_mix_proportions, prec=0.05) assert_equal( samples["cluster_means"].mean(0).sort()[0], true_cluster_means, prec=0.2 )
def test_beta_binomial(hyperpriors): def model(data): with pyro.plate("plate_0", data.shape[-1]): alpha = pyro.sample( "alpha", dist.HalfCauchy(1.)) if hyperpriors else torch.tensor( [1., 1.]) beta = pyro.sample( "beta", dist.HalfCauchy(1.)) if hyperpriors else torch.tensor( [1., 1.]) beta_binom = BetaBinomialPair() with pyro.plate("plate_1", data.shape[-2]): probs = pyro.sample("probs", beta_binom.latent(alpha, beta)) with pyro.plate("data", data.shape[0]): pyro.sample("binomial", beta_binom.conditional( probs=probs, total_count=total_count), obs=data) true_probs = torch.tensor([[0.7, 0.4], [0.6, 0.4]]) total_count = torch.tensor([[1000, 600], [400, 800]]) num_samples = 80 data = dist.Binomial( total_count=total_count, probs=true_probs).sample(sample_shape=(torch.Size((10, )))) hmc_kernel = NUTS(collapse_conjugate(model), jit_compile=True, ignore_jit_warnings=True) mcmc = MCMC(hmc_kernel, num_samples=num_samples, warmup_steps=50) mcmc.run(data) samples = mcmc.get_samples() posterior = posterior_replay(model, samples, data, num_samples=num_samples) assert_equal(posterior["probs"].mean(0), true_probs, prec=0.05)
def run_inference( pyro_model: Callable[[Tensor, Tensor, Tensor, bool, str, float], None], X: Tensor, Y: Tensor, Yvar: Tensor, num_samples: int = 512, warmup_steps: int = 1024, thinning: int = 16, use_input_warping: bool = False, max_tree_depth: int = 6, use_saas: bool = False, disable_progbar: bool = False, ) -> Tensor: start = time.time() try: from pyro.infer.mcmc import NUTS, MCMC except ImportError: # pragma: no cover raise RuntimeError("Cannot call run_inference without pyro installed!") kernel = NUTS( pyro_model, jit_compile=True, full_mass=True, ignore_jit_warnings=True, max_tree_depth=max_tree_depth, ) mcmc = MCMC( kernel, warmup_steps=warmup_steps, num_samples=num_samples, disable_progbar=disable_progbar, ) mcmc.run( # there is an issue with jit-compilation and cuda # for now, we run MCMC on the CPU. X.cpu(), Y.cpu(), Yvar.cpu(), use_input_warping=use_input_warping, use_saas=use_saas, ) # this prints the summary orig_std_out = sys.stdout.write sys.stdout.write = logger.info mcmc.summary() sys.stdout.write = orig_std_out logger.info(f"MCMC elapsed time: {time.time() - start}") samples = mcmc.get_samples() if use_saas: # compute the lengthscale for saas and throw away everything else inv_length_sq = (samples["kernel_tausq"].unsqueeze(-1) * samples["_kernel_inv_length_sq"]) samples["lengthscale"] = (1.0 / inv_length_sq).sqrt() # pyre-ignore [16] del samples["kernel_tausq"], samples["_kernel_inv_length_sq"] # thin for k, v in samples.items(): # apply thinning and move back to X's device samples[k] = v[::thinning].to(device=X.device) return samples
def fit_fully_bayesian_model_nuts( model: SaasFullyBayesianSingleTaskGP, max_tree_depth: int = 6, warmup_steps: int = 512, num_samples: int = 256, thinning: int = 16, disable_progbar: bool = False, ) -> None: r"""Fit a fully Bayesian model using the No-U-Turn-Sampler (NUTS) Args: model: SaasFullyBayesianSingleTaskGP to be fitted. max_tree_depth: Maximum tree depth for NUTS warmup_steps: The number of burn-in steps for NUTS. num_samples: The number of MCMC samples. Note that with thinning, num_samples / thinning samples are retained. thinning: The amount of thinning. Every nth sample is retained. disable_progbar: A boolean indicating whether to print the progress bar and diagnostics during MCMC. Example: >>> gp = SaasFullyBayesianSingleTaskGP(train_X, train_Y) >>> fit_fully_bayesian_model_nuts(gp) """ model.train() # Do inference with NUTS nuts = NUTS( model.pyro_model.sample, jit_compile=True, full_mass=True, ignore_jit_warnings=True, max_tree_depth=max_tree_depth, ) mcmc = MCMC( nuts, warmup_steps=warmup_steps, num_samples=num_samples, disable_progbar=disable_progbar, ) mcmc.run() # Get final MCMC samples from the Pyro model mcmc_samples = model.pyro_model.postprocess_mcmc_samples( mcmc_samples=mcmc.get_samples()) for k, v in mcmc_samples.items(): mcmc_samples[k] = v[::thinning] # Load the MCMC samples back into the BoTorch model model.load_mcmc_samples(mcmc_samples) model.eval()
def test_pyro_sampling(self): try: import pyro # noqa from pyro.infer.mcmc import NUTS, MCMC except ImportError: return train_x, test_x, train_y, test_y = self._get_data(cuda=False) likelihood = GaussianLikelihood( noise_constraint=gpytorch.constraints.Positive()) gp_model = ExactGPModel(train_x, train_y, likelihood) # Register normal GPyTorch priors gp_model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1), "constant") gp_model.covar_module.base_kernel.register_prior( "lengthscale_prior", UniformPrior(0.01, 0.5), "lengthscale") gp_model.covar_module.register_prior("outputscale_prior", UniformPrior(1, 2), "outputscale") likelihood.register_prior("noise_prior", UniformPrior(0.05, 0.3), "noise") def pyro_model(x, y): with gpytorch.settings.fast_computations(False, False, False): sampled_model = gp_model.pyro_sample_from_prior() output = sampled_model.likelihood(sampled_model(x)) pyro.sample("obs", output, obs=y) return y nuts_kernel = NUTS(pyro_model, adapt_step_size=True) mcmc_run = MCMC(nuts_kernel, num_samples=3, warmup_steps=20, disable_progbar=True) mcmc_run.run(train_x, train_y) gp_model.pyro_load_from_samples(mcmc_run.get_samples()) gp_model.eval() expanded_test_x = test_x.unsqueeze(-1).repeat(3, 1, 1) output = gp_model(expanded_test_x) self.assertEqual(output.mean.size(0), 3) # All 3 samples should do reasonably well on a noiseless dataset. self.assertLess( torch.norm(output.mean[0] - test_y) / test_y.norm(), 0.2) self.assertLess( torch.norm(output.mean[1] - test_y) / test_y.norm(), 0.2) self.assertLess( torch.norm(output.mean[2] - test_y) / test_y.norm(), 0.2)
def test_dirichlet_categorical(jit): def model(data): concentration = torch.tensor([1.0, 1.0, 1.0]) p_latent = pyro.sample("p_latent", dist.Dirichlet(concentration)) pyro.sample("obs", dist.Categorical(p_latent), obs=data) return p_latent true_probs = torch.tensor([0.1, 0.6, 0.3]) data = dist.Categorical(true_probs).sample(sample_shape=(torch.Size((2000,)))) nuts_kernel = NUTS(model, jit_compile=jit, ignore_jit_warnings=True) mcmc = MCMC(nuts_kernel, num_samples=200, warmup_steps=100) mcmc.run(data) samples = mcmc.get_samples() posterior = samples["p_latent"] assert_equal(posterior.mean(0), true_probs, prec=0.02)
def test_gamma_beta(jit): def model(data): alpha_prior = pyro.sample('alpha', dist.Gamma(concentration=1., rate=1.)) beta_prior = pyro.sample('beta', dist.Gamma(concentration=1., rate=1.)) pyro.sample('x', dist.Beta(concentration1=alpha_prior, concentration0=beta_prior), obs=data) true_alpha = torch.tensor(5.) true_beta = torch.tensor(1.) data = dist.Beta(concentration1=true_alpha, concentration0=true_beta).sample(torch.Size((5000,))) nuts_kernel = NUTS(model, jit_compile=jit, ignore_jit_warnings=True) mcmc = MCMC(nuts_kernel, num_samples=500, warmup_steps=200) mcmc.run(data) samples = mcmc.get_samples() assert_equal(samples["alpha"].mean(0), true_alpha, prec=0.08) assert_equal(samples["beta"].mean(0), true_beta, prec=0.05)
def test_pyro_sampling(self): try: import pyro from pyro.infer.mcmc import NUTS, MCMC except: return train_x, test_x, train_y, test_y = self._get_data(cuda=False) likelihood = GaussianLikelihood( noise_constraint=gpytorch.constraints.Positive()) gp_model = ExactGPModel(train_x, train_y, likelihood) # Register normal GPyTorch priors gp_model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1), "constant") gp_model.covar_module.base_kernel.register_prior( "lengthscale_prior", UniformPrior(0.01, 0.2), "lengthscale") gp_model.covar_module.register_prior("outputscale_prior", UniformPrior(1, 2), "outputscale") likelihood.register_prior("noise_prior", LogNormalPrior(-1.5, 0.1), "noise") mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) def pyro_model(x, y): gp_model.pyro_sample_from_prior() output = gp_model(x) loss = mll.pyro_factor(output, y) return y nuts_kernel = NUTS(pyro_model, adapt_step_size=True) mcmc_run = MCMC(nuts_kernel, num_samples=3, warmup_steps=20) mcmc_run.run(train_x, train_y) gp_model.pyro_load_from_samples(mcmc_run.get_samples()) gp_model.eval() expanded_test_x = test_x.unsqueeze(-1).repeat(3, 1, 1) output = gp_model(expanded_test_x) self.assertEqual(output.mean.size(0), 3) # All 3 samples should do reasonably well on a noiseless dataset. self.assertLess( torch.norm(output.mean[0] - test_y) / test_y.norm(), 0.2) self.assertLess( torch.norm(output.mean[1] - test_y) / test_y.norm(), 0.2) self.assertLess( torch.norm(output.mean[2] - test_y) / test_y.norm(), 0.2)
def test_beta_bernoulli(step_size, adapt_step_size, adapt_mass_matrix, full_mass): def model(data): alpha = torch.tensor([1.1, 1.1]) beta = torch.tensor([1.1, 1.1]) p_latent = pyro.sample("p_latent", dist.Beta(alpha, beta)) pyro.sample("obs", dist.Bernoulli(p_latent), obs=data) return p_latent true_probs = torch.tensor([0.9, 0.1]) data = dist.Bernoulli(true_probs).sample(sample_shape=(torch.Size((1000,)))) nuts_kernel = NUTS(model, step_size=step_size, adapt_step_size=adapt_step_size, adapt_mass_matrix=adapt_mass_matrix, full_mass=full_mass) mcmc = MCMC(nuts_kernel, num_samples=400, warmup_steps=200) mcmc.run(data) samples = mcmc.get_samples() assert_equal(samples["p_latent"].mean(0), true_probs, prec=0.02)
def _train_hmc(self, train_loader, n_samples, warmup, step_size, num_steps, device): print("\n == HMC training ==") pyro.clear_param_store() num_batches = int(len(train_loader.dataset) / train_loader.batch_size) batch_samples = int(n_samples / num_batches) + 1 print("\nn_batches=", num_batches, "\tbatch_samples =", batch_samples) kernel = HMC(self.model, step_size=step_size, num_steps=num_steps) mcmc = MCMC(kernel=kernel, num_samples=batch_samples, warmup_steps=warmup, num_chains=1) start = time.time() for x_batch, y_batch in train_loader: x_batch = x_batch.to(device) labels = y_batch.to(device).argmax(-1) mcmc.run(x_batch, labels) execution_time(start=start, end=time.time()) self.posterior_predictive = {} posterior_samples = mcmc.get_samples(n_samples) state_dict_keys = list(self.basenet.state_dict().keys()) if DEBUG: print("\n", list(posterior_samples.values())[-1]) for model_idx in range(n_samples): net_copy = copy.deepcopy(self.basenet) model_dict = OrderedDict({}) for weight_idx, weights in enumerate(posterior_samples.values()): model_dict.update( {state_dict_keys[weight_idx]: weights[model_idx]}) net_copy.load_state_dict(model_dict) self.posterior_predictive.update({str(model_idx): net_copy}) if DEBUG: print("\n", weights[model_idx]) self.save()
def test_gamma_normal(jit, use_multinomial_sampling): def model(data): rate = torch.tensor([1.0, 1.0]) concentration = torch.tensor([1.0, 1.0]) p_latent = pyro.sample('p_latent', dist.Gamma(rate, concentration)) pyro.sample("obs", dist.Normal(3, p_latent), obs=data) return p_latent true_std = torch.tensor([0.5, 2]) data = dist.Normal(3, true_std).sample(sample_shape=(torch.Size((2000, )))) nuts_kernel = NUTS(model, use_multinomial_sampling=use_multinomial_sampling, jit_compile=jit, ignore_jit_warnings=True) mcmc = MCMC(nuts_kernel, num_samples=200, warmup_steps=100) mcmc.run(data) samples = mcmc.get_samples() assert_equal(samples["p_latent"].mean(0), true_std, prec=0.05)
def run(param): nn_model, p_tgt, save_fn, args = param if (not args.overwrite) and os.path.isfile(save_fn): print(save_fn + ' already exists!') return if not os.path.isfile(save_fn): fo = open(save_fn, 'w') # write the file first to signal working on it. fo.write('\n') fo.close() nuts = NUTS(program_arbitrary) mcmc = MCMC(nuts, num_samples=args.num_samples, warmup_steps=args.num_warmups, num_chains=args.num_chains) mcmc.run(nn_model, p_tgt) zs = mcmc.get_samples()['z'].detach().cpu().numpy() np.savetxt(save_fn, zs)
def test_bernoulli_latent_model(jit): @poutine.broadcast def model(data): y_prob = pyro.sample("y_prob", dist.Beta(1., 1.)) with pyro.plate("data", data.shape[0]): y = pyro.sample("y", dist.Bernoulli(y_prob)) z = pyro.sample("z", dist.Bernoulli(0.65 * y + 0.1)) pyro.sample("obs", dist.Normal(2. * z, 1.), obs=data) N = 2000 y_prob = torch.tensor(0.3) y = dist.Bernoulli(y_prob).sample(torch.Size((N,))) z = dist.Bernoulli(0.65 * y + 0.1).sample() data = dist.Normal(2. * z, 1.0).sample() nuts_kernel = NUTS(model, max_plate_nesting=1, jit_compile=jit, ignore_jit_warnings=True) mcmc = MCMC(nuts_kernel, num_samples=600, warmup_steps=200) mcmc.run(data) samples = mcmc.get_samples() assert_equal(samples["y_prob"].mean(0), y_prob, prec=0.05)
def test_logistic_regression(jit, use_multinomial_sampling): dim = 3 data = torch.randn(2000, dim) true_coefs = torch.arange(1., dim + 1.) labels = dist.Bernoulli(logits=(true_coefs * data).sum(-1)).sample() def model(data): coefs_mean = torch.zeros(dim) coefs = pyro.sample('beta', dist.Normal(coefs_mean, torch.ones(dim))) y = pyro.sample('y', dist.Bernoulli(logits=(coefs * data).sum(-1)), obs=labels) return y nuts_kernel = NUTS(model, use_multinomial_sampling=use_multinomial_sampling, jit_compile=jit, ignore_jit_warnings=True) mcmc = MCMC(nuts_kernel, num_samples=500, warmup_steps=100) mcmc.run(data) samples = mcmc.get_samples() assert_equal(rmse(true_coefs, samples["beta"].mean(0)).item(), 0.0, prec=0.1)
def monte_carlo(y): pyro.clear_param_store() # create a Simple Hamiltonian Monte Carlo kernel with step_size of 0.1 hmc_kernel = HMC(conditioned_model, step_size=.1) mcmc = MCMC(hmc_kernel, num_samples=500, warmup_steps=100) # create a Markov Chain Monte Carlo method with: # the hmc_kernel, 500 samples, and 100 warmup iterations mcmc.run(model,y) mcmc.run(model, y) sample_dict = mcmc.get_samples(num_samples=5000) plt.figure(figsize=(8, 6)) sns.distplot(sample_dict["p"].numpy()) plt.xlabel("Observed probability value") plt.ylabel("Observed frequency") plt.show() mcmc.summary(prob=0.95) return sample_dict
def _train_hmc(self, train_loader, n_samples, warmup, step_size, num_steps, savedir, device): print("\n == fullBNN HMC training ==") pyro.clear_param_store() num_batches = int(len(train_loader.dataset)/train_loader.batch_size) batch_samples = int(n_samples/num_batches)+1 print("\nn_batches =",num_batches,"\tbatch_samples =", batch_samples) # kernel = HMC(self.model, step_size=step_size, num_steps=num_steps) kernel = NUTS(self.model, adapt_step_size=True) mcmc = MCMC(kernel=kernel, num_samples=batch_samples, warmup_steps=warmup, num_chains=1) self.posterior_samples=[] state_dict_keys = list(self.basenet.state_dict().keys()) start = time.time() for x_batch, y_batch in train_loader: x_batch = x_batch.to(device) y_batch = y_batch.to(device).argmax(-1) mcmc_run = mcmc.run(x_batch, y_batch) posterior_samples = mcmc.get_samples(batch_samples) # print('module$$$model.1.weight:\n', posterior_samples['module$$$model.1.weight'][:,0,:5]) for sample_idx in range(batch_samples): net_copy = copy.deepcopy(self.basenet) model_dict=OrderedDict({}) for weight_idx, weights in enumerate(posterior_samples.values()): model_dict.update({state_dict_keys[weight_idx]:weights[sample_idx]}) net_copy.load_state_dict(model_dict) self.posterior_samples.append(net_copy) execution_time(start=start, end=time.time()) self.save(savedir)
nn_model = GANModel(args.dataset, gan_path=path) return nn_model, args.excl_label, save_fn if __name__ == '__main__': set_start_method('spawn') # fashion_mnist labels # [0: 'T-shirt', 1: 'Trouser', 2: 'Pullover', 3: 'Dress', 4: 'Coat', # 5: 'Sandal', 6: 'Shirt', 7: 'Sneaker', 8: 'Bag', 9: 'Ankle boot'] parser = argparse.ArgumentParser() parser.add_argument('--num-samples', type=int, default=1000) parser.add_argument('--num-warmups', type=int, default=1000) parser.add_argument('--num-chains', type=int, default=1) parser.add_argument('--overwrite', action='store_true') parser.add_argument('--nn-model') parser.add_argument('--dataset') parser.add_argument('--excl-label', type=int) args = parser.parse_args() nn_model, label, save_fn = setting(args) nuts = NUTS(program_all_but_one) mcmc = MCMC(nuts, num_samples=args.num_samples, warmup_steps=args.num_warmups, num_chains=args.num_chains) mcmc.run(nn_model, label) zs = mcmc.get_samples()['z'].detach().cpu().numpy() np.savetxt(save_fn, zs)
class BayesianMCMCLinearModel(BayesianLinearModel): def __init__(self, X, y, prior, warmup_steps=100, num_samples=1000): super().__init__(X, y, prior) self.warmup_steps = warmup_steps self.num_samples = num_samples def fit(self): pyro.clear_param_store() self.kernel = NUTS(self.model) self.mcmc = MCMC(self.kernel, warmup_steps=self.warmup_steps, num_samples=self.num_samples) self.mcmc.run() self._posterior_sample_df() def generate_posterior_samples(self, n_samples=1000): return self.mcmc.get_samples(n_samples) def posterior_summary(self, q=[0.05, 0.95], plot=False): if self.posterior_df is None: self._posterior_sample_df() summary = self.posterior_df.describe(percentiles=q).T if not plot: return summary else: sns.scatterplot(x=summary['mean'], y=summary.index) for i, var in enumerate(summary.index): sns.lineplot(x=summary.loc[var, format_percentiles(q)], y=[var, var], color='k') plt.xlabel('') plt.show() def plot_joint_posterior(self): g = sns.pairplot(self.posterior_df, diag_kind='kde', corner=True) g.map_lower(sns.kdeplot, levels=4, color='.2') plt.show() def plot_counterfactual(self): # counter factual plots pass def _plot_prior_posterior(self, prior_sample, posterior_sample, label): plot_df = pd.concat([pd.DataFrame({'value': prior_sample, 'type': 'prior'}), pd.DataFrame({'value': posterior_sample, 'type': 'posterior'})]) ax = sns.histplot(data=plot_df, x='value', hue='type', kde=True) ax.set(xlabel = '', ylabel=label) def plot_prior_posterior(self, sample_size=1000): # need to determine the total number of param param_num = self.posterior_df.shape[1] # plot bias plt.subplot(param_num, 1, 1) self._plot_prior_posterior(prior_sample=self.prior['bias'].rsample((sample_size, )), posterior_sample=self.posterior_df['bias'], label='bias') # plot weights w = self.prior['weights'].rsample((1000, )) for i in range(w.shape[1]): plt.subplot(param_num, 1, i + 2) self._plot_prior_posterior(prior_sample=w[:, i], posterior_sample=self.posterior_df[f'weights_{i + 1}'], label=f'weights_{i + 1}') # plot sigma plt.subplot(param_num, 1, param_num) self._plot_prior_posterior(self.prior['sigma'].rsample((sample_size, )), self.posterior_df['sigma'], label='sigma') plt.show() def plot_predicted(self): ps = self.posterior_summary() expected_bias = ps['mean']['bias'] expected_weights = ps['mean'][ps['mean'].index.str.startswith('weights')] y_pred = expected_bias + self.X @ expected_weights ax = sns.scatterplot(self.y, y_pred) ax.set(xlabel='Observed', ylabel='predicted') ax.plot([self.y.min(), self.y.max()], [y_pred.min(), y_pred.max()], ls="--", c=".3") plt.show() def _posterior_sample_df(self): posterior_sample = self.generate_posterior_samples(n_samples=1000) result = {} for k, v in posterior_sample.items(): if len(v.shape) == 1: result[k] = v.numpy() else: for i in range(v.shape[1]): result[f'{k}_{i + 1}'] = v[:, i].numpy() self.posterior_df = pd.DataFrame(result)
b = pyro.sample('b', dist.Normal(0., 5.)) y = pyro.sample('y', dist.Normal(a * x + b, 1.), obs=y) return y # In[11]: nuts_kernel = NUTS(model, adapt_step_size=True) mcmc_run = MCMC(nuts_kernel, num_samples=1000, warmup_steps=1000) mcmc_run.run(x, y) # MCMCで得られたサンプルの取得 # In[12]: posterior_a = mcmc_run.get_samples()['a'] posterior_b = mcmc_run.get_samples()['b'] # In[13]: print(posterior_a[:20]) # In[14]: print(posterior_b[:20]) # 次にこのサンプルを用いた予測分布の計算を行います。 # こちらも関数一つで予測分布の計算が行えるので簡単です。 # In[15]:
for key, val in model.par_real.items() if key not in ['softmax_mult', "h0"] } init_par['h0-batch'] = model.par_real['h0'] mcmc = MCMC(hmc_kernel, num_samples=10000, warmup_steps=100, initial_params=init_par) #%% all_data = dataloaders['train'].dataset.data all_data['phase_mask'] = all_data['mask_train'] #%% TRAIN and save TRAIN = False if TRAIN: mcmc.run(all_data) par = mcmc.get_samples() par = {key: val.detach().cpu() for key, val in par.items()} with open("mcmc-cuda2.parameter", "wb") as file: pickle.dump(par, file=file) else: with open("mcmc.parameter", "rb") as file: par = pickle.load(file=file) #%% ANALYSIS # %% list(par.keys()) # %% Plot gamma over iteration steps: plt.plot(par['gamma']) #%%
def run_inference( pyro_model: Callable, X: Tensor, Y: Tensor, Yvar: Tensor, num_samples: int = 512, warmup_steps: int = 1024, thinning: int = 16, use_input_warping: bool = False, max_tree_depth: int = 6, disable_progbar: bool = False, gp_kernel: str = "matern", verbose: bool = False, task_feature: Optional[int] = None, rank: Optional[int] = None, ) -> Dict[str, Tensor]: start = time.time() try: from pyro.infer.mcmc import NUTS, MCMC from pyro.infer.mcmc.util import print_summary except ImportError: # pragma: no cover raise RuntimeError("Cannot call run_inference without pyro installed!") kernel = NUTS( pyro_model, jit_compile=True, full_mass=True, ignore_jit_warnings=True, max_tree_depth=max_tree_depth, ) mcmc = MCMC( kernel, warmup_steps=warmup_steps, num_samples=num_samples, disable_progbar=disable_progbar, ) mcmc.run( X, Y, Yvar, use_input_warping=use_input_warping, gp_kernel=gp_kernel, task_feature=task_feature, rank=rank, ) # compute the true lengthscales and get rid of the temporary variables samples = mcmc.get_samples() inv_length_sq = (samples["kernel_tausq"].unsqueeze(-1) * samples["_kernel_inv_length_sq"]) samples["lengthscale"] = (1.0 / inv_length_sq).sqrt() # pyre-ignore [16] del samples["kernel_tausq"], samples["_kernel_inv_length_sq"] # this prints the summary if verbose: orig_std_out = sys.stdout.write sys.stdout.write = logger.info print_summary(samples, prob=0.9, group_by_chain=False) sys.stdout.write = orig_std_out logger.info(f"MCMC elapsed time: {time.time() - start}") # thin for k, v in samples.items(): samples[k] = v[::thinning] # apply thinning return samples
def pyro_model(x, y): model.pyro_sample_from_prior() output = model(x) loss = marginal_loglikelihood.pyro_factor(output, y) return y nuts_kernel = NUTS(pyro_model, adapt_step_size=True) mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps) mcmc_run.run(x_train, y_train) #we load the samples generated by NUTS in to the model. # This converts model from a single GP to a batch of num_samples GPs, in this case 100. model.pyro_load_from_samples(mcmc_run.get_samples()) model.eval() x_test = torch.from_numpy(np.linspace(1870, 2030, 200)[:, np.newaxis]) expanded_test_x = x_test.unsqueeze(0).repeat(num_samples, 1, 1) output = model(expanded_test_x) import matplotlib.pyplot as plt with torch.no_grad(): # Initialize plot f, ax = plt.subplots(1, 1, figsize=(16, 9)) # Plot training data as black stars ax.plot(x_train.numpy(), y_train.numpy(), 'k*', zorder=10) for i in range(num_samples):
ss.transform(x.detach().numpy())[0], f.reshape(-1, 1).detach().numpy(), kernel) gp.optimize_restarts(5, verbose=False) # Use No U-Turn Sampler (NUTS) Hamiltonian Monte Carlo to sample from the posterior of the original model. #plain NUTS num_chains = 1 num_samples = 100 kernel = NUTS(model) mcmc = MCMC(kernel, num_samples=num_samples, warmup_steps=100, num_chains=num_chains) mcmc.run(f) mcmc.summary() mcmc_samples = mcmc.get_samples(group_by_chain=True) print(mcmc_samples.keys()) chains = mcmc_samples["input"] print(chains.shape) # Show the probablity posterior distribution of each inputs' component (input_dim). for i in range(5): plt.figure(figsize=(6, 4)) sns.distplot(mcmc_samples['input'][:, :, i]) plt.title("Full model") plt.xlabel("input {}th-component".format(i + 1)) plt.show() # Posterior samples of the active variable from original model print(ss.transform(chains[0])[0].mean()) plt.figure(figsize=(6, 4))
class BayesianGP(object): def __init__(self, x: np.ndarray, y: np.ndarray): """ :param x: [N x D] :param y: [N] """ x, y = TensorType(x), TensorType(y) assert x.ndimension() == 2 assert y.ndimension() == 1 assert x.shape[0] == y.numel() self.x = x self.y = y self.n_samples = 32 self._xform = ExpTransform() # Length scales for the kernel self.raw_scales_prior = Normal(zeros(self.dx), ones(self.dx)) # Kernel variance self.raw_variance_prior = Normal(zeros(1), ones(1)) # Jitter, aka Gaussian likelihood's variance self.raw_jitter_prior = Normal(-3.0 + zeros(1), ones(1)) # For the constant ("bias") mean function self.bias_prior = Normal(zeros(1), ones(1)) self._mcmc = None @property def dx(self): """ Input dimension """ return self.x.shape[1] @property def n(self): """ Number of data """ return self.y.numel() def fit(self): mcmc_kernel = NUTS(self._prior_model) self._mcmc = MCMC(mcmc_kernel, num_samples=self.n_samples, warmup_steps=128) self._mcmc.run() def predict_f(self, x_test, diag=True): return self._predict(x_test, diag, False) def predict_y(self, x_test, diag=True): return self._predict(x_test, diag, True) def append_data(self, x_new, y_new): """ Add new input-output pair(s) to the model :param x_new: inputs :type x_new: np.ndarray :param y_new: outputs :type y_new: np.ndarray """ self.x = torch.cat((self.x, TensorType(np.atleast_2d(x_new)))) self.y = torch.cat((self.y, TensorType(y_new.flatten()))) def _prior_model(self): scales, variance, jitter, bias = self._get_samples() if self.n > 0: kyy = _rbf(self.x, self.x, scales, variance) + jitter * eye(self.n) try: ckyy = _jitchol(kyy) sample( "output", MultivariateNormal(bias + zeros(self.n), scale_tril=ckyy), obs=self.y, ) except RuntimeError: # Cholesky fails? # "No chance" sample("output", Delta(zeros(1)), obs=ones(1)) def _posterior_model(self, x_test, diag, with_jitter): """ Return means & (co)variance samples. """ assert self.n > 0, "Need at least one training datum for posterior" scales, variance, jitter, bias = self._get_samples() kyy = _rbf(self.x, self.x, scales, variance) + jitter * eye(self.n) ckyy = _jitchol(kyy) kys = _rbf(self.x, x_test, scales, variance) alpha = _trtrs(kys, ckyy) beta = _trtrs(self.y[:, None] - bias, ckyy) mean = (alpha.t() @ beta).flatten() + bias if diag: kss = _rbf_diag(x_test, variance) cov = kss - torch.sum(alpha**2, dim=0) if with_jitter: cov = cov + jitter # Guard against numerically-negative variances? cov = cov - (torch.clamp(cov, max=0.0)).detach() else: kss = _rbf(x_test, x_test, scales, variance) cov = kss - alpha.t() @ alpha if with_jitter: cov = cov + jitter * eye(*cov.shape) # Numerically-negativs variances?... sample("mean", Delta(mean)) sample("cov", Delta(cov)) def _posterior_model_no_data(self, x_test, diag, with_jitter): """ When the conditioning set is empty """ scales, variance, jitter, bias = self._get_samples() if diag: cov = _rbf_diag(x_test, variance) if with_jitter: cov = cov + jitter else: cov = _rbf(x_test, x_test, scales, variance) if with_jitter: cov = cov + jitter * eye(x_test.shape[0]) mean = torch.zeros(x_test.shape[0]) + bias sample("mean", Delta(mean)) sample("cov", Delta(cov)) def _get_samples(self): scales = self._xform(sample("raw_scales", self.raw_scales_prior)) variance = self._xform(sample("raw_variance", self.raw_variance_prior)) jitter = self._xform(sample("raw_jitter", self.raw_jitter_prior)) bias = sample("bias", self.bias_prior) return scales, variance, jitter, bias @_input_as_tensor def _predict(self, x_test: TensorType, diag, with_jitter): """ Return predictive mean [N* x 1] and either predictive variance [N* x 1] or covariance [N* x N*] :return: (TensorType, TensorType) mean & (co)variance """ model = self._posterior_model if self.n > 0 else self._posterior_model_no_data samples = Predictive(model, self._mcmc.get_samples()).get_samples( x_test, diag, with_jitter) means, covs = samples["mean"], samples["cov"] mean = means.mean(dim=0) # Law of total (co)variance: if diag: cov = means.var(dim=0) + covs.mean(dim=0) else: d_mean = (means - mean)[:, :, None] cov_of_means = (d_mean @ torch.transpose(d_mean, 1, 2)).sum( dim=0) / (means.shape[0] - 1) mean_of_covs = covs.mean(dim=0) cov = cov_of_means + mean_of_covs # Make sure the shapes are right: if len(mean.shape) == 1: mean = mean[:, None] if len(cov.shape) == 1: cov = cov[:, None] return mean, cov
categories, words = torch.stack(categories), torch.stack(words) # split into supervised data and unsupervised data supervised_categories = categories[:num_supervised_data] supervised_words = words[:num_supervised_data] unsupervised_words = words[num_supervised_data:] def forward_log_prob(prev_log_prob, curr_word, transition_log_prob, emission_log_prob): log_prob = emission_log_prob[:, curr_word] + transition_log_prob + prev_log_prob.unsqueeze(dim=1) return log_prob.logsumexp(dim=0) def unsupervised_hmm(words): with pyro.plate("prob_plate", num_categories): transition_prob = pyro.sample("transition_prob", dist.Dirichlet(transition_prior)) emission_prob = pyro.sample("emission_prob", dist.Dirichlet(emission_prior)) transition_log_prob = transition_prob.log() emission_log_prob = emission_prob.log() log_prob = emission_log_prob[:, words[0]] for t in range(1, len(words)): log_prob = forward_log_prob(log_prob, words[t], transition_log_prob, emission_log_prob) prob = log_prob.logsumexp(dim=0).exp() # a trick to inject an additional log_prob into model's log_prob pyro.sample("forward_prob", dist.Bernoulli(prob), obs=torch.tensor(1.)) nuts_kernel = NUTS(unsupervised_hmm, jit_compile=True, ignore_jit_warnings=True) mcmc = MCMC(nuts_kernel, num_samples=100) mcmc.run(unsupervised_words) trace_transition_prob = mcmc.get_samples()["transition_prob"] print(trace_transition_prob)