Beispiel #1
0
def sample(draws=500,
           model=None,
           warmup_steps=None,
           num_chains=1,
           kernel='nuts'):
    """Markov-chain Monte Carlo sampling.

  Sampling should be run within the context of a model or the model should be passed as an argument `model` explicitly.
  Number of samples is given by `draws` which defaults to `500`.
  Warm-up steps are assumed to be 30% of sample count.
  MCMC kernel can be selected by setting `kernel`. `hmc` and `nuts` are available.
  `pmpyro.inference.sample` returns a trace of samples

  """
    # get model from context
    if model is None:
        model = Context.get_context()
    stfn = model.stfn  # get stochastic function from model
    data = model.args  # get data
    # make nuts kernel
    kernels = {'nuts': NUTS(stfn, adapt_step_size=True), 'hmc': HMC(stfn)}
    # if not num_chains:    # figure out number of chains
    #   num_chains = max(os.cpu_count() -1, 2)
    if not warmup_steps:  # figure out warm-up steps
        warmup_steps = int(0.3 * draws)
    # run MCMC
    mcmc = MCMC(kernels[kernel],
                num_samples=draws,
                warmup_steps=warmup_steps,
                num_chains=num_chains)
    mcmc.run(*data)
    # get num samples
    num_samples = num_chains * draws
    return mcmc.get_samples()
Beispiel #2
0
 def test_saasbo_sample(self):
     for use_saas, use_input_warping in product((False, True),
                                                repeat=2):
         with torch.random.fork_rng():
             torch.manual_seed(0)
             X = torch.randn(3, 2)
             Y = torch.randn(3, 1)
             Yvar = torch.randn(3, 1)
             kernel = NUTS(pyro_model, max_tree_depth=1)
             mcmc = MCMC(kernel, warmup_steps=0, num_samples=1)
             mcmc.run(
                 X,
                 Y,
                 Yvar,
                 use_input_warping=use_input_warping,
                 use_saas=use_saas,
             )
             samples = mcmc.get_samples()
             if use_saas:
                 self.assertTrue("kernel_tausq" in samples)
                 self.assertTrue("_kernel_inv_length_sq" in samples)
                 self.assertTrue("lengthscale" not in samples)
             else:
                 self.assertTrue("kernel_tausq" not in samples)
                 self.assertTrue("_kernel_inv_length_sq" not in samples)
                 self.assertTrue("lengthscale" in samples)
             if use_input_warping:
                 self.assertIn("c0", samples)
                 self.assertIn("c1", samples)
             else:
                 self.assertNotIn("c0", samples)
                 self.assertNotIn("c1", samples)
Beispiel #3
0
def test_nuts_conjugate_gaussian(
    fixture,
    num_samples,
    warmup_steps,
    expected_means,
    expected_precs,
    mean_tol,
    std_tol,
):
    pyro.get_param_store().clear()
    nuts_kernel = NUTS(fixture.model)
    mcmc = MCMC(nuts_kernel, num_samples, warmup_steps)
    mcmc.run(fixture.data)
    samples = mcmc.get_samples()
    for i in range(1, fixture.chain_len + 1):
        param_name = "loc_" + str(i)
        latent = samples[param_name]
        latent_loc = latent.mean(0)
        latent_std = latent.std(0)
        expected_mean = torch.ones(fixture.dim) * expected_means[i - 1]
        expected_std = 1 / torch.sqrt(torch.ones(fixture.dim) * expected_precs[i - 1])

        # Actual vs expected posterior means for the latents
        logger.debug("Posterior mean (actual) - {}".format(param_name))
        logger.debug(latent_loc)
        logger.debug("Posterior mean (expected) - {}".format(param_name))
        logger.debug(expected_mean)
        assert_equal(rmse(latent_loc, expected_mean).item(), 0.0, prec=mean_tol)

        # Actual vs expected posterior precisions for the latents
        logger.debug("Posterior std (actual) - {}".format(param_name))
        logger.debug(latent_std)
        logger.debug("Posterior std (expected) - {}".format(param_name))
        logger.debug(expected_std)
        assert_equal(rmse(latent_std, expected_std).item(), 0.0, prec=std_tol)
Beispiel #4
0
def test_gamma_poisson(hyperpriors):
    def model(data):
        with pyro.plate("latent_dim", data.shape[1]):
            alpha = (
                pyro.sample("alpha", dist.HalfCauchy(1.0))
                if hyperpriors
                else torch.tensor([1.0, 1.0])
            )
            beta = (
                pyro.sample("beta", dist.HalfCauchy(1.0))
                if hyperpriors
                else torch.tensor([1.0, 1.0])
            )
            gamma_poisson = GammaPoissonPair()
            rate = pyro.sample("rate", gamma_poisson.latent(alpha, beta))
            with pyro.plate("data", data.shape[0]):
                pyro.sample("obs", gamma_poisson.conditional(rate), obs=data)

    true_rate = torch.tensor([3.0, 10.0])
    num_samples = 100
    data = dist.Poisson(rate=true_rate).sample(sample_shape=(torch.Size((100,))))
    hmc_kernel = NUTS(
        collapse_conjugate(model), jit_compile=True, ignore_jit_warnings=True
    )
    mcmc = MCMC(hmc_kernel, num_samples=num_samples, warmup_steps=50)
    mcmc.run(data)
    samples = mcmc.get_samples()
    posterior = posterior_replay(model, samples, data, num_samples=num_samples)
    assert_equal(posterior["rate"].mean(0), true_rate, prec=0.3)
Beispiel #5
0
def test_gaussian_mixture_model(jit):
    K, N = 3, 1000

    def gmm(data):
        mix_proportions = pyro.sample("phi", dist.Dirichlet(torch.ones(K)))
        with pyro.plate("num_clusters", K):
            cluster_means = pyro.sample(
                "cluster_means", dist.Normal(torch.arange(float(K)), 1.0)
            )
        with pyro.plate("data", data.shape[0]):
            assignments = pyro.sample("assignments", dist.Categorical(mix_proportions))
            pyro.sample("obs", dist.Normal(cluster_means[assignments], 1.0), obs=data)
        return cluster_means

    true_cluster_means = torch.tensor([1.0, 5.0, 10.0])
    true_mix_proportions = torch.tensor([0.1, 0.3, 0.6])
    cluster_assignments = dist.Categorical(true_mix_proportions).sample(
        torch.Size((N,))
    )
    data = dist.Normal(true_cluster_means[cluster_assignments], 1.0).sample()
    nuts_kernel = NUTS(
        gmm, max_plate_nesting=1, jit_compile=jit, ignore_jit_warnings=True
    )
    mcmc = MCMC(nuts_kernel, num_samples=300, warmup_steps=100)
    mcmc.run(data)
    samples = mcmc.get_samples()
    assert_equal(samples["phi"].mean(0).sort()[0], true_mix_proportions, prec=0.05)
    assert_equal(
        samples["cluster_means"].mean(0).sort()[0], true_cluster_means, prec=0.2
    )
Beispiel #6
0
def test_beta_binomial(hyperpriors):
    def model(data):
        with pyro.plate("plate_0", data.shape[-1]):
            alpha = pyro.sample(
                "alpha", dist.HalfCauchy(1.)) if hyperpriors else torch.tensor(
                    [1., 1.])
            beta = pyro.sample(
                "beta", dist.HalfCauchy(1.)) if hyperpriors else torch.tensor(
                    [1., 1.])
            beta_binom = BetaBinomialPair()
            with pyro.plate("plate_1", data.shape[-2]):
                probs = pyro.sample("probs", beta_binom.latent(alpha, beta))
                with pyro.plate("data", data.shape[0]):
                    pyro.sample("binomial",
                                beta_binom.conditional(
                                    probs=probs, total_count=total_count),
                                obs=data)

    true_probs = torch.tensor([[0.7, 0.4], [0.6, 0.4]])
    total_count = torch.tensor([[1000, 600], [400, 800]])
    num_samples = 80
    data = dist.Binomial(
        total_count=total_count,
        probs=true_probs).sample(sample_shape=(torch.Size((10, ))))
    hmc_kernel = NUTS(collapse_conjugate(model),
                      jit_compile=True,
                      ignore_jit_warnings=True)
    mcmc = MCMC(hmc_kernel, num_samples=num_samples, warmup_steps=50)
    mcmc.run(data)
    samples = mcmc.get_samples()
    posterior = posterior_replay(model, samples, data, num_samples=num_samples)
    assert_equal(posterior["probs"].mean(0), true_probs, prec=0.05)
Beispiel #7
0
def run_inference(
    pyro_model: Callable[[Tensor, Tensor, Tensor, bool, str, float], None],
    X: Tensor,
    Y: Tensor,
    Yvar: Tensor,
    num_samples: int = 512,
    warmup_steps: int = 1024,
    thinning: int = 16,
    use_input_warping: bool = False,
    max_tree_depth: int = 6,
    use_saas: bool = False,
    disable_progbar: bool = False,
) -> Tensor:
    start = time.time()
    try:
        from pyro.infer.mcmc import NUTS, MCMC
    except ImportError:  # pragma: no cover
        raise RuntimeError("Cannot call run_inference without pyro installed!")
    kernel = NUTS(
        pyro_model,
        jit_compile=True,
        full_mass=True,
        ignore_jit_warnings=True,
        max_tree_depth=max_tree_depth,
    )
    mcmc = MCMC(
        kernel,
        warmup_steps=warmup_steps,
        num_samples=num_samples,
        disable_progbar=disable_progbar,
    )
    mcmc.run(
        # there is an issue with jit-compilation and cuda
        # for now, we run MCMC on the CPU.
        X.cpu(),
        Y.cpu(),
        Yvar.cpu(),
        use_input_warping=use_input_warping,
        use_saas=use_saas,
    )
    # this prints the summary
    orig_std_out = sys.stdout.write
    sys.stdout.write = logger.info
    mcmc.summary()
    sys.stdout.write = orig_std_out
    logger.info(f"MCMC elapsed time: {time.time() - start}")
    samples = mcmc.get_samples()
    if use_saas:  # compute the lengthscale for saas and throw away everything else
        inv_length_sq = (samples["kernel_tausq"].unsqueeze(-1) *
                         samples["_kernel_inv_length_sq"])
        samples["lengthscale"] = (1.0 /
                                  inv_length_sq).sqrt()  # pyre-ignore [16]
        del samples["kernel_tausq"], samples["_kernel_inv_length_sq"]
    # thin
    for k, v in samples.items():
        # apply thinning and move back to X's device
        samples[k] = v[::thinning].to(device=X.device)
    return samples
Beispiel #8
0
def fit_fully_bayesian_model_nuts(
    model: SaasFullyBayesianSingleTaskGP,
    max_tree_depth: int = 6,
    warmup_steps: int = 512,
    num_samples: int = 256,
    thinning: int = 16,
    disable_progbar: bool = False,
) -> None:
    r"""Fit a fully Bayesian model using the No-U-Turn-Sampler (NUTS)


    Args:
        model: SaasFullyBayesianSingleTaskGP to be fitted.
        max_tree_depth: Maximum tree depth for NUTS
        warmup_steps: The number of burn-in steps for NUTS.
        num_samples:  The number of MCMC samples. Note that with thinning,
            num_samples / thinning samples are retained.
        thinning: The amount of thinning. Every nth sample is retained.
        disable_progbar: A boolean indicating whether to print the progress
            bar and diagnostics during MCMC.

    Example:
        >>> gp = SaasFullyBayesianSingleTaskGP(train_X, train_Y)
        >>> fit_fully_bayesian_model_nuts(gp)
    """
    model.train()

    # Do inference with NUTS
    nuts = NUTS(
        model.pyro_model.sample,
        jit_compile=True,
        full_mass=True,
        ignore_jit_warnings=True,
        max_tree_depth=max_tree_depth,
    )
    mcmc = MCMC(
        nuts,
        warmup_steps=warmup_steps,
        num_samples=num_samples,
        disable_progbar=disable_progbar,
    )
    mcmc.run()

    # Get final MCMC samples from the Pyro model
    mcmc_samples = model.pyro_model.postprocess_mcmc_samples(
        mcmc_samples=mcmc.get_samples())
    for k, v in mcmc_samples.items():
        mcmc_samples[k] = v[::thinning]

    # Load the MCMC samples back into the BoTorch model
    model.load_mcmc_samples(mcmc_samples)
    model.eval()
    def test_pyro_sampling(self):
        try:
            import pyro  # noqa
            from pyro.infer.mcmc import NUTS, MCMC
        except ImportError:
            return
        train_x, test_x, train_y, test_y = self._get_data(cuda=False)
        likelihood = GaussianLikelihood(
            noise_constraint=gpytorch.constraints.Positive())
        gp_model = ExactGPModel(train_x, train_y, likelihood)

        # Register normal GPyTorch priors
        gp_model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1),
                                            "constant")
        gp_model.covar_module.base_kernel.register_prior(
            "lengthscale_prior", UniformPrior(0.01, 0.5), "lengthscale")
        gp_model.covar_module.register_prior("outputscale_prior",
                                             UniformPrior(1, 2), "outputscale")
        likelihood.register_prior("noise_prior", UniformPrior(0.05, 0.3),
                                  "noise")

        def pyro_model(x, y):
            with gpytorch.settings.fast_computations(False, False, False):
                sampled_model = gp_model.pyro_sample_from_prior()
                output = sampled_model.likelihood(sampled_model(x))
                pyro.sample("obs", output, obs=y)
            return y

        nuts_kernel = NUTS(pyro_model, adapt_step_size=True)
        mcmc_run = MCMC(nuts_kernel,
                        num_samples=3,
                        warmup_steps=20,
                        disable_progbar=True)
        mcmc_run.run(train_x, train_y)

        gp_model.pyro_load_from_samples(mcmc_run.get_samples())

        gp_model.eval()
        expanded_test_x = test_x.unsqueeze(-1).repeat(3, 1, 1)
        output = gp_model(expanded_test_x)

        self.assertEqual(output.mean.size(0), 3)

        # All 3 samples should do reasonably well on a noiseless dataset.
        self.assertLess(
            torch.norm(output.mean[0] - test_y) / test_y.norm(), 0.2)
        self.assertLess(
            torch.norm(output.mean[1] - test_y) / test_y.norm(), 0.2)
        self.assertLess(
            torch.norm(output.mean[2] - test_y) / test_y.norm(), 0.2)
Beispiel #10
0
def test_dirichlet_categorical(jit):
    def model(data):
        concentration = torch.tensor([1.0, 1.0, 1.0])
        p_latent = pyro.sample("p_latent", dist.Dirichlet(concentration))
        pyro.sample("obs", dist.Categorical(p_latent), obs=data)
        return p_latent

    true_probs = torch.tensor([0.1, 0.6, 0.3])
    data = dist.Categorical(true_probs).sample(sample_shape=(torch.Size((2000,))))
    nuts_kernel = NUTS(model, jit_compile=jit, ignore_jit_warnings=True)
    mcmc = MCMC(nuts_kernel, num_samples=200, warmup_steps=100)
    mcmc.run(data)
    samples = mcmc.get_samples()
    posterior = samples["p_latent"]
    assert_equal(posterior.mean(0), true_probs, prec=0.02)
Beispiel #11
0
def test_gamma_beta(jit):
    def model(data):
        alpha_prior = pyro.sample('alpha', dist.Gamma(concentration=1., rate=1.))
        beta_prior = pyro.sample('beta', dist.Gamma(concentration=1., rate=1.))
        pyro.sample('x', dist.Beta(concentration1=alpha_prior, concentration0=beta_prior), obs=data)

    true_alpha = torch.tensor(5.)
    true_beta = torch.tensor(1.)
    data = dist.Beta(concentration1=true_alpha, concentration0=true_beta).sample(torch.Size((5000,)))
    nuts_kernel = NUTS(model, jit_compile=jit, ignore_jit_warnings=True)
    mcmc = MCMC(nuts_kernel, num_samples=500, warmup_steps=200)
    mcmc.run(data)
    samples = mcmc.get_samples()
    assert_equal(samples["alpha"].mean(0), true_alpha, prec=0.08)
    assert_equal(samples["beta"].mean(0), true_beta, prec=0.05)
    def test_pyro_sampling(self):
        try:
            import pyro
            from pyro.infer.mcmc import NUTS, MCMC
        except:
            return
        train_x, test_x, train_y, test_y = self._get_data(cuda=False)
        likelihood = GaussianLikelihood(
            noise_constraint=gpytorch.constraints.Positive())
        gp_model = ExactGPModel(train_x, train_y, likelihood)

        # Register normal GPyTorch priors
        gp_model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1),
                                            "constant")
        gp_model.covar_module.base_kernel.register_prior(
            "lengthscale_prior", UniformPrior(0.01, 0.2), "lengthscale")
        gp_model.covar_module.register_prior("outputscale_prior",
                                             UniformPrior(1, 2), "outputscale")
        likelihood.register_prior("noise_prior", LogNormalPrior(-1.5, 0.1),
                                  "noise")

        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model)

        def pyro_model(x, y):
            gp_model.pyro_sample_from_prior()
            output = gp_model(x)
            loss = mll.pyro_factor(output, y)
            return y

        nuts_kernel = NUTS(pyro_model, adapt_step_size=True)
        mcmc_run = MCMC(nuts_kernel, num_samples=3, warmup_steps=20)
        mcmc_run.run(train_x, train_y)

        gp_model.pyro_load_from_samples(mcmc_run.get_samples())

        gp_model.eval()
        expanded_test_x = test_x.unsqueeze(-1).repeat(3, 1, 1)
        output = gp_model(expanded_test_x)

        self.assertEqual(output.mean.size(0), 3)

        # All 3 samples should do reasonably well on a noiseless dataset.
        self.assertLess(
            torch.norm(output.mean[0] - test_y) / test_y.norm(), 0.2)
        self.assertLess(
            torch.norm(output.mean[1] - test_y) / test_y.norm(), 0.2)
        self.assertLess(
            torch.norm(output.mean[2] - test_y) / test_y.norm(), 0.2)
Beispiel #13
0
def test_beta_bernoulli(step_size, adapt_step_size, adapt_mass_matrix, full_mass):
    def model(data):
        alpha = torch.tensor([1.1, 1.1])
        beta = torch.tensor([1.1, 1.1])
        p_latent = pyro.sample("p_latent", dist.Beta(alpha, beta))
        pyro.sample("obs", dist.Bernoulli(p_latent), obs=data)
        return p_latent

    true_probs = torch.tensor([0.9, 0.1])
    data = dist.Bernoulli(true_probs).sample(sample_shape=(torch.Size((1000,))))
    nuts_kernel = NUTS(model, step_size=step_size, adapt_step_size=adapt_step_size,
                       adapt_mass_matrix=adapt_mass_matrix, full_mass=full_mass)
    mcmc = MCMC(nuts_kernel, num_samples=400, warmup_steps=200)
    mcmc.run(data)
    samples = mcmc.get_samples()
    assert_equal(samples["p_latent"].mean(0), true_probs, prec=0.02)
Beispiel #14
0
    def _train_hmc(self, train_loader, n_samples, warmup, step_size, num_steps,
                   device):

        print("\n == HMC training ==")
        pyro.clear_param_store()

        num_batches = int(len(train_loader.dataset) / train_loader.batch_size)
        batch_samples = int(n_samples / num_batches) + 1
        print("\nn_batches=", num_batches, "\tbatch_samples =", batch_samples)

        kernel = HMC(self.model, step_size=step_size, num_steps=num_steps)
        mcmc = MCMC(kernel=kernel,
                    num_samples=batch_samples,
                    warmup_steps=warmup,
                    num_chains=1)

        start = time.time()
        for x_batch, y_batch in train_loader:
            x_batch = x_batch.to(device)
            labels = y_batch.to(device).argmax(-1)
            mcmc.run(x_batch, labels)

        execution_time(start=start, end=time.time())

        self.posterior_predictive = {}
        posterior_samples = mcmc.get_samples(n_samples)
        state_dict_keys = list(self.basenet.state_dict().keys())

        if DEBUG:
            print("\n", list(posterior_samples.values())[-1])

        for model_idx in range(n_samples):
            net_copy = copy.deepcopy(self.basenet)

            model_dict = OrderedDict({})
            for weight_idx, weights in enumerate(posterior_samples.values()):
                model_dict.update(
                    {state_dict_keys[weight_idx]: weights[model_idx]})

            net_copy.load_state_dict(model_dict)
            self.posterior_predictive.update({str(model_idx): net_copy})

        if DEBUG:
            print("\n", weights[model_idx])

        self.save()
Beispiel #15
0
def test_gamma_normal(jit, use_multinomial_sampling):
    def model(data):
        rate = torch.tensor([1.0, 1.0])
        concentration = torch.tensor([1.0, 1.0])
        p_latent = pyro.sample('p_latent', dist.Gamma(rate, concentration))
        pyro.sample("obs", dist.Normal(3, p_latent), obs=data)
        return p_latent

    true_std = torch.tensor([0.5, 2])
    data = dist.Normal(3, true_std).sample(sample_shape=(torch.Size((2000, ))))
    nuts_kernel = NUTS(model,
                       use_multinomial_sampling=use_multinomial_sampling,
                       jit_compile=jit,
                       ignore_jit_warnings=True)
    mcmc = MCMC(nuts_kernel, num_samples=200, warmup_steps=100)
    mcmc.run(data)
    samples = mcmc.get_samples()
    assert_equal(samples["p_latent"].mean(0), true_std, prec=0.05)
Beispiel #16
0
def run(param):
    nn_model, p_tgt, save_fn, args = param
    if (not args.overwrite) and os.path.isfile(save_fn):
        print(save_fn + ' already exists!')
        return
    if not os.path.isfile(save_fn):
        fo = open(save_fn,
                  'w')  # write the file first to signal working on it.
        fo.write('\n')
        fo.close()
    nuts = NUTS(program_arbitrary)
    mcmc = MCMC(nuts,
                num_samples=args.num_samples,
                warmup_steps=args.num_warmups,
                num_chains=args.num_chains)
    mcmc.run(nn_model, p_tgt)
    zs = mcmc.get_samples()['z'].detach().cpu().numpy()
    np.savetxt(save_fn, zs)
Beispiel #17
0
def test_bernoulli_latent_model(jit):
    @poutine.broadcast
    def model(data):
        y_prob = pyro.sample("y_prob", dist.Beta(1., 1.))
        with pyro.plate("data", data.shape[0]):
            y = pyro.sample("y", dist.Bernoulli(y_prob))
            z = pyro.sample("z", dist.Bernoulli(0.65 * y + 0.1))
            pyro.sample("obs", dist.Normal(2. * z, 1.), obs=data)

    N = 2000
    y_prob = torch.tensor(0.3)
    y = dist.Bernoulli(y_prob).sample(torch.Size((N,)))
    z = dist.Bernoulli(0.65 * y + 0.1).sample()
    data = dist.Normal(2. * z, 1.0).sample()
    nuts_kernel = NUTS(model, max_plate_nesting=1, jit_compile=jit, ignore_jit_warnings=True)
    mcmc = MCMC(nuts_kernel, num_samples=600, warmup_steps=200)
    mcmc.run(data)
    samples = mcmc.get_samples()
    assert_equal(samples["y_prob"].mean(0), y_prob, prec=0.05)
Beispiel #18
0
def test_logistic_regression(jit, use_multinomial_sampling):
    dim = 3
    data = torch.randn(2000, dim)
    true_coefs = torch.arange(1., dim + 1.)
    labels = dist.Bernoulli(logits=(true_coefs * data).sum(-1)).sample()

    def model(data):
        coefs_mean = torch.zeros(dim)
        coefs = pyro.sample('beta', dist.Normal(coefs_mean, torch.ones(dim)))
        y = pyro.sample('y', dist.Bernoulli(logits=(coefs * data).sum(-1)), obs=labels)
        return y

    nuts_kernel = NUTS(model,
                       use_multinomial_sampling=use_multinomial_sampling,
                       jit_compile=jit,
                       ignore_jit_warnings=True)
    mcmc = MCMC(nuts_kernel, num_samples=500, warmup_steps=100)
    mcmc.run(data)
    samples = mcmc.get_samples()
    assert_equal(rmse(true_coefs, samples["beta"].mean(0)).item(), 0.0, prec=0.1)
Beispiel #19
0
def monte_carlo(y):
    pyro.clear_param_store()

    # create a Simple Hamiltonian Monte Carlo kernel with step_size of 0.1
    hmc_kernel = HMC(conditioned_model, step_size=.1)
    mcmc = MCMC(hmc_kernel, num_samples=500, warmup_steps=100)
    # create a Markov Chain Monte Carlo method with: 
    # the hmc_kernel, 500 samples, and 100 warmup iterations
    mcmc.run(model,y)
    
    mcmc.run(model, y)

    sample_dict = mcmc.get_samples(num_samples=5000)
    plt.figure(figsize=(8, 6))
    sns.distplot(sample_dict["p"].numpy())
    plt.xlabel("Observed probability value")
    plt.ylabel("Observed frequency")
    plt.show()
    mcmc.summary(prob=0.95)

    return sample_dict
Beispiel #20
0
    def _train_hmc(self, train_loader, n_samples, warmup, step_size, num_steps, savedir, device):
        print("\n == fullBNN HMC training ==")
        pyro.clear_param_store()

        num_batches = int(len(train_loader.dataset)/train_loader.batch_size)
        batch_samples = int(n_samples/num_batches)+1
        print("\nn_batches =",num_batches,"\tbatch_samples =", batch_samples)

        # kernel = HMC(self.model, step_size=step_size, num_steps=num_steps)
        kernel = NUTS(self.model, adapt_step_size=True)
        mcmc = MCMC(kernel=kernel, num_samples=batch_samples, warmup_steps=warmup, num_chains=1)

        self.posterior_samples=[]
        state_dict_keys = list(self.basenet.state_dict().keys())
        start = time.time()

        for x_batch, y_batch in train_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device).argmax(-1)
            mcmc_run = mcmc.run(x_batch, y_batch)

            posterior_samples = mcmc.get_samples(batch_samples)
            # print('module$$$model.1.weight:\n', posterior_samples['module$$$model.1.weight'][:,0,:5])

            for sample_idx in range(batch_samples):
                net_copy = copy.deepcopy(self.basenet)

                model_dict=OrderedDict({})
                for weight_idx, weights in enumerate(posterior_samples.values()):
                    model_dict.update({state_dict_keys[weight_idx]:weights[sample_idx]})
                
                net_copy.load_state_dict(model_dict)
                self.posterior_samples.append(net_copy)

        execution_time(start=start, end=time.time())     
        self.save(savedir)
        nn_model = GANModel(args.dataset, gan_path=path)
    return nn_model, args.excl_label, save_fn


if __name__ == '__main__':
    set_start_method('spawn')

    # fashion_mnist labels
    # [0: 'T-shirt', 1: 'Trouser', 2: 'Pullover', 3: 'Dress', 4: 'Coat',
    #  5: 'Sandal', 6: 'Shirt', 7: 'Sneaker', 8: 'Bag', 9: 'Ankle boot']

    parser = argparse.ArgumentParser()
    parser.add_argument('--num-samples', type=int, default=1000)
    parser.add_argument('--num-warmups', type=int, default=1000)
    parser.add_argument('--num-chains', type=int, default=1)
    parser.add_argument('--overwrite', action='store_true')
    parser.add_argument('--nn-model')
    parser.add_argument('--dataset')
    parser.add_argument('--excl-label', type=int)
    args = parser.parse_args()
    nn_model, label, save_fn = setting(args)

    nuts = NUTS(program_all_but_one)
    mcmc = MCMC(nuts,
                num_samples=args.num_samples,
                warmup_steps=args.num_warmups,
                num_chains=args.num_chains)
    mcmc.run(nn_model, label)
    zs = mcmc.get_samples()['z'].detach().cpu().numpy()
    np.savetxt(save_fn, zs)
Beispiel #22
0
class BayesianMCMCLinearModel(BayesianLinearModel):
    def __init__(self, X, y, prior, warmup_steps=100, num_samples=1000):
        super().__init__(X, y, prior)
        self.warmup_steps = warmup_steps
        self.num_samples = num_samples


    def fit(self):
        pyro.clear_param_store()
        self.kernel = NUTS(self.model)
        self.mcmc = MCMC(self.kernel,
                         warmup_steps=self.warmup_steps,
                         num_samples=self.num_samples)
        self.mcmc.run()
        self._posterior_sample_df()

    def generate_posterior_samples(self,
                                   n_samples=1000):
        return self.mcmc.get_samples(n_samples)

    def posterior_summary(self, q=[0.05, 0.95], plot=False):
        if self.posterior_df is None:
            self._posterior_sample_df()
        summary = self.posterior_df.describe(percentiles=q).T
        if not plot:
            return summary
        else:
            sns.scatterplot(x=summary['mean'], y=summary.index)
            for i, var in enumerate(summary.index):
                sns.lineplot(x=summary.loc[var, format_percentiles(q)],
                             y=[var, var], color='k')
            plt.xlabel('')
            plt.show()

    def plot_joint_posterior(self):
        g = sns.pairplot(self.posterior_df, diag_kind='kde', corner=True)
        g.map_lower(sns.kdeplot, levels=4, color='.2')
        plt.show()

    def plot_counterfactual(self):
        # counter factual plots
        pass

    def _plot_prior_posterior(self, prior_sample, posterior_sample, label):
        plot_df = pd.concat([pd.DataFrame({'value': prior_sample, 'type': 'prior'}),
                             pd.DataFrame({'value': posterior_sample, 'type': 'posterior'})])
        ax = sns.histplot(data=plot_df, x='value', hue='type', kde=True)
        ax.set(xlabel = '', ylabel=label)


    def plot_prior_posterior(self, sample_size=1000):
        # need to determine the total number of param
        param_num = self.posterior_df.shape[1]

        # plot bias
        plt.subplot(param_num, 1, 1)
        self._plot_prior_posterior(prior_sample=self.prior['bias'].rsample((sample_size, )),
                                   posterior_sample=self.posterior_df['bias'],
                                   label='bias')

        # plot weights
        w = self.prior['weights'].rsample((1000, ))
        for i in range(w.shape[1]):
            plt.subplot(param_num, 1, i + 2)
            self._plot_prior_posterior(prior_sample=w[:, i],
                                       posterior_sample=self.posterior_df[f'weights_{i + 1}'],
                                       label=f'weights_{i + 1}')

        # plot sigma
        plt.subplot(param_num, 1, param_num)
        self._plot_prior_posterior(self.prior['sigma'].rsample((sample_size, )),
                                   self.posterior_df['sigma'],
                                   label='sigma')
        plt.show()

    def plot_predicted(self):
        ps = self.posterior_summary()
        expected_bias = ps['mean']['bias']
        expected_weights = ps['mean'][ps['mean'].index.str.startswith('weights')]
        y_pred = expected_bias + self.X @ expected_weights
        ax = sns.scatterplot(self.y, y_pred)
        ax.set(xlabel='Observed', ylabel='predicted')
        ax.plot([self.y.min(), self.y.max()], [y_pred.min(), y_pred.max()], ls="--", c=".3")
        plt.show()

    def _posterior_sample_df(self):
        posterior_sample = self.generate_posterior_samples(n_samples=1000)
        result = {}
        for k, v in posterior_sample.items():
            if len(v.shape) == 1:
                result[k] = v.numpy()
            else:
                for i in range(v.shape[1]):
                    result[f'{k}_{i + 1}'] = v[:, i].numpy()
        self.posterior_df = pd.DataFrame(result)
Beispiel #23
0
    b = pyro.sample('b', dist.Normal(0., 5.))
    y = pyro.sample('y', dist.Normal(a * x + b, 1.), obs=y)
    return y


# In[11]:

nuts_kernel = NUTS(model, adapt_step_size=True)
mcmc_run = MCMC(nuts_kernel, num_samples=1000, warmup_steps=1000)
mcmc_run.run(x, y)

# MCMCで得られたサンプルの取得

# In[12]:

posterior_a = mcmc_run.get_samples()['a']
posterior_b = mcmc_run.get_samples()['b']

# In[13]:

print(posterior_a[:20])

# In[14]:

print(posterior_b[:20])

# 次にこのサンプルを用いた予測分布の計算を行います。
# こちらも関数一つで予測分布の計算が行えるので簡単です。

# In[15]:
Beispiel #24
0
    for key, val in model.par_real.items()
    if key not in ['softmax_mult', "h0"]
}
init_par['h0-batch'] = model.par_real['h0']
mcmc = MCMC(hmc_kernel,
            num_samples=10000,
            warmup_steps=100,
            initial_params=init_par)
#%%
all_data = dataloaders['train'].dataset.data
all_data['phase_mask'] = all_data['mask_train']
#%% TRAIN and save
TRAIN = False
if TRAIN:
    mcmc.run(all_data)
    par = mcmc.get_samples()
    par = {key: val.detach().cpu() for key, val in par.items()}

    with open("mcmc-cuda2.parameter", "wb") as file:
        pickle.dump(par, file=file)
else:
    with open("mcmc.parameter", "rb") as file:
        par = pickle.load(file=file)
#%% ANALYSIS

# %%
list(par.keys())

# %% Plot gamma over iteration steps:
plt.plot(par['gamma'])
#%%
Beispiel #25
0
def run_inference(
    pyro_model: Callable,
    X: Tensor,
    Y: Tensor,
    Yvar: Tensor,
    num_samples: int = 512,
    warmup_steps: int = 1024,
    thinning: int = 16,
    use_input_warping: bool = False,
    max_tree_depth: int = 6,
    disable_progbar: bool = False,
    gp_kernel: str = "matern",
    verbose: bool = False,
    task_feature: Optional[int] = None,
    rank: Optional[int] = None,
) -> Dict[str, Tensor]:
    start = time.time()
    try:
        from pyro.infer.mcmc import NUTS, MCMC
        from pyro.infer.mcmc.util import print_summary
    except ImportError:  # pragma: no cover
        raise RuntimeError("Cannot call run_inference without pyro installed!")
    kernel = NUTS(
        pyro_model,
        jit_compile=True,
        full_mass=True,
        ignore_jit_warnings=True,
        max_tree_depth=max_tree_depth,
    )
    mcmc = MCMC(
        kernel,
        warmup_steps=warmup_steps,
        num_samples=num_samples,
        disable_progbar=disable_progbar,
    )
    mcmc.run(
        X,
        Y,
        Yvar,
        use_input_warping=use_input_warping,
        gp_kernel=gp_kernel,
        task_feature=task_feature,
        rank=rank,
    )

    # compute the true lengthscales and get rid of the temporary variables
    samples = mcmc.get_samples()
    inv_length_sq = (samples["kernel_tausq"].unsqueeze(-1) *
                     samples["_kernel_inv_length_sq"])
    samples["lengthscale"] = (1.0 / inv_length_sq).sqrt()  # pyre-ignore [16]
    del samples["kernel_tausq"], samples["_kernel_inv_length_sq"]
    # this prints the summary
    if verbose:
        orig_std_out = sys.stdout.write
        sys.stdout.write = logger.info
        print_summary(samples, prob=0.9, group_by_chain=False)
        sys.stdout.write = orig_std_out
        logger.info(f"MCMC elapsed time: {time.time() - start}")
    # thin
    for k, v in samples.items():
        samples[k] = v[::thinning]  # apply thinning
    return samples
Beispiel #26
0
def pyro_model(x, y):
    model.pyro_sample_from_prior()
    output = model(x)
    loss = marginal_loglikelihood.pyro_factor(output, y)
    return y


nuts_kernel = NUTS(pyro_model, adapt_step_size=True)
mcmc_run = MCMC(nuts_kernel,
                num_samples=num_samples,
                warmup_steps=warmup_steps)
mcmc_run.run(x_train, y_train)

#we load the samples generated by NUTS in to the model.
# This converts model from a single GP to a batch of num_samples GPs, in this case 100.
model.pyro_load_from_samples(mcmc_run.get_samples())
model.eval()
x_test = torch.from_numpy(np.linspace(1870, 2030, 200)[:, np.newaxis])
expanded_test_x = x_test.unsqueeze(0).repeat(num_samples, 1, 1)
output = model(expanded_test_x)

import matplotlib.pyplot as plt

with torch.no_grad():
    # Initialize plot
    f, ax = plt.subplots(1, 1, figsize=(16, 9))

    # Plot training data as black stars
    ax.plot(x_train.numpy(), y_train.numpy(), 'k*', zorder=10)

    for i in range(num_samples):
Beispiel #27
0
    ss.transform(x.detach().numpy())[0],
    f.reshape(-1, 1).detach().numpy(), kernel)
gp.optimize_restarts(5, verbose=False)

# Use No U-Turn Sampler (NUTS) Hamiltonian Monte Carlo to sample from the posterior of the original model.
#plain NUTS
num_chains = 1
num_samples = 100
kernel = NUTS(model)
mcmc = MCMC(kernel,
            num_samples=num_samples,
            warmup_steps=100,
            num_chains=num_chains)
mcmc.run(f)
mcmc.summary()
mcmc_samples = mcmc.get_samples(group_by_chain=True)
print(mcmc_samples.keys())
chains = mcmc_samples["input"]
print(chains.shape)

# Show the probablity posterior distribution of each inputs' component (input_dim).
for i in range(5):
    plt.figure(figsize=(6, 4))
    sns.distplot(mcmc_samples['input'][:, :, i])
    plt.title("Full model")
    plt.xlabel("input {}th-component".format(i + 1))
    plt.show()

# Posterior samples of the active variable from original model
print(ss.transform(chains[0])[0].mean())
plt.figure(figsize=(6, 4))
Beispiel #28
0
class BayesianGP(object):
    def __init__(self, x: np.ndarray, y: np.ndarray):
        """
        :param x: [N x D]
        :param y: [N]
        """

        x, y = TensorType(x), TensorType(y)
        assert x.ndimension() == 2
        assert y.ndimension() == 1
        assert x.shape[0] == y.numel()

        self.x = x
        self.y = y
        self.n_samples = 32
        self._xform = ExpTransform()

        # Length scales for the kernel
        self.raw_scales_prior = Normal(zeros(self.dx), ones(self.dx))
        # Kernel variance
        self.raw_variance_prior = Normal(zeros(1), ones(1))
        # Jitter, aka Gaussian likelihood's variance
        self.raw_jitter_prior = Normal(-3.0 + zeros(1), ones(1))
        # For the constant ("bias") mean function
        self.bias_prior = Normal(zeros(1), ones(1))

        self._mcmc = None

    @property
    def dx(self):
        """
        Input dimension
        """
        return self.x.shape[1]

    @property
    def n(self):
        """
        Number of data
        """
        return self.y.numel()

    def fit(self):
        mcmc_kernel = NUTS(self._prior_model)
        self._mcmc = MCMC(mcmc_kernel,
                          num_samples=self.n_samples,
                          warmup_steps=128)
        self._mcmc.run()

    def predict_f(self, x_test, diag=True):
        return self._predict(x_test, diag, False)

    def predict_y(self, x_test, diag=True):
        return self._predict(x_test, diag, True)

    def append_data(self, x_new, y_new):
        """
        Add new input-output pair(s) to the model

        :param x_new: inputs
        :type x_new: np.ndarray
        :param y_new: outputs
        :type y_new: np.ndarray
        """

        self.x = torch.cat((self.x, TensorType(np.atleast_2d(x_new))))
        self.y = torch.cat((self.y, TensorType(y_new.flatten())))

    def _prior_model(self):
        scales, variance, jitter, bias = self._get_samples()
        if self.n > 0:
            kyy = _rbf(self.x, self.x, scales, variance) + jitter * eye(self.n)
            try:
                ckyy = _jitchol(kyy)
                sample(
                    "output",
                    MultivariateNormal(bias + zeros(self.n), scale_tril=ckyy),
                    obs=self.y,
                )
            except RuntimeError:  # Cholesky fails?
                # "No chance"
                sample("output", Delta(zeros(1)), obs=ones(1))

    def _posterior_model(self, x_test, diag, with_jitter):
        """
        Return means & (co)variance samples.
        """

        assert self.n > 0, "Need at least one training datum for posterior"

        scales, variance, jitter, bias = self._get_samples()
        kyy = _rbf(self.x, self.x, scales, variance) + jitter * eye(self.n)
        ckyy = _jitchol(kyy)
        kys = _rbf(self.x, x_test, scales, variance)

        alpha = _trtrs(kys, ckyy)
        beta = _trtrs(self.y[:, None] - bias, ckyy)

        mean = (alpha.t() @ beta).flatten() + bias
        if diag:
            kss = _rbf_diag(x_test, variance)
            cov = kss - torch.sum(alpha**2, dim=0)
            if with_jitter:
                cov = cov + jitter
            # Guard against numerically-negative variances?
            cov = cov - (torch.clamp(cov, max=0.0)).detach()
        else:
            kss = _rbf(x_test, x_test, scales, variance)
            cov = kss - alpha.t() @ alpha
            if with_jitter:
                cov = cov + jitter * eye(*cov.shape)
            # Numerically-negativs variances?...

        sample("mean", Delta(mean))
        sample("cov", Delta(cov))

    def _posterior_model_no_data(self, x_test, diag, with_jitter):
        """
        When the conditioning set is empty
        """

        scales, variance, jitter, bias = self._get_samples()
        if diag:
            cov = _rbf_diag(x_test, variance)
            if with_jitter:
                cov = cov + jitter
        else:
            cov = _rbf(x_test, x_test, scales, variance)
            if with_jitter:
                cov = cov + jitter * eye(x_test.shape[0])
        mean = torch.zeros(x_test.shape[0]) + bias

        sample("mean", Delta(mean))
        sample("cov", Delta(cov))

    def _get_samples(self):
        scales = self._xform(sample("raw_scales", self.raw_scales_prior))
        variance = self._xform(sample("raw_variance", self.raw_variance_prior))
        jitter = self._xform(sample("raw_jitter", self.raw_jitter_prior))
        bias = sample("bias", self.bias_prior)

        return scales, variance, jitter, bias

    @_input_as_tensor
    def _predict(self, x_test: TensorType, diag, with_jitter):
        """
        Return predictive mean [N* x 1] and either predictive variance [N* x 1]
        or covariance [N* x N*]

        :return: (TensorType, TensorType) mean & (co)variance
        """

        model = self._posterior_model if self.n > 0 else self._posterior_model_no_data
        samples = Predictive(model, self._mcmc.get_samples()).get_samples(
            x_test, diag, with_jitter)

        means, covs = samples["mean"], samples["cov"]

        mean = means.mean(dim=0)
        # Law of total (co)variance:
        if diag:
            cov = means.var(dim=0) + covs.mean(dim=0)
        else:
            d_mean = (means - mean)[:, :, None]
            cov_of_means = (d_mean @ torch.transpose(d_mean, 1, 2)).sum(
                dim=0) / (means.shape[0] - 1)
            mean_of_covs = covs.mean(dim=0)
            cov = cov_of_means + mean_of_covs

        # Make sure the shapes are right:
        if len(mean.shape) == 1:
            mean = mean[:, None]
        if len(cov.shape) == 1:
            cov = cov[:, None]

        return mean, cov
Beispiel #29
0
categories, words = torch.stack(categories), torch.stack(words)

# split into supervised data and unsupervised data
supervised_categories = categories[:num_supervised_data]
supervised_words = words[:num_supervised_data]
unsupervised_words = words[num_supervised_data:]

def forward_log_prob(prev_log_prob, curr_word, transition_log_prob, emission_log_prob):
    log_prob = emission_log_prob[:, curr_word] + transition_log_prob + prev_log_prob.unsqueeze(dim=1)
    return log_prob.logsumexp(dim=0)

def unsupervised_hmm(words):
    with pyro.plate("prob_plate", num_categories):
        transition_prob = pyro.sample("transition_prob", dist.Dirichlet(transition_prior))
        emission_prob = pyro.sample("emission_prob", dist.Dirichlet(emission_prior))

    transition_log_prob = transition_prob.log()
    emission_log_prob = emission_prob.log()
    log_prob = emission_log_prob[:, words[0]]
    for t in range(1, len(words)):
        log_prob = forward_log_prob(log_prob, words[t], transition_log_prob, emission_log_prob)
    prob = log_prob.logsumexp(dim=0).exp()
    # a trick to inject an additional log_prob into model's log_prob
    pyro.sample("forward_prob", dist.Bernoulli(prob), obs=torch.tensor(1.))
nuts_kernel = NUTS(unsupervised_hmm, jit_compile=True, ignore_jit_warnings=True)
mcmc = MCMC(nuts_kernel, num_samples=100)
mcmc.run(unsupervised_words)
trace_transition_prob = mcmc.get_samples()["transition_prob"]
print(trace_transition_prob)