Exemple #1
0
    def test_linear_gaussian_inference(self):
        # set random seed, so the data is reproducible each time
        np.random.seed(1)

        nobs = 10  # number of observations
        noise_stdev = .1  # standard deviation of noise
        x = np.linspace(0., 9., nobs)
        Amatrix = np.hstack([np.ones((nobs, 1)), x[:, np.newaxis]])

        univariate_variables = [norm(1, 1), norm(0, 4)]
        variables = IndependentMultivariateRandomVariable(univariate_variables)

        mtrue = 0.4  # true gradient
        ctrue = 2.  # true y-intercept
        true_sample = np.array([[ctrue, mtrue]]).T

        model = LinearModel(Amatrix)

        # make data
        data = noise_stdev * np.random.randn(nobs) + model(true_sample)[0, :]
        loglike = GaussianLogLike(model, data, noise_stdev**2)
        loglike = PYMC3LogLikeWrapper(loglike)

        # number of draws from the distribution
        ndraws = 5000
        # number of "burn-in points" (which we'll discard)
        nburn = min(1000, int(ndraws * 0.1))
        # number of parallel chains
        njobs = 4

        #algorithm='nuts'
        algorithm = 'metropolis'
        samples, effective_sample_size, map_sample = \
            run_bayesian_inference_gaussian_error_model(
                loglike,variables,ndraws,nburn,njobs,
                algorithm=algorithm,get_map=True,print_summary=False)

        prior_mean = np.asarray(
            [rv.mean() for rv in variables.all_variables()])
        prior_hessian = np.diag(
            [1. / rv.var() for rv in variables.all_variables()])
        noise_covariance_inv = 1. / noise_stdev**2 * np.eye(nobs)

        from pyapprox.bayesian_inference.laplace import \
                laplace_posterior_approximation_for_linear_models
        exact_mean, exact_covariance = \
            laplace_posterior_approximation_for_linear_models(
                Amatrix, prior_mean, prior_hessian,
                noise_covariance_inv, data)

        print('mcmc mean error', samples.mean(axis=1) - exact_mean)
        print('mcmc cov error', np.cov(samples) - exact_covariance)
        print('MAP sample', map_sample)
        print('exact mean', exact_mean.squeeze())
        print('exact cov', exact_covariance)
        assert np.allclose(map_sample, exact_mean)
        assert np.allclose(exact_mean.squeeze(),
                           samples.mean(axis=1),
                           atol=1e-2)
        assert np.allclose(exact_covariance, np.cov(samples), atol=1e-2)
Exemple #2
0
    def test_bayesian_importance_sampling_avar(self):
        np.random.seed(1)
        nrandom_vars = 2
        Amat = np.array([[-0.5, 1]])
        noise_std = 0.1
        prior_variable = IndependentMultivariateRandomVariable(
            [stats.norm(0, 1)] * nrandom_vars)
        prior_mean = prior_variable.get_statistics('mean')
        prior_cov = np.diag(prior_variable.get_statistics('var')[:, 0])
        prior_cov_inv = np.linalg.inv(prior_cov)
        noise_cov_inv = np.eye(Amat.shape[0]) / noise_std**2
        true_sample = np.array([.4] * nrandom_vars)[:, None]
        collected_obs = Amat.dot(true_sample)
        collected_obs += np.random.normal(0, noise_std, (collected_obs.shape))
        exact_post_mean, exact_post_cov = \
            laplace_posterior_approximation_for_linear_models(
                Amat, prior_mean, prior_cov_inv, noise_cov_inv,
                collected_obs)

        chol_factor = np.linalg.cholesky(exact_post_cov)
        chol_factor_inv = np.linalg.inv(chol_factor)

        def g_model(samples):
            return np.exp(
                np.sum(chol_factor_inv.dot(samples - exact_post_mean),
                       axis=0))[:, None]

        nsamples = int(1e6)
        prior_samples = generate_independent_random_samples(
            prior_variable, nsamples)
        posterior_samples = chol_factor.dot(
            np.random.normal(0, 1, (nrandom_vars, nsamples))) + exact_post_mean

        g_mu, g_sigma = 0, np.sqrt(nrandom_vars)
        f, f_cdf, f_pdf, VaR, CVaR, ssd, ssd_disutil = \
            get_lognormal_example_exact_quantities(g_mu, g_sigma)

        beta = .1
        cvar_exact = CVaR(beta)

        cvar_mc = conditional_value_at_risk(g_model(posterior_samples), beta)

        prior_pdf = prior_variable.pdf
        post_pdf = stats.multivariate_normal(mean=exact_post_mean[:, 0],
                                             cov=exact_post_cov).pdf
        weights = post_pdf(prior_samples.T) / prior_pdf(prior_samples)[:, 0]
        weights /= weights.sum()
        cvar_im = conditional_value_at_risk(g_model(prior_samples), beta,
                                            weights)
        # print(cvar_exact, cvar_mc, cvar_im)
        assert np.allclose(cvar_exact, cvar_mc, rtol=1e-3)
        assert np.allclose(cvar_exact, cvar_im, rtol=2e-3)
Exemple #3
0
    def test_gaussian_loglike_fun(self):
        nvars = 1

        def fun(design, samples):
            assert design.ndim == 2
            assert samples.ndim == 2
            Amat = design.T
            return Amat.dot(samples).T

        noise_std = 0.3
        prior_mean = np.zeros((nvars, 1))
        prior_cov = np.eye(nvars)

        design = np.linspace(-1, 1, 4)[None, :]
        true_sample = np.ones((nvars, 1)) * 0.4
        obs = fun(design, true_sample)
        obs += np.random.normal(0, noise_std, obs.shape)

        noise_cov_inv = np.eye(obs.shape[1]) / (noise_std**2)
        obs_matrix = design.T

        exact_post_mean, exact_post_cov = \
            laplace_posterior_approximation_for_linear_models(
                obs_matrix, prior_mean, np.linalg.inv(prior_cov),
                noise_cov_inv, obs.T)

        lb, ub = stats.norm(0, 1).interval(0.99)
        xx = np.linspace(lb, ub, 101)
        true_pdf_vals = stats.norm(exact_post_mean[0],
                                   np.sqrt(exact_post_cov[0])).pdf(xx)[:, None]

        prior_pdf = stats.norm(prior_mean[0], np.sqrt(prior_cov[0])).pdf
        pred_obs = fun(design, xx[None, :])
        lvals = np.exp(gaussian_loglike_fun(
            obs, pred_obs, noise_std)) * prior_pdf(xx)[:, None]

        xx_gauss, ww_gauss = gauss_hermite_pts_wts_1D(300)
        pred_obs = fun(design, xx_gauss[None, :])
        evidence = np.exp(
            gaussian_loglike_fun(obs, pred_obs, noise_std)[:, 0]).dot(ww_gauss)
        post_pdf_vals = lvals / evidence

        gauss_evidence = laplace_evidence(
            lambda x: np.exp(
                gaussian_loglike_fun(obs, fun(design, x), noise_std)[:, 0]),
            prior_pdf, exact_post_cov, exact_post_mean)
        assert np.allclose(evidence, gauss_evidence)

        # accuracy depends on quadrature rule and size of noise
        # print(post_pdf_vals - true_pdf_vals)
        assert np.allclose(post_pdf_vals, true_pdf_vals)
#%%
#Now let the prior on the coefficients of :math:`Y_\alpha` be Gaussian with mean :math:`\mu_\alpha` and covariance :math:`\Sigma_{\alpha\alpha}`, and the covariance between the coefficients of different information sources :math:`Y_\alpha` and :math:`Y_\beta` be :math:`\Sigma_{\alpha\beta}`, such that the joint density of the coefficients of all information sources is Gaussian with mean and covariance given by
#
#.. math::  \mu=\left[\mu_1^T,\ldots,\mu_M^T\right]^T` \qquad \Sigma=\begin{bmatrix}\Sigma_{11} &\Sigma_{12} &\ldots &\Sigma_{1M} \\ \Sigma_{21} &\Sigma_{22} &\ldots &\Sigma_{2M}\\\vdots &\vdots & \ddots &\vdots \\ \Sigma_{M1} &\Sigma_{M2} &\ldots &\Sigma_{MM}\end{bmatrix}
#
#In the following we will set the prior mean to zero for all coefficients and first try setting all the coefficients to be independent
prior_mean = np.zeros((nparams.sum(), 1))
prior_cov = np.eye(nparams.sum())

#%%
#With these definition the posterior distribution of the coefficients is (see :ref:`sphx_glr_auto_tutorials_foundations_plot_bayesian_inference.py`)
#
#.. math:: \Sigma^\mathrm{post}=\left(\Sigma^{-1}+\Phi^T\Sigma_\epsilon^{-1}\Phi\right)^{-1}, \qquad  \mu^\mathrm{post}=\Sigma^\mathrm{post}\left(\Phi^T\Sigma_\epsilon^{-1}y+\Sigma^{-1}\mu\right),
#
post_mean, post_cov = laplace_posterior_approximation_for_linear_models(
    basis_mat, prior_mean, np.linalg.inv(prior_cov), np.linalg.inv(noise_cov),
    values)

#%%
#Now let's plot the resulting approximation of the high-fidelity data.
hf_prior = (prior_mean[nparams[:-1].sum():], prior_cov[nparams[:-1].sum():,
                                                       nparams[:-1].sum():])
hf_posterior = (post_mean[nparams[:-1].sum():], post_cov[nparams[:-1].sum():,
                                                         nparams[:-1].sum():])
xx = np.linspace(0, 1, 101)
fig, axs = plt.subplots(1, 1, figsize=(8, 6))
training_labels = [
    r'$f_1(z_1^{(i)})$', r'$f_2(z_2^{(i)})$', r'$f_3(z_2^{(i)})$'
]
plot_1d_lvn_approx(xx,
                   nmodels,
Exemple #5
0
for factor in network.factors:
    factor.condition(evidence_ids, evidence)
factor_post = network.factors[0]
for jj in range(1, len(network.factors)):
    factor_post *= network.factors[jj]
gauss_post = convert_gaussian_from_canonical_form(factor_post.precision_matrix,
                                                  factor_post.shift)

#%%
#We can check this matches the posterior returned by the classical formulas

from pyapprox.bayesian_inference.laplace import \
    laplace_posterior_approximation_for_linear_models
true_post = laplace_posterior_approximation_for_linear_models(
    data_cpd_mats[0], prior_means[ii] * np.ones((nparams[0], 1)),
    np.linalg.inv(prior_covs[ii] * np.eye(nparams[0])),
    np.linalg.inv(noise_covs[0]), values_train[0], data_cpd_vecs[0])

assert np.allclose(gauss_post[1], true_post[1])
assert np.allclose(gauss_post[0], true_post[0].squeeze())

#%%
#Marginalizing Canonical Forms
#-----------------------------
#Gaussian networks are best used when one wants to comute a marginal of the joint density of parameters, possibly conditioned on data. The following describes the process of marginalization and conditioning often referred to as the sum-product eliminate variable algorithm.
#
#First lets discuss how to marginalize a canoncial form, e.g. compute
#
#.. math:: \int \phi(X,Y,K,h,g)dY
#
#which marginalizes out the variable :math:`Y` from a canonical form also involving the variable :math:`X`. Provided :math:`K_{YY}` in :eq:`eq-canonical-XY` is positive definite the marginalized canonical form has parameters
Exemple #6
0
    def test_hierarchical_graph_inference(self):
        nnodes = 3
        graph = nx.DiGraph()
        prior_covs = [1, 2, 3]
        prior_means = [-1, -2, -3]
        cpd_scales = [0.5, 0.4]
        node_labels = [f'Node_{ii}' for ii in range(nnodes)]
        nparams = np.array([2] * 3)
        cpd_mats = [
            None, cpd_scales[0] * np.eye(nparams[1], nparams[0]),
            cpd_scales[1] * np.eye(nparams[2], nparams[1])
        ]

        ii = 0
        graph.add_node(ii,
                       label=node_labels[ii],
                       cpd_cov=prior_covs[ii] * np.eye(nparams[ii]),
                       nparams=nparams[ii],
                       cpd_mat=cpd_mats[ii],
                       cpd_mean=prior_means[ii] * np.ones((nparams[ii], 1)))
        for ii in range(1, nnodes):
            cpd_mean = np.ones(
                (nparams[ii], 1)) * (prior_means[ii] -
                                     cpd_scales[ii - 1] * prior_means[ii - 1])
            cpd_cov = np.eye(nparams[ii]) * max(
                1e-8,
                prior_covs[ii] - cpd_scales[ii - 1]**2 * prior_covs[ii - 1])
            graph.add_node(ii,
                           label=node_labels[ii],
                           cpd_cov=cpd_cov,
                           nparams=nparams[ii],
                           cpd_mat=cpd_mats[ii],
                           cpd_mean=cpd_mean)

        graph.add_edges_from([(ii, ii + 1) for ii in range(nnodes - 1)])

        nsamples = [3] * nnodes
        noise_std = [0.01] * nnodes
        data_cpd_mats = [
            np.random.normal(0, 1, (nsamples[ii], nparams[ii]))
            for ii in range(nnodes)
        ]
        data_cpd_vecs = [np.ones((nsamples[ii], 1)) for ii in range(nnodes)]
        true_coefs = [
            np.random.normal(0, np.sqrt(prior_covs[ii]), (nparams[ii], 1))
            for ii in range(nnodes)
        ]
        noise_covs = [
            np.eye(nsamples[ii]) * noise_std[ii]**2 for ii in range(nnodes)
        ]

        network = GaussianNetwork(graph)
        network.add_data_to_network(data_cpd_mats, data_cpd_vecs, noise_covs)

        noise = [
            noise_std[ii] * np.random.normal(0, noise_std[ii],
                                             (nsamples[ii], 1))
            for ii in range(nnodes)
        ]
        values_train = [
            b.dot(c) + s + n for b, c, s, n in zip(data_cpd_mats, true_coefs,
                                                   data_cpd_vecs, noise)
        ]

        evidence, evidence_ids = network.assemble_evidence(values_train)

        network.convert_to_compact_factors()

        #query_labels = [node_labels[2]]
        query_labels = node_labels[:nnodes]
        factor_post = cond_prob_variable_elimination(network,
                                                     query_labels,
                                                     evidence_ids=evidence_ids,
                                                     evidence=evidence)
        gauss_post = convert_gaussian_from_canonical_form(
            factor_post.precision_matrix, factor_post.shift)

        assert np.all(np.diff(nparams) == 0)
        prior_mean = np.hstack([[prior_means[ii]] * nparams[ii]
                                for ii in range(nnodes)])

        v1, v2, v3 = prior_covs
        A21, A32 = cpd_mats[1:]
        I1, I2, I3 = [np.eye(nparams[0])] * 3
        S11, S22, S33 = v1 * I1, v2 * I2, v3 * I3
        prior_cov = np.vstack([
            np.hstack([S11, S11.dot(A21.T),
                       S11.dot(A21.T.dot(A32.T))]),
            np.hstack([A21.dot(S11), S22, S22.dot(A32.T)]),
            np.hstack([A32.dot(A21).dot(S11),
                       A32.dot(S22), S33])
        ])
        #print('Prior Covariance\n',prior_cov)
        #print('Prior Mean\n',prior_mean)

        # dataless_network = GaussianNetwork(graph)
        # dataless_network.convert_to_compact_factors()
        # labels = [l[1] for l in dataless_network.graph.nodes.data('label')]
        # factor_prior = cond_prob_variable_elimination(
        #     dataless_network, labels, None)
        # prior_mean1,prior_cov1 = convert_gaussian_from_canonical_form(
        #     factor_prior.precision_matrix,factor_prior.shift)
        # print('Prior Covariance\n',prior_cov1)
        # print('Prior Mean\n',prior_mean1)

        basis_mat = scipy.linalg.block_diag(*data_cpd_mats)
        noise_cov_inv = np.linalg.inv(scipy.linalg.block_diag(*noise_covs))
        values = np.vstack(values_train)
        # print('values\n',values)
        bvec = np.vstack(data_cpd_vecs)
        prior_mean = prior_mean[:nparams[:nnodes].sum()]
        prior_cov = prior_cov[:nparams[:nnodes].sum(), :nparams[:nnodes].sum()]
        true_post = laplace_posterior_approximation_for_linear_models(
            basis_mat, prior_mean, np.linalg.inv(prior_cov), noise_cov_inv,
            values, bvec)

        # print(gauss_post[0],'\n',true_post[0].squeeze())
        # print(true_post[1])
        assert np.allclose(gauss_post[1], true_post[1])
        assert np.allclose(gauss_post[0], true_post[0].squeeze())

        # check ability to marginalize prior after data has
        # been added.
        factor_prior = cond_prob_variable_elimination(network, ['Node_2'])
        prior = convert_gaussian_from_canonical_form(
            factor_prior.precision_matrix, factor_prior.shift)
        assert np.allclose(prior[0], prior_mean[nparams[:-1].sum():])
        assert np.allclose(prior[1], prior_cov[nparams[:-1].sum():,
                                               nparams[:-1].sum():])
Exemple #7
0
    def test_one_node_inference(self):
        nnodes = 1
        prior_covs = [1]
        prior_means = [-1]
        cpd_scales = []
        node_labels = [f'Node_{ii}' for ii in range(nnodes)]
        nparams = np.array([2] * 3)
        cpd_mats = [None]

        graph = nx.DiGraph()
        ii = 0
        graph.add_node(ii,
                       label=node_labels[ii],
                       cpd_cov=prior_covs[ii] * np.eye(nparams[ii]),
                       nparams=nparams[ii],
                       cpd_mat=cpd_mats[ii],
                       cpd_mean=prior_means[ii] * np.ones((nparams[ii], 1)))

        nsamples = [3]
        noise_std = [0.01] * nnodes
        data_cpd_mats = [
            np.random.normal(0, 1, (nsamples[ii], nparams[ii]))
            for ii in range(nnodes)
        ]
        data_cpd_vecs = [np.ones((nsamples[ii], 1)) for ii in range(nnodes)]
        true_coefs = [
            np.random.normal(0, np.sqrt(prior_covs[ii]), (nparams[ii], 1))
            for ii in range(nnodes)
        ]
        noise_covs = [
            np.eye(nsamples[ii]) * noise_std[ii]**2 for ii in range(nnodes)
        ]

        network = GaussianNetwork(graph)
        network.add_data_to_network(data_cpd_mats, data_cpd_vecs, noise_covs)
        network.convert_to_compact_factors()

        noise = [
            noise_std[ii] * np.random.normal(0, noise_std[ii],
                                             (nsamples[ii], 1))
        ]
        values_train = [
            b.dot(c) + s + n for b, c, s, n in zip(data_cpd_mats, true_coefs,
                                                   data_cpd_vecs, noise)
        ]

        evidence, evidence_ids = network.assemble_evidence(values_train)
        factor_post = cond_prob_variable_elimination(network,
                                                     node_labels,
                                                     evidence_ids=evidence_ids,
                                                     evidence=evidence)
        gauss_post = convert_gaussian_from_canonical_form(
            factor_post.precision_matrix, factor_post.shift)

        # solve using classical inversion formula
        true_post = laplace_posterior_approximation_for_linear_models(
            data_cpd_mats[0], prior_means[ii] * np.ones((nparams[0], 1)),
            np.linalg.inv(prior_covs[ii] * np.eye(nparams[0])),
            np.linalg.inv(noise_covs[0]), values_train[0], data_cpd_vecs[0])
        #print('True post mean\n',true_post[0])
        #print('Graph post mean\n',gauss_post[0])
        #print('True post covar\n',true_post[1])
        #print('Graph post covar\n',gauss_post[1])

        assert np.allclose(gauss_post[1], true_post[1])
        assert np.allclose(gauss_post[0], true_post[0].squeeze())
Exemple #8
0
    def help_compare_prediction_based_oed(self, deviation_fun,
                                          gauss_deviation_fun,
                                          use_gauss_quadrature,
                                          ninner_loop_samples, ndesign_vars,
                                          tol):
        ncandidates_1d = 5
        design_candidates = cartesian_product(
            [np.linspace(-1, 1, ncandidates_1d)] * ndesign_vars)
        ncandidates = design_candidates.shape[1]

        # Define model used to predict likely observable data
        indices = compute_hyperbolic_indices(ndesign_vars, 1)[:, 1:]
        Amat = monomial_basis_matrix(indices, design_candidates)
        obs_fun = partial(linear_obs_fun, Amat)

        # Define model used to predict unobservable QoI
        qoi_fun = exponential_qoi_fun

        # Define the prior PDF of the unknown variables
        nrandom_vars = indices.shape[1]
        prior_variable = IndependentMultivariateRandomVariable(
            [stats.norm(0, 0.5)] * nrandom_vars)

        # Define the independent observational noise
        noise_std = 1

        # Define initial design
        init_design_indices = np.array([ncandidates // 2])

        # Define OED options
        nouter_loop_samples = 100
        if use_gauss_quadrature:
            # 301 needed for cvar deviation
            # only 31 needed for variance deviation
            ninner_loop_samples_1d = ninner_loop_samples
            var_trans = AffineRandomVariableTransformation(prior_variable)
            x_quad, w_quad = gauss_hermite_pts_wts_1D(ninner_loop_samples_1d)
            x_quad = cartesian_product([x_quad] * nrandom_vars)
            w_quad = outer_product([w_quad] * nrandom_vars)
            x_quad = var_trans.map_from_canonical_space(x_quad)
            ninner_loop_samples = x_quad.shape[1]

            def generate_inner_prior_samples(nsamples):
                assert nsamples == x_quad.shape[1], (nsamples, x_quad.shape)
                return x_quad, w_quad
        else:
            # use default Monte Carlo sampling
            generate_inner_prior_samples = None

        # Define initial design
        init_design_indices = np.array([ncandidates // 2])

        # Setup OED problem
        oed = BayesianBatchDeviationOED(design_candidates,
                                        obs_fun,
                                        noise_std,
                                        prior_variable,
                                        qoi_fun,
                                        nouter_loop_samples,
                                        ninner_loop_samples,
                                        generate_inner_prior_samples,
                                        deviation_fun=deviation_fun)
        oed.populate()
        oed.set_collected_design_indices(init_design_indices)

        prior_mean = oed.prior_variable.get_statistics('mean')
        prior_cov = np.diag(prior_variable.get_statistics('var')[:, 0])
        prior_cov_inv = np.linalg.inv(prior_cov)
        selected_indices = init_design_indices

        # Generate experimental design
        nexperiments = 3
        for step in range(len(init_design_indices), nexperiments):
            # Copy current state of OED before new data is determined
            # This copy will be used to compute Laplace based utility and
            # evidence values for testing
            oed_copy = copy.deepcopy(oed)

            # Update the design
            utility_vals, selected_indices = oed.update_design()

            utility, deviations, evidences, weights = \
                oed_copy.compute_expected_utility(
                    oed_copy.collected_design_indices, selected_indices, True)

            exact_deviations = np.empty(nouter_loop_samples)
            for jj in range(nouter_loop_samples):
                # only test intermediate quantities associated with design
                # chosen by the OED step
                idx = oed.collected_design_indices
                obs_jj = oed_copy.outer_loop_obs[jj:jj + 1, idx]

                noise_cov_inv_jj = np.eye(idx.shape[0]) / noise_std**2
                exact_post_mean_jj, exact_post_cov_jj = \
                    laplace_posterior_approximation_for_linear_models(
                        Amat[idx, :],
                        prior_mean, prior_cov_inv, noise_cov_inv_jj, obs_jj.T)

                exact_deviations[jj] = gauss_deviation_fun(
                    exact_post_mean_jj, exact_post_cov_jj)
            print('d',
                  np.absolute(exact_deviations - deviations[:, 0]).max(), tol)
            # print(exact_deviations, deviations[:, 0])
            assert np.allclose(exact_deviations, deviations[:, 0], atol=tol)
            assert np.allclose(utility_vals[selected_indices],
                               -np.mean(exact_deviations),
                               atol=tol)
Exemple #9
0
    def test_sequential_kl_oed(self):
        """
        Observations collected ARE used to inform subsequent designs
        """
        nrandom_vars = 1
        noise_std = 1
        ndesign = 5
        nouter_loop_samples = int(1e4)
        ninner_loop_samples = 31

        ncandidates = 6
        design_candidates = np.linspace(-1, 1, ncandidates)[None, :]

        def obs_fun(samples):
            assert design_candidates.ndim == 2
            assert samples.ndim == 2
            Amat = design_candidates.T
            return Amat.dot(samples).T

        prior_variable = IndependentMultivariateRandomVariable(
            [stats.norm(0, 1)] * nrandom_vars)

        true_sample = np.array([.4] * nrandom_vars)[:, None]

        def obs_process(new_design_indices):
            obs = obs_fun(true_sample)[:, new_design_indices]
            obs += oed.noise_fun(obs)
            return obs

        x_quad, w_quad = gauss_hermite_pts_wts_1D(ninner_loop_samples)

        def generate_inner_prior_samples_gauss(n):
            # use precomputed samples so to avoid cost of regenerating
            assert n == x_quad.shape[0]
            return x_quad[None, :], w_quad

        generate_inner_prior_samples = generate_inner_prior_samples_gauss

        # Define initial design
        init_design_indices = np.array([ncandidates // 2])
        oed = BayesianSequentialKLOED(design_candidates, obs_fun, noise_std,
                                      prior_variable, obs_process,
                                      nouter_loop_samples, ninner_loop_samples,
                                      generate_inner_prior_samples)
        oed.populate()
        oed.set_collected_design_indices(init_design_indices)

        prior_mean = oed.prior_variable.get_statistics('mean')
        prior_cov = np.diag(prior_variable.get_statistics('var')[:, 0])
        prior_cov_inv = np.linalg.inv(prior_cov)

        exact_post_mean_prev = prior_mean
        exact_post_cov_prev = prior_cov
        post_var_prev = stats.multivariate_normal(mean=exact_post_mean_prev[:,
                                                                            0],
                                                  cov=exact_post_cov_prev)
        selected_indices = init_design_indices

        # Because of Monte Carlo error set step tols individually
        # It is too expensive to up the number of outer_loop samples to
        # reduce errors
        step_tols = [7.3e-3, 6.5e-2, 3.3e-2, 1.6e-1]

        for step in range(len(init_design_indices), ndesign):
            current_design = design_candidates[:, oed.collected_design_indices]
            noise_cov_inv = np.eye(current_design.shape[1]) / noise_std**2

            # Compute posterior moving from previous posterior and using
            # only the most recently collected data
            noise_cov_inv_incr = np.eye(
                selected_indices.shape[0]) / noise_std**2
            exact_post_mean, exact_post_cov = \
                laplace_posterior_approximation_for_linear_models(
                    design_candidates[:, selected_indices].T,
                    exact_post_mean_prev, np.linalg.inv(exact_post_cov_prev),
                    noise_cov_inv_incr, oed.collected_obs[:, -1:].T)

            # check using current posteior as prior and only using new
            # data (above) produces the same posterior as using original prior
            # and all collected data (from_prior approach). The posteriors
            # should be the same but the evidences will be difference.
            # This is tested below
            exact_post_mean_from_prior, exact_post_cov_from_prior = \
                laplace_posterior_approximation_for_linear_models(
                    current_design.T, prior_mean, prior_cov_inv, noise_cov_inv,
                    oed.collected_obs.T)

            assert np.allclose(exact_post_mean, exact_post_mean_from_prior)
            assert np.allclose(exact_post_cov, exact_post_cov_from_prior)

            # Compute PDF of current posterior that uses all collected data
            post_var = stats.multivariate_normal(
                mean=exact_post_mean[:, 0].copy(), cov=exact_post_cov.copy())

            # Compute evidence moving from previous posterior to
            # new posterior (not initial prior to posterior).
            # Values can be computed exactly for Gaussian prior and noise
            gauss_evidence = laplace_evidence(
                lambda x: np.exp(
                    gaussian_loglike_fun(
                        oed.collected_obs[:, -1:],
                        obs_fun(x)[:, oed.collected_design_indices[-1:]],
                        noise_std))[:, 0],
                lambda y: np.atleast_2d(post_var_prev.pdf(y.T)).T,
                exact_post_cov, exact_post_mean)

            # Compute evidence using Gaussian quadrature rule. This
            # is possible for this low-dimensional example.
            quad_loglike_vals = np.exp(
                gaussian_loglike_fun(
                    oed.collected_obs[:, -1:],
                    obs_fun(
                        x_quad[None, :])[:, oed.collected_design_indices[-1:]],
                    noise_std))[:, 0]
            # we must divide integarnd by initial prior_pdf since it is
            # already implicilty included via the quadrature weights
            integrand_vals = quad_loglike_vals * post_var_prev.pdf(
                x_quad[:, None]) / prior_variable.pdf(x_quad[None, :])[:, 0]
            quad_evidence = integrand_vals.dot(w_quad)
            # print(quad_evidence, gauss_evidence)
            assert np.allclose(gauss_evidence, quad_evidence), step

            # print('G', gauss_evidence, oed.evidence)
            assert np.allclose(gauss_evidence, oed.evidence), step

            # compute the evidence of moving from the initial prior
            # to the current posterior. This will be used for testing later
            gauss_evidence_from_prior = laplace_evidence(
                lambda x: np.exp(
                    gaussian_loglike_fun(
                        oed.collected_obs,
                        obs_fun(x)[:, oed.collected_design_indices], noise_std)
                )[:, 0], prior_variable.pdf, exact_post_cov, exact_post_mean)

            # Copy current state of OED before new data is determined
            # This copy will be used to compute Laplace based utility and
            # evidence values for testing
            oed_copy = copy.deepcopy(oed)

            # Update the design
            utility_vals, selected_indices = oed.update_design()
            new_obs = oed.obs_process(selected_indices)
            oed.update_observations(new_obs)
            utility = utility_vals[selected_indices]

            # Re-compute the evidences that were used to update the design
            # above. This will be used for testing later
            # print('D', oed_copy.evidence)
            evidences = oed_copy.compute_expected_utility(
                oed_copy.collected_design_indices, selected_indices, True)[1]

            # print('Collected plus selected indices',
            #       oed.collected_design_indices,
            #       oed_copy.collected_design_indices, selected_indices)

            # For all outer loop samples compute the posterior exactly
            # and compute intermediate values for testing. While OED
            # considers all possible candidate design indices
            # Here we just test the one that was chosen last when
            # design was updated
            exact_evidences = np.empty(nouter_loop_samples)
            exact_kl_divs = np.empty_like(exact_evidences)
            for jj in range(nouter_loop_samples):
                # Fill obs with those predicted by outer loop sample
                idx = oed.collected_design_indices
                obs_jj = oed_copy.outer_loop_obs[jj:jj + 1, idx]
                # Overwrite the previouly simulated obs with collected obs.
                # Do not ovewrite the last value which is the potential
                # data used to compute expected utility
                obs_jj[:, :oed_copy.collected_obs.shape[1]] = \
                    oed_copy.collected_obs

                # Compute the posterior obtained by using predicted value
                # of outer loop sample
                noise_cov_inv_jj = np.eye(
                    selected_indices.shape[0]) / noise_std**2
                exact_post_mean_jj, exact_post_cov_jj = \
                    laplace_posterior_approximation_for_linear_models(
                        design_candidates[:, selected_indices].T,
                        exact_post_mean, np.linalg.inv(exact_post_cov),
                        noise_cov_inv_jj, obs_jj[:, -1].T)

                # Use post_pdf so measure change from current posterior (prior)
                # to new posterior
                gauss_evidence_jj = laplace_evidence(
                    lambda x: np.exp(
                        gaussian_loglike_fun(obs_jj[:, -1:],
                                             obs_fun(x)[:, selected_indices],
                                             noise_std))[:, 0],
                    lambda y: np.atleast_2d(post_var.pdf(y.T)).T,
                    exact_post_cov_jj, exact_post_mean_jj)
                exact_evidences[jj] = gauss_evidence_jj

                # Check quadrature gets the same answer
                quad_loglike_vals = np.exp(
                    gaussian_loglike_fun(
                        obs_jj[:, -1:],
                        obs_fun(x_quad[None, :])[:, selected_indices],
                        noise_std))[:, 0]
                integrand_vals = quad_loglike_vals * post_var.pdf(
                    x_quad[:, None]) / prior_variable.pdf(x_quad[None, :])[:,
                                                                           0]
                quad_evidence = integrand_vals.dot(w_quad)
                # print(quad_evidence, gauss_evidence_jj)
                assert np.allclose(gauss_evidence_jj, quad_evidence), step

                # Check that evidence of moving from current posterior
                # to new posterior with (potential data from outer-loop sample)
                # is equal to the evidence of moving from
                # intitial prior to new posterior divide by the evidence
                # from moving from the initial prior to the current posterior
                gauss_evidence_jj_from_prior = laplace_evidence(
                    lambda x: np.exp(
                        gaussian_loglike_fun(obs_jj,
                                             obs_fun(x)[:, idx], noise_std)
                    )[:, 0], prior_variable.pdf, exact_post_cov_jj,
                    exact_post_mean_jj)
                # print(gauss_evidence_jj_from_prior/gauss_evidence_from_prior,
                #       gauss_evidence_jj)
                # print('gauss_evidence_from_prior', gauss_evidence_from_prior)
                assert np.allclose(
                    gauss_evidence_jj_from_prior / gauss_evidence_from_prior,
                    gauss_evidence_jj)

                gauss_kl_div = gaussian_kl_divergence(exact_post_mean_jj,
                                                      exact_post_cov_jj,
                                                      exact_post_mean,
                                                      exact_post_cov)
                # gauss_kl_div = gaussian_kl_divergence(
                #     exact_post_mean, exact_post_cov,
                #     exact_post_mean_jj, exact_post_cov_jj)
                exact_kl_divs[jj] = gauss_kl_div

            # print(evidences[:, 0], exact_evidences)
            assert np.allclose(evidences[:, 0], exact_evidences)

            # Outer loop samples are from prior. Use importance reweighting
            # to sample from previous posterior. This step is only relevant
            # for open loop design (used here)
            # where observed data informs current estimate
            # of parameters. Closed loop design (not used here)
            # never collects data and so it always samples from the prior.
            post_weights = post_var.pdf(
                oed.outer_loop_prior_samples.T) / post_var_prev.pdf(
                    oed.outer_loop_prior_samples.T) / oed.nouter_loop_samples
            laplace_utility = np.sum(exact_kl_divs * post_weights)
            # print('u', (utility-laplace_utility)/laplace_utility, step)
            assert np.allclose(utility,
                               laplace_utility,
                               rtol=step_tols[step - 1])

            exact_post_mean_prev = exact_post_mean
            exact_post_cov_prev = exact_post_cov
            post_var_prev = post_var
Exemple #10
0
    def test_compute_expected_kl_utility_monte_carlo(self):
        nrandom_vars = 1
        noise_std = .3
        design = np.linspace(-1, 1, 2)[None, :]
        Amat = design.T

        def obs_fun(x):
            return (Amat.dot(x)).T

        def noise_fun(values):
            return np.random.normal(0, noise_std, (values.shape))

        # specify the first design point
        collected_design_indices = np.array([0])

        prior_variable = IndependentMultivariateRandomVariable(
            [stats.norm(0, 1)] * nrandom_vars)

        prior_mean = prior_variable.get_statistics('mean')
        prior_cov = np.diag(prior_variable.get_statistics('var')[:, 0])
        prior_cov_inv = np.linalg.inv(prior_cov)
        noise_cov_inv = np.eye(Amat.shape[0]) / noise_std**2

        def generate_random_prior_samples(n):
            return (generate_independent_random_samples(prior_variable,
                                                        n), np.ones(n) / n)

        def generate_inner_prior_samples_mc(n):
            return generate_random_prior_samples(n), np.ones(n) / n

        ninner_loop_samples = 300
        x, w = gauss_hermite_pts_wts_1D(ninner_loop_samples)

        def generate_inner_prior_samples_gauss(n):
            # use precomputed samples so to avoid cost of regenerating
            assert n == x.shape[0]
            return x[None, :], w

        generate_inner_prior_samples = generate_inner_prior_samples_gauss

        nouter_loop_samples = 10000
        outer_loop_obs, outer_loop_pred_obs, inner_loop_pred_obs, \
            inner_loop_weights, __, __ = \
            precompute_compute_expected_kl_utility_data(
                generate_random_prior_samples, nouter_loop_samples, obs_fun,
                noise_fun, ninner_loop_samples,
                generate_inner_prior_samples=generate_inner_prior_samples)

        new_design_indices = np.array([1])

        outer_loop_weights = np.ones(
            (nouter_loop_samples, 1)) / nouter_loop_samples

        def log_likelihood_fun(obs, pred_obs, active_indices=None):
            return gaussian_loglike_fun(obs, pred_obs, noise_std,
                                        active_indices)

        utility = compute_expected_kl_utility_monte_carlo(
            log_likelihood_fun, outer_loop_obs, outer_loop_pred_obs,
            inner_loop_pred_obs, inner_loop_weights, outer_loop_weights,
            collected_design_indices, new_design_indices, False)

        kl_divs = []
        # overwrite subset of obs with previously collected data
        # make copy so that outerloop obs can be used again
        outer_loop_obs_copy = outer_loop_obs.copy()
        for ii in range(nouter_loop_samples):
            idx = np.hstack((collected_design_indices, new_design_indices))
            obs_ii = outer_loop_obs_copy[ii:ii + 1, idx]

            idx = np.hstack((collected_design_indices, new_design_indices))
            exact_post_mean, exact_post_cov = \
                laplace_posterior_approximation_for_linear_models(
                    Amat[idx, :], prior_mean, prior_cov_inv,
                    noise_cov_inv[np.ix_(idx, idx)], obs_ii.T)

            kl_div = gaussian_kl_divergence(exact_post_mean, exact_post_cov,
                                            prior_mean, prior_cov)
            kl_divs.append(kl_div)

        print(utility - np.mean(kl_divs), utility, np.mean(kl_divs))
        assert np.allclose(utility, np.mean(kl_divs), rtol=2e-2)