def test_linear_gaussian_inference(self): # set random seed, so the data is reproducible each time np.random.seed(1) nobs = 10 # number of observations noise_stdev = .1 # standard deviation of noise x = np.linspace(0., 9., nobs) Amatrix = np.hstack([np.ones((nobs, 1)), x[:, np.newaxis]]) univariate_variables = [norm(1, 1), norm(0, 4)] variables = IndependentMultivariateRandomVariable(univariate_variables) mtrue = 0.4 # true gradient ctrue = 2. # true y-intercept true_sample = np.array([[ctrue, mtrue]]).T model = LinearModel(Amatrix) # make data data = noise_stdev * np.random.randn(nobs) + model(true_sample)[0, :] loglike = GaussianLogLike(model, data, noise_stdev**2) loglike = PYMC3LogLikeWrapper(loglike) # number of draws from the distribution ndraws = 5000 # number of "burn-in points" (which we'll discard) nburn = min(1000, int(ndraws * 0.1)) # number of parallel chains njobs = 4 #algorithm='nuts' algorithm = 'metropolis' samples, effective_sample_size, map_sample = \ run_bayesian_inference_gaussian_error_model( loglike,variables,ndraws,nburn,njobs, algorithm=algorithm,get_map=True,print_summary=False) prior_mean = np.asarray( [rv.mean() for rv in variables.all_variables()]) prior_hessian = np.diag( [1. / rv.var() for rv in variables.all_variables()]) noise_covariance_inv = 1. / noise_stdev**2 * np.eye(nobs) from pyapprox.bayesian_inference.laplace import \ laplace_posterior_approximation_for_linear_models exact_mean, exact_covariance = \ laplace_posterior_approximation_for_linear_models( Amatrix, prior_mean, prior_hessian, noise_covariance_inv, data) print('mcmc mean error', samples.mean(axis=1) - exact_mean) print('mcmc cov error', np.cov(samples) - exact_covariance) print('MAP sample', map_sample) print('exact mean', exact_mean.squeeze()) print('exact cov', exact_covariance) assert np.allclose(map_sample, exact_mean) assert np.allclose(exact_mean.squeeze(), samples.mean(axis=1), atol=1e-2) assert np.allclose(exact_covariance, np.cov(samples), atol=1e-2)
def test_bayesian_importance_sampling_avar(self): np.random.seed(1) nrandom_vars = 2 Amat = np.array([[-0.5, 1]]) noise_std = 0.1 prior_variable = IndependentMultivariateRandomVariable( [stats.norm(0, 1)] * nrandom_vars) prior_mean = prior_variable.get_statistics('mean') prior_cov = np.diag(prior_variable.get_statistics('var')[:, 0]) prior_cov_inv = np.linalg.inv(prior_cov) noise_cov_inv = np.eye(Amat.shape[0]) / noise_std**2 true_sample = np.array([.4] * nrandom_vars)[:, None] collected_obs = Amat.dot(true_sample) collected_obs += np.random.normal(0, noise_std, (collected_obs.shape)) exact_post_mean, exact_post_cov = \ laplace_posterior_approximation_for_linear_models( Amat, prior_mean, prior_cov_inv, noise_cov_inv, collected_obs) chol_factor = np.linalg.cholesky(exact_post_cov) chol_factor_inv = np.linalg.inv(chol_factor) def g_model(samples): return np.exp( np.sum(chol_factor_inv.dot(samples - exact_post_mean), axis=0))[:, None] nsamples = int(1e6) prior_samples = generate_independent_random_samples( prior_variable, nsamples) posterior_samples = chol_factor.dot( np.random.normal(0, 1, (nrandom_vars, nsamples))) + exact_post_mean g_mu, g_sigma = 0, np.sqrt(nrandom_vars) f, f_cdf, f_pdf, VaR, CVaR, ssd, ssd_disutil = \ get_lognormal_example_exact_quantities(g_mu, g_sigma) beta = .1 cvar_exact = CVaR(beta) cvar_mc = conditional_value_at_risk(g_model(posterior_samples), beta) prior_pdf = prior_variable.pdf post_pdf = stats.multivariate_normal(mean=exact_post_mean[:, 0], cov=exact_post_cov).pdf weights = post_pdf(prior_samples.T) / prior_pdf(prior_samples)[:, 0] weights /= weights.sum() cvar_im = conditional_value_at_risk(g_model(prior_samples), beta, weights) # print(cvar_exact, cvar_mc, cvar_im) assert np.allclose(cvar_exact, cvar_mc, rtol=1e-3) assert np.allclose(cvar_exact, cvar_im, rtol=2e-3)
def test_gaussian_loglike_fun(self): nvars = 1 def fun(design, samples): assert design.ndim == 2 assert samples.ndim == 2 Amat = design.T return Amat.dot(samples).T noise_std = 0.3 prior_mean = np.zeros((nvars, 1)) prior_cov = np.eye(nvars) design = np.linspace(-1, 1, 4)[None, :] true_sample = np.ones((nvars, 1)) * 0.4 obs = fun(design, true_sample) obs += np.random.normal(0, noise_std, obs.shape) noise_cov_inv = np.eye(obs.shape[1]) / (noise_std**2) obs_matrix = design.T exact_post_mean, exact_post_cov = \ laplace_posterior_approximation_for_linear_models( obs_matrix, prior_mean, np.linalg.inv(prior_cov), noise_cov_inv, obs.T) lb, ub = stats.norm(0, 1).interval(0.99) xx = np.linspace(lb, ub, 101) true_pdf_vals = stats.norm(exact_post_mean[0], np.sqrt(exact_post_cov[0])).pdf(xx)[:, None] prior_pdf = stats.norm(prior_mean[0], np.sqrt(prior_cov[0])).pdf pred_obs = fun(design, xx[None, :]) lvals = np.exp(gaussian_loglike_fun( obs, pred_obs, noise_std)) * prior_pdf(xx)[:, None] xx_gauss, ww_gauss = gauss_hermite_pts_wts_1D(300) pred_obs = fun(design, xx_gauss[None, :]) evidence = np.exp( gaussian_loglike_fun(obs, pred_obs, noise_std)[:, 0]).dot(ww_gauss) post_pdf_vals = lvals / evidence gauss_evidence = laplace_evidence( lambda x: np.exp( gaussian_loglike_fun(obs, fun(design, x), noise_std)[:, 0]), prior_pdf, exact_post_cov, exact_post_mean) assert np.allclose(evidence, gauss_evidence) # accuracy depends on quadrature rule and size of noise # print(post_pdf_vals - true_pdf_vals) assert np.allclose(post_pdf_vals, true_pdf_vals)
#%% #Now let the prior on the coefficients of :math:`Y_\alpha` be Gaussian with mean :math:`\mu_\alpha` and covariance :math:`\Sigma_{\alpha\alpha}`, and the covariance between the coefficients of different information sources :math:`Y_\alpha` and :math:`Y_\beta` be :math:`\Sigma_{\alpha\beta}`, such that the joint density of the coefficients of all information sources is Gaussian with mean and covariance given by # #.. math:: \mu=\left[\mu_1^T,\ldots,\mu_M^T\right]^T` \qquad \Sigma=\begin{bmatrix}\Sigma_{11} &\Sigma_{12} &\ldots &\Sigma_{1M} \\ \Sigma_{21} &\Sigma_{22} &\ldots &\Sigma_{2M}\\\vdots &\vdots & \ddots &\vdots \\ \Sigma_{M1} &\Sigma_{M2} &\ldots &\Sigma_{MM}\end{bmatrix} # #In the following we will set the prior mean to zero for all coefficients and first try setting all the coefficients to be independent prior_mean = np.zeros((nparams.sum(), 1)) prior_cov = np.eye(nparams.sum()) #%% #With these definition the posterior distribution of the coefficients is (see :ref:`sphx_glr_auto_tutorials_foundations_plot_bayesian_inference.py`) # #.. math:: \Sigma^\mathrm{post}=\left(\Sigma^{-1}+\Phi^T\Sigma_\epsilon^{-1}\Phi\right)^{-1}, \qquad \mu^\mathrm{post}=\Sigma^\mathrm{post}\left(\Phi^T\Sigma_\epsilon^{-1}y+\Sigma^{-1}\mu\right), # post_mean, post_cov = laplace_posterior_approximation_for_linear_models( basis_mat, prior_mean, np.linalg.inv(prior_cov), np.linalg.inv(noise_cov), values) #%% #Now let's plot the resulting approximation of the high-fidelity data. hf_prior = (prior_mean[nparams[:-1].sum():], prior_cov[nparams[:-1].sum():, nparams[:-1].sum():]) hf_posterior = (post_mean[nparams[:-1].sum():], post_cov[nparams[:-1].sum():, nparams[:-1].sum():]) xx = np.linspace(0, 1, 101) fig, axs = plt.subplots(1, 1, figsize=(8, 6)) training_labels = [ r'$f_1(z_1^{(i)})$', r'$f_2(z_2^{(i)})$', r'$f_3(z_2^{(i)})$' ] plot_1d_lvn_approx(xx, nmodels,
for factor in network.factors: factor.condition(evidence_ids, evidence) factor_post = network.factors[0] for jj in range(1, len(network.factors)): factor_post *= network.factors[jj] gauss_post = convert_gaussian_from_canonical_form(factor_post.precision_matrix, factor_post.shift) #%% #We can check this matches the posterior returned by the classical formulas from pyapprox.bayesian_inference.laplace import \ laplace_posterior_approximation_for_linear_models true_post = laplace_posterior_approximation_for_linear_models( data_cpd_mats[0], prior_means[ii] * np.ones((nparams[0], 1)), np.linalg.inv(prior_covs[ii] * np.eye(nparams[0])), np.linalg.inv(noise_covs[0]), values_train[0], data_cpd_vecs[0]) assert np.allclose(gauss_post[1], true_post[1]) assert np.allclose(gauss_post[0], true_post[0].squeeze()) #%% #Marginalizing Canonical Forms #----------------------------- #Gaussian networks are best used when one wants to comute a marginal of the joint density of parameters, possibly conditioned on data. The following describes the process of marginalization and conditioning often referred to as the sum-product eliminate variable algorithm. # #First lets discuss how to marginalize a canoncial form, e.g. compute # #.. math:: \int \phi(X,Y,K,h,g)dY # #which marginalizes out the variable :math:`Y` from a canonical form also involving the variable :math:`X`. Provided :math:`K_{YY}` in :eq:`eq-canonical-XY` is positive definite the marginalized canonical form has parameters
def test_hierarchical_graph_inference(self): nnodes = 3 graph = nx.DiGraph() prior_covs = [1, 2, 3] prior_means = [-1, -2, -3] cpd_scales = [0.5, 0.4] node_labels = [f'Node_{ii}' for ii in range(nnodes)] nparams = np.array([2] * 3) cpd_mats = [ None, cpd_scales[0] * np.eye(nparams[1], nparams[0]), cpd_scales[1] * np.eye(nparams[2], nparams[1]) ] ii = 0 graph.add_node(ii, label=node_labels[ii], cpd_cov=prior_covs[ii] * np.eye(nparams[ii]), nparams=nparams[ii], cpd_mat=cpd_mats[ii], cpd_mean=prior_means[ii] * np.ones((nparams[ii], 1))) for ii in range(1, nnodes): cpd_mean = np.ones( (nparams[ii], 1)) * (prior_means[ii] - cpd_scales[ii - 1] * prior_means[ii - 1]) cpd_cov = np.eye(nparams[ii]) * max( 1e-8, prior_covs[ii] - cpd_scales[ii - 1]**2 * prior_covs[ii - 1]) graph.add_node(ii, label=node_labels[ii], cpd_cov=cpd_cov, nparams=nparams[ii], cpd_mat=cpd_mats[ii], cpd_mean=cpd_mean) graph.add_edges_from([(ii, ii + 1) for ii in range(nnodes - 1)]) nsamples = [3] * nnodes noise_std = [0.01] * nnodes data_cpd_mats = [ np.random.normal(0, 1, (nsamples[ii], nparams[ii])) for ii in range(nnodes) ] data_cpd_vecs = [np.ones((nsamples[ii], 1)) for ii in range(nnodes)] true_coefs = [ np.random.normal(0, np.sqrt(prior_covs[ii]), (nparams[ii], 1)) for ii in range(nnodes) ] noise_covs = [ np.eye(nsamples[ii]) * noise_std[ii]**2 for ii in range(nnodes) ] network = GaussianNetwork(graph) network.add_data_to_network(data_cpd_mats, data_cpd_vecs, noise_covs) noise = [ noise_std[ii] * np.random.normal(0, noise_std[ii], (nsamples[ii], 1)) for ii in range(nnodes) ] values_train = [ b.dot(c) + s + n for b, c, s, n in zip(data_cpd_mats, true_coefs, data_cpd_vecs, noise) ] evidence, evidence_ids = network.assemble_evidence(values_train) network.convert_to_compact_factors() #query_labels = [node_labels[2]] query_labels = node_labels[:nnodes] factor_post = cond_prob_variable_elimination(network, query_labels, evidence_ids=evidence_ids, evidence=evidence) gauss_post = convert_gaussian_from_canonical_form( factor_post.precision_matrix, factor_post.shift) assert np.all(np.diff(nparams) == 0) prior_mean = np.hstack([[prior_means[ii]] * nparams[ii] for ii in range(nnodes)]) v1, v2, v3 = prior_covs A21, A32 = cpd_mats[1:] I1, I2, I3 = [np.eye(nparams[0])] * 3 S11, S22, S33 = v1 * I1, v2 * I2, v3 * I3 prior_cov = np.vstack([ np.hstack([S11, S11.dot(A21.T), S11.dot(A21.T.dot(A32.T))]), np.hstack([A21.dot(S11), S22, S22.dot(A32.T)]), np.hstack([A32.dot(A21).dot(S11), A32.dot(S22), S33]) ]) #print('Prior Covariance\n',prior_cov) #print('Prior Mean\n',prior_mean) # dataless_network = GaussianNetwork(graph) # dataless_network.convert_to_compact_factors() # labels = [l[1] for l in dataless_network.graph.nodes.data('label')] # factor_prior = cond_prob_variable_elimination( # dataless_network, labels, None) # prior_mean1,prior_cov1 = convert_gaussian_from_canonical_form( # factor_prior.precision_matrix,factor_prior.shift) # print('Prior Covariance\n',prior_cov1) # print('Prior Mean\n',prior_mean1) basis_mat = scipy.linalg.block_diag(*data_cpd_mats) noise_cov_inv = np.linalg.inv(scipy.linalg.block_diag(*noise_covs)) values = np.vstack(values_train) # print('values\n',values) bvec = np.vstack(data_cpd_vecs) prior_mean = prior_mean[:nparams[:nnodes].sum()] prior_cov = prior_cov[:nparams[:nnodes].sum(), :nparams[:nnodes].sum()] true_post = laplace_posterior_approximation_for_linear_models( basis_mat, prior_mean, np.linalg.inv(prior_cov), noise_cov_inv, values, bvec) # print(gauss_post[0],'\n',true_post[0].squeeze()) # print(true_post[1]) assert np.allclose(gauss_post[1], true_post[1]) assert np.allclose(gauss_post[0], true_post[0].squeeze()) # check ability to marginalize prior after data has # been added. factor_prior = cond_prob_variable_elimination(network, ['Node_2']) prior = convert_gaussian_from_canonical_form( factor_prior.precision_matrix, factor_prior.shift) assert np.allclose(prior[0], prior_mean[nparams[:-1].sum():]) assert np.allclose(prior[1], prior_cov[nparams[:-1].sum():, nparams[:-1].sum():])
def test_one_node_inference(self): nnodes = 1 prior_covs = [1] prior_means = [-1] cpd_scales = [] node_labels = [f'Node_{ii}' for ii in range(nnodes)] nparams = np.array([2] * 3) cpd_mats = [None] graph = nx.DiGraph() ii = 0 graph.add_node(ii, label=node_labels[ii], cpd_cov=prior_covs[ii] * np.eye(nparams[ii]), nparams=nparams[ii], cpd_mat=cpd_mats[ii], cpd_mean=prior_means[ii] * np.ones((nparams[ii], 1))) nsamples = [3] noise_std = [0.01] * nnodes data_cpd_mats = [ np.random.normal(0, 1, (nsamples[ii], nparams[ii])) for ii in range(nnodes) ] data_cpd_vecs = [np.ones((nsamples[ii], 1)) for ii in range(nnodes)] true_coefs = [ np.random.normal(0, np.sqrt(prior_covs[ii]), (nparams[ii], 1)) for ii in range(nnodes) ] noise_covs = [ np.eye(nsamples[ii]) * noise_std[ii]**2 for ii in range(nnodes) ] network = GaussianNetwork(graph) network.add_data_to_network(data_cpd_mats, data_cpd_vecs, noise_covs) network.convert_to_compact_factors() noise = [ noise_std[ii] * np.random.normal(0, noise_std[ii], (nsamples[ii], 1)) ] values_train = [ b.dot(c) + s + n for b, c, s, n in zip(data_cpd_mats, true_coefs, data_cpd_vecs, noise) ] evidence, evidence_ids = network.assemble_evidence(values_train) factor_post = cond_prob_variable_elimination(network, node_labels, evidence_ids=evidence_ids, evidence=evidence) gauss_post = convert_gaussian_from_canonical_form( factor_post.precision_matrix, factor_post.shift) # solve using classical inversion formula true_post = laplace_posterior_approximation_for_linear_models( data_cpd_mats[0], prior_means[ii] * np.ones((nparams[0], 1)), np.linalg.inv(prior_covs[ii] * np.eye(nparams[0])), np.linalg.inv(noise_covs[0]), values_train[0], data_cpd_vecs[0]) #print('True post mean\n',true_post[0]) #print('Graph post mean\n',gauss_post[0]) #print('True post covar\n',true_post[1]) #print('Graph post covar\n',gauss_post[1]) assert np.allclose(gauss_post[1], true_post[1]) assert np.allclose(gauss_post[0], true_post[0].squeeze())
def help_compare_prediction_based_oed(self, deviation_fun, gauss_deviation_fun, use_gauss_quadrature, ninner_loop_samples, ndesign_vars, tol): ncandidates_1d = 5 design_candidates = cartesian_product( [np.linspace(-1, 1, ncandidates_1d)] * ndesign_vars) ncandidates = design_candidates.shape[1] # Define model used to predict likely observable data indices = compute_hyperbolic_indices(ndesign_vars, 1)[:, 1:] Amat = monomial_basis_matrix(indices, design_candidates) obs_fun = partial(linear_obs_fun, Amat) # Define model used to predict unobservable QoI qoi_fun = exponential_qoi_fun # Define the prior PDF of the unknown variables nrandom_vars = indices.shape[1] prior_variable = IndependentMultivariateRandomVariable( [stats.norm(0, 0.5)] * nrandom_vars) # Define the independent observational noise noise_std = 1 # Define initial design init_design_indices = np.array([ncandidates // 2]) # Define OED options nouter_loop_samples = 100 if use_gauss_quadrature: # 301 needed for cvar deviation # only 31 needed for variance deviation ninner_loop_samples_1d = ninner_loop_samples var_trans = AffineRandomVariableTransformation(prior_variable) x_quad, w_quad = gauss_hermite_pts_wts_1D(ninner_loop_samples_1d) x_quad = cartesian_product([x_quad] * nrandom_vars) w_quad = outer_product([w_quad] * nrandom_vars) x_quad = var_trans.map_from_canonical_space(x_quad) ninner_loop_samples = x_quad.shape[1] def generate_inner_prior_samples(nsamples): assert nsamples == x_quad.shape[1], (nsamples, x_quad.shape) return x_quad, w_quad else: # use default Monte Carlo sampling generate_inner_prior_samples = None # Define initial design init_design_indices = np.array([ncandidates // 2]) # Setup OED problem oed = BayesianBatchDeviationOED(design_candidates, obs_fun, noise_std, prior_variable, qoi_fun, nouter_loop_samples, ninner_loop_samples, generate_inner_prior_samples, deviation_fun=deviation_fun) oed.populate() oed.set_collected_design_indices(init_design_indices) prior_mean = oed.prior_variable.get_statistics('mean') prior_cov = np.diag(prior_variable.get_statistics('var')[:, 0]) prior_cov_inv = np.linalg.inv(prior_cov) selected_indices = init_design_indices # Generate experimental design nexperiments = 3 for step in range(len(init_design_indices), nexperiments): # Copy current state of OED before new data is determined # This copy will be used to compute Laplace based utility and # evidence values for testing oed_copy = copy.deepcopy(oed) # Update the design utility_vals, selected_indices = oed.update_design() utility, deviations, evidences, weights = \ oed_copy.compute_expected_utility( oed_copy.collected_design_indices, selected_indices, True) exact_deviations = np.empty(nouter_loop_samples) for jj in range(nouter_loop_samples): # only test intermediate quantities associated with design # chosen by the OED step idx = oed.collected_design_indices obs_jj = oed_copy.outer_loop_obs[jj:jj + 1, idx] noise_cov_inv_jj = np.eye(idx.shape[0]) / noise_std**2 exact_post_mean_jj, exact_post_cov_jj = \ laplace_posterior_approximation_for_linear_models( Amat[idx, :], prior_mean, prior_cov_inv, noise_cov_inv_jj, obs_jj.T) exact_deviations[jj] = gauss_deviation_fun( exact_post_mean_jj, exact_post_cov_jj) print('d', np.absolute(exact_deviations - deviations[:, 0]).max(), tol) # print(exact_deviations, deviations[:, 0]) assert np.allclose(exact_deviations, deviations[:, 0], atol=tol) assert np.allclose(utility_vals[selected_indices], -np.mean(exact_deviations), atol=tol)
def test_sequential_kl_oed(self): """ Observations collected ARE used to inform subsequent designs """ nrandom_vars = 1 noise_std = 1 ndesign = 5 nouter_loop_samples = int(1e4) ninner_loop_samples = 31 ncandidates = 6 design_candidates = np.linspace(-1, 1, ncandidates)[None, :] def obs_fun(samples): assert design_candidates.ndim == 2 assert samples.ndim == 2 Amat = design_candidates.T return Amat.dot(samples).T prior_variable = IndependentMultivariateRandomVariable( [stats.norm(0, 1)] * nrandom_vars) true_sample = np.array([.4] * nrandom_vars)[:, None] def obs_process(new_design_indices): obs = obs_fun(true_sample)[:, new_design_indices] obs += oed.noise_fun(obs) return obs x_quad, w_quad = gauss_hermite_pts_wts_1D(ninner_loop_samples) def generate_inner_prior_samples_gauss(n): # use precomputed samples so to avoid cost of regenerating assert n == x_quad.shape[0] return x_quad[None, :], w_quad generate_inner_prior_samples = generate_inner_prior_samples_gauss # Define initial design init_design_indices = np.array([ncandidates // 2]) oed = BayesianSequentialKLOED(design_candidates, obs_fun, noise_std, prior_variable, obs_process, nouter_loop_samples, ninner_loop_samples, generate_inner_prior_samples) oed.populate() oed.set_collected_design_indices(init_design_indices) prior_mean = oed.prior_variable.get_statistics('mean') prior_cov = np.diag(prior_variable.get_statistics('var')[:, 0]) prior_cov_inv = np.linalg.inv(prior_cov) exact_post_mean_prev = prior_mean exact_post_cov_prev = prior_cov post_var_prev = stats.multivariate_normal(mean=exact_post_mean_prev[:, 0], cov=exact_post_cov_prev) selected_indices = init_design_indices # Because of Monte Carlo error set step tols individually # It is too expensive to up the number of outer_loop samples to # reduce errors step_tols = [7.3e-3, 6.5e-2, 3.3e-2, 1.6e-1] for step in range(len(init_design_indices), ndesign): current_design = design_candidates[:, oed.collected_design_indices] noise_cov_inv = np.eye(current_design.shape[1]) / noise_std**2 # Compute posterior moving from previous posterior and using # only the most recently collected data noise_cov_inv_incr = np.eye( selected_indices.shape[0]) / noise_std**2 exact_post_mean, exact_post_cov = \ laplace_posterior_approximation_for_linear_models( design_candidates[:, selected_indices].T, exact_post_mean_prev, np.linalg.inv(exact_post_cov_prev), noise_cov_inv_incr, oed.collected_obs[:, -1:].T) # check using current posteior as prior and only using new # data (above) produces the same posterior as using original prior # and all collected data (from_prior approach). The posteriors # should be the same but the evidences will be difference. # This is tested below exact_post_mean_from_prior, exact_post_cov_from_prior = \ laplace_posterior_approximation_for_linear_models( current_design.T, prior_mean, prior_cov_inv, noise_cov_inv, oed.collected_obs.T) assert np.allclose(exact_post_mean, exact_post_mean_from_prior) assert np.allclose(exact_post_cov, exact_post_cov_from_prior) # Compute PDF of current posterior that uses all collected data post_var = stats.multivariate_normal( mean=exact_post_mean[:, 0].copy(), cov=exact_post_cov.copy()) # Compute evidence moving from previous posterior to # new posterior (not initial prior to posterior). # Values can be computed exactly for Gaussian prior and noise gauss_evidence = laplace_evidence( lambda x: np.exp( gaussian_loglike_fun( oed.collected_obs[:, -1:], obs_fun(x)[:, oed.collected_design_indices[-1:]], noise_std))[:, 0], lambda y: np.atleast_2d(post_var_prev.pdf(y.T)).T, exact_post_cov, exact_post_mean) # Compute evidence using Gaussian quadrature rule. This # is possible for this low-dimensional example. quad_loglike_vals = np.exp( gaussian_loglike_fun( oed.collected_obs[:, -1:], obs_fun( x_quad[None, :])[:, oed.collected_design_indices[-1:]], noise_std))[:, 0] # we must divide integarnd by initial prior_pdf since it is # already implicilty included via the quadrature weights integrand_vals = quad_loglike_vals * post_var_prev.pdf( x_quad[:, None]) / prior_variable.pdf(x_quad[None, :])[:, 0] quad_evidence = integrand_vals.dot(w_quad) # print(quad_evidence, gauss_evidence) assert np.allclose(gauss_evidence, quad_evidence), step # print('G', gauss_evidence, oed.evidence) assert np.allclose(gauss_evidence, oed.evidence), step # compute the evidence of moving from the initial prior # to the current posterior. This will be used for testing later gauss_evidence_from_prior = laplace_evidence( lambda x: np.exp( gaussian_loglike_fun( oed.collected_obs, obs_fun(x)[:, oed.collected_design_indices], noise_std) )[:, 0], prior_variable.pdf, exact_post_cov, exact_post_mean) # Copy current state of OED before new data is determined # This copy will be used to compute Laplace based utility and # evidence values for testing oed_copy = copy.deepcopy(oed) # Update the design utility_vals, selected_indices = oed.update_design() new_obs = oed.obs_process(selected_indices) oed.update_observations(new_obs) utility = utility_vals[selected_indices] # Re-compute the evidences that were used to update the design # above. This will be used for testing later # print('D', oed_copy.evidence) evidences = oed_copy.compute_expected_utility( oed_copy.collected_design_indices, selected_indices, True)[1] # print('Collected plus selected indices', # oed.collected_design_indices, # oed_copy.collected_design_indices, selected_indices) # For all outer loop samples compute the posterior exactly # and compute intermediate values for testing. While OED # considers all possible candidate design indices # Here we just test the one that was chosen last when # design was updated exact_evidences = np.empty(nouter_loop_samples) exact_kl_divs = np.empty_like(exact_evidences) for jj in range(nouter_loop_samples): # Fill obs with those predicted by outer loop sample idx = oed.collected_design_indices obs_jj = oed_copy.outer_loop_obs[jj:jj + 1, idx] # Overwrite the previouly simulated obs with collected obs. # Do not ovewrite the last value which is the potential # data used to compute expected utility obs_jj[:, :oed_copy.collected_obs.shape[1]] = \ oed_copy.collected_obs # Compute the posterior obtained by using predicted value # of outer loop sample noise_cov_inv_jj = np.eye( selected_indices.shape[0]) / noise_std**2 exact_post_mean_jj, exact_post_cov_jj = \ laplace_posterior_approximation_for_linear_models( design_candidates[:, selected_indices].T, exact_post_mean, np.linalg.inv(exact_post_cov), noise_cov_inv_jj, obs_jj[:, -1].T) # Use post_pdf so measure change from current posterior (prior) # to new posterior gauss_evidence_jj = laplace_evidence( lambda x: np.exp( gaussian_loglike_fun(obs_jj[:, -1:], obs_fun(x)[:, selected_indices], noise_std))[:, 0], lambda y: np.atleast_2d(post_var.pdf(y.T)).T, exact_post_cov_jj, exact_post_mean_jj) exact_evidences[jj] = gauss_evidence_jj # Check quadrature gets the same answer quad_loglike_vals = np.exp( gaussian_loglike_fun( obs_jj[:, -1:], obs_fun(x_quad[None, :])[:, selected_indices], noise_std))[:, 0] integrand_vals = quad_loglike_vals * post_var.pdf( x_quad[:, None]) / prior_variable.pdf(x_quad[None, :])[:, 0] quad_evidence = integrand_vals.dot(w_quad) # print(quad_evidence, gauss_evidence_jj) assert np.allclose(gauss_evidence_jj, quad_evidence), step # Check that evidence of moving from current posterior # to new posterior with (potential data from outer-loop sample) # is equal to the evidence of moving from # intitial prior to new posterior divide by the evidence # from moving from the initial prior to the current posterior gauss_evidence_jj_from_prior = laplace_evidence( lambda x: np.exp( gaussian_loglike_fun(obs_jj, obs_fun(x)[:, idx], noise_std) )[:, 0], prior_variable.pdf, exact_post_cov_jj, exact_post_mean_jj) # print(gauss_evidence_jj_from_prior/gauss_evidence_from_prior, # gauss_evidence_jj) # print('gauss_evidence_from_prior', gauss_evidence_from_prior) assert np.allclose( gauss_evidence_jj_from_prior / gauss_evidence_from_prior, gauss_evidence_jj) gauss_kl_div = gaussian_kl_divergence(exact_post_mean_jj, exact_post_cov_jj, exact_post_mean, exact_post_cov) # gauss_kl_div = gaussian_kl_divergence( # exact_post_mean, exact_post_cov, # exact_post_mean_jj, exact_post_cov_jj) exact_kl_divs[jj] = gauss_kl_div # print(evidences[:, 0], exact_evidences) assert np.allclose(evidences[:, 0], exact_evidences) # Outer loop samples are from prior. Use importance reweighting # to sample from previous posterior. This step is only relevant # for open loop design (used here) # where observed data informs current estimate # of parameters. Closed loop design (not used here) # never collects data and so it always samples from the prior. post_weights = post_var.pdf( oed.outer_loop_prior_samples.T) / post_var_prev.pdf( oed.outer_loop_prior_samples.T) / oed.nouter_loop_samples laplace_utility = np.sum(exact_kl_divs * post_weights) # print('u', (utility-laplace_utility)/laplace_utility, step) assert np.allclose(utility, laplace_utility, rtol=step_tols[step - 1]) exact_post_mean_prev = exact_post_mean exact_post_cov_prev = exact_post_cov post_var_prev = post_var
def test_compute_expected_kl_utility_monte_carlo(self): nrandom_vars = 1 noise_std = .3 design = np.linspace(-1, 1, 2)[None, :] Amat = design.T def obs_fun(x): return (Amat.dot(x)).T def noise_fun(values): return np.random.normal(0, noise_std, (values.shape)) # specify the first design point collected_design_indices = np.array([0]) prior_variable = IndependentMultivariateRandomVariable( [stats.norm(0, 1)] * nrandom_vars) prior_mean = prior_variable.get_statistics('mean') prior_cov = np.diag(prior_variable.get_statistics('var')[:, 0]) prior_cov_inv = np.linalg.inv(prior_cov) noise_cov_inv = np.eye(Amat.shape[0]) / noise_std**2 def generate_random_prior_samples(n): return (generate_independent_random_samples(prior_variable, n), np.ones(n) / n) def generate_inner_prior_samples_mc(n): return generate_random_prior_samples(n), np.ones(n) / n ninner_loop_samples = 300 x, w = gauss_hermite_pts_wts_1D(ninner_loop_samples) def generate_inner_prior_samples_gauss(n): # use precomputed samples so to avoid cost of regenerating assert n == x.shape[0] return x[None, :], w generate_inner_prior_samples = generate_inner_prior_samples_gauss nouter_loop_samples = 10000 outer_loop_obs, outer_loop_pred_obs, inner_loop_pred_obs, \ inner_loop_weights, __, __ = \ precompute_compute_expected_kl_utility_data( generate_random_prior_samples, nouter_loop_samples, obs_fun, noise_fun, ninner_loop_samples, generate_inner_prior_samples=generate_inner_prior_samples) new_design_indices = np.array([1]) outer_loop_weights = np.ones( (nouter_loop_samples, 1)) / nouter_loop_samples def log_likelihood_fun(obs, pred_obs, active_indices=None): return gaussian_loglike_fun(obs, pred_obs, noise_std, active_indices) utility = compute_expected_kl_utility_monte_carlo( log_likelihood_fun, outer_loop_obs, outer_loop_pred_obs, inner_loop_pred_obs, inner_loop_weights, outer_loop_weights, collected_design_indices, new_design_indices, False) kl_divs = [] # overwrite subset of obs with previously collected data # make copy so that outerloop obs can be used again outer_loop_obs_copy = outer_loop_obs.copy() for ii in range(nouter_loop_samples): idx = np.hstack((collected_design_indices, new_design_indices)) obs_ii = outer_loop_obs_copy[ii:ii + 1, idx] idx = np.hstack((collected_design_indices, new_design_indices)) exact_post_mean, exact_post_cov = \ laplace_posterior_approximation_for_linear_models( Amat[idx, :], prior_mean, prior_cov_inv, noise_cov_inv[np.ix_(idx, idx)], obs_ii.T) kl_div = gaussian_kl_divergence(exact_post_mean, exact_post_cov, prior_mean, prior_cov) kl_divs.append(kl_div) print(utility - np.mean(kl_divs), utility, np.mean(kl_divs)) assert np.allclose(utility, np.mean(kl_divs), rtol=2e-2)