def test_batch_kl_oed(self): """ No observations collected to inform subsequent designs """ np.random.seed(1) nrandom_vars = 1 noise_std = 1 ndesign = 4 nouter_loop_samples = 10000 ninner_loop_samples = 31 ncandidates = 11 design_candidates = np.linspace(-1, 1, ncandidates)[None, :] def obs_fun(samples): assert design_candidates.ndim == 2 assert samples.ndim == 2 Amat = design_candidates.T return Amat.dot(samples).T prior_variable = IndependentMultivariateRandomVariable( [stats.norm(0, 1)] * nrandom_vars) x_quad, w_quad = gauss_hermite_pts_wts_1D(ninner_loop_samples) def generate_inner_prior_samples_gauss(n): # use precomputed samples so to avoid cost of regenerating assert n == x_quad.shape[0] return x_quad[None, :], w_quad generate_inner_prior_samples = generate_inner_prior_samples_gauss # Define initial design init_design_indices = np.array([ncandidates // 2]) oed = BayesianBatchKLOED(design_candidates, obs_fun, noise_std, prior_variable, nouter_loop_samples, ninner_loop_samples, generate_inner_prior_samples) oed.populate() oed.set_collected_design_indices(init_design_indices) for ii in range(len(init_design_indices), ndesign): # loop must be before oed.updated design because # which updates oed.collected_design_indices and thus # changes problem d_utility_vals = np.zeros(ncandidates) for kk in range(ncandidates): if kk not in oed.collected_design_indices: new_design = np.hstack( (design_candidates[:, oed.collected_design_indices], design_candidates[:, kk:kk + 1])) Amat = new_design.T d_utility_vals[kk] = d_optimal_utility(Amat, noise_std) utility_vals, selected_indices = oed.update_design() # ignore entries of previously collected data II = np.where(d_utility_vals > 0) print((np.absolute(d_utility_vals[II] - utility_vals[II]) / d_utility_vals[II]).max()) assert np.allclose(d_utility_vals[II], utility_vals[II], rtol=4e-2)
def test_tensor_product_lagrange_interpolation(self): nvars = 5 level = 10 x = gauss_hermite_pts_wts_1D(level + 1)[0] # active_vars = np.arange(nvars) active_vars = np.hstack([np.arange(2), np.arange(3, nvars)]) nactive_vars = active_vars.shape[0] abscissa_1d = [x] * nactive_vars power = x.shape[0] - 2 def fun(samples): return np.sum(samples[active_vars, :]**power, axis=0)[:, None] nsamples = 1000 validation_samples = np.random.normal(0, 1, (nvars, nsamples)) zz = abscissa_1d.copy() zz.insert(2, np.zeros(1)) train_samples = cartesian_product(zz) values = fun(train_samples) approx_values = tensor_product_lagrange_interpolation( validation_samples, abscissa_1d, active_vars, values) barycentric_weights_1d = [ compute_barycentric_weights_1d(x) for x in abscissa_1d ] poly_vals = multivariate_barycentric_lagrange_interpolation( validation_samples, abscissa_1d, barycentric_weights_1d, values, active_vars) assert np.allclose(approx_values, fun(validation_samples)) assert np.allclose(poly_vals, fun(validation_samples))
def get_tensor_product_points(level, var_trans, quad_type): abscissa_1d = [] num_vars = var_trans.num_vars() if quad_type == 'CC': x, w = clenshaw_curtis_pts_wts_1D(level) elif quad_type == 'GH': x, w = gauss_hermite_pts_wts_1D(level) for dd in range(num_vars): abscissa_1d.append(x) pts = cartesian_product(abscissa_1d, 1) pts = var_trans.map_from_canonical_space(pts) return pts
def test_gaussian_loglike_fun(self): nvars = 1 def fun(design, samples): assert design.ndim == 2 assert samples.ndim == 2 Amat = design.T return Amat.dot(samples).T noise_std = 0.3 prior_mean = np.zeros((nvars, 1)) prior_cov = np.eye(nvars) design = np.linspace(-1, 1, 4)[None, :] true_sample = np.ones((nvars, 1)) * 0.4 obs = fun(design, true_sample) obs += np.random.normal(0, noise_std, obs.shape) noise_cov_inv = np.eye(obs.shape[1]) / (noise_std**2) obs_matrix = design.T exact_post_mean, exact_post_cov = \ laplace_posterior_approximation_for_linear_models( obs_matrix, prior_mean, np.linalg.inv(prior_cov), noise_cov_inv, obs.T) lb, ub = stats.norm(0, 1).interval(0.99) xx = np.linspace(lb, ub, 101) true_pdf_vals = stats.norm(exact_post_mean[0], np.sqrt(exact_post_cov[0])).pdf(xx)[:, None] prior_pdf = stats.norm(prior_mean[0], np.sqrt(prior_cov[0])).pdf pred_obs = fun(design, xx[None, :]) lvals = np.exp(gaussian_loglike_fun( obs, pred_obs, noise_std)) * prior_pdf(xx)[:, None] xx_gauss, ww_gauss = gauss_hermite_pts_wts_1D(300) pred_obs = fun(design, xx_gauss[None, :]) evidence = np.exp( gaussian_loglike_fun(obs, pred_obs, noise_std)[:, 0]).dot(ww_gauss) post_pdf_vals = lvals / evidence gauss_evidence = laplace_evidence( lambda x: np.exp( gaussian_loglike_fun(obs, fun(design, x), noise_std)[:, 0]), prior_pdf, exact_post_cov, exact_post_mean) assert np.allclose(evidence, gauss_evidence) # accuracy depends on quadrature rule and size of noise # print(post_pdf_vals - true_pdf_vals) assert np.allclose(post_pdf_vals, true_pdf_vals)
def test_gauss_hermite_quadrature(self): """ integrate x^2 1/sqrt(2*pi)exp(-x**2/2) dx from x=-inf..inf """ true_mean = 0. true_variance = 1. x, w = gauss_hermite_pts_wts_1D(2) def function(x): return x**2 assert np.allclose( np.dot(function(x), w) - true_mean**2, true_variance)
def test_interpolation_gaussian_leja_sequence(self): def f(x): return np.exp(-np.sum(x**2, axis=0)) level = 30 # abscissa_leja, __ = gaussian_leja_quadrature_rule( # level, return_weights_for_all_levels=False) # abscissa = abscissa_leja abscissa_gauss = gauss_hermite_pts_wts_1D(level + 1)[0] abscissa = abscissa_gauss # print(abscissa_leja.shape,abscissa_gauss.shape) abscissa_1d = [abscissa] barycentric_weights_1d = [ compute_barycentric_weights_1d(abscissa_1d[0]) ] # print(barycentric_weights_1d[0]) barycentric_weights_1d[0] /= barycentric_weights_1d[0].max() # print(barycentric_weights_1d[0]) fn_vals = f(np.array(abscissa).reshape(1, abscissa.shape[0]))[:, np.newaxis] # print(fn_vals.shape) samples = np.random.normal(0, 1, (1, 1000)) poly_vals = multivariate_barycentric_lagrange_interpolation( samples, abscissa_1d, barycentric_weights_1d, fn_vals, np.array([0]))[:, 0] l2_error = np.linalg.norm(poly_vals-f(samples)) / \ np.sqrt(samples.shape[1]) # print('l2_error',l2_error) # pts = np.linspace(abscissa.min(),abscissa.max(),101).reshape(1,101) # poly_vals = multivariate_barycentric_lagrange_interpolation( # pts,abscissa_1d,barycentric_weights_1d,fn_vals,np.array([0])) # import matplotlib.pyplot as plt # plt.plot(pts[0,:],poly_vals.squeeze()) # plt.plot(abscissa_1d[0],fn_vals.squeeze(),'r*') # plt.plot(abscissa_leja,abscissa_leja*0,'ro') # plt.plot(abscissa_gauss,abscissa_gauss*0,'ks',ms=3) # plt.ylim(-1,2) # plt.show() assert l2_error < 1e-2
def test_gaussian_kl_divergence(self): nvars = 1 mean1, sigma1 = np.zeros((nvars, 1)), np.eye(nvars) * 2 mean2, sigma2 = np.ones((nvars, 1)), np.eye(nvars) * 3 kl_div = gaussian_kl_divergence(mean1, sigma1, mean2, sigma2) rv1 = stats.multivariate_normal(mean1, sigma1) rv2 = stats.multivariate_normal(mean2, sigma2) xx, ww = gauss_hermite_pts_wts_1D(300) xx = xx * np.sqrt(sigma1[0, 0]) + mean1[0] kl_div_quad = np.log(rv1.pdf(xx) / rv2.pdf(xx)).dot(ww) assert np.allclose(kl_div, kl_div_quad) xx = np.random.normal(mean1[0], np.sqrt(sigma1[0, 0]), (int(1e6))) ww = np.ones(xx.shape[0]) / xx.shape[0] kl_div_quad = np.log(rv1.pdf(xx) / rv2.pdf(xx)).dot(ww) assert np.allclose(kl_div, kl_div_quad, rtol=1e-2)
def test_predictor_corrector_function_of_independent_variables(self): """ Test 1: Sum of Gaussians is a Gaussian Test 2: Product of uniforms on [0,1] """ nvars, nterms = 2, 5 nquad_samples_1d = 50 quad_rules = [gauss_hermite_pts_wts_1D(nquad_samples_1d)] * nvars def fun(x): return x.sum(axis=0) ab = predictor_corrector_function_of_independent_variables( nterms, quad_rules, fun) rv = stats.norm(0, np.sqrt(nvars)) lb, ub = rv.interval(1) ab_full = predictor_corrector(nterms, rv.pdf, lb, ub) assert np.allclose(ab_full, ab) nvars = 2 def measure(x): return (-1)**(nvars - 1) * np.log(x)**(nvars - 1) / factorial(nvars - 1) def fun(x): return x.prod(axis=0) quad_opts = {} ab_full = predictor_corrector(nterms, measure, 0, 1, quad_opts) xx, ww = gauss_jacobi_pts_wts_1D(nquad_samples_1d, 0, 0) xx = (xx + 1) / 2 quad_rules = [(xx, ww)] * nvars ab = predictor_corrector_function_of_independent_variables( nterms, quad_rules, fun) assert np.allclose(ab_full, ab)
def test_barycentric_weights_1d(self): eps = 1e-12 # test barycentric weights for uniform points using direct calculation abscissa = np.linspace(-1, 1., 5) weights = compute_barycentric_weights_1d(abscissa, normalize_weights=False) n = abscissa.shape[0] - 1 h = 2. / n true_weights = np.empty((n + 1), np.double) for j in range(n + 1): true_weights[j] = (-1.)**(n - j) * nchoosek( n, j) / (h**n * factorial(n)) assert np.allclose(true_weights, weights, eps) # test barycentric weights for uniform points using analytical formula # and with scaling on weights = compute_barycentric_weights_1d(abscissa, interval_length=1, normalize_weights=False) weights_analytical = equidistant_barycentric_weights(5) ratio = weights / weights_analytical # assert the two weights array differ by only a constant factor assert np.allclose(np.min(ratio), np.max(ratio)) # test barycentric weights for clenshaw curtis points level = 7 abscissa, tmp = clenshaw_curtis_pts_wts_1D(level) n = abscissa.shape[0] weights = compute_barycentric_weights_1d(abscissa, normalize_weights=False, interval_length=2) true_weights = np.empty((n), np.double) true_weights[0] = true_weights[n - 1] = 0.5 true_weights[1:n - 1] = [(-1)**ii for ii in range(1, n - 1)] factor = true_weights[1] / weights[1] assert np.allclose(true_weights / factor, weights, atol=eps) # check barycentric weights are correctly computed regardless of # order of points. Eventually ordering can effect numerical stability # but not until very high level abscissa, tmp = clenshaw_curtis_in_polynomial_order(level) II = np.argsort(abscissa) n = abscissa.shape[0] weights = compute_barycentric_weights_1d( abscissa, normalize_weights=False, interval_length=abscissa.max() - abscissa.min()) true_weights = np.empty((n), np.double) true_weights[0] = true_weights[n - 1] = 0.5 true_weights[1:n - 1] = [(-1)**ii for ii in range(1, n - 1)] factor = true_weights[1] / weights[II][1] assert np.allclose(true_weights / factor, weights[II], eps) num_samples = 65 abscissa, tmp = gauss_hermite_pts_wts_1D(num_samples) weights = compute_barycentric_weights_1d( abscissa, normalize_weights=False, interval_length=abscissa.max() - abscissa.min()) print(weights) print(np.absolute(weights).max(), np.absolute(weights).min()) print(np.absolute(weights).max() / np.absolute(weights).min())
def help_compare_prediction_based_oed(self, deviation_fun, gauss_deviation_fun, use_gauss_quadrature, ninner_loop_samples, ndesign_vars, tol): ncandidates_1d = 5 design_candidates = cartesian_product( [np.linspace(-1, 1, ncandidates_1d)] * ndesign_vars) ncandidates = design_candidates.shape[1] # Define model used to predict likely observable data indices = compute_hyperbolic_indices(ndesign_vars, 1)[:, 1:] Amat = monomial_basis_matrix(indices, design_candidates) obs_fun = partial(linear_obs_fun, Amat) # Define model used to predict unobservable QoI qoi_fun = exponential_qoi_fun # Define the prior PDF of the unknown variables nrandom_vars = indices.shape[1] prior_variable = IndependentMultivariateRandomVariable( [stats.norm(0, 0.5)] * nrandom_vars) # Define the independent observational noise noise_std = 1 # Define initial design init_design_indices = np.array([ncandidates // 2]) # Define OED options nouter_loop_samples = 100 if use_gauss_quadrature: # 301 needed for cvar deviation # only 31 needed for variance deviation ninner_loop_samples_1d = ninner_loop_samples var_trans = AffineRandomVariableTransformation(prior_variable) x_quad, w_quad = gauss_hermite_pts_wts_1D(ninner_loop_samples_1d) x_quad = cartesian_product([x_quad] * nrandom_vars) w_quad = outer_product([w_quad] * nrandom_vars) x_quad = var_trans.map_from_canonical_space(x_quad) ninner_loop_samples = x_quad.shape[1] def generate_inner_prior_samples(nsamples): assert nsamples == x_quad.shape[1], (nsamples, x_quad.shape) return x_quad, w_quad else: # use default Monte Carlo sampling generate_inner_prior_samples = None # Define initial design init_design_indices = np.array([ncandidates // 2]) # Setup OED problem oed = BayesianBatchDeviationOED(design_candidates, obs_fun, noise_std, prior_variable, qoi_fun, nouter_loop_samples, ninner_loop_samples, generate_inner_prior_samples, deviation_fun=deviation_fun) oed.populate() oed.set_collected_design_indices(init_design_indices) prior_mean = oed.prior_variable.get_statistics('mean') prior_cov = np.diag(prior_variable.get_statistics('var')[:, 0]) prior_cov_inv = np.linalg.inv(prior_cov) selected_indices = init_design_indices # Generate experimental design nexperiments = 3 for step in range(len(init_design_indices), nexperiments): # Copy current state of OED before new data is determined # This copy will be used to compute Laplace based utility and # evidence values for testing oed_copy = copy.deepcopy(oed) # Update the design utility_vals, selected_indices = oed.update_design() utility, deviations, evidences, weights = \ oed_copy.compute_expected_utility( oed_copy.collected_design_indices, selected_indices, True) exact_deviations = np.empty(nouter_loop_samples) for jj in range(nouter_loop_samples): # only test intermediate quantities associated with design # chosen by the OED step idx = oed.collected_design_indices obs_jj = oed_copy.outer_loop_obs[jj:jj + 1, idx] noise_cov_inv_jj = np.eye(idx.shape[0]) / noise_std**2 exact_post_mean_jj, exact_post_cov_jj = \ laplace_posterior_approximation_for_linear_models( Amat[idx, :], prior_mean, prior_cov_inv, noise_cov_inv_jj, obs_jj.T) exact_deviations[jj] = gauss_deviation_fun( exact_post_mean_jj, exact_post_cov_jj) print('d', np.absolute(exact_deviations - deviations[:, 0]).max(), tol) # print(exact_deviations, deviations[:, 0]) assert np.allclose(exact_deviations, deviations[:, 0], atol=tol) assert np.allclose(utility_vals[selected_indices], -np.mean(exact_deviations), atol=tol)
def help_compare_sequential_kl_oed_econ(self, use_gauss_quadrature): """ Use the same inner loop samples for all outer loop samples """ nrandom_vars = 1 noise_std = 1 ndesign = 5 nouter_loop_samples = int(1e1) ninner_loop_samples = 31 ncandidates = 6 design_candidates = np.linspace(-1, 1, ncandidates)[None, :] def obs_fun(samples): assert design_candidates.ndim == 2 assert samples.ndim == 2 Amat = design_candidates.T return Amat.dot(samples).T prior_variable = IndependentMultivariateRandomVariable( [stats.norm(0, 1)] * nrandom_vars) true_sample = np.array([.4] * nrandom_vars)[:, None] def obs_process(new_design_indices): obs = obs_fun(true_sample)[:, new_design_indices] obs += oed.noise_fun(obs) return obs generate_random_prior_samples = partial( generate_independent_random_samples, prior_variable) def generate_inner_prior_samples_mc(n): # fix seed that when econ is False we are still creating # the samples each time. This is just for testing purposes # to make sure that econ is True does this in effect np.random.seed(1) return generate_random_prior_samples(n), np.ones(n) / n x_quad, w_quad = gauss_hermite_pts_wts_1D(ninner_loop_samples) def generate_inner_prior_samples_gauss(n): # use precomputed samples so to avoid cost of regenerating assert n == x_quad.shape[0] return x_quad[None, :], w_quad if use_gauss_quadrature: generate_inner_prior_samples = generate_inner_prior_samples_gauss else: generate_inner_prior_samples = generate_inner_prior_samples_mc # Define initial design init_design_indices = np.array([ncandidates // 2]) np.random.seed(1) oed = BayesianSequentialKLOED(design_candidates, obs_fun, noise_std, prior_variable, obs_process, nouter_loop_samples, ninner_loop_samples, generate_inner_prior_samples) oed.populate() oed.set_collected_design_indices(init_design_indices) # randomness only enters during populate and when collecting # real observations, i.e. evaluating obs model so setting seed # here is sufficient when below we just evaluate obs model once # and use same value for both oed instances np.random.seed(1) oed_econ = BayesianSequentialKLOED(design_candidates, obs_fun, noise_std, prior_variable, obs_process, nouter_loop_samples, ninner_loop_samples, generate_inner_prior_samples, econ=True) oed_econ.populate() oed_econ.set_collected_design_indices(init_design_indices) for step in range(len(init_design_indices), ndesign): utility_vals, selected_indices = oed.update_design() new_obs = oed.obs_process(selected_indices) oed.update_observations(new_obs) econ_utility_vals, econ_selected_indices = oed_econ.update_design() assert np.allclose(econ_utility_vals, utility_vals) assert np.allclose(econ_selected_indices, selected_indices) # use same data as non econ version do not call model # as different noise will be added oed_econ.update_observations(new_obs)
def test_sequential_kl_oed(self): """ Observations collected ARE used to inform subsequent designs """ nrandom_vars = 1 noise_std = 1 ndesign = 5 nouter_loop_samples = int(1e4) ninner_loop_samples = 31 ncandidates = 6 design_candidates = np.linspace(-1, 1, ncandidates)[None, :] def obs_fun(samples): assert design_candidates.ndim == 2 assert samples.ndim == 2 Amat = design_candidates.T return Amat.dot(samples).T prior_variable = IndependentMultivariateRandomVariable( [stats.norm(0, 1)] * nrandom_vars) true_sample = np.array([.4] * nrandom_vars)[:, None] def obs_process(new_design_indices): obs = obs_fun(true_sample)[:, new_design_indices] obs += oed.noise_fun(obs) return obs x_quad, w_quad = gauss_hermite_pts_wts_1D(ninner_loop_samples) def generate_inner_prior_samples_gauss(n): # use precomputed samples so to avoid cost of regenerating assert n == x_quad.shape[0] return x_quad[None, :], w_quad generate_inner_prior_samples = generate_inner_prior_samples_gauss # Define initial design init_design_indices = np.array([ncandidates // 2]) oed = BayesianSequentialKLOED(design_candidates, obs_fun, noise_std, prior_variable, obs_process, nouter_loop_samples, ninner_loop_samples, generate_inner_prior_samples) oed.populate() oed.set_collected_design_indices(init_design_indices) prior_mean = oed.prior_variable.get_statistics('mean') prior_cov = np.diag(prior_variable.get_statistics('var')[:, 0]) prior_cov_inv = np.linalg.inv(prior_cov) exact_post_mean_prev = prior_mean exact_post_cov_prev = prior_cov post_var_prev = stats.multivariate_normal(mean=exact_post_mean_prev[:, 0], cov=exact_post_cov_prev) selected_indices = init_design_indices # Because of Monte Carlo error set step tols individually # It is too expensive to up the number of outer_loop samples to # reduce errors step_tols = [7.3e-3, 6.5e-2, 3.3e-2, 1.6e-1] for step in range(len(init_design_indices), ndesign): current_design = design_candidates[:, oed.collected_design_indices] noise_cov_inv = np.eye(current_design.shape[1]) / noise_std**2 # Compute posterior moving from previous posterior and using # only the most recently collected data noise_cov_inv_incr = np.eye( selected_indices.shape[0]) / noise_std**2 exact_post_mean, exact_post_cov = \ laplace_posterior_approximation_for_linear_models( design_candidates[:, selected_indices].T, exact_post_mean_prev, np.linalg.inv(exact_post_cov_prev), noise_cov_inv_incr, oed.collected_obs[:, -1:].T) # check using current posteior as prior and only using new # data (above) produces the same posterior as using original prior # and all collected data (from_prior approach). The posteriors # should be the same but the evidences will be difference. # This is tested below exact_post_mean_from_prior, exact_post_cov_from_prior = \ laplace_posterior_approximation_for_linear_models( current_design.T, prior_mean, prior_cov_inv, noise_cov_inv, oed.collected_obs.T) assert np.allclose(exact_post_mean, exact_post_mean_from_prior) assert np.allclose(exact_post_cov, exact_post_cov_from_prior) # Compute PDF of current posterior that uses all collected data post_var = stats.multivariate_normal( mean=exact_post_mean[:, 0].copy(), cov=exact_post_cov.copy()) # Compute evidence moving from previous posterior to # new posterior (not initial prior to posterior). # Values can be computed exactly for Gaussian prior and noise gauss_evidence = laplace_evidence( lambda x: np.exp( gaussian_loglike_fun( oed.collected_obs[:, -1:], obs_fun(x)[:, oed.collected_design_indices[-1:]], noise_std))[:, 0], lambda y: np.atleast_2d(post_var_prev.pdf(y.T)).T, exact_post_cov, exact_post_mean) # Compute evidence using Gaussian quadrature rule. This # is possible for this low-dimensional example. quad_loglike_vals = np.exp( gaussian_loglike_fun( oed.collected_obs[:, -1:], obs_fun( x_quad[None, :])[:, oed.collected_design_indices[-1:]], noise_std))[:, 0] # we must divide integarnd by initial prior_pdf since it is # already implicilty included via the quadrature weights integrand_vals = quad_loglike_vals * post_var_prev.pdf( x_quad[:, None]) / prior_variable.pdf(x_quad[None, :])[:, 0] quad_evidence = integrand_vals.dot(w_quad) # print(quad_evidence, gauss_evidence) assert np.allclose(gauss_evidence, quad_evidence), step # print('G', gauss_evidence, oed.evidence) assert np.allclose(gauss_evidence, oed.evidence), step # compute the evidence of moving from the initial prior # to the current posterior. This will be used for testing later gauss_evidence_from_prior = laplace_evidence( lambda x: np.exp( gaussian_loglike_fun( oed.collected_obs, obs_fun(x)[:, oed.collected_design_indices], noise_std) )[:, 0], prior_variable.pdf, exact_post_cov, exact_post_mean) # Copy current state of OED before new data is determined # This copy will be used to compute Laplace based utility and # evidence values for testing oed_copy = copy.deepcopy(oed) # Update the design utility_vals, selected_indices = oed.update_design() new_obs = oed.obs_process(selected_indices) oed.update_observations(new_obs) utility = utility_vals[selected_indices] # Re-compute the evidences that were used to update the design # above. This will be used for testing later # print('D', oed_copy.evidence) evidences = oed_copy.compute_expected_utility( oed_copy.collected_design_indices, selected_indices, True)[1] # print('Collected plus selected indices', # oed.collected_design_indices, # oed_copy.collected_design_indices, selected_indices) # For all outer loop samples compute the posterior exactly # and compute intermediate values for testing. While OED # considers all possible candidate design indices # Here we just test the one that was chosen last when # design was updated exact_evidences = np.empty(nouter_loop_samples) exact_kl_divs = np.empty_like(exact_evidences) for jj in range(nouter_loop_samples): # Fill obs with those predicted by outer loop sample idx = oed.collected_design_indices obs_jj = oed_copy.outer_loop_obs[jj:jj + 1, idx] # Overwrite the previouly simulated obs with collected obs. # Do not ovewrite the last value which is the potential # data used to compute expected utility obs_jj[:, :oed_copy.collected_obs.shape[1]] = \ oed_copy.collected_obs # Compute the posterior obtained by using predicted value # of outer loop sample noise_cov_inv_jj = np.eye( selected_indices.shape[0]) / noise_std**2 exact_post_mean_jj, exact_post_cov_jj = \ laplace_posterior_approximation_for_linear_models( design_candidates[:, selected_indices].T, exact_post_mean, np.linalg.inv(exact_post_cov), noise_cov_inv_jj, obs_jj[:, -1].T) # Use post_pdf so measure change from current posterior (prior) # to new posterior gauss_evidence_jj = laplace_evidence( lambda x: np.exp( gaussian_loglike_fun(obs_jj[:, -1:], obs_fun(x)[:, selected_indices], noise_std))[:, 0], lambda y: np.atleast_2d(post_var.pdf(y.T)).T, exact_post_cov_jj, exact_post_mean_jj) exact_evidences[jj] = gauss_evidence_jj # Check quadrature gets the same answer quad_loglike_vals = np.exp( gaussian_loglike_fun( obs_jj[:, -1:], obs_fun(x_quad[None, :])[:, selected_indices], noise_std))[:, 0] integrand_vals = quad_loglike_vals * post_var.pdf( x_quad[:, None]) / prior_variable.pdf(x_quad[None, :])[:, 0] quad_evidence = integrand_vals.dot(w_quad) # print(quad_evidence, gauss_evidence_jj) assert np.allclose(gauss_evidence_jj, quad_evidence), step # Check that evidence of moving from current posterior # to new posterior with (potential data from outer-loop sample) # is equal to the evidence of moving from # intitial prior to new posterior divide by the evidence # from moving from the initial prior to the current posterior gauss_evidence_jj_from_prior = laplace_evidence( lambda x: np.exp( gaussian_loglike_fun(obs_jj, obs_fun(x)[:, idx], noise_std) )[:, 0], prior_variable.pdf, exact_post_cov_jj, exact_post_mean_jj) # print(gauss_evidence_jj_from_prior/gauss_evidence_from_prior, # gauss_evidence_jj) # print('gauss_evidence_from_prior', gauss_evidence_from_prior) assert np.allclose( gauss_evidence_jj_from_prior / gauss_evidence_from_prior, gauss_evidence_jj) gauss_kl_div = gaussian_kl_divergence(exact_post_mean_jj, exact_post_cov_jj, exact_post_mean, exact_post_cov) # gauss_kl_div = gaussian_kl_divergence( # exact_post_mean, exact_post_cov, # exact_post_mean_jj, exact_post_cov_jj) exact_kl_divs[jj] = gauss_kl_div # print(evidences[:, 0], exact_evidences) assert np.allclose(evidences[:, 0], exact_evidences) # Outer loop samples are from prior. Use importance reweighting # to sample from previous posterior. This step is only relevant # for open loop design (used here) # where observed data informs current estimate # of parameters. Closed loop design (not used here) # never collects data and so it always samples from the prior. post_weights = post_var.pdf( oed.outer_loop_prior_samples.T) / post_var_prev.pdf( oed.outer_loop_prior_samples.T) / oed.nouter_loop_samples laplace_utility = np.sum(exact_kl_divs * post_weights) # print('u', (utility-laplace_utility)/laplace_utility, step) assert np.allclose(utility, laplace_utility, rtol=step_tols[step - 1]) exact_post_mean_prev = exact_post_mean exact_post_cov_prev = exact_post_cov post_var_prev = post_var
def test_compute_expected_kl_utility_monte_carlo(self): nrandom_vars = 1 noise_std = .3 design = np.linspace(-1, 1, 2)[None, :] Amat = design.T def obs_fun(x): return (Amat.dot(x)).T def noise_fun(values): return np.random.normal(0, noise_std, (values.shape)) # specify the first design point collected_design_indices = np.array([0]) prior_variable = IndependentMultivariateRandomVariable( [stats.norm(0, 1)] * nrandom_vars) prior_mean = prior_variable.get_statistics('mean') prior_cov = np.diag(prior_variable.get_statistics('var')[:, 0]) prior_cov_inv = np.linalg.inv(prior_cov) noise_cov_inv = np.eye(Amat.shape[0]) / noise_std**2 def generate_random_prior_samples(n): return (generate_independent_random_samples(prior_variable, n), np.ones(n) / n) def generate_inner_prior_samples_mc(n): return generate_random_prior_samples(n), np.ones(n) / n ninner_loop_samples = 300 x, w = gauss_hermite_pts_wts_1D(ninner_loop_samples) def generate_inner_prior_samples_gauss(n): # use precomputed samples so to avoid cost of regenerating assert n == x.shape[0] return x[None, :], w generate_inner_prior_samples = generate_inner_prior_samples_gauss nouter_loop_samples = 10000 outer_loop_obs, outer_loop_pred_obs, inner_loop_pred_obs, \ inner_loop_weights, __, __ = \ precompute_compute_expected_kl_utility_data( generate_random_prior_samples, nouter_loop_samples, obs_fun, noise_fun, ninner_loop_samples, generate_inner_prior_samples=generate_inner_prior_samples) new_design_indices = np.array([1]) outer_loop_weights = np.ones( (nouter_loop_samples, 1)) / nouter_loop_samples def log_likelihood_fun(obs, pred_obs, active_indices=None): return gaussian_loglike_fun(obs, pred_obs, noise_std, active_indices) utility = compute_expected_kl_utility_monte_carlo( log_likelihood_fun, outer_loop_obs, outer_loop_pred_obs, inner_loop_pred_obs, inner_loop_weights, outer_loop_weights, collected_design_indices, new_design_indices, False) kl_divs = [] # overwrite subset of obs with previously collected data # make copy so that outerloop obs can be used again outer_loop_obs_copy = outer_loop_obs.copy() for ii in range(nouter_loop_samples): idx = np.hstack((collected_design_indices, new_design_indices)) obs_ii = outer_loop_obs_copy[ii:ii + 1, idx] idx = np.hstack((collected_design_indices, new_design_indices)) exact_post_mean, exact_post_cov = \ laplace_posterior_approximation_for_linear_models( Amat[idx, :], prior_mean, prior_cov_inv, noise_cov_inv[np.ix_(idx, idx)], obs_ii.T) kl_div = gaussian_kl_divergence(exact_post_mean, exact_post_cov, prior_mean, prior_cov) kl_divs.append(kl_div) print(utility - np.mean(kl_divs), utility, np.mean(kl_divs)) assert np.allclose(utility, np.mean(kl_divs), rtol=2e-2)