def test_define_mixed_tensor_product_random_variable(self): """ Construct a multivariate random variable from the tensor-product of different one-dimensional variables assuming that a given variable type the distribution parameters ARE NOT the same """ univariate_variables = [ stats.uniform(-1, 2), stats.beta(1, 1, -1, 2), stats.norm(-1, np.sqrt(4)), stats.uniform(), stats.uniform(-1, 2), stats.beta(2, 1, -2, 3) ] var_trans = AffineRandomVariableTransformation(univariate_variables) # first sample is on left boundary of all bounded variables # and one standard deviation to left of mean for gaussian variable # second sample is on right boundary of all bounded variables # and one standard deviation to right of mean for gaussian variable true_user_samples = np.asarray([[-1, -1, -3, 0, -1, -2], [1, 1, 1, 1, 1, 1]]).T canonical_samples = var_trans.map_to_canonical_space(true_user_samples) true_canonical_samples = np.ones_like(true_user_samples) true_canonical_samples[:, 0] = -1 assert np.allclose(true_canonical_samples, canonical_samples) user_samples = var_trans.map_from_canonical_space(canonical_samples) assert np.allclose(user_samples, true_user_samples)
def test_identity_map_subset(self): num_vars = 3 var_trans = define_iid_random_variable_transformation( stats.uniform(0, 1), num_vars) var_trans.set_identity_maps([1]) samples = np.random.uniform(0, 1, (num_vars, 4)) canonical_samples = var_trans.map_to_canonical_space(samples) assert np.allclose(canonical_samples[1, :], samples[1, :]) assert np.allclose( var_trans.map_from_canonical_space(canonical_samples), samples) univariate_variables = [ stats.uniform(-1, 2), stats.beta(1, 1, -1, 2), stats.norm(-1, np.sqrt(4)), stats.uniform(), stats.uniform(-1, 2), stats.beta(2, 1, -2, 3) ] var_trans = AffineRandomVariableTransformation(univariate_variables) var_trans.set_identity_maps([4, 2]) from pyapprox.probability_measure_sampling import \ generate_independent_random_samples samples = generate_independent_random_samples(var_trans.variable, 10) canonical_samples = var_trans.map_to_canonical_space(samples) assert np.allclose(canonical_samples[[2, 4], :], samples[[2, 4], :]) assert np.allclose( var_trans.map_from_canonical_space(canonical_samples), samples)
def test_adaptive_multivariate_sampling_jacobi(self): num_vars = 2 degree = 6 alph = 5 bet = 5. var_trans = AffineRandomVariableTransformation( IndependentMultivariateRandomVariable([beta(alph, bet, -1, 3)], [np.arange(num_vars)])) pce_opts = define_poly_options_from_variable_transformation(var_trans) pce = PolynomialChaosExpansion() pce.configure(pce_opts) indices = compute_hyperbolic_indices(num_vars, 1, 1.0) pce.set_indices(indices) cond_tol = 1e2 samples = generate_induced_samples_migliorati_tolerance(pce, cond_tol) for dd in range(2, degree): num_prev_samples = samples.shape[1] new_indices = compute_hyperbolic_level_indices(num_vars, dd, 1.) samples = increment_induced_samples_migliorati( pce, cond_tol, samples, indices, new_indices) indices = np.hstack((indices, new_indices)) pce.set_indices(indices) new_samples = samples[:, num_prev_samples:] prev_samples = samples[:, :num_prev_samples] #fig,axs = plt.subplots(1,2,figsize=(2*8,6)) #from pyapprox.visualization import plot_2d_indices #axs[0].plot(prev_samples[0,:],prev_samples[1,:],'ko'); #axs[0].plot(new_samples[0,:],new_samples[1,:],'ro'); #plot_2d_indices(indices,other_indices=new_indices,ax=axs[1]); #plt.show() samples = var_trans.map_from_canonical_space(samples) cond = compute_preconditioned_basis_matrix_condition_number( pce.basis_matrix, samples) assert cond < cond_tol
def test_define_mixed_tensor_product_random_variable_contin_discrete(self): """ Construct a multivariate random variable from the tensor-product of different one-dimensional variables assuming that a given variable type the distribution parameters ARE NOT the same """ # parameters of binomial distribution num_trials = 10 prob_success = 0.5 univariate_variables = [ stats.uniform(), stats.norm(-1, np.sqrt(4)), stats.norm(-1, np.sqrt(4)), stats.binom(num_trials, prob_success), stats.norm(-1, np.sqrt(4)), stats.uniform(0, 1), stats.uniform(0, 1), stats.binom(num_trials, prob_success) ] var_trans = AffineRandomVariableTransformation(univariate_variables) # first sample is on left boundary of all bounded variables # and one standard deviation to left of mean for gaussian variables # second sample is on right boundary of all bounded variables # and one standard deviation to right of mean for gaussian variable true_user_samples = np.asarray([[0, -3, -3, 0, -3, 0, 0, 0], [1, 1, 1, num_trials, 1, 1, 1, 10]]).T canonical_samples = var_trans.map_to_canonical_space(true_user_samples) true_canonical_samples = np.ones_like(true_user_samples) true_canonical_samples[:, 0] = -1 true_canonical_samples[5, 0] = -1 true_canonical_samples[3, :] = [-1, 1] true_canonical_samples[7, :] = [-1, 1] assert np.allclose(true_canonical_samples, canonical_samples) user_samples = var_trans.map_from_canonical_space(canonical_samples) assert np.allclose(user_samples, true_user_samples)
def test_map_rv_discrete(self): nvars = 2 mass_locs = np.arange(5, 501, step=50) nmasses = mass_locs.shape[0] mass_probs = np.ones(nmasses, dtype=float) / float(nmasses) univariate_variables = [ float_rv_discrete(name='float_rv_discrete', values=(mass_locs, mass_probs))() ] * nvars variables = IndependentMultivariateRandomVariable(univariate_variables) var_trans = AffineRandomVariableTransformation(variables) samples = np.vstack( [mass_locs[np.newaxis, :], mass_locs[0] * np.ones((1, nmasses))]) canonical_samples = var_trans.map_to_canonical_space(samples) assert (canonical_samples[0].min() == -1) assert (canonical_samples[0].max() == 1) recovered_samples = var_trans.map_from_canonical_space( canonical_samples) assert np.allclose(recovered_samples, samples)
def test_discrete_induced_sampling(self): degree = 3 nmasses1 = 10 mass_locations1 = np.geomspace(1.0, 512.0, num=nmasses1) #mass_locations1 = np.arange(0,nmasses1) masses1 = np.ones(nmasses1, dtype=float) / nmasses1 var1 = float_rv_discrete(name='float_rv_discrete', values=(mass_locations1, masses1))() nmasses2 = 10 mass_locations2 = np.arange(0, nmasses2) # if increase from 16 unmodififed becomes ill conditioned masses2 = np.geomspace(1.0, 16.0, num=nmasses2) #masses2 = np.ones(nmasses2,dtype=float)/nmasses2 masses2 /= masses2.sum() var2 = float_rv_discrete(name='float_rv_discrete', values=(mass_locations2, masses2))() var_trans = AffineRandomVariableTransformation([var1, var2]) pce_opts = define_poly_options_from_variable_transformation(var_trans) pce = PolynomialChaosExpansion() pce.configure(pce_opts) indices = compute_hyperbolic_indices(pce.num_vars(), degree, 1.0) pce.set_indices(indices) num_samples = int(1e4) np.random.seed(1) canonical_samples = generate_induced_samples(pce, num_samples) samples = var_trans.map_from_canonical_space(canonical_samples) np.random.seed(1) canonical_xk = [ 2 * get_distribution_info(var1)[2]['xk'] - 1, 2 * get_distribution_info(var2)[2]['xk'] - 1 ] basis_matrix_generator = partial(basis_matrix_generator_1d, pce, degree) canonical_samples1 = discrete_induced_sampling( basis_matrix_generator, pce.indices, canonical_xk, [var1.dist.pk, var2.dist.pk], num_samples) samples1 = var_trans.map_from_canonical_space(canonical_samples1) def density(x): return var1.pdf(x[0, :]) * var2.pdf(x[1, :]) envelope_factor = 30 def generate_proposal_samples(n): samples = np.vstack([var1.rvs(n), var2.rvs(n)]) return samples proposal_density = density # unlike fekete and leja sampling can and should use # pce.basis_matrix here. If use canonical_basis_matrix then # densities must be mapped to this space also which can be difficult samples2 = random_induced_measure_sampling(num_samples, pce.num_vars(), pce.basis_matrix, density, proposal_density, generate_proposal_samples, envelope_factor) def induced_density(x): vals = density(x) * christoffel_function(x, pce.basis_matrix, True) return vals from pyapprox.utilities import cartesian_product, outer_product from pyapprox.polynomial_sampling import christoffel_function quad_samples = cartesian_product([var1.dist.xk, var2.dist.xk]) quad_weights = outer_product([var1.dist.pk, var2.dist.pk]) #print(canonical_samples.min(axis=1),canonical_samples.max(axis=1)) #print(samples.min(axis=1),samples.max(axis=1)) #print(canonical_samples1.min(axis=1),canonical_samples1.max(axis=1)) #print(samples1.min(axis=1),samples1.max(axis=1)) # import matplotlib.pyplot as plt # plt.plot(quad_samples[0,:],quad_samples[1,:],'s') # plt.plot(samples[0,:],samples[1,:],'o') # plt.plot(samples1[0,:],samples1[1,:],'*') # plt.show() rtol = 1e-2 assert np.allclose(quad_weights, density(quad_samples)) assert np.allclose(density(quad_samples).sum(), 1) assert np.allclose( christoffel_function(quad_samples, pce.basis_matrix, True).dot(quad_weights), 1.0) true_induced_mean = quad_samples.dot(induced_density(quad_samples)) print(true_induced_mean) print(samples.mean(axis=1)) print(samples1.mean(axis=1)) print(samples2.mean(axis=1)) print( samples1.mean(axis=1) - true_induced_mean, true_induced_mean * rtol) #print(samples2.mean(axis=1)) assert np.allclose(samples.mean(axis=1), true_induced_mean, rtol=rtol) assert np.allclose(samples1.mean(axis=1), true_induced_mean, rtol=rtol) assert np.allclose(samples2.mean(axis=1), true_induced_mean, rtol=rtol)
def help_discrete_induced_sampling(self, var1, var2, envelope_factor): degree = 3 var_trans = AffineRandomVariableTransformation([var1, var2]) pce_opts = define_poly_options_from_variable_transformation(var_trans) pce = PolynomialChaosExpansion() pce.configure(pce_opts) indices = compute_hyperbolic_indices(pce.num_vars(), degree, 1.0) pce.set_indices(indices) num_samples = int(3e4) np.random.seed(1) canonical_samples = generate_induced_samples(pce, num_samples) samples = var_trans.map_from_canonical_space(canonical_samples) np.random.seed(1) #canonical_xk = [2*get_distribution_info(var1)[2]['xk']-1, # 2*get_distribution_info(var2)[2]['xk']-1] xk = np.array([ get_probability_masses(var)[0] for var in var_trans.variable.all_variables() ]) pk = np.array([ get_probability_masses(var)[1] for var in var_trans.variable.all_variables() ]) canonical_xk = var_trans.map_to_canonical_space(xk) basis_matrix_generator = partial(basis_matrix_generator_1d, pce, degree) canonical_samples1 = discrete_induced_sampling(basis_matrix_generator, pce.indices, canonical_xk, pk, num_samples) samples1 = var_trans.map_from_canonical_space(canonical_samples1) def univariate_pdf(var, x): if hasattr(var.dist, 'pdf'): return var.pdf(x) else: return var.pmf(x) xk, pk = get_probability_masses(var) x = np.atleast_1d(x) vals = np.zeros(x.shape[0]) for jj in range(x.shape[0]): for ii in range(xk.shape[0]): if xk[ii] == x[jj]: vals[jj] = pk[ii] break return vals def density(x): # some issue with native scipy.pmf #assert np.allclose(var1.pdf(x[0, :]),var1.pmf(x[0, :])) return univariate_pdf(var1, x[0, :]) * univariate_pdf( var2, x[1, :]) def generate_proposal_samples(n): samples = np.vstack([var1.rvs(n), var2.rvs(n)]) return samples proposal_density = density # unlike fekete and leja sampling can and should use # pce.basis_matrix here. If use canonical_basis_matrix then # densities must be mapped to this space also which can be difficult samples2 = random_induced_measure_sampling(num_samples, pce.num_vars(), pce.basis_matrix, density, proposal_density, generate_proposal_samples, envelope_factor) def induced_density(x): vals = density(x) * christoffel_function(x, pce.basis_matrix, True) return vals from pyapprox.utilities import cartesian_product, outer_product from pyapprox.polynomial_sampling import christoffel_function quad_samples = cartesian_product([xk[0], xk[1]]) quad_weights = outer_product([pk[0], pk[1]]) # print(canonical_samples.min(axis=1),canonical_samples.max(axis=1)) # print(samples.min(axis=1),samples.max(axis=1)) # print(canonical_samples1.min(axis=1),canonical_samples1.max(axis=1)) # print(samples1.min(axis=1),samples1.max(axis=1)) # import matplotlib.pyplot as plt # plt.plot(quad_samples[0,:],quad_samples[1,:],'s') # plt.plot(samples[0,:],samples[1,:],'o') # plt.plot(samples1[0,:],samples1[1,:],'*') # plt.show() rtol = 1e-2 assert np.allclose(quad_weights, density(quad_samples)) assert np.allclose(density(quad_samples).sum(), 1) assert np.allclose( christoffel_function(quad_samples, pce.basis_matrix, True).dot(quad_weights), 1.0) true_induced_mean = quad_samples.dot(induced_density(quad_samples)) # print(true_induced_mean) # print(samples.mean(axis=1)) # print(samples1.mean(axis=1)) # print(samples2.mean(axis=1)) # print(samples1.mean(axis=1)-true_induced_mean, true_induced_mean*rtol) # print(samples2.mean(axis=1)) assert np.allclose(samples.mean(axis=1), true_induced_mean, rtol=rtol) assert np.allclose(samples1.mean(axis=1), true_induced_mean, rtol=rtol) assert np.allclose(samples2.mean(axis=1), true_induced_mean, rtol=rtol)
def help_compare_prediction_based_oed(self, deviation_fun, gauss_deviation_fun, use_gauss_quadrature, ninner_loop_samples, ndesign_vars, tol): ncandidates_1d = 5 design_candidates = cartesian_product( [np.linspace(-1, 1, ncandidates_1d)] * ndesign_vars) ncandidates = design_candidates.shape[1] # Define model used to predict likely observable data indices = compute_hyperbolic_indices(ndesign_vars, 1)[:, 1:] Amat = monomial_basis_matrix(indices, design_candidates) obs_fun = partial(linear_obs_fun, Amat) # Define model used to predict unobservable QoI qoi_fun = exponential_qoi_fun # Define the prior PDF of the unknown variables nrandom_vars = indices.shape[1] prior_variable = IndependentMultivariateRandomVariable( [stats.norm(0, 0.5)] * nrandom_vars) # Define the independent observational noise noise_std = 1 # Define initial design init_design_indices = np.array([ncandidates // 2]) # Define OED options nouter_loop_samples = 100 if use_gauss_quadrature: # 301 needed for cvar deviation # only 31 needed for variance deviation ninner_loop_samples_1d = ninner_loop_samples var_trans = AffineRandomVariableTransformation(prior_variable) x_quad, w_quad = gauss_hermite_pts_wts_1D(ninner_loop_samples_1d) x_quad = cartesian_product([x_quad] * nrandom_vars) w_quad = outer_product([w_quad] * nrandom_vars) x_quad = var_trans.map_from_canonical_space(x_quad) ninner_loop_samples = x_quad.shape[1] def generate_inner_prior_samples(nsamples): assert nsamples == x_quad.shape[1], (nsamples, x_quad.shape) return x_quad, w_quad else: # use default Monte Carlo sampling generate_inner_prior_samples = None # Define initial design init_design_indices = np.array([ncandidates // 2]) # Setup OED problem oed = BayesianBatchDeviationOED(design_candidates, obs_fun, noise_std, prior_variable, qoi_fun, nouter_loop_samples, ninner_loop_samples, generate_inner_prior_samples, deviation_fun=deviation_fun) oed.populate() oed.set_collected_design_indices(init_design_indices) prior_mean = oed.prior_variable.get_statistics('mean') prior_cov = np.diag(prior_variable.get_statistics('var')[:, 0]) prior_cov_inv = np.linalg.inv(prior_cov) selected_indices = init_design_indices # Generate experimental design nexperiments = 3 for step in range(len(init_design_indices), nexperiments): # Copy current state of OED before new data is determined # This copy will be used to compute Laplace based utility and # evidence values for testing oed_copy = copy.deepcopy(oed) # Update the design utility_vals, selected_indices = oed.update_design() utility, deviations, evidences, weights = \ oed_copy.compute_expected_utility( oed_copy.collected_design_indices, selected_indices, True) exact_deviations = np.empty(nouter_loop_samples) for jj in range(nouter_loop_samples): # only test intermediate quantities associated with design # chosen by the OED step idx = oed.collected_design_indices obs_jj = oed_copy.outer_loop_obs[jj:jj + 1, idx] noise_cov_inv_jj = np.eye(idx.shape[0]) / noise_std**2 exact_post_mean_jj, exact_post_cov_jj = \ laplace_posterior_approximation_for_linear_models( Amat[idx, :], prior_mean, prior_cov_inv, noise_cov_inv_jj, obs_jj.T) exact_deviations[jj] = gauss_deviation_fun( exact_post_mean_jj, exact_post_cov_jj) print('d', np.absolute(exact_deviations - deviations[:, 0]).max(), tol) # print(exact_deviations, deviations[:, 0]) assert np.allclose(exact_deviations, deviations[:, 0], atol=tol) assert np.allclose(utility_vals[selected_indices], -np.mean(exact_deviations), atol=tol)