def setup_check_variance_reduction_model_ensemble_short_column( nmodels=5,npilot_samples=None): example = ShortColumnModelEnsemble() model_ensemble = pya.ModelEnsemble( [example.models[ii] for ii in range(nmodels)]) univariate_variables = [ uniform(5,10),uniform(15,10),norm(500,100),norm(2000,400), lognorm(s=0.5,scale=np.exp(5))] variable=pya.IndependentMultivariateRandomVariable(univariate_variables) generate_samples=partial( pya.generate_independent_random_samples,variable) if npilot_samples is not None: # The number of pilot samples effects ability of numerical estimate # of variance reduction to match theoretical value cov, samples, weights = pya.estimate_model_ensemble_covariance( npilot_samples,generate_samples,model_ensemble) else: # it is difficult to create a quadrature rule for the lognormal # distribution so instead define the variable as normal and then # apply log transform univariate_variables = [ uniform(5,10),uniform(15,10),norm(500,100),norm(2000,400), norm(loc=5,scale=0.5)] variable=pya.IndependentMultivariateRandomVariable( univariate_variables) example.apply_lognormal=True cov = example.get_covariance_matrix(variable)[:nmodels,:nmodels] example.apply_lognormal=False return model_ensemble, cov, generate_samples
def test_generate_samples_and_values_mfmc(self): functions = ShortColumnModelEnsemble() model_ensemble = pya.ModelEnsemble( [functions.m0, functions.m1, functions.m2]) univariate_variables = [ uniform(5, 10), uniform(15, 10), norm(500, 100), norm(2000, 400), lognorm(s=0.5, scale=np.exp(5)) ] variable = pya.IndependentMultivariateRandomVariable( univariate_variables) generate_samples = partial(pya.generate_independent_random_samples, variable) nhf_samples = 10 nsample_ratios = [2, 4] samples,values =\ pya.generate_samples_and_values_mfmc( nhf_samples,nsample_ratios,model_ensemble,generate_samples) for jj in range(1, len(samples)): assert samples[jj][1].shape[1] == nsample_ratios[jj - 1] * nhf_samples idx = 1 if jj == 1: idx = 0 assert np.allclose(samples[jj][0], samples[jj - 1][idx])
def test_approximate_gaussian_process(self): from sklearn.gaussian_process.kernels import Matern num_vars = 1 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) num_samples = 100 train_samples = pya.generate_independent_random_samples( variable, num_samples) # Generate random function nu = np.inf # 2.5 kernel = Matern(0.5, nu=nu) X = np.linspace(-1, 1, 1000)[np.newaxis, :] alpha = np.random.normal(0, 1, X.shape[1]) train_vals = kernel(train_samples.T, X.T).dot(alpha)[:, np.newaxis] gp = approximate(train_samples, train_vals, "gaussian_process", { "nu": nu, "noise_level": 1e-8 }).approx error = np.linalg.norm(gp(X)[:, 0]-kernel(X.T, X.T).dot(alpha)) /\ np.sqrt(X.shape[1]) assert error < 1e-5
def setup_oakley_function(): r""" Setup the Oakely function benchmark .. math:: f(z) = a_1^Tz + a_2^T\sin(z) + a_3^T\cos(z) + z^TMz where :math:`z` consists of 15 I.I.D. standard Normal variables and the data :math:`a_1,a_2,a_3` and :math:`M` are defined in the function :func:`pyapprox.benchmarks.sensitivity_benchmarks.get_oakley_function_data`. >>> from pyapprox.benchmarks.benchmarks import setup_benchmark >>> benchmark=setup_benchmark('oakley') >>> print(benchmark.keys()) dict_keys(['fun', 'variable', 'mean', 'variance', 'main_effects']) Returns ------- benchmark : pya.Benchmark Object containing the benchmark attributes References ---------- .. [OakelyOJRSB2004] `Oakley, J.E. and O'Hagan, A. (2004), Probabilistic sensitivity analysis of complex models: a Bayesian approach. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 66: 751-769. <https://doi.org/10.1111/j.1467-9868.2004.05304.x>`_ """ univariate_variables = [stats.norm()] * 15 variable = pya.IndependentMultivariateRandomVariable(univariate_variables) mean, variance, main_effects = oakley_function_statistics() return Benchmark({ 'fun': oakley_function, 'variable': variable, 'mean': mean, 'variance': variance, 'main_effects': main_effects })
def __init__(self, theta1, shifts=None): """ Parameters ---------- theta0 : float Angle controling Notes ----- The choice of A0, A1, A2 here results in unit variance for each model """ self.A0 = np.sqrt(11) self.A1 = np.sqrt(7) self.A2 = np.sqrt(3) self.nmodels = 3 self.theta0 = np.pi / 2 self.theta1 = theta1 self.theta2 = np.pi / 6 assert self.theta0 > self.theta1 and self.theta1 > self.theta2 self.shifts = shifts if self.shifts is None: self.shifts = [0, 0] assert len(self.shifts) == 2 self.models = [self.m0, self.m1, self.m2] univariate_variables = [uniform(-1, 2), uniform(-1, 2)] self.variable = pya.IndependentMultivariateRandomVariable( univariate_variables) self.generate_samples = partial( pya.generate_independent_random_samples, self.variable)
def test_adaptive_approximate_increment_degree(self): num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree = 3 poly.set_indices(pya.compute_hyperbolic_indices(num_vars, degree)) poly.set_coefficients( np.random.normal(0, 1, (poly.indices.shape[1], 1))) fun = poly max_degree = degree + 2 result = adaptive_approximate_polynomial_chaos_increment_degree( fun, variable, max_degree, max_nsamples=31, cond_tol=1e4, sample_growth_factor=2, verbose=0, oversampling_ratio=None, solver_type='lstsq', callback=None) print('Ntrain samples', result.train_samples.shape[1]) assert np.allclose( result.approx.coefficients[:poly.coefficients.shape[0]], poly.coefficients)
def setup_rosenbrock_function(nvars): r""" Setup the Rosenbrock function benchmark .. math:: f(z) = \sum_{i=1}^{d/2}\left[100(z_{2i-1}^{2}-z_{2i})^{2}+(z_{2i-1}-1)^{2}\right] using >>> from pyapprox.benchmarks.benchmarks import setup_benchmark >>> benchmark=setup_benchmark('rosenbrock',nvars=2) >>> print(benchmark.keys()) dict_keys(['fun', 'jac', 'hessp', 'variable']) Parameters ---------- nvars : integer The number of variables of the Rosenbrock function Returns ------- benchmark : pya.Benchmark Object containing the benchmark attributes References ---------- .. [DixonSzego1990] `Dixon, L. C. W.; Mills, D. J. "Effect of Rounding Errors on the Variable Metric Method". Journal of Optimization Theory and Applications. 80: 175–179. 1994 <https://doi.org/10.1007%2FBF02196600>`_ """ univariate_variables = [stats.uniform(-2,4)]*nvars variable=pya.IndependentMultivariateRandomVariable(univariate_variables) return Benchmark( {'fun':rosenbrock_function,'jac':rosenbrock_function_jacobian, 'hessp':rosenbrock_function_hessian_prod,'variable':variable})
def test_bootstrap_control_variate_estimator(self): example = TunableModelEnsemble(np.pi / 2 * 0.95) model_ensemble = pya.ModelEnsemble(example.models) univariate_variables = [uniform(-1, 2), uniform(-1, 2)] variable = pya.IndependentMultivariateRandomVariable( univariate_variables) cov_matrix = example.get_covariance_matrix() model_costs = [1, 0.5, 0.4] est = ACVMF(cov_matrix, model_costs) target_cost = 1000 nhf_samples, nsample_ratios = est.allocate_samples(target_cost)[:2] generate_samples = partial(pya.generate_independent_random_samples, variable) samples, values = est.generate_data(nhf_samples, nsample_ratios, generate_samples, model_ensemble) mc_cov_matrix = compute_covariance_from_control_variate_samples(values) #assert np.allclose(cov_matrix,mc_cov_matrix,atol=1e-2) est = ACVMF(mc_cov_matrix, model_costs) weights = get_mfmc_control_variate_weights( example.get_covariance_matrix()) bootstrap_mean,bootstrap_variance = \ pya.bootstrap_mfmc_estimator(values,weights,10000) est_mean = est(values) est_variance = est.get_variance(nhf_samples, nsample_ratios) print(abs((est_variance - bootstrap_variance) / est_variance)) assert abs((est_variance - bootstrap_variance) / est_variance) < 6e-2
def test_adaptive_approximate_gaussian_process_normalize_inputs(self): from sklearn.gaussian_process.kernels import Matern num_vars = 1 univariate_variables = [stats.beta(5, 10, 0, 2)] * num_vars # Generate random function nu = np.inf # 2.5 kernel = Matern(0.1, nu=nu) X = np.linspace(-1, 1, 1000)[np.newaxis, :] alpha = np.random.normal(0, 1, X.shape[1]) def fun(x): return kernel(x.T, X.T).dot(alpha)[:, np.newaxis] # return np.cos(2*np.pi*x.sum(axis=0)/num_vars)[:, np.newaxis] errors = [] validation_samples = pya.generate_independent_random_samples( pya.IndependentMultivariateRandomVariable(univariate_variables), 100) validation_values = fun(validation_samples) def callback(gp): gp_vals = gp(validation_samples) assert gp_vals.shape == validation_values.shape error = np.linalg.norm(gp_vals - validation_values ) / np.linalg.norm(validation_values) print(error, gp.y_train_.shape[0]) errors.append(error) weight_function = partial( pya.tensor_product_pdf, univariate_pdfs=[v.pdf for v in univariate_variables]) gp = adaptive_approximate( fun, univariate_variables, "gaussian_process", { "nu": nu, "noise_level": None, "normalize_y": True, "alpha": 1e-10, "normalize_inputs": True, "weight_function": weight_function, "ncandidate_samples": 1e3, "callback": callback }).approx # import matplotlib.pyplot as plt # plt.plot(gp.X_train_.T[0, :], 0*gp.X_train_.T[0, :], "s") # plt.plot(gp.get_training_samples()[0, :], 0*gp.get_training_samples()[0, :], "x") # plt.plot(gp.sampler.candidate_samples[0, :], 0*gp.sampler.candidate_samples[0, :], "^") # plt.plot(validation_samples[0, :], validation_values[:, 0], "o") # var = univariate_variables[0] # lb, ub = var.interval(1) # xx = np.linspace(lb, ub, 101) # plt.plot(xx, var.pdf(xx), "r-") # plt.show() print(errors[-1]) assert errors[-1] < 1e-7
def test_approximate_fixed_pce(self): num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree, hcross_strength = 7, 0.4 poly.set_indices( pya.compute_hyperbolic_indices(num_vars, degree, hcross_strength)) num_samples = poly.num_terms() * 2 degrees = poly.indices.sum(axis=0) coef = np.random.normal( 0, 1, (poly.indices.shape[1], 2)) / (degrees[:, np.newaxis] + 1)**2 # set some coefficients to zero to make sure that different qoi # are treated correctly. I = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2] coef[I, 0] = 0 I = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2] coef[I, 1] = 0 poly.set_coefficients(coef) train_samples = pya.generate_independent_random_samples( variable, num_samples) train_vals = poly(train_samples) indices = compute_hyperbolic_indices(num_vars, 1, 1) nfolds = 10 method = 'polynomial_chaos' options = { 'basis_type': 'fixed', 'variable': variable, 'options': { 'linear_solver_options': {}, 'indices': indices, 'solver_type': 'lstsq' } } approx_list, residues_list, cv_score = cross_validate_approximation( train_samples, train_vals, options, nfolds, method, random_folds=False) solver = LinearLeastSquaresCV(cv=nfolds, random_folds=False) poly.set_indices(indices) basis_matrix = poly.basis_matrix(train_samples) solver.fit(basis_matrix, train_vals[:, 0:1]) assert np.allclose(solver.cv_score_, cv_score[0]) solver.fit(basis_matrix, train_vals[:, 1:2]) assert np.allclose(solver.cv_score_, cv_score[1])
def test_pce_sensitivities_of_sobol_g_function(self): nsamples = 2000 nvars, degree = 3, 8 a = np.array([1, 2, 5])[:nvars] univariate_variables = [uniform(0, 1)] * nvars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) indices = pya.tensor_product_indices([degree] * nvars) poly.set_indices(indices) #print('No. PCE Terms',indices.shape[1]) samples = pya.generate_independent_random_samples( var_trans.variable, nsamples) samples = (np.cos(np.random.uniform(0, np.pi, (nvars, nsamples))) + 1) / 2 values = sobol_g_function(a, samples) basis_matrix = poly.basis_matrix(samples) weights = 1 / np.sum(basis_matrix**2, axis=1)[:, np.newaxis] coef = np.linalg.lstsq(basis_matrix * weights, values * weights, rcond=None)[0] poly.set_coefficients(coef) nvalidation_samples = 1000 validation_samples = pya.generate_independent_random_samples( var_trans.variable, nvalidation_samples) validation_values = sobol_g_function(a, validation_samples) poly_validation_vals = poly(validation_samples) rel_error = np.linalg.norm(poly_validation_vals - validation_values ) / np.linalg.norm(validation_values) print('Rel. Error', rel_error) pce_main_effects, pce_total_effects =\ pya.get_main_and_total_effect_indices_from_pce( poly.get_coefficients(), poly.get_indices()) interaction_terms, pce_sobol_indices = get_sobol_indices( poly.get_coefficients(), poly.get_indices(), max_order=3) mean, variance, main_effects, total_effects, sobol_indices = \ get_sobol_g_function_statistics(a, interaction_terms) assert np.allclose(poly.mean(), mean, atol=1e-2) # print((poly.variance(),variance)) assert np.allclose(poly.variance(), variance, atol=1e-2) # print(pce_main_effects,main_effects) assert np.allclose(pce_main_effects, main_effects, atol=1e-2) # print(pce_total_effects,total_effects) assert np.allclose(pce_total_effects, total_effects, atol=1e-2) assert np.allclose(pce_sobol_indices, sobol_indices, atol=1e-2)
def __init__(self): self.nmodels = 5 self.nvars = 1 self.models = [self.m0, self.m1, self.m2, self.m3, self.m4] univariate_variables = [uniform(0, 1)] self.variable = pya.IndependentMultivariateRandomVariable( univariate_variables) self.generate_samples = partial( pya.generate_independent_random_samples, self.variable)
def help_cross_validate_pce_degree(self, solver_type, solver_options): print(solver_type, solver_options) num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree = 3 poly.set_indices(pya.compute_hyperbolic_indices(num_vars, degree, 1.0)) # factor of 2 does not pass test but 2.2 does num_samples = int(poly.num_terms() * 2.2) coef = np.random.normal(0, 1, (poly.indices.shape[1], 2)) coef[pya.nchoosek(num_vars + 2, 2):, 0] = 0 # for first qoi make degree 2 the best degree poly.set_coefficients(coef) train_samples = pya.generate_independent_random_samples( variable, num_samples) train_vals = poly(train_samples) true_poly = poly poly = approximate( train_samples, train_vals, "polynomial_chaos", { "basis_type": "hyperbolic_cross", "variable": variable, "options": { "verbose": 3, "solver_type": solver_type, "min_degree": 1, "max_degree": degree + 1, "linear_solver_options": solver_options } }).approx num_validation_samples = 10 validation_samples = pya.generate_independent_random_samples( variable, num_validation_samples) assert np.allclose(poly(validation_samples), true_poly(validation_samples)) poly = copy.deepcopy(true_poly) approx_res = cross_validate_pce_degree( poly, train_samples, train_vals, 1, degree + 1, solver_type=solver_type, linear_solver_options=solver_options) assert np.allclose(approx_res.degrees, [2, 3])
def test_pce_basis_expansion(self): num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree, hcross_strength = 7, 0.4 poly.set_indices( pya.compute_hyperbolic_indices(num_vars, degree, hcross_strength)) num_samples = poly.num_terms() * 2 degrees = poly.indices.sum(axis=0) coef = np.random.normal( 0, 1, (poly.indices.shape[1], 2)) / (degrees[:, np.newaxis] + 1)**2 # set some coefficients to zero to make sure that different qoi # are treated correctly. II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2] coef[II, 0] = 0 II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2] coef[II, 1] = 0 poly.set_coefficients(coef) train_samples = pya.generate_independent_random_samples( variable, num_samples) train_vals = poly(train_samples) true_poly = poly poly = approximate( train_samples, train_vals, "polynomial_chaos", { "basis_type": "expanding_basis", "variable": variable, "options": { "max_num_expansion_steps_iter": 1, "verbose": 3, "max_num_terms": 1000, "max_num_step_increases": 2, "max_num_init_terms": 33 } }).approx num_validation_samples = 100 validation_samples = pya.generate_independent_random_samples( variable, num_validation_samples) validation_samples = train_samples error = np.linalg.norm( poly(validation_samples) - true_poly(validation_samples)) / np.sqrt(num_validation_samples) assert np.allclose(poly(validation_samples), true_poly(validation_samples), atol=1e-8), error
def test_approximate_polynomial_chaos_custom_poly_type(self): benchmark = setup_benchmark("ishigami", a=7, b=0.1) nvars = benchmark.variable.num_vars() # this test purposefully select wrong variable to make sure # poly_type overide is activated univariate_variables = [stats.beta(5, 5, -np.pi, 2 * np.pi)] * nvars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) # specify correct basis so it is not chosen from var_trans.variable poly_opts = {"var_trans": var_trans} # but rather from another variable which will invoke Legendre polys basis_opts = pya.define_poly_options_from_variable( pya.IndependentMultivariateRandomVariable([stats.uniform()] * nvars)) poly_opts["poly_types"] = basis_opts options = { "poly_opts": poly_opts, "variable": variable, "options": { "max_num_step_increases": 1 } } ntrain_samples = 400 train_samples = np.random.uniform(-np.pi, np.pi, (nvars, ntrain_samples)) train_vals = benchmark.fun(train_samples) approx = approximate(train_samples, train_vals, method="polynomial_chaos", options=options).approx nsamples = 100 error = compute_l2_error(approx, benchmark.fun, approx.var_trans.variable, nsamples, rel=True) # print(error) assert error < 1e-4 assert np.allclose(approx.mean(), benchmark.mean, atol=error)
def __init__(self): self.nmodels=5 self.nvars=5 self.models = [self.m0,self.m1,self.m2,self.m3,self.m4] self.apply_lognormal=False univariate_variables = [ uniform(5,10),uniform(15,10),norm(500,100),norm(2000,400), lognorm(s=0.5,scale=np.exp(5))] self.variable = pya.IndependentMultivariateRandomVariable( univariate_variables) self.generate_samples=partial( pya.generate_independent_random_samples,self.variable)
def test_pce_sensitivities_of_ishigami_function(self): nsamples = 1500 nvars, degree = 3, 18 univariate_variables = [uniform(-np.pi, 2 * np.pi)] * nvars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) indices = pya.compute_hyperbolic_indices(nvars, degree, 1.0) poly.set_indices(indices) #print('No. PCE Terms',indices.shape[1]) samples = pya.generate_independent_random_samples( var_trans.variable, nsamples) values = ishigami_function(samples) basis_matrix = poly.basis_matrix(samples) coef = np.linalg.lstsq(basis_matrix, values, rcond=None)[0] poly.set_coefficients(coef) nvalidation_samples = 1000 validation_samples = pya.generate_independent_random_samples( var_trans.variable, nvalidation_samples) validation_values = ishigami_function(validation_samples) poly_validation_vals = poly(validation_samples) abs_error = np.linalg.norm(poly_validation_vals - validation_values ) / np.sqrt(nvalidation_samples) #print('Abs. Error',abs_error) pce_main_effects, pce_total_effects =\ pya.get_main_and_total_effect_indices_from_pce( poly.get_coefficients(), poly.get_indices()) mean, variance, main_effects, total_effects, sobol_indices, \ sobol_interaction_indices = get_ishigami_funciton_statistics() assert np.allclose(poly.mean(), mean) assert np.allclose(poly.variance(), variance) assert np.allclose(pce_main_effects, main_effects) assert np.allclose(pce_total_effects, total_effects) interaction_terms, pce_sobol_indices = get_sobol_indices( poly.get_coefficients(), poly.get_indices(), max_order=3) assert np.allclose(pce_sobol_indices, sobol_indices)
def setup_ishigami_function(a, b): r""" Setup the Ishigami function benchmark .. math:: f(z) = \sin(z_1)+a\sin^2(z_2) + bz_3^4\sin(z_0) using >>> from pyapprox.benchmarks.benchmarks import setup_benchmark >>> benchmark=setup_benchmark('ishigami',a=7,b=0.1) >>> print(benchmark.keys()) dict_keys(['fun', 'jac', 'hess', 'variable', 'mean', 'variance', 'main_effects', 'total_effects', 'sobol_indices']) Parameters ---------- a : float The hyper-parameter a b : float The hyper-parameter b Returns ------- benchmark : pya.Benchmark Object containing the benchmark attributes References ---------- .. [Ishigami1990] `T. Ishigami and T. Homma, "An importance quantification technique in uncertainty analysis for computer models," [1990] Proceedings. First International Symposium on Uncertainty Modeling and Analysis, College Park, MD, USA, 1990, pp. 398-403 <https://doi.org/10.1109/ISUMA.1990.151285>`_ """ univariate_variables = [stats.uniform(-np.pi, 2 * np.pi)] * 3 variable = pya.IndependentMultivariateRandomVariable(univariate_variables) mean, variance, main_effects, total_effects, sobol_indices, \ sobol_interaction_indices = get_ishigami_funciton_statistics() return Benchmark({ 'fun': partial(ishigami_function, a=a, b=b), 'jac': partial(ishigami_function_jacobian, a=a, b=b), 'hess': partial(ishigami_function_hessian, a=a, b=b), 'variable': variable, 'mean': mean, 'variance': variance, 'main_effects': main_effects, 'total_effects': total_effects, 'sobol_indices': sobol_indices, 'sobol_interaction_indices': sobol_interaction_indices })
def variables_prep(filename, product_uniform=False, dummy=False): """ Help function for preparing the data training data to fit PCE. Parameters: =========== filename : str product_uniform : False do not colapse product into one variable 'uniform' uniform distributions are used for product; 'beta', beta distributions are used for variables which are adapted considering the correlations 'exact' the true PDF of the product is used """ # import parameter inputs and generate the dataframe of analytical ratios between sensitivity indices if (product_uniform is False) or (product_uniform == 'uniform'): ranges = np.loadtxt( filename,delimiter=",",usecols=[2,3],skiprows=1).flatten() univariate_variables = [uniform(ranges[2*ii],ranges[2*ii+1]-ranges[2*ii]) for ii in range(0, ranges.shape[0]//2)] else: param_adjust = pd.read_csv(filename) beta_index = param_adjust[param_adjust['distribution']== 'beta'].index.to_list() ranges = np.array(param_adjust.loc[:, ['min','max']]) ranges[:, 1] = ranges[:, 1] - ranges[:, 0] # param_names = param_adjust.loc[[0, 2, 8], 'Veneer_name'].values univariate_variables = [] for ii in range(param_adjust.shape[0]): if ii in beta_index: shape_ab = param_adjust.loc[ii, ['a','b']].values.astype('float') univariate_variables.append(beta(shape_ab[0], shape_ab[1], loc=ranges[ii][0], scale=ranges[ii][1])) else: # uniform_args = ranges[ii] univariate_variables.append(uniform(ranges[ii][0], ranges[ii][1])) # End if # End for() if dummy == True: univariate_variables.append(uniform(0, 1)) # import pdb # pdb.set_trace() variable = pya.IndependentMultivariateRandomVariable(univariate_variables) return variable #END variables_prep()
def test_rsquared_mfmc(self): functions = ShortColumnModelEnsemble() model_ensemble = pya.ModelEnsemble( [functions.m0, functions.m3, functions.m4]) univariate_variables = [ uniform(5, 10), uniform(15, 10), norm(500, 100), norm(2000, 400), lognorm(s=0.5, scale=np.exp(5)) ] variable = pya.IndependentMultivariateRandomVariable( univariate_variables) generate_samples = partial(pya.generate_independent_random_samples, variable) npilot_samples = int(1e4) pilot_samples = generate_samples(npilot_samples) config_vars = np.arange(model_ensemble.nmodels)[np.newaxis, :] pilot_samples = pya.get_all_sample_combinations( pilot_samples, config_vars) pilot_values = model_ensemble(pilot_samples) pilot_values = np.reshape(pilot_values, (npilot_samples, model_ensemble.nmodels)) cov = np.cov(pilot_values, rowvar=False) nhf_samples = 10 nsample_ratios = np.asarray([2, 4]) nsamples_per_model = np.concatenate([[nhf_samples], nsample_ratios * nhf_samples]) eta = pya.get_mfmc_control_variate_weights(cov) cor = pya.get_correlation_from_covariance(cov) var_mfmc = cov[0, 0] / nsamples_per_model[0] for k in range(1, model_ensemble.nmodels): var_mfmc += (1 / nsamples_per_model[k - 1] - 1 / nsamples_per_model[k]) * ( eta[k - 1]**2 * cov[k, k] + 2 * eta[k - 1] * cor[0, k] * np.sqrt(cov[0, 0] * cov[k, k])) assert np.allclose(var_mfmc / cov[0, 0] * nhf_samples, 1 - pya.get_rsquared_mfmc(cov, nsample_ratios))
def setup_sobol_g_function(nvars): r""" Setup the Sobol-G function benchmark .. math:: f(z) = \prod_{i=1}^d\frac{\lvert 4z_i-2\rvert+a_i}{1+a_i}, \quad a_i=\frac{i-2}{2} using >>> from pyapprox.benchmarks.benchmarks import setup_benchmark >>> benchmark=setup_benchmark('sobol_g',nvars=2) >>> print(benchmark.keys()) dict_keys(['fun', 'mean', 'variance', 'main_effects', 'total_effects', 'variable']) Parameters ---------- nvars : integer The number of variables of the Sobol-G function Returns ------- benchmark : pya.Benchmark Object containing the benchmark attributes References ---------- .. [Saltelli1995] `Saltelli, A., & Sobol, I. M. About the use of rank transformation in sensitivity analysis of model output. Reliability Engineering & System Safety, 50(3), 225-239, 1995. <https://doi.org/10.1016/0951-8320(95)00099-2>`_ """ univariate_variables = [stats.uniform(0, 1)] * nvars variable = pya.IndependentMultivariateRandomVariable(univariate_variables) a = (np.arange(1, nvars + 1) - 2) / 2 mean, variance, main_effects, total_effects = \ get_sobol_g_function_statistics(a) return Benchmark({ 'fun': partial(sobol_g_function, a), 'mean': mean, 'variance': variance, 'main_effects': main_effects, 'total_effects': total_effects, 'variable': variable })
def test_pce_basis_expansion(self): num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree, hcross_strength = 7, 0.4 poly.set_indices( pya.compute_hyperbolic_indices(num_vars, degree, hcross_strength)) num_samples = poly.num_terms() * 2 degrees = poly.indices.sum(axis=0) poly.set_coefficients((np.random.normal(0, 1, poly.indices.shape[1]) / (degrees + 1)**2)[:, np.newaxis]) train_samples = pya.generate_independent_random_samples( variable, num_samples) train_vals = poly(train_samples) true_poly = poly poly = approximate(train_samples, train_vals, 'polynomial_chaos', { 'basis_type': 'expanding_basis', 'variable': variable }) num_validation_samples = 100 validation_samples = pya.generate_independent_random_samples( variable, num_validation_samples) validation_samples = train_samples error = np.linalg.norm( poly(validation_samples) - true_poly(validation_samples)) / np.sqrt(num_validation_samples) assert np.allclose( poly(validation_samples),true_poly(validation_samples),atol=1e-8),\ error
def test_cross_validate_pce_degree(self): num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree = 3 poly.set_indices(pya.compute_hyperbolic_indices(num_vars, degree, 1.0)) num_samples = poly.num_terms() * 2 poly.set_coefficients( np.random.normal(0, 1, (poly.indices.shape[1], 1))) train_samples = pya.generate_independent_random_samples( variable, num_samples) train_vals = poly(train_samples) true_poly = poly poly = approximate(train_samples, train_vals, 'polynomial_chaos', { 'basis_type': 'hyperbolic_cross', 'variable': variable }) num_validation_samples = 10 validation_samples = pya.generate_independent_random_samples( variable, num_validation_samples) assert np.allclose(poly(validation_samples), true_poly(validation_samples)) poly = copy.deepcopy(true_poly) poly, best_degree = cross_validate_pce_degree(poly, train_samples, train_vals, 1, degree + 2) assert best_degree == degree
def test_marginalize_polynomial_chaos_expansions(self): univariate_variables = [uniform(-1, 2), norm(0, 1), uniform(-1, 2)] variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) num_vars = len(univariate_variables) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree = 2 indices = pya.compute_hyperbolic_indices(num_vars, degree, 1) poly.set_indices(indices) poly.set_coefficients(np.ones((indices.shape[1], 1))) pce_main_effects, pce_total_effects =\ pya.get_main_and_total_effect_indices_from_pce( poly.get_coefficients(), poly.get_indices()) print(poly.num_terms()) for ii in range(num_vars): # Marginalize out 2 variables xx = np.linspace(-1, 1, 101) inactive_idx = np.hstack( (np.arange(ii), np.arange(ii + 1, num_vars))) marginalized_pce = pya.marginalize_polynomial_chaos_expansion( poly, inactive_idx, center=True) mvals = marginalized_pce(xx[None, :]) variable_ii = variable.all_variables()[ii:ii + 1] var_trans_ii = pya.AffineRandomVariableTransformation(variable_ii) poly_ii = pya.PolynomialChaosExpansion() poly_opts_ii = \ pya.define_poly_options_from_variable_transformation( var_trans_ii) poly_ii.configure(poly_opts_ii) indices_ii = compute_hyperbolic_indices(1, degree, 1.) poly_ii.set_indices(indices_ii) poly_ii.set_coefficients(np.ones((indices_ii.shape[1], 1))) pvals = poly_ii(xx[None, :]) # import matplotlib.pyplot as plt # plt.plot(xx, pvals) # plt.plot(xx, mvals, '--') # plt.show() assert np.allclose(mvals, pvals - poly.mean()) assert np.allclose(poly_ii.variance() / poly.variance(), pce_main_effects[ii]) poly_ii.coefficients /= np.sqrt(poly.variance()) assert np.allclose(poly_ii.variance(), pce_main_effects[ii]) # Marginalize out 1 variable xx = pya.cartesian_product([xx] * 2) inactive_idx = np.array([ii]) marginalized_pce = pya.marginalize_polynomial_chaos_expansion( poly, inactive_idx, center=True) mvals = marginalized_pce(xx) variable_ii = variable.all_variables()[:ii] +\ variable.all_variables()[ii+1:] var_trans_ii = pya.AffineRandomVariableTransformation(variable_ii) poly_ii = pya.PolynomialChaosExpansion() poly_opts_ii = \ pya.define_poly_options_from_variable_transformation( var_trans_ii) poly_ii.configure(poly_opts_ii) indices_ii = pya.compute_hyperbolic_indices(2, degree, 1.) poly_ii.set_indices(indices_ii) poly_ii.set_coefficients(np.ones((indices_ii.shape[1], 1))) pvals = poly_ii(xx) assert np.allclose(mvals, pvals - poly.mean())
def setup_rosenbrock_function(nvars): r""" Setup the Rosenbrock function benchmark .. math:: f(z) = \sum_{i=1}^{d/2}\left[100(z_{2i-1}^{2}-z_{2i})^{2}+(z_{2i-1}-1)^{2}\right] This benchmark can also be used to test Bayesian inference methods. Specifically this benchmarks returns the log likelihood .. math:: l(z) = -f(z) which can be used to compute the posterior distribution .. math:: \pi_{\text{post}}(\rv)=\frac{\pi(\V{y}|\rv)\pi(\rv)}{\int_{\rvdom} \pi(\V{y}|\rv)\pi(\rv)d\rv} where the prior is the tensor product of :math:`d` independent and identically distributed uniform variables on :math:`[-2,2]`, i.e. :math:`\pi(\rv)=\frac{1}{4^d}`, and the likelihood is given by .. math:: \pi(\V{y}|\rv)=\exp\left(l(\rv)\right) Parameters ---------- nvars : integer The number of variables of the Rosenbrock function Returns ------- benchmark : pya.Benchmark Object containing the benchmark attributes documented below fun : callable The rosenbrock with signature ``fun(z) -> np.ndarray`` where ``z`` is a 2D np.ndarray with shape (nvars,nsamples) and the output is a 2D np.ndarray with shape (nsamples,1) jac : callable The jacobian of ``fun`` with signature ``jac(z) -> np.ndarray`` where ``z`` is a 2D np.ndarray with shape (nvars,nsamples) and the output is a 2D np.ndarray with shape (nvars,1) hessp : callable Hessian of ``fun`` times an arbitrary vector p with signature ``hessp(z, p) -> ndarray shape (nvars,1)`` where ``z`` is a 2D np.ndarray with shape (nvars,nsamples) and p is an arbitraty vector with shape (nvars,1) variable : pya.IndependentMultivariateRandomVariable Object containing information of the joint density of the inputs z which is the tensor product of independent and identically distributed uniform variables on :math:`[-2,2]`. mean : float The mean of the rosenbrock function with respect to the pdf of variable. loglike : callable The log likelihood of the Bayesian inference problem for inferring z given the uniform prior specified by variable and the negative log likelihood given by the Rosenbrock function. loglike has the signature ``loglike(z) -> np.ndarray`` where ``z`` is a 2D np.ndarray with shape (nvars,nsamples) and the output is a 2D np.ndarray with shape (nsamples,1) loglike_grad : callable The gradient of the ``loglike`` with the signature ``loglike_grad(z) -> np.ndarray`` where ``z`` is a 2D np.ndarray with shape (nvars,nsamples) and the output is a 2D np.ndarray with shape (nsamples,1) References ---------- .. [DixonSzego1990] `Dixon, L. C. W.; Mills, D. J. "Effect of Rounding Errors on the Variable Metric Method". Journal of Optimization Theory and Applications. 80: 175–179. 1994 <https://doi.org/10.1007%2FBF02196600>`_ Examples -------- >>> from pyapprox.benchmarks.benchmarks import setup_benchmark >>> benchmark=setup_benchmark('rosenbrock',nvars=2) >>> print(benchmark.keys()) dict_keys(['fun', 'jac', 'hessp', 'variable', 'mean', 'loglike', 'loglike_grad']) """ univariate_variables = [stats.uniform(-2, 4)] * nvars variable = pya.IndependentMultivariateRandomVariable(univariate_variables) benchmark = Benchmark({ 'fun': rosenbrock_function, 'jac': rosenbrock_function_jacobian, 'hessp': rosenbrock_function_hessian_prod, 'variable': variable, 'mean': rosenbrock_function_mean(nvars) }) benchmark.update({ 'loglike': lambda x: -benchmark['fun'](x), 'loglike_grad': lambda x: -benchmark['jac'](x) }) return benchmark
where :math:`\rv_1,\rv_2\sim\mathcal{U}(-1,1)` and all :math:`A` and :math:`\theta` coefficients are real. We choose to set :math:`A=\sqrt{11}`, :math:`A_1=\sqrt{7}` and :math:`A_2=\sqrt{3}` to obtain unitary variance for each model. The parameters :math:`s_1,s_2` control the bias between the models. Here we set :math:`s_1=1/10,s_2=1/5`. Similarly we can change the correlation between the models in a systematic way (by varying :math:`\theta_1`. We will levarage this later in the tutorial. """ #%% # Lets setup the problem import pyapprox as pya import numpy as np import matplotlib.pyplot as plt from pyapprox.tests.test_control_variate_monte_carlo import TunableModelEnsemble from scipy.stats import uniform np.random.seed(1) univariate_variables = [uniform(-1, 2), uniform(-1, 2)] variable = pya.IndependentMultivariateRandomVariable(univariate_variables) print(variable) shifts = [.1, .2] model = TunableModelEnsemble(np.pi / 2 * .95, shifts=shifts) #%% # Now let us compute the mean of :math:`f_1` using Monte Carlo nsamples = int(1e3) samples = pya.generate_independent_random_samples(variable, nsamples) values = model.m1(samples) pya.print_statistics(samples, values) #%% # We can compute the exact mean using sympy and compute the MC MSE import sympy as sp z1, z2 = sp.Symbol('z1'), sp.Symbol('z2')
def setup_genz_function(nvars, test_name, coefficients=None): r""" Setup the Genz Benchmarks. For example, the two-dimensional oscillatory Genz problem can be defined using >>> from pyapprox.benchmarks.benchmarks import setup_benchmark >>> benchmark=setup_benchmark('genz',nvars=2,test_name='oscillatory') >>> print(benchmark.keys()) dict_keys(['fun', 'mean', 'variable']) Parameters ---------- nvars : integer The number of variables of the Genz function test_name : string The test_name of the specific Genz function. See notes for options the string needed is given in brackets e.g. ('oscillatory') coefficients : tuple (ndarray (nvars), ndarray (nvars)) The coefficients :math:`c_i` and :math:`w_i` If None (default) then :math:`c_j = \hat{c}_j\left(\sum_{i=1}^d \hat{c}_i\right)^{-1}` where :math:`\hat{c}_i=(10^{-15\left(\frac{i}{d}\right)^2)})` Returns ------- benchmark : pya.Benchmark Object containing the benchmark attributes References ---------- .. [Genz1984] `Genz, A. Testing multidimensional integration routines. In Proc. of international conference on Tools, methods and languages for scientific and engineering computation (pp. 81-94), 1984 <https://dl.acm.org/doi/10.5555/2837.2842>`_ Notes ----- Corner Peak ('corner-peak') .. math:: f(z)=\left( 1+\sum_{i=1}^d c_iz_i\right)^{-(d+1)} Oscillatory ('oscillatory') .. math:: f(z) = \cos\left(2\pi w_1 + \sum_{i=1}^d c_iz_i\right) Gaussian Peak ('gaussian-peak') .. math:: f(z) = \exp\left( -\sum_{i=1}^d c_i^2(z_i-w_i)^2\right) Continuous ('continuous') .. math:: f(z) = \exp\left( -\sum_{i=1}^d c_i\lvert z_i-w_i\rvert\right) Product Peak ('product-peak') .. math:: f(z) = \prod_{i=1}^d \left(c_i^{-2}+(z_i-w_i)^2\right)^{-1} Discontinuous ('discontinuous') .. math:: f(z) = \begin{cases}0 & x_1>u_1 \;\mathrm{or}\; x_2>u_2\\\exp\left(\sum_{i=1}^d c_iz_i\right) & \mathrm{otherwise}\end{cases} """ genz = GenzFunction(test_name, nvars) univariate_variables = [stats.uniform(0, 1)] * nvars variable = pya.IndependentMultivariateRandomVariable(univariate_variables) if coefficients is None: genz.set_coefficients(1, 'squared-exponential-decay', 0) else: genz.c, genz.w = coefficients attributes = {'fun': genz, 'mean': genz.integrate(), 'variable': variable} if test_name == 'corner-peak': attributes['variance'] = genz.variance() from scipy.optimize import OptimizeResult return Benchmark(attributes)
def test_cross_validate_approximation_after_regularization_selection(self): """ This test is useful as it shows how to use cross_validate_approximation to produce a list of approximations on each cross validation fold once regularization parameters have been chosen. These can be used to show variance in predictions of values, sensitivity indices, etc. Ideally this could be avoided if sklearn stored the coefficients and alphas for each fold and then we can just find the coefficients that correspond to the first time the path drops below the best_alpha """ num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree, hcross_strength = 7, 0.4 poly.set_indices( pya.compute_hyperbolic_indices(num_vars, degree, hcross_strength)) num_samples = poly.num_terms() * 2 degrees = poly.indices.sum(axis=0) coef = np.random.normal( 0, 1, (poly.indices.shape[1], 2)) / (degrees[:, np.newaxis] + 1)**2 # set some coefficients to zero to make sure that different qoi # are treated correctly. II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2] coef[II, 0] = 0 II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2] coef[II, 1] = 0 poly.set_coefficients(coef) train_samples = pya.generate_independent_random_samples( variable, num_samples) train_vals = poly(train_samples) # true_poly = poly result = approximate(train_samples, train_vals, "polynomial_chaos", { "basis_type": "expanding_basis", "variable": variable }) # Even with the same folds, iterative methods such as Lars, LarsLasso # and OMP will not have cv_score from approximate and cross validate # approximation exactly the same because iterative methods interpolate # residuals to compute cross validation scores nfolds = 10 linear_solver_options = [{ "alpha": result.reg_params[0] }, { "alpha": result.reg_params[1] }] indices = [ result.approx.indices[:, np.where(np.absolute(c) > 0)[0]] for c in result.approx.coefficients.T ] options = { "basis_type": "fixed", "variable": variable, "options": { "linear_solver_options": linear_solver_options, "indices": indices } } approx_list, residues_list, cv_score = \ cross_validate_approximation( train_samples, train_vals, options, nfolds, "polynomial_chaos", random_folds="sklearn") assert (np.all(cv_score < 6e-14) and np.all(result.scores < 4e-13))
Lets first consider a simple 1D example. The following sets up the problem """ import pyapprox as pya from scipy.stats import uniform from pyapprox.examples.multi_index_advection_diffusion import * from pyapprox.models.wrappers import MultiLevelWrapper nmodels = 3 num_vars = 1 max_eval_concurrency = 1 base_model = setup_model(num_vars, max_eval_concurrency) multilevel_model = MultiLevelWrapper(base_model, base_model.base_model.num_config_vars, base_model.cost_function) variable = pya.IndependentMultivariateRandomVariable( [uniform(-np.sqrt(3), 2 * np.sqrt(3))], [np.arange(num_vars)]) #%% #Now lets us plot each model as a function of the random variable lb, ub = variable.get_statistics('interval', alpha=1)[0] nsamples = 10 random_samples = np.linspace(lb, ub, nsamples)[np.newaxis, :] config_vars = np.arange(nmodels)[np.newaxis, :] samples = pya.get_all_sample_combinations(random_samples, config_vars) values = multilevel_model(samples) values = np.reshape(values, (nsamples, nmodels)) import dolfin as dl plt.figure(figsize=(nmodels * 8, 2 * 6)) config_samples = multilevel_model.map_to_multidimensional_index(config_vars) for ii in range(nmodels): nx, ny, dt = base_model.base_model.get_degrees_of_freedom_and_timestep(