def transform_initial_samples(variable, initial_points): loc, scale = transform_scale_parameters(variable) if is_bounded_continuous_variable(variable): bounds = [-1, 1] if initial_points is None: initial_points = np.asarray([[variable.ppf(0.5)]]).T initial_points = (initial_points - loc) / scale # initial samples must be in canonical space assert np.all((initial_points >= bounds[0]) & (initial_points <= bounds[1])) return initial_points, bounds bounds = list(variable.interval(1)) if variable.dist.name == 'continuous_rv_sample': bounds = [-np.inf, np.inf] if initial_points is None: # creating a leja sequence with initial points == 0 # e.g. norm(0, 1).ppf(0.5) will cause leja sequence to # try to add point at infinity. So use different initial point initial_points = np.asarray([[variable.ppf(0.75)]]).T initial_points = (initial_points - loc) / scale if initial_points.shape[1] == 1: assert initial_points[0, 0] != 0 return initial_points, bounds
def __init__(self, variable, enforce_bounds=False): """ Variable uniquness dependes on both the type of random variable e.g. beta, gaussian, etc. and the parameters of that distribution e.g. loc and scale parameters as well as any additional parameters """ if (type(variable) != IndependentMultivariateRandomVariable): variable = IndependentMultivariateRandomVariable(variable) self.variable = variable self.enforce_bounds = enforce_bounds self.identity_map_indices = None self.scale_parameters = np.empty((self.variable.nunique_vars, 2)) for ii in range(self.variable.nunique_vars): var = self.variable.unique_variables[ii] # name, scale_dict, __ = get_distribution_info(var) # copy is essential here because code below modifies scale # loc, scale = scale_dict['loc'].copy(), scale_dict['scale'].copy() # if (is_bounded_continuous_variable(var) or # (type(var.dist) == float_rv_discrete and # var.dist.name != 'discrete_chebyshev')): # lb, ub = -1, 1 # scale /= (ub-lb) # loc = loc-scale*lb self.scale_parameters[ii, :] = transform_scale_parameters(var)
def test_get_univariate_leja_rule_bounded_discrete(self): growth_rule = partial(constant_increment_growth_rule, 2) level = 3 nmasses = 20 xk = np.array(range(0, nmasses), dtype='float') pk = np.ones(nmasses) / nmasses var_cheb = float_rv_discrete(name='discrete_chebyshev', values=(xk, pk))() for variable in [ var_cheb, stats.binom(17, 0.5), stats.hypergeom(10 + 10, 10, 9) ]: quad_rule = get_univariate_leja_quadrature_rule( variable, growth_rule) x, w = quad_rule(level) loc, scale = transform_scale_parameters(variable) x = x * scale + loc xk, pk = get_probability_masses(variable) print(x, xk, loc, scale) degree = (x.shape[0] - 1) true_moment = (xk**degree).dot(pk) moment = (x**degree).dot(w[-1]) print(moment, true_moment, variable.dist.name) assert np.allclose(moment, true_moment)
def test_get_univariate_leja_rule_float_rv_discrete(self): nmasses = 20 xk = np.array(range(1, nmasses + 1), dtype='float') pk = np.ones(nmasses) / nmasses variable = float_rv_discrete(name='float_rv_discrete', values=(xk, pk))() growth_rule = partial(constant_increment_growth_rule, 2) quad_rule = get_univariate_leja_quadrature_rule( variable, growth_rule, orthonormality_tol=1e-10, return_weights_for_all_levels=False) level = 3 x, w = quad_rule(level) loc, scale = transform_scale_parameters(variable) x = x * scale + loc degree = x.shape[0] - 1 true_moment = (xk**degree).dot(pk) moment = (x**degree).dot(w) # print(moment, true_moment) assert np.allclose(moment, true_moment)
def transform_initial_samples(variable, initial_points): loc, scale = transform_scale_parameters(variable) if is_bounded_continuous_variable(variable): bounds = [-1, 1] if initial_points is None: initial_points = np.asarray([[variable.ppf(0.5)]]).T initial_points = (initial_points - loc) / scale # # initial samples must be in canonical space # eps = 10*np.finfo(float).eps # if np.any((initial_points < bounds[0]-eps) | # (initial_points > bounds[1]+eps)): # print(initial_points - bounds[0], bounds[1]-initial_points) # raise RuntimeError("initial points out of bounds") return initial_points, bounds bounds = list(variable.interval(1)) if variable.dist.name == 'continuous_rv_sample': bounds = [-np.inf, np.inf] if initial_points is None: # creating a leja sequence with initial points == 0 # e.g. norm(0, 1).ppf(0.5) will cause leja sequence to # try to add point at infinity. So use different initial point initial_points = np.asarray([[variable.ppf(0.75)]]).T initial_points = (initial_points - loc) / scale if initial_points.shape[1] == 1: assert initial_points[0, 0] != 0 return initial_points, bounds
def ortho_polynomial_grammian_bounded_continuous_variable( var, ab, degree, tol, integrate_fun=None): """ Compute the inner product of all polynomials up to and including degree. Useful for testing that the polynomials are orthonormal. The grammian should always be the identity (modulo errors due to quadrature) """ if ab.shape[0] < degree + 1: raise ValueError("Not enough recursion coefficients") loc, scale = transform_scale_parameters(var) if is_bounded_continuous_variable(var): can_lb, can_ub = -1, 1 else: lb, ub = var.interval(1) can_lb = (lb - loc) / scale can_ub = (ub - loc) / scale def default_integrate(integrand): result = scipy.integrate.quad(integrand, can_lb, can_ub, epsabs=tol, epsrel=tol) return result[0] if integrate_fun is None: integrate = default_integrate else: integrate = partial(integrate_fun, can_lb, can_ub) def fun(order1, order2): order = max(order1, order2) def integrand(x): x = np.atleast_1d(x) basis_mat = evaluate_orthonormal_polynomial_1d(x, order, ab) return var.pdf(x * scale + loc) * scale * (basis_mat[:, order1] * basis_mat[:, order2]) return integrate(integrand) vec_fun = np.vectorize(fun) indices = cartesian_product((np.arange(degree + 1), np.arange(degree + 1))) gram_mat = vec_fun(indices[0, :], indices[1, :]) return gram_mat.reshape((degree + 1, degree + 1))
def get_discrete_univariate_leja_quadrature_rule( variable, growth_rule, initial_points=None, orthonormality_tol=1e-12, return_weights_for_all_levels=True, recursion_opts=None): from pyapprox.variables import get_probability_masses, \ is_bounded_discrete_variable var_name = get_distribution_info(variable)[0] if is_bounded_discrete_variable(variable): xk, pk = get_probability_masses(variable) loc, scale = transform_scale_parameters(variable) xk = (xk - loc) / scale if initial_points is None: initial_points = (np.atleast_2d([variable.ppf(0.5)]) - loc) / scale # initial samples must be in canonical space assert np.all((initial_points >= -1) & (initial_points <= 1)) assert np.all((xk >= -1) & (xk <= 1)) def generate_candidate_samples(num_samples): return xk[None, :] if recursion_opts is None: recursion_opts = {"orthonormality_tol": orthonormality_tol} ab = get_recursion_coefficients_from_variable(variable, xk.shape[0], recursion_opts) quad_rule = partial( candidate_based_christoffel_leja_rule_1d, ab, generate_candidate_samples, xk.shape[0], growth_rule=growth_rule, initial_points=initial_points, return_weights_for_all_levels=return_weights_for_all_levels) return quad_rule raise ValueError('var_name %s not implemented' % var_name)
def test_predictor_corrector_known_pdf(self): nterms = 12 tol = 1e-12 quad_options = { 'epsrel': tol, 'epsabs': tol, "limlst": 10, "limit": 1000 } rv = stats.beta(1, 1, -1, 2) ab = predictor_corrector_known_pdf(nterms, -1, 1, rv.pdf, quad_options) true_ab = jacobi_recurrence(nterms, 0, 0) assert np.allclose(ab, true_ab) rv = stats.beta(3, 3, -1, 2) ab = predictor_corrector_known_pdf(nterms, -1, 1, rv.pdf, quad_options) true_ab = jacobi_recurrence(nterms, 2, 2) rv = stats.norm(0, 2) loc, scale = transform_scale_parameters(rv) ab = predictor_corrector_known_pdf( nterms, -np.inf, np.inf, lambda x: rv.pdf(x * scale + loc) * scale, quad_options) true_ab = hermite_recurrence(nterms) assert np.allclose(ab, true_ab) # lognormal is a very hard test # rv = stats.lognorm(1) # custom_integrate_fun = native_recursion_integrate_fun # interval_size = abs(np.diff(rv.interval(0.99))) # integrate_fun = partial(custom_integrate_fun, interval_size) # quad_opts = {"integrate_fun": integrate_fun} # # quad_opts = {} # opts = {"numeric": True, "quad_options": quad_opts} # loc, scale = transform_scale_parameters(rv) # ab = predictor_corrector_known_pdf( # nterms, 0, np.inf, lambda x: rv.pdf(x*scale+loc)*scale, opts) # for ii in range(1, nterms): # assert np.all(gauss_quadrature(ab, ii)[0] > 0) # gram_mat = ortho_polynomial_grammian_bounded_continuous_variable( # rv, ab, nterms-1, tol=tol, integrate_fun=integrate_fun) # # print(gram_mat-np.eye(gram_mat.shape[0])) # # print(np.absolute(gram_mat-np.eye(gram_mat.shape[0])).max()) # assert np.absolute(gram_mat-np.eye(gram_mat.shape[0])).max() < 5e-10 nterms = 2 mean, std = 1e4, 7.5e3 beta = std * np.sqrt(6) / np.pi mu = mean - beta * np.euler_gamma # mu, beta = 1, 1 rv = stats.gumbel_r(loc=mu, scale=beta) custom_integrate_fun = native_recursion_integrate_fun tabulated_quad_rules = {} from numpy.polynomial.legendre import leggauss for nquad_samples in [100, 200, 400]: tabulated_quad_rules[nquad_samples] = leggauss(nquad_samples) # interval_size must be in canonical domain interval_size = abs(np.diff(rv.interval(0.99))) / beta integrate_fun = partial(custom_integrate_fun, interval_size, tabulated_quad_rules=tabulated_quad_rules, verbose=3) quad_opts = {"integrate_fun": integrate_fun} # quad_opts = {} opts = {"numeric": True, "quad_options": quad_opts} loc, scale = transform_scale_parameters(rv) ab = predictor_corrector_known_pdf( nterms, -np.inf, np.inf, lambda x: rv.pdf(x * scale + loc) * scale, opts) gram_mat = ortho_polynomial_grammian_bounded_continuous_variable( rv, ab, nterms - 1, tol=tol, integrate_fun=integrate_fun) # print(gram_mat-np.eye(gram_mat.shape[0])) print(np.absolute(gram_mat - np.eye(gram_mat.shape[0])).max()) assert np.absolute(gram_mat - np.eye(gram_mat.shape[0])).max() < 5e-10
def test_get_recursion_coefficients_from_variable_discrete(self): degree = 4 N = 10 scipy_discrete_var_names = [ n for n in stats._discrete_distns._distn_names ] discrete_var_names = [ "binom", "bernoulli", "nbinom", "geom", "hypergeom", "logser", "poisson", "planck", "boltzmann", "randint", "zipf", "dlaplace", "skellam", "yulesimon" ] # valid shape parameters for each distribution in names # there is a one to one correspondence between entries discrete_var_shapes = [{ "n": 10, "p": 0.5 }, { "p": 0.5 }, { "n": 10, "p": 0.5 }, { "p": 0.5 }, { "M": 20, "n": 7, "N": 12 }, { "p": 0.5 }, { "mu": 1 }, { "lambda_": 1 }, { "lambda_": 2, "N": 10 }, { "low": 0, "high": 10 }, { "a": 2 }, { "a": 1 }, { "mu1": 1, "mu2": 3 }, { "alpha": 1 }] for name in scipy_discrete_var_names: assert name in discrete_var_names # do not support : # yulesimon as there is a bug when interval is called # from a frozen variable # bernoulli which only has two masses # zipf unusual distribution and difficult to compute basis # crystallball is discontinuous and requires special integrator # this can be developed if needed unsupported_discrete_var_names = ["bernoulli", "yulesimon", "zipf"] for name in unsupported_discrete_var_names: ii = discrete_var_names.index(name) del discrete_var_names[ii] del discrete_var_shapes[ii] for name, shapes in zip(discrete_var_names, discrete_var_shapes): # print(name) var = getattr(stats, name)(**shapes) xk, pk = get_probability_masses(var, 1e-15) loc, scale = transform_scale_parameters(var) xk = (xk - loc) / scale ab = get_recursion_coefficients_from_variable( var, degree + 1, { "orthonormality_tol": 3e-14, "truncated_probability_tol": 1e-15, "numeric": False }) basis_mat = evaluate_orthonormal_polynomial_1d(xk, degree, ab) gram_mat = (basis_mat * pk[:, None]).T.dot(basis_mat) assert np.allclose(gram_mat, np.eye(basis_mat.shape[1]), atol=2e-8) # custom discrete variables xk1, pk1 = np.arange(N), np.ones(N) / N xk2, pk2 = np.arange(N)**2, np.ones(N) / N custom_vars = [ float_rv_discrete(name="discrete_chebyshev", values=(xk1, pk1))(), float_rv_discrete(name="float_rv_discrete", values=(xk2, pk2))() ] for var in custom_vars: xk, pk = get_probability_masses(var, 1e-15) loc, scale = transform_scale_parameters(var) xk = (xk - loc) / scale ab = get_recursion_coefficients_from_variable( var, degree + 1, { "orthonormality_tol": 1e-14, "truncated_probability_tol": 1e-15 }) basis_mat = evaluate_orthonormal_polynomial_1d(xk, degree, ab) gram_mat = (basis_mat * pk[:, None]).T.dot(basis_mat) assert np.allclose(gram_mat, np.eye(basis_mat.shape[1]), atol=2e-8)
def get_recursion_coefficients_from_variable(var, num_coefs, opts): """ Generate polynomial recursion coefficients by inspecting a random variable. """ var_name, _, shapes = get_distribution_info(var) if var_name == "continuous_monomial": return None loc, scale = transform_scale_parameters(var) if var_name == "rv_function_indpndt_vars": shapes["loc"] = loc shapes["scale"] = scale return get_function_independent_vars_recursion_coefficients( shapes, num_coefs) if var_name == "rv_product_indpndt_vars": shapes["loc"] = loc shapes["scale"] = scale return get_product_independent_vars_recursion_coefficients( shapes, num_coefs) if (var_name in askey_variable_names and opts.get("numeric", False) is False): return get_askey_recursion_coefficients_from_variable(var, num_coefs) orthonormality_tol = opts.get("orthonormality_tol", 1e-8) truncated_probability_tol = opts.get("truncated_probability_tol", 0) if (not is_continuous_variable(var)): if hasattr(shapes, "xk"): xk, pk = shapes["xk"], shapes["pk"] else: xk, pk = get_probability_masses(var, truncated_probability_tol) xk = (xk - loc) / scale return get_numerically_generated_recursion_coefficients_from_samples( xk, pk, num_coefs, orthonormality_tol, truncated_probability_tol) # integration performed in canonical domain so need to map back to # domain of pdf lb, ub = var.interval(1) # Get version var.pdf without error checking which runs much faster pdf = get_pdf(var) def canonical_pdf(x): # print(x, lb, ub, x*scale+loc) # print(var.pdf(x*scale+loc)*scale) # assert np.all(x*scale+loc >= lb) and np.all(x*scale+loc <= ub) return pdf(x * scale + loc) * scale # return var.pdf(x*scale+loc)*scale if (is_bounded_continuous_variable(var) or is_bounded_discrete_variable(var)): can_lb, can_ub = -1, 1 elif is_continuous_variable(var): can_lb = (lb - loc) / scale can_ub = (ub - loc) / scale return predictor_corrector_known_pdf(num_coefs, can_lb, can_ub, canonical_pdf, opts)