Beispiel #1
0
def transform_initial_samples(variable, initial_points):
    loc, scale = transform_scale_parameters(variable)
    if is_bounded_continuous_variable(variable):
        bounds = [-1, 1]
        if initial_points is None:
            initial_points = np.asarray([[variable.ppf(0.5)]]).T
            initial_points = (initial_points - loc) / scale
        # initial samples must be in canonical space
        assert np.all((initial_points >= bounds[0])
                      & (initial_points <= bounds[1]))
        return initial_points, bounds

    bounds = list(variable.interval(1))
    if variable.dist.name == 'continuous_rv_sample':
        bounds = [-np.inf, np.inf]
    if initial_points is None:
        # creating a leja sequence with initial points == 0
        # e.g. norm(0, 1).ppf(0.5) will cause leja sequence to
        # try to add point at infinity. So use different initial point
        initial_points = np.asarray([[variable.ppf(0.75)]]).T
        initial_points = (initial_points - loc) / scale
    if initial_points.shape[1] == 1:
        assert initial_points[0, 0] != 0

    return initial_points, bounds
Beispiel #2
0
    def __init__(self, variable, enforce_bounds=False):
        """
        Variable uniquness dependes on both the type of random variable
        e.g. beta, gaussian, etc. and the parameters of that distribution
        e.g. loc and scale parameters as well as any additional parameters
        """
        if (type(variable) != IndependentMultivariateRandomVariable):
            variable = IndependentMultivariateRandomVariable(variable)
        self.variable = variable
        self.enforce_bounds = enforce_bounds
        self.identity_map_indices = None

        self.scale_parameters = np.empty((self.variable.nunique_vars, 2))
        for ii in range(self.variable.nunique_vars):
            var = self.variable.unique_variables[ii]
            # name, scale_dict, __ = get_distribution_info(var)
            # copy is essential here because code below modifies scale
            # loc, scale = scale_dict['loc'].copy(), scale_dict['scale'].copy()
            # if (is_bounded_continuous_variable(var) or
            #     (type(var.dist) == float_rv_discrete and
            #      var.dist.name != 'discrete_chebyshev')):
            #     lb, ub = -1, 1
            #     scale /= (ub-lb)
            #     loc = loc-scale*lb
            self.scale_parameters[ii, :] = transform_scale_parameters(var)
Beispiel #3
0
    def test_get_univariate_leja_rule_bounded_discrete(self):
        growth_rule = partial(constant_increment_growth_rule, 2)
        level = 3

        nmasses = 20
        xk = np.array(range(0, nmasses), dtype='float')
        pk = np.ones(nmasses) / nmasses
        var_cheb = float_rv_discrete(name='discrete_chebyshev',
                                     values=(xk, pk))()

        for variable in [
                var_cheb,
                stats.binom(17, 0.5),
                stats.hypergeom(10 + 10, 10, 9)
        ]:
            quad_rule = get_univariate_leja_quadrature_rule(
                variable, growth_rule)

            x, w = quad_rule(level)
            loc, scale = transform_scale_parameters(variable)
            x = x * scale + loc

            xk, pk = get_probability_masses(variable)
            print(x, xk, loc, scale)

            degree = (x.shape[0] - 1)
            true_moment = (xk**degree).dot(pk)
            moment = (x**degree).dot(w[-1])

            print(moment, true_moment, variable.dist.name)
            assert np.allclose(moment, true_moment)
Beispiel #4
0
    def test_get_univariate_leja_rule_float_rv_discrete(self):
        nmasses = 20
        xk = np.array(range(1, nmasses + 1), dtype='float')
        pk = np.ones(nmasses) / nmasses
        variable = float_rv_discrete(name='float_rv_discrete',
                                     values=(xk, pk))()

        growth_rule = partial(constant_increment_growth_rule, 2)
        quad_rule = get_univariate_leja_quadrature_rule(
            variable,
            growth_rule,
            orthonormality_tol=1e-10,
            return_weights_for_all_levels=False)
        level = 3

        x, w = quad_rule(level)
        loc, scale = transform_scale_parameters(variable)
        x = x * scale + loc

        degree = x.shape[0] - 1
        true_moment = (xk**degree).dot(pk)
        moment = (x**degree).dot(w)

        # print(moment, true_moment)
        assert np.allclose(moment, true_moment)
Beispiel #5
0
def transform_initial_samples(variable, initial_points):
    loc, scale = transform_scale_parameters(variable)
    if is_bounded_continuous_variable(variable):
        bounds = [-1, 1]
        if initial_points is None:
            initial_points = np.asarray([[variable.ppf(0.5)]]).T
            initial_points = (initial_points - loc) / scale
        # # initial samples must be in canonical space
        # eps = 10*np.finfo(float).eps
        # if np.any((initial_points < bounds[0]-eps) |
        #           (initial_points > bounds[1]+eps)):
        #     print(initial_points - bounds[0], bounds[1]-initial_points)
        #     raise RuntimeError("initial points out of bounds")
        return initial_points, bounds

    bounds = list(variable.interval(1))
    if variable.dist.name == 'continuous_rv_sample':
        bounds = [-np.inf, np.inf]
    if initial_points is None:
        # creating a leja sequence with initial points == 0
        # e.g. norm(0, 1).ppf(0.5) will cause leja sequence to
        # try to add point at infinity. So use different initial point
        initial_points = np.asarray([[variable.ppf(0.75)]]).T
        initial_points = (initial_points - loc) / scale
    if initial_points.shape[1] == 1:
        assert initial_points[0, 0] != 0

    return initial_points, bounds
def ortho_polynomial_grammian_bounded_continuous_variable(
        var, ab, degree, tol, integrate_fun=None):
    """
    Compute the inner product of all polynomials up to and including
    degree. Useful for testing that the polynomials are orthonormal.
    The grammian should always be the identity (modulo errors due to
    quadrature)
    """
    if ab.shape[0] < degree + 1:
        raise ValueError("Not enough recursion coefficients")

    loc, scale = transform_scale_parameters(var)
    if is_bounded_continuous_variable(var):
        can_lb, can_ub = -1, 1
    else:
        lb, ub = var.interval(1)
        can_lb = (lb - loc) / scale
        can_ub = (ub - loc) / scale

    def default_integrate(integrand):
        result = scipy.integrate.quad(integrand,
                                      can_lb,
                                      can_ub,
                                      epsabs=tol,
                                      epsrel=tol)
        return result[0]

    if integrate_fun is None:
        integrate = default_integrate
    else:
        integrate = partial(integrate_fun, can_lb, can_ub)

    def fun(order1, order2):
        order = max(order1, order2)

        def integrand(x):
            x = np.atleast_1d(x)
            basis_mat = evaluate_orthonormal_polynomial_1d(x, order, ab)
            return var.pdf(x * scale + loc) * scale * (basis_mat[:, order1] *
                                                       basis_mat[:, order2])

        return integrate(integrand)

    vec_fun = np.vectorize(fun)
    indices = cartesian_product((np.arange(degree + 1), np.arange(degree + 1)))
    gram_mat = vec_fun(indices[0, :], indices[1, :])
    return gram_mat.reshape((degree + 1, degree + 1))
Beispiel #7
0
def get_discrete_univariate_leja_quadrature_rule(
        variable,
        growth_rule,
        initial_points=None,
        orthonormality_tol=1e-12,
        return_weights_for_all_levels=True,
        recursion_opts=None):
    from pyapprox.variables import get_probability_masses, \
        is_bounded_discrete_variable
    var_name = get_distribution_info(variable)[0]
    if is_bounded_discrete_variable(variable):
        xk, pk = get_probability_masses(variable)
        loc, scale = transform_scale_parameters(variable)
        xk = (xk - loc) / scale

        if initial_points is None:
            initial_points = (np.atleast_2d([variable.ppf(0.5)]) - loc) / scale
        # initial samples must be in canonical space
        assert np.all((initial_points >= -1) & (initial_points <= 1))
        assert np.all((xk >= -1) & (xk <= 1))

        def generate_candidate_samples(num_samples):
            return xk[None, :]

        if recursion_opts is None:
            recursion_opts = {"orthonormality_tol": orthonormality_tol}
        ab = get_recursion_coefficients_from_variable(variable, xk.shape[0],
                                                      recursion_opts)

        quad_rule = partial(
            candidate_based_christoffel_leja_rule_1d,
            ab,
            generate_candidate_samples,
            xk.shape[0],
            growth_rule=growth_rule,
            initial_points=initial_points,
            return_weights_for_all_levels=return_weights_for_all_levels)
        return quad_rule

    raise ValueError('var_name %s not implemented' % var_name)
Beispiel #8
0
    def test_predictor_corrector_known_pdf(self):
        nterms = 12
        tol = 1e-12
        quad_options = {
            'epsrel': tol,
            'epsabs': tol,
            "limlst": 10,
            "limit": 1000
        }

        rv = stats.beta(1, 1, -1, 2)
        ab = predictor_corrector_known_pdf(nterms, -1, 1, rv.pdf, quad_options)
        true_ab = jacobi_recurrence(nterms, 0, 0)
        assert np.allclose(ab, true_ab)

        rv = stats.beta(3, 3, -1, 2)
        ab = predictor_corrector_known_pdf(nterms, -1, 1, rv.pdf, quad_options)
        true_ab = jacobi_recurrence(nterms, 2, 2)

        rv = stats.norm(0, 2)
        loc, scale = transform_scale_parameters(rv)
        ab = predictor_corrector_known_pdf(
            nterms, -np.inf, np.inf, lambda x: rv.pdf(x * scale + loc) * scale,
            quad_options)
        true_ab = hermite_recurrence(nterms)
        assert np.allclose(ab, true_ab)

        # lognormal is a very hard test
        # rv = stats.lognorm(1)
        # custom_integrate_fun = native_recursion_integrate_fun
        # interval_size = abs(np.diff(rv.interval(0.99)))
        # integrate_fun = partial(custom_integrate_fun, interval_size)
        # quad_opts = {"integrate_fun": integrate_fun}
        # # quad_opts = {}
        # opts = {"numeric": True, "quad_options": quad_opts}

        # loc, scale = transform_scale_parameters(rv)
        # ab = predictor_corrector_known_pdf(
        #     nterms, 0, np.inf, lambda x: rv.pdf(x*scale+loc)*scale, opts)
        # for ii in range(1, nterms):
        #     assert np.all(gauss_quadrature(ab, ii)[0] > 0)
        # gram_mat = ortho_polynomial_grammian_bounded_continuous_variable(
        #     rv, ab, nterms-1, tol=tol, integrate_fun=integrate_fun)
        # # print(gram_mat-np.eye(gram_mat.shape[0]))
        # # print(np.absolute(gram_mat-np.eye(gram_mat.shape[0])).max())
        # assert np.absolute(gram_mat-np.eye(gram_mat.shape[0])).max() < 5e-10

        nterms = 2
        mean, std = 1e4, 7.5e3
        beta = std * np.sqrt(6) / np.pi
        mu = mean - beta * np.euler_gamma
        # mu, beta = 1, 1
        rv = stats.gumbel_r(loc=mu, scale=beta)
        custom_integrate_fun = native_recursion_integrate_fun
        tabulated_quad_rules = {}
        from numpy.polynomial.legendre import leggauss
        for nquad_samples in [100, 200, 400]:
            tabulated_quad_rules[nquad_samples] = leggauss(nquad_samples)
        # interval_size must be in canonical domain
        interval_size = abs(np.diff(rv.interval(0.99))) / beta
        integrate_fun = partial(custom_integrate_fun,
                                interval_size,
                                tabulated_quad_rules=tabulated_quad_rules,
                                verbose=3)
        quad_opts = {"integrate_fun": integrate_fun}
        # quad_opts = {}
        opts = {"numeric": True, "quad_options": quad_opts}

        loc, scale = transform_scale_parameters(rv)
        ab = predictor_corrector_known_pdf(
            nterms, -np.inf, np.inf, lambda x: rv.pdf(x * scale + loc) * scale,
            opts)
        gram_mat = ortho_polynomial_grammian_bounded_continuous_variable(
            rv, ab, nterms - 1, tol=tol, integrate_fun=integrate_fun)
        # print(gram_mat-np.eye(gram_mat.shape[0]))
        print(np.absolute(gram_mat - np.eye(gram_mat.shape[0])).max())
        assert np.absolute(gram_mat - np.eye(gram_mat.shape[0])).max() < 5e-10
    def test_get_recursion_coefficients_from_variable_discrete(self):
        degree = 4
        N = 10
        scipy_discrete_var_names = [
            n for n in stats._discrete_distns._distn_names
        ]
        discrete_var_names = [
            "binom", "bernoulli", "nbinom", "geom", "hypergeom", "logser",
            "poisson", "planck", "boltzmann", "randint", "zipf", "dlaplace",
            "skellam", "yulesimon"
        ]
        # valid shape parameters for each distribution in names
        # there is a one to one correspondence between entries
        discrete_var_shapes = [{
            "n": 10,
            "p": 0.5
        }, {
            "p": 0.5
        }, {
            "n": 10,
            "p": 0.5
        }, {
            "p": 0.5
        }, {
            "M": 20,
            "n": 7,
            "N": 12
        }, {
            "p": 0.5
        }, {
            "mu": 1
        }, {
            "lambda_": 1
        }, {
            "lambda_": 2,
            "N": 10
        }, {
            "low": 0,
            "high": 10
        }, {
            "a": 2
        }, {
            "a": 1
        }, {
            "mu1": 1,
            "mu2": 3
        }, {
            "alpha": 1
        }]

        for name in scipy_discrete_var_names:
            assert name in discrete_var_names

        # do not support :
        #    yulesimon as there is a bug when interval is called
        #       from a frozen variable
        #    bernoulli which only has two masses
        #    zipf unusual distribution and difficult to compute basis
        #    crystallball is discontinuous and requires special integrator
        #        this can be developed if needed
        unsupported_discrete_var_names = ["bernoulli", "yulesimon", "zipf"]
        for name in unsupported_discrete_var_names:
            ii = discrete_var_names.index(name)
            del discrete_var_names[ii]
            del discrete_var_shapes[ii]

        for name, shapes in zip(discrete_var_names, discrete_var_shapes):
            # print(name)
            var = getattr(stats, name)(**shapes)
            xk, pk = get_probability_masses(var, 1e-15)
            loc, scale = transform_scale_parameters(var)
            xk = (xk - loc) / scale
            ab = get_recursion_coefficients_from_variable(
                var, degree + 1, {
                    "orthonormality_tol": 3e-14,
                    "truncated_probability_tol": 1e-15,
                    "numeric": False
                })
            basis_mat = evaluate_orthonormal_polynomial_1d(xk, degree, ab)
            gram_mat = (basis_mat * pk[:, None]).T.dot(basis_mat)
            assert np.allclose(gram_mat, np.eye(basis_mat.shape[1]), atol=2e-8)

        # custom discrete variables
        xk1, pk1 = np.arange(N), np.ones(N) / N
        xk2, pk2 = np.arange(N)**2, np.ones(N) / N
        custom_vars = [
            float_rv_discrete(name="discrete_chebyshev", values=(xk1, pk1))(),
            float_rv_discrete(name="float_rv_discrete", values=(xk2, pk2))()
        ]
        for var in custom_vars:
            xk, pk = get_probability_masses(var, 1e-15)
            loc, scale = transform_scale_parameters(var)
            xk = (xk - loc) / scale
            ab = get_recursion_coefficients_from_variable(
                var, degree + 1, {
                    "orthonormality_tol": 1e-14,
                    "truncated_probability_tol": 1e-15
                })
            basis_mat = evaluate_orthonormal_polynomial_1d(xk, degree, ab)
            gram_mat = (basis_mat * pk[:, None]).T.dot(basis_mat)
            assert np.allclose(gram_mat, np.eye(basis_mat.shape[1]), atol=2e-8)
Beispiel #10
0
def get_recursion_coefficients_from_variable(var, num_coefs, opts):
    """
    Generate polynomial recursion coefficients by inspecting a random variable.
    """
    var_name, _, shapes = get_distribution_info(var)
    if var_name == "continuous_monomial":
        return None

    loc, scale = transform_scale_parameters(var)

    if var_name == "rv_function_indpndt_vars":
        shapes["loc"] = loc
        shapes["scale"] = scale
        return get_function_independent_vars_recursion_coefficients(
            shapes, num_coefs)

    if var_name == "rv_product_indpndt_vars":
        shapes["loc"] = loc
        shapes["scale"] = scale
        return get_product_independent_vars_recursion_coefficients(
            shapes, num_coefs)

    if (var_name in askey_variable_names
            and opts.get("numeric", False) is False):
        return get_askey_recursion_coefficients_from_variable(var, num_coefs)

    orthonormality_tol = opts.get("orthonormality_tol", 1e-8)
    truncated_probability_tol = opts.get("truncated_probability_tol", 0)
    if (not is_continuous_variable(var)):
        if hasattr(shapes, "xk"):
            xk, pk = shapes["xk"], shapes["pk"]
        else:
            xk, pk = get_probability_masses(var, truncated_probability_tol)
        xk = (xk - loc) / scale

        return get_numerically_generated_recursion_coefficients_from_samples(
            xk, pk, num_coefs, orthonormality_tol, truncated_probability_tol)

    # integration performed in canonical domain so need to map back to
    # domain of pdf
    lb, ub = var.interval(1)

    # Get version var.pdf without error checking which runs much faster
    pdf = get_pdf(var)

    def canonical_pdf(x):
        # print(x, lb, ub, x*scale+loc)
        # print(var.pdf(x*scale+loc)*scale)
        # assert np.all(x*scale+loc >= lb) and np.all(x*scale+loc <= ub)
        return pdf(x * scale + loc) * scale
        # return var.pdf(x*scale+loc)*scale

    if (is_bounded_continuous_variable(var)
            or is_bounded_discrete_variable(var)):
        can_lb, can_ub = -1, 1
    elif is_continuous_variable(var):
        can_lb = (lb - loc) / scale
        can_ub = (ub - loc) / scale

    return predictor_corrector_known_pdf(num_coefs, can_lb, can_ub,
                                         canonical_pdf, opts)