def plot_discrete_distribution_surface_2d(rv1, rv2, ax=None):
    """
    Only works if rv1 and rv2 are defined on consecutive integers
    """
    from matplotlib import cm
    from pyapprox.utilities import cartesian_product, outer_product
    from pyapprox.variables import get_probability_masses

    if ax is None:
        fig = plt.figure(figsize=(8, 6))
        ax = fig.add_subplot(111, projection='3d')
    x_1d = [get_probability_masses(rv)[0] for rv in [rv1, rv2]]
    w_1d = [get_probability_masses(rv)[1] for rv in [rv1, rv2]]
    samples = cartesian_product(x_1d)
    weights = outer_product(w_1d)

    dz = weights
    cmap = cm.get_cmap('jet')  # Get desired colormap - you can change this!
    max_height = np.max(dz)    # get range of colorbars so we can normalize
    min_height = np.min(dz)
    # scale each z to [0,1], and get their rgb values
    rgba = [cmap((k-min_height)/max_height) for k in dz]
    # Only works if rv1 and rv2 are defined on consecutive integers
    dx, dy = 1, 1
    ax.bar3d(samples[0, :], samples[1, :], 0, dx, dy, dz, color=rgba,
             zsort='average')

    angle = 45
    ax.view_init(10, angle)
    ax.set_axis_off()
Example #2
0
    def test_get_univariate_leja_rule_bounded_discrete(self):
        from scipy import stats
        growth_rule = partial(constant_increment_growth_rule, 2)
        level = 3

        nmasses = 20
        xk = np.array(range(0, nmasses), dtype='float')
        pk = np.ones(nmasses) / nmasses
        var_cheb = float_rv_discrete(name='discrete_chebyshev',
                                     values=(xk, pk))()

        for variable in [
                var_cheb,
                stats.binom(20, 0.5),
                stats.hypergeom(10 + 10, 10, 9)
        ]:
            quad_rule = get_univariate_leja_quadrature_rule(
                variable, growth_rule)

            # polys of binom, hypergeometric have no canonical domain [-1,1]
            x, w = quad_rule(level)

            from pyapprox.variables import get_probability_masses
            xk, pk = get_probability_masses(variable)
            true_moment = (xk**(x.shape[0] - 1)).dot(pk)
            moment = (x**(x.shape[0] - 1)).dot(w[-1])

            assert np.allclose(moment, true_moment)
Example #3
0
    def test_get_univariate_leja_rule_bounded_discrete(self):
        growth_rule = partial(constant_increment_growth_rule, 2)
        level = 3

        nmasses = 20
        xk = np.array(range(0, nmasses), dtype='float')
        pk = np.ones(nmasses) / nmasses
        var_cheb = float_rv_discrete(name='discrete_chebyshev',
                                     values=(xk, pk))()

        for variable in [
                var_cheb,
                stats.binom(17, 0.5),
                stats.hypergeom(10 + 10, 10, 9)
        ]:
            quad_rule = get_univariate_leja_quadrature_rule(
                variable, growth_rule)

            x, w = quad_rule(level)
            loc, scale = transform_scale_parameters(variable)
            x = x * scale + loc

            xk, pk = get_probability_masses(variable)
            print(x, xk, loc, scale)

            degree = (x.shape[0] - 1)
            true_moment = (xk**degree).dot(pk)
            moment = (x**degree).dot(w[-1])

            print(moment, true_moment, variable.dist.name)
            assert np.allclose(moment, true_moment)
def plot_discrete_distribution_heatmap_2d(rv1, rv2, ax=None, zero_tol=1e-4):
    """
    Only works if rv1 and rv2 are defined on consecutive integers
    """
    import copy
    from pyapprox.utilities import outer_product
    from pyapprox.variables import get_probability_masses

    if ax is None:
        fig = plt.figure(figsize=(8, 6))
        ax = fig.add_subplot(111)
    x_1d = [get_probability_masses(rv)[0] for rv in [rv1, rv2]]
    w_1d = [get_probability_masses(rv)[1] for rv in [rv1, rv2]]
    weights = outer_product(w_1d)

    Z = np.reshape(weights, (len(x_1d[0]), len(x_1d[1])), order='F')
    Z[Z < zero_tol] = np.inf
    cmap = copy.copy(plt.cm.viridis)
    cmap.set_bad('gray', 1)
    xx = np.hstack((x_1d[0], x_1d[0].max()+1))-0.5
    yy = np.hstack((x_1d[1], x_1d[1].max()+1))-0.5
    p = ax.pcolormesh(xx, yy, Z.T, cmap=cmap)
    plt.colorbar(p, ax=ax)
Example #5
0
 def univariate_pdf(var, x):
     if hasattr(var.dist, 'pdf'):
         return var.pdf(x)
     else:
         return var.pmf(x)
         xk, pk = get_probability_masses(var)
         x = np.atleast_1d(x)
         vals = np.zeros(x.shape[0])
         for jj in range(x.shape[0]):
             for ii in range(xk.shape[0]):
                 if xk[ii] == x[jj]:
                     vals[jj] = pk[ii]
                     break
         return vals
Example #6
0
def inverse_transform_sampling_1d(var, ab, ii, u_samples):
    name = var.dist.name
    if is_bounded_discrete_variable(var):
        xk, pk = get_probability_masses(var)
        if type(var.dist) == float_rv_discrete and name != 'discrete_chebyshev':
            lb, ub = xk.min(), xk.max()
            xk = (xk-lb)/(ub-lb)*2-1
        return float_rv_discrete_inverse_transform_sampling_1d(
            xk, pk, ab, ii, u_samples)
    elif name in _continuous_distns._distn_names:
        return continuous_induced_measure_ppf(var, ab, ii, u_samples)
    else:
        msg = 'induced sampling not yet implemented for var type %s' % name
        raise Exception(msg)
    return samples
Example #7
0
def get_discrete_univariate_leja_quadrature_rule(
        variable,
        growth_rule,
        initial_points=None,
        orthonormality_tol=1e-12,
        return_weights_for_all_levels=True,
        recursion_opts=None):
    from pyapprox.variables import get_probability_masses, \
        is_bounded_discrete_variable
    var_name = get_distribution_info(variable)[0]
    if is_bounded_discrete_variable(variable):
        xk, pk = get_probability_masses(variable)
        loc, scale = transform_scale_parameters(variable)
        xk = (xk - loc) / scale

        if initial_points is None:
            initial_points = (np.atleast_2d([variable.ppf(0.5)]) - loc) / scale
        # initial samples must be in canonical space
        assert np.all((initial_points >= -1) & (initial_points <= 1))
        assert np.all((xk >= -1) & (xk <= 1))

        def generate_candidate_samples(num_samples):
            return xk[None, :]

        if recursion_opts is None:
            recursion_opts = {"orthonormality_tol": orthonormality_tol}
        ab = get_recursion_coefficients_from_variable(variable, xk.shape[0],
                                                      recursion_opts)

        quad_rule = partial(
            candidate_based_christoffel_leja_rule_1d,
            ab,
            generate_candidate_samples,
            xk.shape[0],
            growth_rule=growth_rule,
            initial_points=initial_points,
            return_weights_for_all_levels=return_weights_for_all_levels)
        return quad_rule

    raise ValueError('var_name %s not implemented' % var_name)
Example #8
0
def get_discrete_univariate_leja_quadrature_rule(variable, growth_rule, initial_points=None, numerically_generated_poly_accuracy_tolerance=1e-12):
    from pyapprox.variables import get_probability_masses, \
        is_bounded_discrete_variable
    var_name, scales, shapes = get_distribution_info(variable)
    if is_bounded_discrete_variable(variable):
        if initial_points is None:
            initial_points = np.atleast_2d([variable.ppf(0.5)])

        xk, pk = get_probability_masses(variable)
        def generate_candidate_samples(num_samples):
            return xk[None, :]
        opts = {'rv_type': var_name, 'shapes': shapes}
        recursion_coeffs = get_recursion_coefficients(
            opts, xk.shape[0],
            numerically_generated_poly_accuracy_tolerance=numerically_generated_poly_accuracy_tolerance)
        quad_rule = partial(
            candidate_based_christoffel_leja_rule_1d, recursion_coeffs,
            generate_candidate_samples, xk.shape[0], growth_rule=growth_rule,
            initial_points=initial_points)
    else:
        raise Exception('var_name %s not implemented' % var_name)
    return quad_rule
    def test_get_recursion_coefficients_from_variable_discrete(self):
        degree = 4
        N = 10
        scipy_discrete_var_names = [
            n for n in stats._discrete_distns._distn_names
        ]
        discrete_var_names = [
            "binom", "bernoulli", "nbinom", "geom", "hypergeom", "logser",
            "poisson", "planck", "boltzmann", "randint", "zipf", "dlaplace",
            "skellam", "yulesimon"
        ]
        # valid shape parameters for each distribution in names
        # there is a one to one correspondence between entries
        discrete_var_shapes = [{
            "n": 10,
            "p": 0.5
        }, {
            "p": 0.5
        }, {
            "n": 10,
            "p": 0.5
        }, {
            "p": 0.5
        }, {
            "M": 20,
            "n": 7,
            "N": 12
        }, {
            "p": 0.5
        }, {
            "mu": 1
        }, {
            "lambda_": 1
        }, {
            "lambda_": 2,
            "N": 10
        }, {
            "low": 0,
            "high": 10
        }, {
            "a": 2
        }, {
            "a": 1
        }, {
            "mu1": 1,
            "mu2": 3
        }, {
            "alpha": 1
        }]

        for name in scipy_discrete_var_names:
            assert name in discrete_var_names

        # do not support :
        #    yulesimon as there is a bug when interval is called
        #       from a frozen variable
        #    bernoulli which only has two masses
        #    zipf unusual distribution and difficult to compute basis
        #    crystallball is discontinuous and requires special integrator
        #        this can be developed if needed
        unsupported_discrete_var_names = ["bernoulli", "yulesimon", "zipf"]
        for name in unsupported_discrete_var_names:
            ii = discrete_var_names.index(name)
            del discrete_var_names[ii]
            del discrete_var_shapes[ii]

        for name, shapes in zip(discrete_var_names, discrete_var_shapes):
            # print(name)
            var = getattr(stats, name)(**shapes)
            xk, pk = get_probability_masses(var, 1e-15)
            loc, scale = transform_scale_parameters(var)
            xk = (xk - loc) / scale
            ab = get_recursion_coefficients_from_variable(
                var, degree + 1, {
                    "orthonormality_tol": 3e-14,
                    "truncated_probability_tol": 1e-15,
                    "numeric": False
                })
            basis_mat = evaluate_orthonormal_polynomial_1d(xk, degree, ab)
            gram_mat = (basis_mat * pk[:, None]).T.dot(basis_mat)
            assert np.allclose(gram_mat, np.eye(basis_mat.shape[1]), atol=2e-8)

        # custom discrete variables
        xk1, pk1 = np.arange(N), np.ones(N) / N
        xk2, pk2 = np.arange(N)**2, np.ones(N) / N
        custom_vars = [
            float_rv_discrete(name="discrete_chebyshev", values=(xk1, pk1))(),
            float_rv_discrete(name="float_rv_discrete", values=(xk2, pk2))()
        ]
        for var in custom_vars:
            xk, pk = get_probability_masses(var, 1e-15)
            loc, scale = transform_scale_parameters(var)
            xk = (xk - loc) / scale
            ab = get_recursion_coefficients_from_variable(
                var, degree + 1, {
                    "orthonormality_tol": 1e-14,
                    "truncated_probability_tol": 1e-15
                })
            basis_mat = evaluate_orthonormal_polynomial_1d(xk, degree, ab)
            gram_mat = (basis_mat * pk[:, None]).T.dot(basis_mat)
            assert np.allclose(gram_mat, np.eye(basis_mat.shape[1]), atol=2e-8)
Example #10
0
def get_recursion_coefficients_from_variable(var, num_coefs, opts):
    """
    Generate polynomial recursion coefficients by inspecting a random variable.
    """
    var_name, _, shapes = get_distribution_info(var)
    if var_name == "continuous_monomial":
        return None

    loc, scale = transform_scale_parameters(var)

    if var_name == "rv_function_indpndt_vars":
        shapes["loc"] = loc
        shapes["scale"] = scale
        return get_function_independent_vars_recursion_coefficients(
            shapes, num_coefs)

    if var_name == "rv_product_indpndt_vars":
        shapes["loc"] = loc
        shapes["scale"] = scale
        return get_product_independent_vars_recursion_coefficients(
            shapes, num_coefs)

    if (var_name in askey_variable_names
            and opts.get("numeric", False) is False):
        return get_askey_recursion_coefficients_from_variable(var, num_coefs)

    orthonormality_tol = opts.get("orthonormality_tol", 1e-8)
    truncated_probability_tol = opts.get("truncated_probability_tol", 0)
    if (not is_continuous_variable(var)):
        if hasattr(shapes, "xk"):
            xk, pk = shapes["xk"], shapes["pk"]
        else:
            xk, pk = get_probability_masses(var, truncated_probability_tol)
        xk = (xk - loc) / scale

        return get_numerically_generated_recursion_coefficients_from_samples(
            xk, pk, num_coefs, orthonormality_tol, truncated_probability_tol)

    # integration performed in canonical domain so need to map back to
    # domain of pdf
    lb, ub = var.interval(1)

    # Get version var.pdf without error checking which runs much faster
    pdf = get_pdf(var)

    def canonical_pdf(x):
        # print(x, lb, ub, x*scale+loc)
        # print(var.pdf(x*scale+loc)*scale)
        # assert np.all(x*scale+loc >= lb) and np.all(x*scale+loc <= ub)
        return pdf(x * scale + loc) * scale
        # return var.pdf(x*scale+loc)*scale

    if (is_bounded_continuous_variable(var)
            or is_bounded_discrete_variable(var)):
        can_lb, can_ub = -1, 1
    elif is_continuous_variable(var):
        can_lb = (lb - loc) / scale
        can_ub = (ub - loc) / scale

    return predictor_corrector_known_pdf(num_coefs, can_lb, can_ub,
                                         canonical_pdf, opts)
Example #11
0
    def help_discrete_induced_sampling(self, var1, var2, envelope_factor):
        degree = 3

        var_trans = AffineRandomVariableTransformation([var1, var2])
        pce_opts = define_poly_options_from_variable_transformation(var_trans)

        pce = PolynomialChaosExpansion()
        pce.configure(pce_opts)
        indices = compute_hyperbolic_indices(pce.num_vars(), degree, 1.0)
        pce.set_indices(indices)

        num_samples = int(3e4)
        np.random.seed(1)
        canonical_samples = generate_induced_samples(pce, num_samples)
        samples = var_trans.map_from_canonical_space(canonical_samples)

        np.random.seed(1)
        # canonical_xk = [2*get_distribution_info(var1)[2]['xk']-1,
        #                2*get_distribution_info(var2)[2]['xk']-1]
        xk = np.array(
            [get_probability_masses(var)[0]
             for var in var_trans.variable.all_variables()])
        pk = np.array(
            [get_probability_masses(var)[1]
             for var in var_trans.variable.all_variables()])
        canonical_xk = var_trans.map_to_canonical_space(xk)
        basis_matrix_generator = partial(
            basis_matrix_generator_1d, pce, degree)
        canonical_samples1 = discrete_induced_sampling(
            basis_matrix_generator, pce.indices, canonical_xk,
            pk, num_samples)
        samples1 = var_trans.map_from_canonical_space(canonical_samples1)

        def univariate_pdf(var, x):
            if hasattr(var.dist, 'pdf'):
                return var.pdf(x)
            else:
                return var.pmf(x)
                xk, pk = get_probability_masses(var)
                x = np.atleast_1d(x)
                vals = np.zeros(x.shape[0])
                for jj in range(x.shape[0]):
                    for ii in range(xk.shape[0]):
                        if xk[ii] == x[jj]:
                            vals[jj] = pk[ii]
                            break
                return vals

        def density(x):
            # some issue with native scipy.pmf
            # assert np.allclose(var1.pdf(x[0, :]),var1.pmf(x[0, :]))
            return univariate_pdf(var1, x[0, :])*univariate_pdf(var2, x[1, :])

        def generate_proposal_samples(n):
            samples = np.vstack([var1.rvs(n), var2.rvs(n)])
            return samples
        proposal_density = density

        # unlike fekete and leja sampling can and should use
        # pce.basis_matrix here. If use canonical_basis_matrix then
        # densities must be mapped to this space also which can be difficult
        samples2 = random_induced_measure_sampling(
            num_samples, pce.num_vars(), pce.basis_matrix, density,
            proposal_density, generate_proposal_samples, envelope_factor)

        def induced_density(x):
            vals = density(x)*christoffel_function(
                x, pce.basis_matrix, True)
            return vals

        from pyapprox.utilities import cartesian_product, outer_product
        from pyapprox.polynomial_sampling import christoffel_function
        quad_samples = cartesian_product([xk[0], xk[1]])
        quad_weights = outer_product([pk[0], pk[1]])

        # print(canonical_samples.min(axis=1),canonical_samples.max(axis=1))
        # print(samples.min(axis=1),samples.max(axis=1))
        # print(canonical_samples1.min(axis=1),canonical_samples1.max(axis=1))
        # print(samples1.min(axis=1),samples1.max(axis=1))
        # import matplotlib.pyplot as plt
        # plt.plot(quad_samples[0,:],quad_samples[1,:],'s')
        # plt.plot(samples[0,:],samples[1,:],'o')
        # plt.plot(samples1[0,:],samples1[1,:],'*')
        # plt.show()

        rtol = 1e-2
        assert np.allclose(quad_weights, density(quad_samples))
        assert np.allclose(density(quad_samples).sum(), 1)
        assert np.allclose(
            christoffel_function(quad_samples, pce.basis_matrix, True).dot(
                quad_weights), 1.0)
        true_induced_mean = quad_samples.dot(induced_density(quad_samples))
        # print(true_induced_mean)
        # print(samples.mean(axis=1))
        # print(samples1.mean(axis=1))
        # print(samples2.mean(axis=1))
        # print(samples1.mean(axis=1)-true_induced_mean,
        #       true_induced_mean*rtol)
        # print(samples2.mean(axis=1))
        assert np.allclose(samples.mean(axis=1), true_induced_mean, rtol=rtol)
        assert np.allclose(samples1.mean(axis=1), true_induced_mean, rtol=rtol)
        assert np.allclose(samples2.mean(axis=1), true_induced_mean, rtol=rtol)