Exemple #1
0
def _cross_validate_pce_degree(pce,
                               train_samples,
                               train_vals,
                               min_degree=1,
                               max_degree=3,
                               hcross_strength=1,
                               cv=10,
                               solver_type='lasso_lars',
                               verbosity=0):
    assert train_vals.shape[1] == 1
    num_samples = train_samples.shape[1]
    if min_degree is None:
        min_degree = 2
    if max_degree is None:
        max_degree = np.iinfo(int).max - 1

    best_coef = None
    best_cv_score = -np.finfo(np.double).max
    best_degree = min_degree
    prev_num_terms = 0
    if verbosity > 0:
        print("{:<8} {:<10} {:<18}".format(
            'degree',
            'num_terms',
            'cv score',
        ))
    for degree in range(min_degree, max_degree + 1):
        indices = compute_hyperbolic_indices(pce.num_vars(), degree,
                                             hcross_strength)
        pce.set_indices(indices)
        if ((pce.num_terms() > 100000)
                and (100000 - prev_num_terms < pce.num_terms() - 100000)):
            break

        basis_matrix = pce.basis_matrix(train_samples)
        coef, cv_score = fit_linear_model(basis_matrix,
                                          train_vals,
                                          solver_type,
                                          cv=cv)
        pce.set_coefficients(coef)

        if verbosity > 0:
            print("{:<8} {:<10} {:<18} ".format(degree, pce.num_terms(),
                                                cv_score))
        if (cv_score > best_cv_score):
            best_cv_score = cv_score
            best_coef = coef.copy()
            best_degree = degree
        if ((cv_score >= best_cv_score) and (degree - best_degree > 1)):
            break
        prev_num_terms = pce.num_terms()

    pce.set_indices(
        compute_hyperbolic_indices(pce.num_vars(), best_degree,
                                   hcross_strength))
    pce.set_coefficients(best_coef)
    if verbosity > 0:
        print('best degree:', best_degree)
    return pce, best_cv_score, best_degree
    def test_approximate_fixed_pce(self):
        num_vars = 2
        univariate_variables = [stats.uniform(-1, 2)] * num_vars
        variable = pya.IndependentMultivariateRandomVariable(
            univariate_variables)
        var_trans = pya.AffineRandomVariableTransformation(variable)
        poly = pya.PolynomialChaosExpansion()
        poly_opts = pya.define_poly_options_from_variable_transformation(
            var_trans)
        poly.configure(poly_opts)

        degree, hcross_strength = 7, 0.4
        poly.set_indices(
            pya.compute_hyperbolic_indices(num_vars, degree, hcross_strength))
        num_samples = poly.num_terms() * 2
        degrees = poly.indices.sum(axis=0)
        coef = np.random.normal(
            0, 1, (poly.indices.shape[1], 2)) / (degrees[:, np.newaxis] + 1)**2
        # set some coefficients to zero to make sure that different qoi
        # are treated correctly.
        II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2]
        coef[II, 0] = 0
        II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2]
        coef[II, 1] = 0
        poly.set_coefficients(coef)
        train_samples = pya.generate_independent_random_samples(
            variable, num_samples)
        train_vals = poly(train_samples)

        indices = pya.compute_hyperbolic_indices(num_vars, 1, 1)
        nfolds = 10
        method = "polynomial_chaos"
        options = {
            "basis_type": "fixed",
            "variable": variable,
            "options": {
                "linear_solver_options": {},
                "indices": indices,
                "solver_type": "lstsq"
            }
        }
        approx_list, residues_list, cv_score = \
            cross_validate_approximation(
                train_samples, train_vals, options, nfolds, method,
                random_folds=False)

        solver = LinearLeastSquaresCV(cv=nfolds, random_folds=False)
        poly.set_indices(indices)
        basis_matrix = poly.basis_matrix(train_samples)
        solver.fit(basis_matrix, train_vals[:, 0:1])
        assert np.allclose(solver.cv_score_, cv_score[0])

        solver.fit(basis_matrix, train_vals[:, 1:2])
        assert np.allclose(solver.cv_score_, cv_score[1])
    def test_adaptive_approximate_increment_degree(self):
        num_vars = 2
        univariate_variables = [stats.uniform(-1, 2)] * num_vars
        variable = pya.IndependentMultivariateRandomVariable(
            univariate_variables)
        var_trans = pya.AffineRandomVariableTransformation(variable)
        poly = pya.PolynomialChaosExpansion()
        poly_opts = pya.define_poly_options_from_variable_transformation(
            var_trans)
        poly.configure(poly_opts)

        degree = 3
        poly.set_indices(pya.compute_hyperbolic_indices(num_vars, degree))
        poly.set_coefficients(
            np.random.normal(0, 1, (poly.indices.shape[1], 1)))
        fun = poly

        max_degree = degree + 2
        result = adaptive_approximate_polynomial_chaos_increment_degree(
            fun,
            variable,
            max_degree,
            max_nsamples=31,
            cond_tol=1e4,
            sample_growth_factor=2,
            verbose=0,
            oversampling_ratio=None,
            solver_type='lstsq',
            callback=None)
        print('Ntrain samples', result.train_samples.shape[1])
        assert np.allclose(
            result.approx.coefficients[:poly.coefficients.shape[0]],
            poly.coefficients)
    def help_cross_validate_pce_degree(self, solver_type, solver_options):
        print(solver_type, solver_options)
        num_vars = 2
        univariate_variables = [stats.uniform(-1, 2)] * num_vars
        variable = pya.IndependentMultivariateRandomVariable(
            univariate_variables)
        var_trans = pya.AffineRandomVariableTransformation(variable)
        poly = pya.PolynomialChaosExpansion()
        poly_opts = pya.define_poly_options_from_variable_transformation(
            var_trans)
        poly.configure(poly_opts)

        degree = 3
        poly.set_indices(pya.compute_hyperbolic_indices(num_vars, degree, 1.0))
        # factor of 2 does not pass test but 2.2 does
        num_samples = int(poly.num_terms() * 2.2)
        coef = np.random.normal(0, 1, (poly.indices.shape[1], 2))
        coef[pya.nchoosek(num_vars + 2, 2):, 0] = 0
        # for first qoi make degree 2 the best degree
        poly.set_coefficients(coef)

        train_samples = pya.generate_independent_random_samples(
            variable, num_samples)
        train_vals = poly(train_samples)
        true_poly = poly

        poly = approximate(
            train_samples, train_vals, "polynomial_chaos", {
                "basis_type": "hyperbolic_cross",
                "variable": variable,
                "options": {
                    "verbose": 3,
                    "solver_type": solver_type,
                    "min_degree": 1,
                    "max_degree": degree + 1,
                    "linear_solver_options": solver_options
                }
            }).approx

        num_validation_samples = 10
        validation_samples = pya.generate_independent_random_samples(
            variable, num_validation_samples)
        assert np.allclose(poly(validation_samples),
                           true_poly(validation_samples))

        poly = copy.deepcopy(true_poly)
        approx_res = cross_validate_pce_degree(
            poly,
            train_samples,
            train_vals,
            1,
            degree + 1,
            solver_type=solver_type,
            linear_solver_options=solver_options)
        assert np.allclose(approx_res.degrees, [2, 3])
    def test_pce_basis_expansion(self):
        num_vars = 2
        univariate_variables = [stats.uniform(-1, 2)] * num_vars
        variable = pya.IndependentMultivariateRandomVariable(
            univariate_variables)
        var_trans = pya.AffineRandomVariableTransformation(variable)
        poly = pya.PolynomialChaosExpansion()
        poly_opts = pya.define_poly_options_from_variable_transformation(
            var_trans)
        poly.configure(poly_opts)

        degree, hcross_strength = 7, 0.4
        poly.set_indices(
            pya.compute_hyperbolic_indices(num_vars, degree, hcross_strength))
        num_samples = poly.num_terms() * 2
        degrees = poly.indices.sum(axis=0)
        coef = np.random.normal(
            0, 1, (poly.indices.shape[1], 2)) / (degrees[:, np.newaxis] + 1)**2
        # set some coefficients to zero to make sure that different qoi
        # are treated correctly.
        II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2]
        coef[II, 0] = 0
        II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2]
        coef[II, 1] = 0
        poly.set_coefficients(coef)
        train_samples = pya.generate_independent_random_samples(
            variable, num_samples)
        train_vals = poly(train_samples)
        true_poly = poly

        poly = approximate(
            train_samples, train_vals, "polynomial_chaos", {
                "basis_type": "expanding_basis",
                "variable": variable,
                "options": {
                    "max_num_expansion_steps_iter": 1,
                    "verbose": 3,
                    "max_num_terms": 1000,
                    "max_num_step_increases": 2,
                    "max_num_init_terms": 33
                }
            }).approx

        num_validation_samples = 100
        validation_samples = pya.generate_independent_random_samples(
            variable, num_validation_samples)
        validation_samples = train_samples
        error = np.linalg.norm(
            poly(validation_samples) -
            true_poly(validation_samples)) / np.sqrt(num_validation_samples)
        assert np.allclose(poly(validation_samples),
                           true_poly(validation_samples),
                           atol=1e-8), error
Exemple #6
0
    def __init__(self, mesh_dof=100, num_terms=35):
        self.mesh = np.linspace(-1., 1., mesh_dof)
        self.num_terms = num_terms

        variable = [uniform(-1, 2)]
        var_trans = pya.AffineRandomVariableTransformation(variable)
        self.poly = pya.PolynomialChaosExpansion()
        poly_opts = pya.define_poly_options_from_variable_transformation(
            var_trans)
        self.poly.configure(poly_opts)
        self.poly.set_indices(
            pya.compute_hyperbolic_indices(1, self.num_terms - 1))
Exemple #7
0
    def test_pce_sensitivities_of_ishigami_function(self):
        nsamples = 1500
        nvars, degree = 3, 18
        univariate_variables = [uniform(-np.pi, 2 * np.pi)] * nvars
        variable = pya.IndependentMultivariateRandomVariable(
            univariate_variables)

        var_trans = pya.AffineRandomVariableTransformation(variable)
        poly = pya.PolynomialChaosExpansion()
        poly_opts = pya.define_poly_options_from_variable_transformation(
            var_trans)
        poly.configure(poly_opts)
        indices = pya.compute_hyperbolic_indices(nvars, degree, 1.0)
        poly.set_indices(indices)
        #print('No. PCE Terms',indices.shape[1])

        samples = pya.generate_independent_random_samples(
            var_trans.variable, nsamples)
        values = ishigami_function(samples)

        basis_matrix = poly.basis_matrix(samples)
        coef = np.linalg.lstsq(basis_matrix, values, rcond=None)[0]
        poly.set_coefficients(coef)

        nvalidation_samples = 1000
        validation_samples = pya.generate_independent_random_samples(
            var_trans.variable, nvalidation_samples)
        validation_values = ishigami_function(validation_samples)
        poly_validation_vals = poly(validation_samples)
        abs_error = np.linalg.norm(poly_validation_vals - validation_values
                                   ) / np.sqrt(nvalidation_samples)
        #print('Abs. Error',abs_error)

        pce_main_effects, pce_total_effects =\
            pya.get_main_and_total_effect_indices_from_pce(
                poly.get_coefficients(), poly.get_indices())

        mean, variance, main_effects, total_effects, sobol_indices, \
            sobol_interaction_indices = get_ishigami_funciton_statistics()
        assert np.allclose(poly.mean(), mean)
        assert np.allclose(poly.variance(), variance)
        assert np.allclose(pce_main_effects, main_effects)
        assert np.allclose(pce_total_effects, total_effects)

        interaction_terms, pce_sobol_indices = get_sobol_indices(
            poly.get_coefficients(), poly.get_indices(), max_order=3)
        assert np.allclose(pce_sobol_indices, sobol_indices)
Exemple #8
0
    def test_pce_basis_expansion(self):
        num_vars = 2
        univariate_variables = [stats.uniform(-1, 2)] * num_vars
        variable = pya.IndependentMultivariateRandomVariable(
            univariate_variables)
        var_trans = pya.AffineRandomVariableTransformation(variable)
        poly = pya.PolynomialChaosExpansion()
        poly_opts = pya.define_poly_options_from_variable_transformation(
            var_trans)
        poly.configure(poly_opts)

        degree, hcross_strength = 7, 0.4
        poly.set_indices(
            pya.compute_hyperbolic_indices(num_vars, degree, hcross_strength))
        num_samples = poly.num_terms() * 2
        degrees = poly.indices.sum(axis=0)
        poly.set_coefficients((np.random.normal(0, 1, poly.indices.shape[1]) /
                               (degrees + 1)**2)[:, np.newaxis])
        train_samples = pya.generate_independent_random_samples(
            variable, num_samples)
        train_vals = poly(train_samples)
        true_poly = poly

        poly = approximate(train_samples, train_vals, 'polynomial_chaos', {
            'basis_type': 'expanding_basis',
            'variable': variable
        })

        num_validation_samples = 100
        validation_samples = pya.generate_independent_random_samples(
            variable, num_validation_samples)
        validation_samples = train_samples
        error = np.linalg.norm(
            poly(validation_samples) -
            true_poly(validation_samples)) / np.sqrt(num_validation_samples)
        assert np.allclose(
            poly(validation_samples),true_poly(validation_samples),atol=1e-8),\
            error
Exemple #9
0
    def test_cross_validate_pce_degree(self):
        num_vars = 2
        univariate_variables = [stats.uniform(-1, 2)] * num_vars
        variable = pya.IndependentMultivariateRandomVariable(
            univariate_variables)
        var_trans = pya.AffineRandomVariableTransformation(variable)
        poly = pya.PolynomialChaosExpansion()
        poly_opts = pya.define_poly_options_from_variable_transformation(
            var_trans)
        poly.configure(poly_opts)

        degree = 3
        poly.set_indices(pya.compute_hyperbolic_indices(num_vars, degree, 1.0))
        num_samples = poly.num_terms() * 2
        poly.set_coefficients(
            np.random.normal(0, 1, (poly.indices.shape[1], 1)))
        train_samples = pya.generate_independent_random_samples(
            variable, num_samples)
        train_vals = poly(train_samples)
        true_poly = poly

        poly = approximate(train_samples, train_vals, 'polynomial_chaos', {
            'basis_type': 'hyperbolic_cross',
            'variable': variable
        })

        num_validation_samples = 10
        validation_samples = pya.generate_independent_random_samples(
            variable, num_validation_samples)
        assert np.allclose(poly(validation_samples),
                           true_poly(validation_samples))

        poly = copy.deepcopy(true_poly)
        poly, best_degree = cross_validate_pce_degree(poly, train_samples,
                                                      train_vals, 1,
                                                      degree + 2)
        assert best_degree == degree
    def test_cross_validate_approximation_after_regularization_selection(self):
        """
        This test is useful as it shows how to use cross_validate_approximation
        to produce a list of approximations on each cross validation fold
        once regularization parameters have been chosen.
        These can be used to show variance in predictions of values,
        sensitivity indices, etc.

        Ideally this could be avoided if sklearn stored the coefficients
        and alphas for each fold and then we can just find the coefficients
        that correspond to the first time the path drops below the best_alpha
        """
        num_vars = 2
        univariate_variables = [stats.uniform(-1, 2)] * num_vars
        variable = pya.IndependentMultivariateRandomVariable(
            univariate_variables)
        var_trans = pya.AffineRandomVariableTransformation(variable)
        poly = pya.PolynomialChaosExpansion()
        poly_opts = pya.define_poly_options_from_variable_transformation(
            var_trans)
        poly.configure(poly_opts)

        degree, hcross_strength = 7, 0.4
        poly.set_indices(
            pya.compute_hyperbolic_indices(num_vars, degree, hcross_strength))
        num_samples = poly.num_terms() * 2
        degrees = poly.indices.sum(axis=0)
        coef = np.random.normal(
            0, 1, (poly.indices.shape[1], 2)) / (degrees[:, np.newaxis] + 1)**2
        # set some coefficients to zero to make sure that different qoi
        # are treated correctly.
        II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2]
        coef[II, 0] = 0
        II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2]
        coef[II, 1] = 0
        poly.set_coefficients(coef)
        train_samples = pya.generate_independent_random_samples(
            variable, num_samples)
        train_vals = poly(train_samples)
        # true_poly = poly

        result = approximate(train_samples, train_vals, "polynomial_chaos", {
            "basis_type": "expanding_basis",
            "variable": variable
        })

        # Even with the same folds, iterative methods such as Lars, LarsLasso
        # and OMP will not have cv_score from approximate and cross validate
        # approximation exactly the same because iterative methods interpolate
        # residuals to compute cross validation scores
        nfolds = 10
        linear_solver_options = [{
            "alpha": result.reg_params[0]
        }, {
            "alpha": result.reg_params[1]
        }]
        indices = [
            result.approx.indices[:, np.where(np.absolute(c) > 0)[0]]
            for c in result.approx.coefficients.T
        ]
        options = {
            "basis_type": "fixed",
            "variable": variable,
            "options": {
                "linear_solver_options": linear_solver_options,
                "indices": indices
            }
        }
        approx_list, residues_list, cv_score = \
            cross_validate_approximation(
                train_samples, train_vals, options, nfolds, "polynomial_chaos",
                random_folds="sklearn")

        assert (np.all(cv_score < 6e-14) and np.all(result.scores < 4e-13))
Exemple #11
0
def expanding_basis_omp_pce(pce,
                            train_samples,
                            train_vals,
                            hcross_strength=1,
                            verbosity=1,
                            max_num_terms=None,
                            solver_type='lasso_lars',
                            cv=10,
                            restriction_tol=np.finfo(float).eps * 2):
    r"""
    Iteratively expand and restrict the polynomial basis and use 
    cross validation to find the best basis [JESJCP2015]_
    
    Parameters
    ----------
    train_samples : np.ndarray (nvars,nsamples)
        The inputs of the function used to train the approximation

    train_vals : np.ndarray (nvars,nsamples)
        The values of the function at ``train_samples``
    
    hcross_strength : float
       The strength of the hyperbolic cross index set. hcross_strength must be 
       in (0,1]. A value of 1 produces total degree polynomials

    cv : integer
        The number of cross validation folds used to compute the cross 
        validation error

    solver_type : string
        The type of regression used to train the polynomial

        - 'lasso_lars'
        - 'lars'
        - 'lasso'
        - 'omp'

    verbosity : integer
        Controls the amount of information printed to screen

    restriction_tol : float
        The tolerance used to prune inactive indices

    Returns
    -------
    pce : :class:`pyapprox.multivariate_polynomials.PolynomialChaosExpansion`
        The PCE approximation

    References
    ----------
    .. [JESJCP2015] `J.D. Jakeman, M.S. Eldred, and K. Sargsyan. Enhancing l1-minimization estimates of polynomial chaos expansions using basis selection. Journal of Computational Physics, 289(0):18 – 34, 2015 <https://doi.org/10.1016/j.jcp.2015.02.025>`_
    """

    assert train_vals.shape[1] == 1
    num_vars = pce.num_vars()
    if max_num_terms is None:
        max_num_terms = 10 * train_vals.shape[1]
    degree = 2
    prev_num_terms = 0
    while True:
        indices = compute_hyperbolic_indices(num_vars, degree, hcross_strength)
        num_terms = indices.shape[1]
        if (num_terms > max_num_terms): break
        degree += 1
        prev_num_terms = num_terms

    if (abs(num_terms - max_num_terms) > abs(prev_num_terms - max_num_terms)):
        degree -= 1
    pce.set_indices(
        compute_hyperbolic_indices(num_vars, degree, hcross_strength))

    if verbosity > 0:
        msg = f'Initializing basis with hyperbolic cross of degree {degree} and '
        msg += f' strength {hcross_strength} with {pce.num_terms()} terms'
        print(msg)

    basis_matrix = pce.basis_matrix(train_samples)
    best_coef, best_cv_score = fit_linear_model(basis_matrix,
                                                train_vals,
                                                solver_type,
                                                cv=cv)
    pce.set_coefficients(best_coef)
    best_indices = pce.get_indices()
    if verbosity > 0:
        print("{:<10} {:<10} {:<18}".format('nterms', 'nnz terms', 'cv score'))
        print("{:<10} {:<10} {:<18}".format(pce.num_terms(),
                                            np.count_nonzero(pce.coefficients),
                                            best_cv_score))

    best_cv_score_iter = best_cv_score
    best_num_expansion_steps = 3
    it = 0
    best_it = 0
    while True:
        max_num_expansion_steps = 1
        best_num_expansion_steps_iter = best_num_expansion_steps
        while True:
            # -------------- #
            #  Expand basis  #
            # -------------- #
            num_expansion_steps = 0
            indices = restrict_basis(pce.indices, pce.coefficients,
                                     restriction_tol)
            while ((num_expansion_steps < max_num_expansion_steps)
                   and (num_expansion_steps < best_num_expansion_steps)):
                new_indices = expand_basis(pce.indices)
                pce.set_indices(np.hstack([pce.indices, new_indices]))
                num_terms = pce.num_terms()
                num_expansion_steps += 1

            # -----------------#
            # Compute solution #
            # -----------------#
            basis_matrix = pce.basis_matrix(train_samples)
            coef, cv_score = fit_linear_model(basis_matrix,
                                              train_vals,
                                              solver_type,
                                              cv=cv)
            pce.set_coefficients(coef)

            if verbosity > 0:
                print("{:<10} {:<10} {:<18}".format(
                    pce.num_terms(), np.count_nonzero(pce.coefficients),
                    cv_score))

            if (cv_score > best_cv_score_iter):
                best_cv_score_iter = cv_score
                best_indices_iter = pce.indices.copy()
                best_coef_iter = pce.coefficients.copy()
                best_num_expansion_steps_iter = num_expansion_steps

            if (num_terms >= max_num_terms): break
            if (max_num_expansion_steps >= 3): break

            max_num_expansion_steps += 1

        if (best_cv_score_iter > best_cv_score):
            best_cv_score = best_cv_score_iter
            best_coef = best_coef_iter.copy()
            best_indices = best_indices_iter.copy()
            best_num_expansion_steps = best_num_expansion_steps_iter
            best_it = it
        elif (it - best_it >= 2):
            break

        it += 1

    nindices = best_indices.shape[1]
    I = np.nonzero(best_coef[:, 0])[0]
    pce.set_indices(best_indices[:, I])
    pce.set_coefficients(best_coef[I])
    if verbosity > 0:
        msg = f'Final basis has {pce.num_terms()} terms selected from {nindices}'
        msg += f' using {train_samples.shape[1]} samples'
        print(msg)
    return pce, best_cv_score
Exemple #12
0
def cross_validate_pce_degree(pce,
                              train_samples,
                              train_vals,
                              min_degree=1,
                              max_degree=3,
                              hcross_strength=1,
                              cv=10,
                              solver_type='lasso_lars',
                              verbosity=0):
    r"""
    Use cross validation to find the polynomial degree which best fits the data.
    A polynomial is constructed for each degree and the degree with the highest
    cross validation score is returned.
    
    Parameters
    ----------
    train_samples : np.ndarray (nvars,nsamples)
        The inputs of the function used to train the approximation

    train_vals : np.ndarray (nvars,nsamples)
        The values of the function at ``train_samples``
    
    min_degree : integer
        The minimum degree to consider

    min_degree : integer
        The maximum degree to consider. 
        All degrees in ``range(min_degree,max_deree+1)`` are considered

    hcross_strength : float
       The strength of the hyperbolic cross index set. hcross_strength must be 
       in (0,1]. A value of 1 produces total degree polynomials

    cv : integer
        The number of cross validation folds used to compute the cross 
        validation error

    solver_type : string
        The type of regression used to train the polynomial

        - 'lasso_lars'
        - 'lars'
        - 'lasso'
        - 'omp'

    verbosity : integer
        Controls the amount of information printed to screen

    Returns
    -------
    pce : :class:`pyapprox.multivariate_polynomials.PolynomialChaosExpansion`
        The PCE approximation
    """

    num_samples = train_samples.shape[1]
    if min_degree is None:
        min_degree = 2
    if max_degree is None:
        max_degree = np.iinfo(int).max - 1

    best_coef = None
    best_cv_score = -np.finfo(np.double).max
    best_degree = min_degree
    prev_num_terms = 0
    if verbosity > 0:
        print("{:<8} {:<10} {:<18}".format(
            'degree',
            'num_terms',
            'cv score',
        ))
    for degree in range(min_degree, max_degree + 1):
        indices = compute_hyperbolic_indices(pce.num_vars(), degree,
                                             hcross_strength)
        pce.set_indices(indices)
        if ((pce.num_terms() > 100000)
                and (100000 - prev_num_terms < pce.num_terms() - 100000)):
            break

        basis_matrix = pce.basis_matrix(train_samples)
        coef, cv_score = fit_linear_model(basis_matrix,
                                          train_vals,
                                          solver_type,
                                          cv=cv)
        pce.set_coefficients(coef)

        if verbosity > 0:
            print("{:<8} {:<10} {:<18} ".format(degree, pce.num_terms(),
                                                cv_score))
        if (cv_score > best_cv_score):
            best_cv_score = cv_score
            best_coef = coef.copy()
            best_degree = degree
        if ((cv_score >= best_cv_score) and (degree - best_degree > 1)):
            break
        prev_num_terms = pce.num_terms()

    pce.set_indices(
        compute_hyperbolic_indices(pce.num_vars(), best_degree,
                                   hcross_strength))
    pce.set_coefficients(best_coef)
    if verbosity > 0:
        print('best degree:', best_degree)
    return pce, best_degree
Exemple #13
0
def _expanding_basis_omp_pce(pce,
                             train_samples,
                             train_vals,
                             hcross_strength=1,
                             verbosity=1,
                             max_num_terms=None,
                             solver_type='lasso_lars',
                             cv=10,
                             restriction_tol=np.finfo(float).eps * 2):
    assert train_vals.shape[1] == 1
    num_vars = pce.num_vars()
    if max_num_terms is None:
        max_num_terms = 10 * train_vals.shape[1]
    degree = 2
    prev_num_terms = 0
    while True:
        indices = compute_hyperbolic_indices(num_vars, degree, hcross_strength)
        num_terms = indices.shape[1]
        if (num_terms > max_num_terms): break
        degree += 1
        prev_num_terms = num_terms

    if (abs(num_terms - max_num_terms) > abs(prev_num_terms - max_num_terms)):
        degree -= 1
    pce.set_indices(
        compute_hyperbolic_indices(num_vars, degree, hcross_strength))

    if verbosity > 0:
        msg = f'Initializing basis with hyperbolic cross of degree {degree} and '
        msg += f' strength {hcross_strength} with {pce.num_terms()} terms'
        print(msg)

    basis_matrix = pce.basis_matrix(train_samples)
    best_coef, best_cv_score = fit_linear_model(basis_matrix,
                                                train_vals,
                                                solver_type,
                                                cv=cv)
    pce.set_coefficients(best_coef)
    best_indices = pce.get_indices()
    if verbosity > 0:
        print("{:<10} {:<10} {:<18}".format('nterms', 'nnz terms', 'cv score'))
        print("{:<10} {:<10} {:<18}".format(pce.num_terms(),
                                            np.count_nonzero(pce.coefficients),
                                            best_cv_score))

    best_cv_score_iter = best_cv_score
    best_num_expansion_steps = 3
    it = 0
    best_it = 0
    while True:
        max_num_expansion_steps = 1
        best_num_expansion_steps_iter = best_num_expansion_steps
        while True:
            # -------------- #
            #  Expand basis  #
            # -------------- #
            num_expansion_steps = 0
            indices = restrict_basis(pce.indices, pce.coefficients,
                                     restriction_tol)
            while ((num_expansion_steps < max_num_expansion_steps)
                   and (num_expansion_steps < best_num_expansion_steps)):
                new_indices = expand_basis(pce.indices)
                pce.set_indices(np.hstack([pce.indices, new_indices]))
                num_terms = pce.num_terms()
                num_expansion_steps += 1

            # -----------------#
            # Compute solution #
            # -----------------#
            basis_matrix = pce.basis_matrix(train_samples)
            coef, cv_score = fit_linear_model(basis_matrix,
                                              train_vals,
                                              solver_type,
                                              cv=cv)
            pce.set_coefficients(coef)

            if verbosity > 0:
                print("{:<10} {:<10} {:<18}".format(
                    pce.num_terms(), np.count_nonzero(pce.coefficients),
                    cv_score))

            if (cv_score > best_cv_score_iter):
                best_cv_score_iter = cv_score
                best_indices_iter = pce.indices.copy()
                best_coef_iter = pce.coefficients.copy()
                best_num_expansion_steps_iter = num_expansion_steps

            if (num_terms >= max_num_terms): break
            if (max_num_expansion_steps >= 3): break

            max_num_expansion_steps += 1

        if (best_cv_score_iter > best_cv_score):
            best_cv_score = best_cv_score_iter
            best_coef = best_coef_iter.copy()
            best_indices = best_indices_iter.copy()
            best_num_expansion_steps = best_num_expansion_steps_iter
            best_it = it
        elif (it - best_it >= 2):
            break

        it += 1

    nindices = best_indices.shape[1]
    I = np.nonzero(best_coef[:, 0])[0]
    pce.set_indices(best_indices[:, I])
    pce.set_coefficients(best_coef[I])
    if verbosity > 0:
        msg = f'Final basis has {pce.num_terms()} terms selected from {nindices}'
        msg += f' using {train_samples.shape[1]} samples'
        print(msg)
    return pce, best_cv_score
    def test_marginalize_polynomial_chaos_expansions(self):
        univariate_variables = [uniform(-1, 2), norm(0, 1), uniform(-1, 2)]
        variable = pya.IndependentMultivariateRandomVariable(
            univariate_variables)
        var_trans = pya.AffineRandomVariableTransformation(variable)
        num_vars = len(univariate_variables)

        poly = pya.PolynomialChaosExpansion()
        poly_opts = pya.define_poly_options_from_variable_transformation(
            var_trans)
        poly.configure(poly_opts)

        degree = 2
        indices = pya.compute_hyperbolic_indices(num_vars, degree, 1)
        poly.set_indices(indices)
        poly.set_coefficients(np.ones((indices.shape[1], 1)))

        pce_main_effects, pce_total_effects =\
            pya.get_main_and_total_effect_indices_from_pce(
                poly.get_coefficients(), poly.get_indices())
        print(poly.num_terms())

        for ii in range(num_vars):
            # Marginalize out 2 variables
            xx = np.linspace(-1, 1, 101)
            inactive_idx = np.hstack(
                (np.arange(ii), np.arange(ii + 1, num_vars)))
            marginalized_pce = pya.marginalize_polynomial_chaos_expansion(
                poly, inactive_idx, center=True)
            mvals = marginalized_pce(xx[None, :])
            variable_ii = variable.all_variables()[ii:ii + 1]
            var_trans_ii = pya.AffineRandomVariableTransformation(variable_ii)
            poly_ii = pya.PolynomialChaosExpansion()
            poly_opts_ii = \
                pya.define_poly_options_from_variable_transformation(
                    var_trans_ii)
            poly_ii.configure(poly_opts_ii)
            indices_ii = compute_hyperbolic_indices(1, degree, 1.)
            poly_ii.set_indices(indices_ii)
            poly_ii.set_coefficients(np.ones((indices_ii.shape[1], 1)))
            pvals = poly_ii(xx[None, :])
            # import matplotlib.pyplot as plt
            # plt.plot(xx, pvals)
            # plt.plot(xx, mvals, '--')
            # plt.show()
            assert np.allclose(mvals, pvals - poly.mean())
            assert np.allclose(poly_ii.variance() / poly.variance(),
                               pce_main_effects[ii])
            poly_ii.coefficients /= np.sqrt(poly.variance())
            assert np.allclose(poly_ii.variance(), pce_main_effects[ii])

            # Marginalize out 1 variable
            xx = pya.cartesian_product([xx] * 2)
            inactive_idx = np.array([ii])
            marginalized_pce = pya.marginalize_polynomial_chaos_expansion(
                poly, inactive_idx, center=True)
            mvals = marginalized_pce(xx)
            variable_ii = variable.all_variables()[:ii] +\
                variable.all_variables()[ii+1:]
            var_trans_ii = pya.AffineRandomVariableTransformation(variable_ii)
            poly_ii = pya.PolynomialChaosExpansion()
            poly_opts_ii = \
                pya.define_poly_options_from_variable_transformation(
                    var_trans_ii)
            poly_ii.configure(poly_opts_ii)
            indices_ii = pya.compute_hyperbolic_indices(2, degree, 1.)
            poly_ii.set_indices(indices_ii)
            poly_ii.set_coefficients(np.ones((indices_ii.shape[1], 1)))
            pvals = poly_ii(xx)
            assert np.allclose(mvals, pvals - poly.mean())
Exemple #15
0
def _cross_validate_pce_degree(pce,
                               train_samples,
                               train_vals,
                               min_degree=1,
                               max_degree=3,
                               hcross_strength=1,
                               linear_solver_options={'cv': 10},
                               solver_type='lasso',
                               verbose=0):
    assert train_vals.shape[1] == 1
    num_samples = train_samples.shape[1]
    if min_degree is None:
        min_degree = 2
    if max_degree is None:
        max_degree = np.iinfo(int).max - 1

    best_coef = None
    best_cv_score = np.finfo(np.double).max
    best_degree = min_degree
    prev_num_terms = 0
    if verbose > 0:
        print("{:<8} {:<10} {:<18}".format(
            'degree',
            'num_terms',
            'cv score',
        ))

    rng_state = np.random.get_state()
    for degree in range(min_degree, max_degree + 1):
        indices = compute_hyperbolic_indices(pce.num_vars(), degree,
                                             hcross_strength)
        pce.set_indices(indices)
        if ((pce.num_terms() > 100000)
                and (100000 - prev_num_terms < pce.num_terms() - 100000)):
            break

        basis_matrix = pce.basis_matrix(train_samples)

        # use the same state (thus cross validation folds) for each degree
        np.random.set_state(rng_state)
        coef, cv_score, reg_param = fit_linear_model(basis_matrix, train_vals,
                                                     solver_type,
                                                     **linear_solver_options)
        np.random.set_state(rng_state)
        pce.set_coefficients(coef)
        if verbose > 0:
            print("{:<8} {:<10} {:<18} ".format(degree, pce.num_terms(),
                                                cv_score))
        if ((cv_score >= best_cv_score) and (degree - best_degree > 1)):
            break
        if (cv_score < best_cv_score):
            best_cv_score = cv_score
            best_coef = coef.copy()
            best_degree = degree
            best_reg_param = reg_param
        prev_num_terms = pce.num_terms()

    pce.set_indices(
        compute_hyperbolic_indices(pce.num_vars(), best_degree,
                                   hcross_strength))
    pce.set_coefficients(best_coef)
    if verbose > 0:
        print('best degree:', best_degree)
    return pce, best_cv_score, best_degree, best_reg_param