Python OptionsList Exemples, PyDakota.options_list.OptionsList Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_cross_validation.py Projet : stellarscience/dakota-stellar

    def xtest_equality_constrained_least_squares_cross_validation(self):
        """
        Test cross validation applied to equality constrained regression
        """
        num_pts = 11
        num_eq_per_pt = 1
        num_nonzeros = 3
        num_cols = 10
        num_rhs = 1
        noise_std = 0.1
        num_folds = num_pts
        num_primary_eq = 9
        regression_type = EQ_CONS_LEAST_SQ_REGRESSION
        opts_dict = {
            'regression_type': regression_type,
            'num-primary-equations': num_primary_eq
        }
        regression_opts = OptionsList(opts_dict)
        solver = regression_solver_factory(regression_opts)

        A, rhs, x_truth = get_linear_system(num_pts, num_eq_per_pt, num_cols,
                                            num_rhs, num_nonzeros, noise_std)
        solver.solve(A, rhs, regression_opts)
        x = solver.get_solutions_for_all_regularization_params(0)
        # todo make my own allclose function that calls squeeze before
        # comparison, like i do here
        #assert numpy.allclose(x.squeeze(),x_truth)
        opts = OptionsList()
        self.linear_system_cross_validation_test(A, rhs, num_folds,
                                                 num_eq_per_pt, solver,
                                                 regression_opts)

Exemple #2

0

Afficher le fichier

Fichier : test_cross_validation.py Projet : stellarscience/dakota-stellar

    def test_cross_validated_solver_wrappers_of_lscv_iterator(self):
        """
        Make sure the CrossValidatedSolver returns the solution associated
        with the best cross validation error found using linear system
        solvers that envoke LinearSystemCrossValidationIterator
        """
        num_pts = 20
        num_eq_per_pt = 1
        num_nonzeros = 3
        num_cols = 10
        num_rhs = 2
        noise_std = 0.1
        num_folds = 4

        A, rhs, x_truth = get_linear_system(num_pts, num_eq_per_pt, num_cols,
                                            num_rhs, num_nonzeros, noise_std)

        cv_opts = {'num-points': num_pts, 'num-folds': num_folds}
        cv_opts = OptionsList(cv_opts)
        regression_opts = {'verbosity': 0}
        regression_opts = OptionsList(regression_opts)
        regression_opts.set("cv-opts", cv_opts)
        regression_types = [
            ORTHOG_MATCH_PURSUIT, LEAST_ANGLE_REGRESSION, LASSO_REGRESSION
        ]
        for regression_type in regression_types:
            self.cross_validated_solver_wrappers_of_lscv_iterator_helper(
                regression_type, regression_opts, A, rhs)

Exemple #3

0

Afficher le fichier

    def test_lasso_path(self):
        """ 
        Test max covariance is tied and decreasing
        """

        solver = LARSolver()
        matrix = self.diabetes_matrix
        rhs = self.diabetes_rhs
        regression_opts = {'regression_type': LASSO_REGRESSION}
        regression_opts = OptionsList(regression_opts)
        solver.solve(matrix, rhs, regression_opts)
        coef_lasso = solver.get_solutions_for_all_regularization_params(0)
        max_covariance_prev = numpy.finfo(float).max
        for i in xrange(coef_lasso.shape[1]):
            coef = coef_lasso[:, i]
            residual = rhs - numpy.dot(self.diabetes_matrix, coef)
            covariance = numpy.dot(self.diabetes_matrix.T, residual)

            max_covariance = numpy.max(numpy.absolute(covariance))
            assert max_covariance < max_covariance_prev
            max_covariance_prev = max_covariance
            eps = 1e-3
            num_non_zeros = len(
                covariance[max_covariance - eps < numpy.absolute(covariance)])
            if i < self.diabetes_matrix.shape[1] - 1:
                assert num_non_zeros == i + 2
            else:
                # no more than max_pred variables can go into the active set
                assert num_non_zeros == self.diabetes_matrix.shape[1]

Exemple #4

0

Afficher le fichier

 def test_omp_memory_management(self):
     """ 
     OMP internally allocates memory for solutions and QR factorization in 
     blocks so that if residual tolerance is met before maximum number of
     solutions is reached min (num_rows, num_cols) then memory has not been 
     wasted. Memory is allocated in chunks of size 100. Test algorithm works
     when more than one chunk is needed.
     """
     num_rows = 200
     num_cols = 200
     sparsity = 100
     memory_chunk_size = 100
     solver = OMPSolver()
     matrix = numpy.random.normal(0., 1., (num_rows, num_cols))
     x = numpy.zeros((num_cols), float)
     I = numpy.random.permutation(num_cols)[:sparsity]
     x[I] = numpy.random.normal(0., 1., (sparsity))
     rhs = numpy.dot(matrix, x)
     regression_opts = {
         'regression_type': ORTHOG_MATCH_PURSUIT,
         'verbosity': 0,
         'memory-chunk-size': memory_chunk_size
     }
     regression_opts = OptionsList(regression_opts)
     solver.solve(matrix, rhs, regression_opts)
     coef_omp = solver.get_final_solutions()
     assert numpy.allclose(coef_omp[:, 0].squeeze(), x.squeeze())

Exemple #5

0

Afficher le fichier

Fichier : test_cross_validation.py Projet : stellarscience/dakota-stellar

    def test_equality_constrained_least_squares_cross_validation_error_catching(
            self):
        """
        Test that error is thrown when cross validation applied to 
        equality constrained regression induces an underdetermined system
        """
        num_pts = 10
        num_eq_per_pt = 1
        num_nonzeros = 3
        num_cols = 10
        num_rhs = 1
        noise_std = 0.001
        num_folds = num_pts
        num_primary_eq = 10
        regression_type = EQ_CONS_LEAST_SQ_REGRESSION
        opts_dict = {
            'regression_type': regression_type,
            'num-primary-equations': num_primary_eq
        }
        regression_opts = OptionsList(opts_dict)
        solver = regression_solver_factory(regression_opts)

        A, rhs, x_truth = get_linear_system(num_pts, num_eq_per_pt, num_cols,
                                            num_rhs, num_nonzeros, noise_std)
        solver.solve(A, rhs, regression_opts)
        x = solver.get_solutions_for_all_regularization_params(0)
        # todo make my own allclose function that calls squeeze before
        # comparison, like i do here
        assert numpy.allclose(x, x_truth, atol=noise_std * 10)
        self.assertRaises(RuntimeError,
                          self.linear_system_cross_validation_test, A, rhs,
                          num_folds, num_eq_per_pt, solver, regression_opts)

Exemple #6

0

Afficher le fichier

 def test_lasso_pce_exact_recovery(self):
     base_dir = os.path.join(os.path.dirname(__file__), 'data')
     matrix = numpy.loadtxt(os.path.join(base_dir, 'pce_data.csv.gz'))
     rhs = numpy.loadtxt(os.path.join(base_dir, 'pce_target.csv.gz'))
     exact_coef = numpy.loadtxt(os.path.join(base_dir, 'pce_coef.csv.gz'))
     solver = LARSolver()
     regression_opts = {'regression_type': LASSO_REGRESSION, 'verbosity': 0}
     regression_opts = OptionsList(regression_opts)
     solver.solve(matrix, rhs, regression_opts)
     coef_lasso = solver.get_solutions_for_all_regularization_params(0)
     assert numpy.allclose(coef_lasso[:, -1], exact_coef)

Exemple #7

0

Afficher le fichier

Fichier : test_cross_validation.py Projet : stellarscience/dakota-stellar

    def test_linear_system_cross_validation(self):
        """
        Test cross validation applied to least squares regression using
        the various supported methods: QR, LU and SVD regression.

        This test does not use the fast LSQCrossValidationIterator
        which solves least squares problem Ax=b 
        using x = inv(A'A)*A'*b computing inv(A'A) using cholesky factorization
        """
        num_pts = 11
        num_eq_per_pt = 1
        num_nonzeros = 3
        num_cols = 10
        num_rhs = 2
        noise_std = 0.1
        num_folds = num_pts
        regression_types = [
            SVD_LEAST_SQ_REGRESSION, QR_LEAST_SQ_REGRESSION,
            ORTHOG_MATCH_PURSUIT, LEAST_ANGLE_REGRESSION, LASSO_REGRESSION
        ]
        #LU_LEAST_SQ_REGRESSION] # not working
        for regression_type in regression_types:
            opts_dict = {'regression_type': regression_type, 'verbosity': 0}
            regression_opts = OptionsList(opts_dict)
            solver = regression_solver_factory(regression_opts)

            A, rhs, x_truth = get_linear_system(num_pts, num_eq_per_pt,
                                                num_cols, num_rhs,
                                                num_nonzeros, noise_std)
            solver.solve(A, rhs, regression_opts)
            x = solver.get_solutions_for_all_regularization_params(0)
            # todo make my own allclose function that calls squeeze before
            # comparison, like i do here
            #assert numpy.allclose(x.squeeze(),x_truth)
            opts = OptionsList()
            self.linear_system_cross_validation_test(A, rhs, num_folds,
                                                     num_eq_per_pt, solver,
                                                     regression_opts)

Exemple #8

0

Afficher le fichier

 def test_lar_factory(self):
     """ 
     Test that the regression factory returns a lar solver and that the 
     solver works correctly. That is the last step of least angle 
     regression returns the least squares solution  
     """
     factory_opts = OptionsList({'regression_type': LEAST_ANGLE_REGRESSION})
     print type(factory_opts)
     solver = regression_solver_factory(factory_opts)
     matrix = self.diabetes_matrix
     rhs = self.diabetes_rhs
     for store_history in [False, True]:
         regression_opts = {
             'regression_type': LEAST_ANGLE_REGRESSION,
             'verbosity': 0,
             'store-history': store_history
         }
         regression_opts = OptionsList(regression_opts)
         solver.solve(matrix, rhs, regression_opts)
         coef_lasso = solver.get_solutions_for_all_regularization_params(0)
         coef_lstsq = numpy.linalg.lstsq(matrix, rhs)[0]
         assert numpy.allclose(coef_lasso[:, -1].squeeze(),
                               coef_lstsq.squeeze())

Exemple #9

0

Afficher le fichier

 def test_omp_pce_exact_recovery(self):
     base_dir = os.path.join(os.path.dirname(__file__), 'data')
     matrix = numpy.loadtxt(os.path.join(base_dir, 'pce_data.csv.gz'))
     rhs = numpy.loadtxt(os.path.join(base_dir, 'pce_target.csv.gz'))
     exact_coef = numpy.loadtxt(os.path.join(base_dir, 'pce_coef.csv.gz'))
     solver = OMPSolver()
     regression_opts = {
         'regression_type': ORTHOG_MATCH_PURSUIT,
         'verbosity': 0
     }
     regression_opts = OptionsList(regression_opts)
     solver.solve(matrix, rhs, regression_opts)
     coef_omp = solver.get_solutions_for_all_regularization_params(0)
     assert numpy.allclose(coef_omp[:, -1], exact_coef)

Exemple #10

0

Afficher le fichier

 def test_lasso_early_exit_tol(self):
     """
     Test that the algorithm terminates correctly when tolerance is set
     """
     tol = 3.40e3
     solver = LARSolver()
     matrix = self.diabetes_matrix
     rhs = self.diabetes_rhs
     regression_opts = {
         'regression_type': LASSO_REGRESSION,
         'residual-tolerance': tol
     }
     regression_opts = OptionsList(regression_opts)
     solver.solve(matrix, rhs, regression_opts)
     coef_lasso = solver.get_solutions_for_all_regularization_params(0)
     coef = coef_lasso[:, -1]
     residual = rhs - numpy.dot(self.diabetes_matrix, coef)
     assert numpy.linalg.norm(residual) < tol

Exemple #11

0

Afficher le fichier

 def test_lasso_early_exit_num_non_zeros(self):
     """
     Test that the algorithm terminates correctly when the number of nonzeros
     is set
     """
     solver = LARSolver()
     matrix = self.diabetes_matrix
     rhs = self.diabetes_rhs
     max_nnz = 9
     regression_opts = {
         'regression_type': LASSO_REGRESSION,
         'verbosity': 0,
         'max-num-non-zeros': max_nnz
     }
     regression_opts = OptionsList(regression_opts)
     solver.solve(matrix, rhs, regression_opts)
     coef_lasso = solver.get_solutions_for_all_regularization_params(0)
     assert numpy.count_nonzero(coef_lasso[:, -1]) == max_nnz

Exemple #12

0

Afficher le fichier

 def test_omp_early_exit_tol(self):
     """
     Test that the algorithm terminates correctly when tolerance is set
     """
     tol = 3.40e3
     solver = OMPSolver()
     matrix = self.diabetes_matrix
     rhs = self.diabetes_rhs
     regression_opts = {
         'regression_type': ORTHOG_MATCH_PURSUIT,
         'verbosity': 0,
         'residual-tolerance': tol
     }
     regression_opts = OptionsList(regression_opts)
     solver.solve(matrix, rhs, regression_opts)
     coef_omp = solver.get_solutions_for_all_regularization_params(0)
     coef = coef_omp[:, -1]
     residual = rhs - numpy.dot(self.diabetes_matrix, coef)
     assert numpy.linalg.norm(residual) < tol

Exemple #13

0

Afficher le fichier

 def test_omp_last_step(self):
     """ 
     Test that the last step of orthogonal matching pursuit returns the
     least squares solution
     """
     solver = OMPSolver()
     #solver.set_verbosity( 3 )
     matrix = self.diabetes_matrix
     rhs = self.diabetes_rhs
     regression_opts = {
         'regression_type': ORTHOG_MATCH_PURSUIT,
         'verbosity': 0
     }
     regression_opts = OptionsList(regression_opts)
     solver.solve(matrix, rhs, regression_opts)
     coef_omp = solver.get_solutions_for_all_regularization_params(0)
     coef_lstsq = numpy.linalg.lstsq(matrix, rhs)[0]
     #print coef_omp[:,-1].squeeze(), coef_lstsq.squeeze()
     assert numpy.allclose(coef_omp[:, -1].squeeze(), coef_lstsq.squeeze())

Exemple #14

0

Afficher le fichier

 def test_omp_early_exit_num_non_zeros(self):
     """
     Test that the algorithm terminates correctly when the number of nonzeros
     is set
     """
     solver = OMPSolver()
     matrix = self.diabetes_matrix
     rhs = self.diabetes_rhs
     max_nnz = 9
     # setting max_iters will set max_nnz as no columns of A can be removed
     # once added
     regression_opts = {
         'regression_type': ORTHOG_MATCH_PURSUIT,
         'verbosity': 0,
         'max-iters': max_nnz
     }
     regression_opts = OptionsList(regression_opts)
     solver.solve(matrix, rhs, regression_opts)
     coef_omp = solver.get_solutions_for_all_regularization_params(0)
     assert numpy.count_nonzero(coef_omp[:, -1]) == max_nnz

Exemple #15

0

Afficher le fichier

 def test_lasso_last_step(self):
     """
     Test that the last step of the lasso variant of 
     least angle regression returns the least squares solution
     """
     solver = LARSolver()
     matrix = self.diabetes_matrix  # use unnormalized data
     rhs = self.diabetes_rhs
     regression_opts = {
         'regression_type': LASSO_REGRESSION,
         'verbosity': 0,
         'normalize-inputs': False
     }
     regression_opts = OptionsList(regression_opts)
     solver.solve(matrix, rhs, regression_opts)
     coef_lasso = solver.get_solutions_for_all_regularization_params(0)
     coef_lstsq = numpy.linalg.lstsq(matrix, rhs)[0]
     #print coef_lasso[:,-1].squeeze(), coef_lstsq.squeeze()
     assert numpy.allclose(coef_lasso[:, -1].squeeze(),
                           coef_lstsq.squeeze())

Exemple #16

0

Afficher le fichier

 def test_omp_path(self):
     """ 
     Test residual is always decreasing
     """
     solver = OMPSolver()
     #solver.set_verbosity( 3 )
     matrix = self.diabetes_matrix.copy()
     rhs = self.diabetes_rhs
     regression_opts = {
         'regression_type': ORTHOG_MATCH_PURSUIT,
         'verbosity': 0
     }
     regression_opts = OptionsList(regression_opts)
     solver.solve(matrix, rhs, regression_opts)
     coef_omp = solver.get_solutions_for_all_regularization_params(0)
     coef_lstsq = numpy.linalg.lstsq(matrix, rhs)[0]
     residual_norm_prev = numpy.finfo(float).max
     for i in xrange(coef_omp.shape[1]):
         coef = coef_omp[:, i]
         residual = rhs - numpy.dot(self.diabetes_matrix, coef)
         residual_norm = numpy.linalg.norm(residual)
         assert residual_norm < residual_norm_prev

Exemple #17

0

Afficher le fichier

Fichier : test_cross_validation.py Projet : stellarscience/dakota-stellar

    def lsq_cross_validation_test(self, A, rhs, num_pts, num_folds, seed=0):
        """
        Helper function for testing LSQCrossValidationIterator.

        Tests:
         * Test validation residuals on each fold are correct

         * Test validation residuals on each fold are consistent with
           those are produced by LinearSystemCrossValidationIterator using 
           SVD regression

         * Test cross validation scores are consistent with
           those are produced by LinearSystemCrossValidationIterator using 
           SVD regression

        """
        lsq_cv_iterator = LSQCrossValidationIterator()
        cv_opts = {'num-points': num_pts, 'num-folds': num_folds, 'seed': seed}
        cv_opts = OptionsList(cv_opts)
        regression_opts = {'regression_type': SVD_LEAST_SQ_REGRESSION}
        regression_opts = OptionsList(regression_opts)
        cv_opts.set("regression-opts", regression_opts)
        lsq_cv_iterator.run(A, rhs, cv_opts)
        fold_diffs = lsq_cv_iterator.get_fold_validation_residuals()
        x = lsq_cv_iterator.get_coefficients()

        solver = regression_solver_factory(regression_opts)
        cv_iterator = LinearSystemCrossValidationIterator()
        cv_iterator.set_linear_system_solver(solver)
        cv_iterator.run(A, rhs, cv_opts)
        brute_force_fold_diffs = cv_iterator.get_fold_validation_residuals()
        assert len(fold_diffs) == num_folds
        for i in xrange(len(fold_diffs)):
            # Get fold validation residuals using analytical formula
            # which is implemented by lsq_cv_iterator. I.e.
            AtA_inv = numpy.linalg.inv(numpy.dot(A.T, A))
            residuals = rhs - numpy.dot(A, x)
            if num_pts == num_folds:
                H = numpy.dot(A, numpy.dot(AtA_inv, A.T))
                cv_diffs = \
                  residuals[i:i+1]/(1.-numpy.diag(H)[i:i+1,numpy.newaxis])
            else:
                validation_indices = cv_iterator.get_fold_validation_indices(i)
                A_valid = cv_iterator.extract_matrix(A, validation_indices)
                residuals_valid = cv_iterator.extract_values(
                    residuals, validation_indices)
                I = numpy.eye(validation_indices.shape[0])
                H_valid = I - numpy.dot(A_valid, numpy.dot(AtA_inv, A_valid.T))
                H_valid_inv = numpy.linalg.inv(H_valid)
                cv_diffs = numpy.dot(H_valid_inv, residuals_valid)

            # Test the validation residuals on this fold are correct
            assert numpy.allclose(cv_diffs, fold_diffs[i])

            # Test validation residuals on this fold are consistent with
            # those are produced by LinearSystemCrossValidationIterator
            # cv_iterator.get_fold_validation_residuals() only contains diffs
            # from last rhs column considered
            assert numpy.allclose(fold_diffs[i][0, -1],
                                  brute_force_fold_diffs[i][0])
        # Test cross validation scores are consistent with
        # those are produced by LinearSystemCrossValidationIterator
        assert numpy.allclose(cv_iterator.get_scores(),
                              lsq_cv_iterator.get_scores())

Exemple #18

0

Afficher le fichier

Fichier : test_cross_validation.py Projet : stellarscience/dakota-stellar

    def cross_validated_solver_wrappers_of_lscv_iterator_helper(
            self, regression_type, regression_opts, A, rhs):

        # Get solutions using cross validated solver
        cv_solver = CrossValidatedSolver()
        cv_solver.set_linear_system_solver(regression_type)
        cv_solver.solve(A, rhs, regression_opts)
        solutions = cv_solver.get_final_solutions()
        scores = cv_solver.get_best_scores()

        # make sure the best scores returned by cross validated solver
        # are the same as the best scores computed by its internal
        # cross validation iterator
        cv_iterator = cv_solver.get_cross_validation_iterator()
        cv_scores = cv_iterator.get_scores()
        best_score_indices = cv_iterator.get_best_score_indices()
        num_rhs = rhs.shape[1]
        for i in xrange(num_rhs):
            assert numpy.allclose(cv_scores[i].min(), scores[i])
            assert numpy.argmin(cv_scores[i]) == best_score_indices[i]

        # For each rhs extract residual tolerances of
        # best cross validated solution and run a new solver instance
        # (of the same type wrapped by the cross validated solver)
        # and check it produces the same solution when run
        # on the entire data set
        for i in xrange(num_rhs):

            # run new instance of correct linear system solver
            local_regression_opts = {
                'regression_type': regression_type,
                '   verbosity': 0
            }
            local_regression_opts = OptionsList(local_regression_opts)
            solver = regression_solver_factory(local_regression_opts)
            solver.solve(A, rhs, local_regression_opts)

            # check residuals of entire data set solutions are the same
            cv_solutions = cv_solver.get_final_solutions()
            cv_residual_norms = cv_solver.get_final_residuals()
            residual_norms = numpy.linalg.norm(rhs - numpy.dot(A, solutions),
                                               axis=0)
            assert numpy.allclose(residual_norms, cv_residual_norms)

            # Check the residuals used to compute final solution
            # (adjusted residuals) account
            # difference in size between the training sets
            # used in cross validation and the entire data set
            cv_iterator = cast_linear_cv_iterator(cv_iterator, regression_type)
            cv_adjusted_residuals = \
              cv_iterator.get_adjusted_best_residual_tolerances()
            cv_best_residual_tols = cv_iterator.get_best_residual_tolerances()
            cv_opts = regression_opts.get("cv-opts")
            num_folds = cv_opts.get("num-folds")
            assert numpy.allclose(
                cv_adjusted_residuals,
                cv_best_residual_tols * num_folds / (num_folds - 1.0))

            # check that the residual tolerances of the best cv solution
            # are set correctly in the linear system solver
            # contained in the cross validated solver instance
            derived_class = cast_linear_system_solver(
                cv_iterator.get_linear_system_solver(), regression_type)
            assert numpy.allclose(derived_class.get_residual_tolerances(),
                                  cv_adjusted_residuals)

            # Check that the l2 norm of the residual of the last solution
            # computed using the entire data set is the largest
            # residual norm that is smaller than the adjusted residuals
            residuals = \
              solver.get_residuals_for_all_regularization_params(i)
            I = numpy.where(residuals >= cv_adjusted_residuals[i])[0]
            I = I[-1] + 1
            assert cv_residual_norms[i] >= residuals[I]
            print cv_residual_norms[i], residuals[I], residuals[I - 1], I
            solver_solutions = \
              solver.get_solutions_for_all_regularization_params(i)[:,I]

            # check that the final solutions obtained by the cross
            # validated solver and the new solver instance are the same
            assert numpy.allclose(solver_solutions,
                                  cv_solver.get_final_solutions()[:, i])

Exemple #19

0

Afficher le fichier

def test_options_list_typemap_in():
    """
    Test that a wrapped function that takes an OptionsList as input
    also accepts a PythonDictionary
    """
    opts = OptionsList()
    str_types = ['int','double','string','optionslist']
    items = [1,2.,'a',OptionsList()]
    names = ['key%s'%(i+1) for i in range(len(items))]
    for i,type_str in enumerate(str_types):
        item = items[i]
        name = names[i]
        set_entry = PyDakota.swig_examples.options_list_interface.__dict__[type_str + "set_entry"]
        tmp_opts = set_entry({},name,item)
        opts = set_entry(opts,name,item)
        assert tmp_opts=={names[i]:items[i]}

    pydict = dict((names[i],items[i]) for i in range(len(items)))
    assert opts == pydict

    opts = OptionsList()
    list_of_opts = []
    opts1 = OptionsList(); opts1.set(names[0],items[0])
    list_of_opts.append(opts1)
    opts2 = OptionsList(); opts2.set(names[1],items[1])
    list_of_opts.append(opts2)
    opts.set(names[3],list_of_opts)
    #tmp_opts = intset_entry(opts,names[2],1)
    #assert tmp_opts=={names[2]:1,names[3]:[{names[0]:items[0]},{names[1]:items[1]}]}
    print opts,{names[3]:[{names[0]:items[0]},{names[1]:items[1]}]}
    assert opts=={names[3]:[{names[0]:items[0]},{names[1]:items[1]}]}

Exemple #20

0

Afficher le fichier

Fichier : test_cross_validation.py Projet : stellarscience/dakota-stellar

    def linear_system_cross_validation_test(self,
                                            vand,
                                            rhs,
                                            num_folds,
                                            num_eq_per_pt,
                                            solver,
                                            regression_opts,
                                            residual_tols=None,
                                            fault_data=None):
        """
        Helper function for teseting K fold cross validation for linear systems

        Tests:
         * The validation residuals on each fold are correct
        
         * The cross validation scores are correct. This implicilty test
           the interpolation of cross validation scores on each fold from
           an arbitray set of residual tolerances onto a common (unique)
           set of tolerances.
        """
        #rhs = rhs.squeeze()
        #assert rhs.ndim==1
        assert vand.shape[0] % num_eq_per_pt == 0
        num_build_pts = vand.shape[0] / num_eq_per_pt
        #################################
        # Use in built cross validation #
        #################################
        cv_iterator = LinearSystemCrossValidationIterator()
        cv_iterator.set_linear_system_solver(solver)
        cv_opts = {'num-points': num_build_pts, 'num-folds': num_folds}
        cv_opts = OptionsList(cv_opts)
        cv_opts.set("regression-opts", regression_opts)
        cv_iterator.run(vand, rhs.squeeze(), cv_opts)
        cv_scores = cv_iterator.get_scores()
        cv_fold_diffs = cv_iterator.get_fold_validation_residuals()
        assert len(cv_fold_diffs) == num_build_pts
        cv_fold_tols = cv_iterator.get_fold_tolerances()
        cv_unique_tols = cv_iterator.get_unique_tolerances()
        cv_fold_scores = cv_iterator.get_fold_scores()

        #################################
        # Use brute force cross validation and use as reference `truth'
        #################################
        # cv_iterator randomizes points incase there is a pattern, e.g
        # the points are from a tensor grid, to do test we must know what
        # random permutation of the points was used

        for rhs_num in range(rhs.shape[1]):
            fold_tols = []
            fold_diffs = []
            for i in xrange(num_folds):
                # partition the data into a training and validation set
                A_train, b_train, valid_ind = partition_data(
                    vand, rhs[:, rhs_num], cv_iterator, i, num_build_pts,
                    num_eq_per_pt, fault_data)
                # Compute the solution on the training data
                solver.solve(A_train, b_train, regression_opts)
                x = solver.get_solutions_for_all_regularization_params(0)
                # Test that the validation residual is stored correctly for
                # each linear model.
                fold_diffs.append(
                    (numpy.tile(rhs[:, rhs_num].reshape(rhs.shape[0], 1),
                                (1, x.shape[1])) -
                     numpy.dot(vand, x))[valid_ind])
                if rhs_num == rhs.shape[1] - 1:
                    #cv_fold_diffs is only stored for last RHS
                    assert numpy.allclose(fold_diffs[i], cv_fold_diffs[i])
                fold_tols.append(
                    solver.get_residuals_for_all_regularization_params(0))

            # Determine the unique tolerances at which to compute cross
            # validation errors
            max_num_path_steps = 0
            unique_tols = numpy.empty((0), numpy.double)
            for i in xrange(num_folds):
                fold_tols[i] = fold_tols[i][::-1]
                if rhs_num == rhs.shape[1] - 1:
                    # cv_fold_tols is only stored for last RHS
                    assert numpy.allclose(fold_tols[i], cv_fold_tols[i])

                unique_tols = numpy.concatenate((unique_tols, fold_tols[i]))
                num_path_steps = fold_diffs[i].shape[1]
                max_num_path_steps = max(max_num_path_steps, num_path_steps)
            unique_tols = numpy.unique(unique_tols)

            # There are often thousands of unique parameter values so take
            # a thinned subset of these. The size of the subset is controlled by
            # max_num_path_steps
            num_unique_res = unique_tols.shape[0]
            stride = num_unique_res / max_num_path_steps
            unique_tols = unique_tols[::stride]
            assert numpy.allclose(cv_unique_tols[rhs_num], unique_tols)

            unique_errors = numpy.empty((unique_tols.shape[0], num_folds),
                                        numpy.double)
            for i in xrange(num_folds):
                # for the current fold compute the cross validation error
                fold_tols_i = fold_tols[i]
                errors_i = numpy.sum(fold_diffs[i]**2, axis=0)

                if unique_tols.shape[0] > 1:
                    # fold_tols is reversed internally by c++ code
                    # so we must reverse errors_i to be consistent
                    errors_i = errors_i[::-1]
                    if rhs_num == rhs.shape[1] - 1:
                        # cv_errors only stored for last RHS
                        assert numpy.allclose(errors_i, cv_fold_scores[i])

                    # Enforce constant interpolation when interpolation is
                    # outside the range of fold_tols_i
                    if (fold_tols_i[0] > unique_tols[0]):
                        fold_tols_i = numpy.r_[unique_tols[0], fold_tols_i]
                        errors_i = numpy.r_[errors_i[0], errors_i]
                    if (fold_tols_i[-1] < unique_tols[-1]):
                        fold_tols_i = numpy.r_[fold_tols_i, unique_tols[-1]]
                        errors_i = numpy.r_[errors_i, errors_i[-1]]

                    # Interpolate the cross validation errors onto a unique
                    # set of tolerances
                    poly = interpolate.interp1d(fold_tols_i, errors_i)
                    unique_errors[:, i] = poly(unique_tols)
                else:
                    unique_errors[:, i] = errors_i

            # Test that the cross validation scores computed by the `truth'
            # and the internal linear solver cross validation module are the same
            scores = numpy.sum(unique_errors, axis=1) / num_build_pts
            assert numpy.allclose(scores, cv_scores[rhs_num])