예제 #1
0
    def test_polynomial_chaos_expansion( self ):

        # test 1D bounded domain
        num_dims = 1
        func_domain = TensorProductDomain( num_dims )
        poly_1d = [ JacobiPolynomial1D( 0., 0. ) ]
        basis = TensorProductBasis( num_dims, poly_1d )
        predictor = PCE( num_dims, basis, order = 2  )
        predictor.set_coefficients( numpy.ones( predictor.coeff.shape ) )
	predictor.function_domain( func_domain )
        num_test_points = 20
        test_points = \
            numpy.linspace( 0., 1., num_test_points ).reshape(1,num_test_points)
        pred_vals = predictor.evaluate_set( test_points )
        x = 2. * test_points - 1.
        test_vals = numpy.ones( num_test_points ) + x[0,:] + \
            0.5 * ( 3.*x[0,:]**2 - 1. )
        assert numpy.allclose( test_vals, pred_vals )
        test_mean = 1.
        test_variance = 1./3. + 1./5.
        assert numpy.allclose( predictor.mean(), test_mean )
        assert numpy.allclose( predictor.variance(), test_variance )

        # test 2D bounded domain
        num_dims = 2
        func_domain = TensorProductDomain( num_dims )
        poly_1d = [ JacobiPolynomial1D( 0., 0. ) ]
        basis = TensorProductBasis( num_dims, poly_1d )
        predictor = PCE( num_dims, basis, order = 2  )
        predictor.set_coefficients( numpy.ones( predictor.coeff.shape ) )
	predictor.function_domain( func_domain )
        num_test_points = 20
        test_points = numpy.random.uniform( 0., 1., ( num_dims, num_test_points))
        pred_vals = predictor.evaluate_set( test_points )
        x = 2. * test_points - 1.
        test_vals = numpy.ones( num_test_points ) + x[0,:] + x[1,:] + \
            0.5 * ( 3.*x[0,:]**2 - 1. ) + 0.5 * ( 3.*x[1,:]**2 - 1. ) + \
            x[0,:] * x[1,:]
        assert numpy.allclose( test_vals, pred_vals )
        test_mean = 1.
        test_variance = 2. * 1./3. + 1./9. + 2. * 1./5.
        assert numpy.allclose( predictor.mean(), test_mean )
        assert numpy.allclose( predictor.variance(), test_variance )

        # test when domain is unbounded in one dimension
        num_dims = 2
        func_domain = TensorProductDomain( num_dims, 
                                           ranges = [[0.,1.],
                                                     [-numpy.inf, numpy.inf]] )
        poly_1d = [ JacobiPolynomial1D( 0., 0. ), HermitePolynomial1D() ]
        basis = TensorProductBasis( num_dims, poly_1d )
        predictor = PCE( num_dims, basis, order = 2  )
        predictor.set_coefficients( numpy.ones( predictor.coeff.shape ) )
	predictor.function_domain( func_domain )
        num_test_points = 20
        x_1 = numpy.random.uniform( 0., 1., ( 1, 20 ) )
        x_2 = numpy.random.normal( 0., 1., ( 1, 20 ) )
        test_points = numpy.vstack( ( x_1, x_2 ) )
        pred_vals = predictor.evaluate_set( test_points )
        x = test_points
        x[0,:] = 2. * x[0,:] - 1.
        test_vals = numpy.ones( num_test_points ) + x[0,:] + x[1,:] + \
            0.5 * ( 3.*x[0,:]**2 - 1. ) + ( x[1,:]**2 - 1. ) + \
            x[0,:] * x[1,:]
        assert numpy.allclose( test_vals, pred_vals )
        test_mean = 1.
        test_variance = 2. * 1./3. + 1./5. + 2. + 1.
        assert numpy.allclose( predictor.mean(), test_mean )
        assert numpy.allclose( predictor.variance(), test_variance )
예제 #2
0
    def cv_vs_error_study( build_pts, build_vals, domain, 
               test_pts, test_vals, 
               results_file = None,
               cv_file = None, solver_type = 2 ):
        num_dims = build_pts.shape[0]
        if ( num_dims == 10 ):
            max_order = 5
        elif ( num_dims == 15 ):
            max_order = 4
        else: 
            max_order = 3

        poly_1d = [ LegendrePolynomial1D() ]
        basis = TensorProductBasis( num_dims, poly_1d )
        pce = PCE( num_dims, order = 0, basis = basis, func_domain = domain )

        orders = numpy.arange( 1, max_order + 1 )
        solvers = numpy.array( [solver_type], numpy.int32 )
        cv_params_grid_array = cartesian_product( [solvers,orders] )
        cv_params_grid = []
        for i in xrange( cv_params_grid_array.shape[0] ):
            cv_params = {}
            cv_params['solver'] = numpy.int32( cv_params_grid_array[i,0] )
            cv_params['order'] = numpy.int32( cv_params_grid_array[i,1] )
            num_pce_terms = polynomial_space_dimension( num_dims,
                                                        cv_params['order'] )
            if ( cv_params['solver'] <= 1 and 
                 num_pce_terms >= build_pts.shape[1] ):
                cv_params['lambda'] = 1.e-12
            cv_params_grid.append( cv_params )

        # print cv_params_grid

        # cv_iterator = LeaveOneOutCrossValidationIterator()    
        cv_iterator = KFoldCrossValidationIterator( num_folds = 20 )
        CV = GridSearchCrossValidation( cv_iterator, pce,
                                        use_predictor_cross_validation = True,
                                        use_fast_predictor_cross_validation = True )
        t0 = time.time()
        CV.run( build_pts, build_vals, cv_params_grid )
        time_taken = time.time() - t0
        print 'cross validation took ', time_taken, ' seconds'

        print "################"
        print "Best cv params: ", CV.best_cv_params
        print "Best cv score: ", CV.best_score
        print "################"

        for order in orders:
            residual_norms = numpy.empty( len( CV.cv_params_set ), numpy.double )
            scores = numpy.empty( len( CV.cv_params_set ), numpy.double )
            k = 0
            for i in xrange( len( CV.cv_params_set ) ):
                if ( CV.cv_params_set[i]['order'] == order ):
                    residual_norms[k] = CV.cv_params_set[i]['norm_residual']
                    scores[k] = CV.scores[i]
                    k += 1

            residual_norms.resize( k )
            scores.resize( k )

            pce = PCE( num_dims, 
                       order = order, 
                       basis = basis, 
                       func_domain = domain )
            V = pce.vandermonde( build_pts ).T
            pce.set_solver( CV.best_cv_params['solver'] )
            # pce.linear_solver.max_iterations = 3
            sols, sol_metrics = pce.linear_solver.solve( V, build_vals )
            from sklearn.linear_model import orthogonal_mp
            l2_error = numpy.empty( ( sols.shape[1] ), numpy.double )
            residuals = numpy.empty( ( sols.shape[1] ), numpy.double )
            test_pts = numpy.random.uniform( 0., 1., ( num_dims, 1000 ) )
            f = GenzModel( domain, 'oscillatory' )
            # f.set_coefficients( 4.5, 'no-decay' )
            f.set_coefficients( 4.5, 'quadratic-decay' )
            test_vals = f( test_pts ).reshape( ( test_pts.shape[1], 1 ) )
            for i in xrange( sols.shape[1] ):
                coeff = sols[:,i]
                pce.set_coefficients( coeff )
                residuals[i] = numpy.linalg.norm( build_vals - 
                                                  pce.evaluate_set( build_pts ) )
                num_test_pts = test_pts.shape[1]
                pce_vals_pred = pce.evaluate_set( test_pts ).T
                error = test_vals.squeeze() - pce_vals_pred
                l2_error[i] = numpy.linalg.norm( error ) / numpy.sqrt( num_test_pts )

            import pylab
            print residuals, l2_error
            print residual_norms, scores
            pylab.loglog( residuals, l2_error, label  = str( order ) + 'true' )
            pylab.loglog( residual_norms, scores, label = str( order )+'-cv' )
        pylab.xlim([1e-3,10])
        pylab.legend()
        pylab.show()
예제 #3
0
def pce_study( build_pts, build_vals, domain, 
               test_pts, test_vals, 
               results_file = None,
               cv_file = None, solver_type = 2 ):

    num_dims = build_pts.shape[0]

    index_generator = IndexGenerator()

    poly_1d = [ LegendrePolynomial1D() ]
    basis = TensorProductBasis( num_dims, poly_1d )
    pce = PCE( num_dims, order = 0, basis = basis, func_domain = domain )

    if ( solver_type == 1 ):
        num_folds = build_pts.shape[1]
    else:
        num_folds = 20

    index_norm_orders = numpy.linspace( 0.4, 1.0, 4 )    
    #if (solver_tupe == 1):
    #    index_norm_orders = [.4,.5,.6,.7,.8,.9,1.]

    #solvers = numpy.array( [solver_type], numpy.int32 )
    #cv_params_grid_array = cartesian_product( [solvers,orders] )
    cv_params_grid = []
    for index_norm_order in index_norm_orders:
        level = 2
        # determine what range of orders to consider. 
        # spefically consider any order that results in a pce with terms <= 3003
        while ( True ):
            #index_generator.set_parameters( num_dims, level, 
            #                                index_norm_order = index_norm_order)
            indices = index_generator.get_isotropic_indices( num_dims, level, 
                                                             index_norm_order )
            num_indices = len( indices )
            print level, index_norm_order, len ( indices )
            if ( num_indices > 3003 ):
                break
            
            cv_params = {}
            cv_params['solver'] = solver_type
            cv_params['order'] = level
            cv_params['index_norm_order'] = index_norm_order

            if ( cv_params['solver'] > 1 or 
                 num_indices <= build_pts.shape[1] ):
                # only do least squares on over-determined systems
                cv_params_grid.append( cv_params  )

            level += 1

    print cv_params_grid

    # cv_iterator = LeaveOneOutCrossValidationIterator()    
    cv_iterator = KFoldCrossValidationIterator( num_folds = num_folds )
    CV = GridSearchCrossValidation( cv_iterator, pce,
                                    use_predictor_cross_validation = True,
                                    use_fast_predictor_cross_validation = True )
    t0 = time.time()
    CV.run( build_pts, build_vals, cv_params_grid )
    time_taken = time.time() - t0
    print 'cross validation took ', time_taken, ' seconds'
    
    print "################"
    print "Best cv params: ", CV.best_cv_params
    print "Best cv score: ", CV.best_score
    print "################"

    #for i in xrange( len( CV.cv_params_set ) ):
    #    print CV.cv_params_set[i], CV.scores[i]

    best_order = CV.best_cv_params['order']
    best_index_norm_order = CV.best_cv_params['index_norm_order']

    best_pce = PCE( num_dims, 
                    order = best_order, 
                    basis = basis, 
                    func_domain = domain,
                    index_norm_order = best_index_norm_order)
    V = best_pce.vandermonde( build_pts ).T
    best_pce.set_solver( CV.best_cv_params['solver'] )
    if  cv_params['solver'] != 1 and cv_params['solver'] != 5:
        best_res_tol = CV.best_cv_params['norm_residual']
        best_pce.linear_solver.residual_tolerance = best_res_tol
        
    sols, sol_metrics = best_pce.linear_solver.solve( V, build_vals )
    coeff = sols[:,-1]

    best_pce.set_coefficients( coeff )
    error = abs( build_vals - best_pce.evaluate_set( build_pts ) )
    print max( error )

    print 'Evaluating best pce at test points'
    num_test_pts = test_pts.shape[1]
    pce_vals_pred = best_pce.evaluate_set( test_pts ).T
    print test_vals.shape, pce_vals_pred.shape
    error = test_vals.squeeze() - pce_vals_pred
    linf_error = numpy.max( numpy.absolute( error ) )
    l2_error = numpy.sqrt( numpy.dot( error.T, error ) / num_test_pts )
    mean = numpy.mean( pce_vals_pred )
    var =  numpy.var( pce_vals_pred )
    pce_mean = best_pce.mean()
    pce_var = best_pce.variance()

    if results_file is not None:
        results_file.write( '%1.15e' %linf_error + ',' +  '%1.15e' %l2_error + 
                            ',' +  '%1.15e' %mean + ',' +  '%1.15e' %var + 
                            ',%1.15e' %pce_mean + ',' + '%1.15e' %pce_var + '\n')

    print "linf error: ", linf_error
    print "l2 error: ", l2_error
    print "mean: ", mean 
    print "var: ", var
    print "pce mean: ", pce_mean 
    print "pce var: ", pce_var
예제 #4
0
def pce_study( build_pts, build_vals, domain, 
               test_pts, test_vals, 
               results_file = None,
               cv_file = None, solver_type = 2 ):

    num_dims = build_pts.shape[0]

    index_generator = IndexGenerator()

    poly_1d = [ LegendrePolynomial1D() ]
    basis = TensorProductBasis( num_dims, poly_1d )
    pce = PCE( num_dims, order = 0, basis = basis, func_domain = domain )

    if ( solver_type == 1 ):
        num_folds = build_pts.shape[1]
    else:
        num_folds = 20

    index_norm_orders = numpy.linspace( 0.4, 1.0, 4 )
    #solvers = numpy.array( [solver_type], numpy.int32 )
    #cv_params_grid_array = cartesian_product( [solvers,orders] )
    cv_params_grid = []
    for index_norm_order in index_norm_orders:
        level = 2
        # determine what range of orders to consider. 
        # spefically consider any order that results in a pce with terms <= 3003
        while ( True ):
            index_generator.set_parameters( num_dims, level, 
                                            index_norm_order = index_norm_order )
            index_generator.build_isotropic_index_set()
            print level, index_norm_order, index_generator.num_indices
            if ( index_generator.num_indices > 3003 ):
                break
            
            cv_params = {}
            cv_params['solver'] = solver_type
            cv_params['order'] = level
            cv_params['index_norm_order'] = index_norm_order

            if ( cv_params['solver'] > 1 or 
                 index_generator.num_indices <= build_pts.shape[1] ):
                # only do least squares on over-determined systems
                cv_params_grid.append( cv_params  )
            else:
                break

            level += 1

    print cv_params_grid

    # cv_iterator = LeaveOneOutCrossValidationIterator()    
    cv_iterator = KFoldCrossValidationIterator( num_folds = num_folds )
    CV = GridSearchCrossValidation( cv_iterator, pce,
                                    use_predictor_cross_validation = True,
                                    use_fast_predictor_cross_validation = True )
    t0 = time.time()
    CV.run( build_pts, build_vals, cv_params_grid )
    time_taken = time.time() - t0
    print 'cross validation took ', time_taken, ' seconds'
    
    print "################"
    print "Best cv params: ", CV.best_cv_params
    print "Best cv score: ", CV.best_score
    print "################"

    #for i in xrange( len( CV.cv_params_set ) ):
    #    print CV.cv_params_set[i], CV.scores[i]

    best_order = CV.best_cv_params['order']
    best_index_norm_order = CV.best_cv_params['index_norm_order']

    best_pce = PCE( num_dims, 
                    order = best_order, 
                    basis = basis, 
                    func_domain = domain,
                    index_norm_order = best_index_norm_order)
    V = best_pce.vandermonde( build_pts ).T
    best_pce.set_solver( CV.best_cv_params['solver'] )
    if  cv_params['solver'] > 1 :
        best_res_tol = CV.best_cv_params['norm_residual']
        best_pce.linear_solver.residual_tolerance = best_res_tol
        
    sols, sol_metrics = best_pce.linear_solver.solve( V, build_vals )
    coeff = sols[:,-1]

    best_pce.set_coefficients( coeff )
    error = abs( build_vals - best_pce.evaluate_set( build_pts ) )
    print max( error )

    print 'Evaluating best pce at test points'
    num_test_pts = test_pts.shape[1]
    pce_vals_pred = best_pce.evaluate_set( test_pts ).T
    print test_vals.shape, pce_vals_pred.shape
    error = test_vals.squeeze() - pce_vals_pred
    linf_error = numpy.max( numpy.absolute( error ) )
    l2_error = numpy.sqrt( numpy.dot( error.T, error ) / num_test_pts )
    mean = numpy.mean( pce_vals_pred )
    var =  numpy.var( pce_vals_pred )
    pce_mean = best_pce.mean()
    pce_var = best_pce.variance()

    if results_file is not None:
        results_file.write( '%1.15e' %linf_error + ',' +  '%1.15e' %l2_error + 
                            ',' +  '%1.15e' %mean + ',' +  '%1.15e' %var + 
                            ',%1.15e' %pce_mean + ',' + '%1.15e' %pce_var + '\n')

    print "linf error: ", linf_error
    print "l2 error: ", l2_error
    print "mean: ", mean 
    print "var: ", var
    print "pce mean: ", pce_mean 
    print "pce var: ", pce_var

    me, te, ie = best_pce.get_sensitivities()
    interaction_values, interaction_terms = best_pce.get_interactions()

    show = False
    fignum = 1
    filename = 'oscillator-individual-interactions.png'
    plot_interaction_values( interaction_values, interaction_terms, title = 'Sobol indices', truncation_pct = 0.95, filename = filename, show = show,
                             fignum = fignum )
    fignum += 1
    filename = 'oscillator-dimension-interactions.png'
    plot_interaction_effects( ie, title = 'Dimension-wise joint effects', truncation_pct = 0.95, filename = filename, show = show,fignum = fignum   )
    fignum += 1
    filename = 'oscillator-main-effects.png'
    plot_main_effects( me, truncation_pct = 0.95, title = 'Main effect sensitivity indices', filename = filename, show = show, fignum = fignum  )
    fignum += 1
    filename = 'oscillator-total-effects.png'
    plot_total_effects( te, truncation_pct = 0.95, title = 'Total effect sensitivity indices', filename = filename, show = show, fignum = fignum  )
    fignum += 1

    from scipy.stats.kde import gaussian_kde
    pylab.figure( fignum  )
    pce_kde = gaussian_kde( pce_vals_pred )
    pce_kde_x = numpy.linspace( pce_vals_pred.min(), pce_vals_pred.max(), 100 )
    pce_kde_y = pce_kde( pce_kde_x )
    pylab.plot( pce_kde_x, pce_kde_y,label = 'pdf of surrogate' )
    true_kde = gaussian_kde( test_vals )
    true_kde_x = numpy.linspace( test_vals.min(), test_vals.max(), 100 )
    true_kde_y = true_kde( true_kde_x )
    pylab.plot( true_kde_x, true_kde_y, label = 'true pdf' )
    pylab.legend(loc=2)
    pylab.show()