def set_order( self, order, index_norm_order = 1.0 ): self.order = order index_generator = IndexGenerator() #index_generator.set_parameters( self.num_dims, order, # index_norm_order = index_norm_order ) #index_generator.build_isotropic_index_set() #self.basis_indices = index_generator.get_all_indices() self.basis_indices = \ index_generator.get_isotropic_indices( self.num_dims, order, index_norm_order ) self.num_terms = len( self.basis_indices ) self.coeff = numpy.zeros( (self.num_terms), numpy.double )
from utilities.indexing import PolynomialIndex, IndexGenerator # num_dims = 2, max_level = 6 index_norm_order = 0.5 num_dims = 10 max_level = 30 index_generator = IndexGenerator() index_generator.set_parameters( num_dims, max_level, index_norm_order = 0.5, priority_weight = 1. ) index_generator.build_isotropic_index_set() indices = index_generator.get_all_indices() print index_generator.num_indices
def test_index_generator( self ): index_generator = IndexGenerator() # num_dims = 2, max_level = 3 index_norm_order = 1 num_dims = 2 max_level = 3 index_generator.set_parameters( num_dims, max_level, index_norm_order = 1, priority_weight = 1. ) index_generator.build_isotropic_index_set() indices = index_generator.get_all_indices() true_indices = [[0, 0], [1, 0], [2, 0], [0, 1], [1, 1], [0, 2], [1, 2], [2, 1], [3, 0], [0, 3]] indices_list = [] for i, index in enumerate( indices ): indices_list.append( index.uncompressed_data( num_dims ) ) indices = unique_matrix_rows( numpy.array( indices_list )) true_indices = unique_matrix_rows( numpy.array( true_indices ) ) assert numpy.allclose( true_indices, indices ) # num_dims = 3, max_level = 2 index_norm_order = 1 num_dims = 3 max_level = 2 index_generator.set_parameters( num_dims, max_level, index_norm_order = 1, priority_weight = 1. ) index_generator.build_isotropic_index_set() indices = index_generator.get_all_indices() true_indices = [[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], [2, 0, 0], [1, 1, 0], [0, 2, 0], [1, 0, 1], [0, 1, 1], [0, 0, 2]] indices_list = [] for i, index in enumerate( indices ): indices_list.append( index.uncompressed_data( num_dims ) ) indices = unique_matrix_rows( numpy.array( indices_list )) true_indices = unique_matrix_rows( numpy.array( true_indices ) ) assert numpy.allclose( true_indices, indices ) # num_dims = 2, max_level = 3 index_norm_order = 0.5 num_dims = 2 max_level = 3 index_generator = IndexGenerator() index_generator.set_parameters( num_dims, max_level, index_norm_order = 0.5, priority_weight = 1. ) index_generator.build_isotropic_index_set() indices = index_generator.get_all_indices() true_indices = [[0, 0], [1, 0], [2, 0], [3, 0], [0, 1], [0, 2], [0, 3]] indices_list = [] for i, index in enumerate( indices ): indices_list.append( index.uncompressed_data( num_dims ) ) indices = unique_matrix_rows( numpy.array( indices_list ) ) true_indices = unique_matrix_rows( numpy.array( true_indices ) ) assert numpy.allclose( true_indices, indices ) # num_dims = 2, max_level = 6 index_norm_order = 0.5 num_dims = 2 max_level = 6 index_generator = IndexGenerator() index_generator.set_parameters( num_dims, max_level, index_norm_order = 0.5, priority_weight = 1. ) index_generator.build_isotropic_index_set() indices = index_generator.get_all_indices() true_indices = [[0, 0], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [1, 1], [2, 1], [1, 2], [0, 1], [0, 2], [0, 3], [0, 4], [0, 5], [0, 6]] indices_list = [] for i, index in enumerate( indices ): indices_list.append( index.uncompressed_data( num_dims ) ) indices = unique_matrix_rows( numpy.array( indices_list ) ) true_indices = unique_matrix_rows( numpy.array( true_indices ) ) assert numpy.allclose( true_indices, indices )
def pce_study( build_pts, build_vals, domain, test_pts, test_vals, results_file = None, cv_file = None, solver_type = 2 ): num_dims = build_pts.shape[0] index_generator = IndexGenerator() poly_1d = [ LegendrePolynomial1D() ] basis = TensorProductBasis( num_dims, poly_1d ) pce = PCE( num_dims, order = 0, basis = basis, func_domain = domain ) if ( solver_type == 1 ): num_folds = build_pts.shape[1] else: num_folds = 20 index_norm_orders = numpy.linspace( 0.4, 1.0, 4 ) #if (solver_tupe == 1): # index_norm_orders = [.4,.5,.6,.7,.8,.9,1.] #solvers = numpy.array( [solver_type], numpy.int32 ) #cv_params_grid_array = cartesian_product( [solvers,orders] ) cv_params_grid = [] for index_norm_order in index_norm_orders: level = 2 # determine what range of orders to consider. # spefically consider any order that results in a pce with terms <= 3003 while ( True ): #index_generator.set_parameters( num_dims, level, # index_norm_order = index_norm_order) indices = index_generator.get_isotropic_indices( num_dims, level, index_norm_order ) num_indices = len( indices ) print level, index_norm_order, len ( indices ) if ( num_indices > 3003 ): break cv_params = {} cv_params['solver'] = solver_type cv_params['order'] = level cv_params['index_norm_order'] = index_norm_order if ( cv_params['solver'] > 1 or num_indices <= build_pts.shape[1] ): # only do least squares on over-determined systems cv_params_grid.append( cv_params ) level += 1 print cv_params_grid # cv_iterator = LeaveOneOutCrossValidationIterator() cv_iterator = KFoldCrossValidationIterator( num_folds = num_folds ) CV = GridSearchCrossValidation( cv_iterator, pce, use_predictor_cross_validation = True, use_fast_predictor_cross_validation = True ) t0 = time.time() CV.run( build_pts, build_vals, cv_params_grid ) time_taken = time.time() - t0 print 'cross validation took ', time_taken, ' seconds' print "################" print "Best cv params: ", CV.best_cv_params print "Best cv score: ", CV.best_score print "################" #for i in xrange( len( CV.cv_params_set ) ): # print CV.cv_params_set[i], CV.scores[i] best_order = CV.best_cv_params['order'] best_index_norm_order = CV.best_cv_params['index_norm_order'] best_pce = PCE( num_dims, order = best_order, basis = basis, func_domain = domain, index_norm_order = best_index_norm_order) V = best_pce.vandermonde( build_pts ).T best_pce.set_solver( CV.best_cv_params['solver'] ) if cv_params['solver'] != 1 and cv_params['solver'] != 5: best_res_tol = CV.best_cv_params['norm_residual'] best_pce.linear_solver.residual_tolerance = best_res_tol sols, sol_metrics = best_pce.linear_solver.solve( V, build_vals ) coeff = sols[:,-1] best_pce.set_coefficients( coeff ) error = abs( build_vals - best_pce.evaluate_set( build_pts ) ) print max( error ) print 'Evaluating best pce at test points' num_test_pts = test_pts.shape[1] pce_vals_pred = best_pce.evaluate_set( test_pts ).T print test_vals.shape, pce_vals_pred.shape error = test_vals.squeeze() - pce_vals_pred linf_error = numpy.max( numpy.absolute( error ) ) l2_error = numpy.sqrt( numpy.dot( error.T, error ) / num_test_pts ) mean = numpy.mean( pce_vals_pred ) var = numpy.var( pce_vals_pred ) pce_mean = best_pce.mean() pce_var = best_pce.variance() if results_file is not None: results_file.write( '%1.15e' %linf_error + ',' + '%1.15e' %l2_error + ',' + '%1.15e' %mean + ',' + '%1.15e' %var + ',%1.15e' %pce_mean + ',' + '%1.15e' %pce_var + '\n') print "linf error: ", linf_error print "l2 error: ", l2_error print "mean: ", mean print "var: ", var print "pce mean: ", pce_mean print "pce var: ", pce_var
def pce_study( build_pts, build_vals, domain, test_pts, test_vals, results_file = None, cv_file = None, solver_type = 2 ): num_dims = build_pts.shape[0] index_generator = IndexGenerator() poly_1d = [ LegendrePolynomial1D() ] basis = TensorProductBasis( num_dims, poly_1d ) pce = PCE( num_dims, order = 0, basis = basis, func_domain = domain ) if ( solver_type == 1 ): num_folds = build_pts.shape[1] else: num_folds = 20 index_norm_orders = numpy.linspace( 0.4, 1.0, 4 ) #solvers = numpy.array( [solver_type], numpy.int32 ) #cv_params_grid_array = cartesian_product( [solvers,orders] ) cv_params_grid = [] for index_norm_order in index_norm_orders: level = 2 # determine what range of orders to consider. # spefically consider any order that results in a pce with terms <= 3003 while ( True ): index_generator.set_parameters( num_dims, level, index_norm_order = index_norm_order ) index_generator.build_isotropic_index_set() print level, index_norm_order, index_generator.num_indices if ( index_generator.num_indices > 3003 ): break cv_params = {} cv_params['solver'] = solver_type cv_params['order'] = level cv_params['index_norm_order'] = index_norm_order if ( cv_params['solver'] > 1 or index_generator.num_indices <= build_pts.shape[1] ): # only do least squares on over-determined systems cv_params_grid.append( cv_params ) else: break level += 1 print cv_params_grid # cv_iterator = LeaveOneOutCrossValidationIterator() cv_iterator = KFoldCrossValidationIterator( num_folds = num_folds ) CV = GridSearchCrossValidation( cv_iterator, pce, use_predictor_cross_validation = True, use_fast_predictor_cross_validation = True ) t0 = time.time() CV.run( build_pts, build_vals, cv_params_grid ) time_taken = time.time() - t0 print 'cross validation took ', time_taken, ' seconds' print "################" print "Best cv params: ", CV.best_cv_params print "Best cv score: ", CV.best_score print "################" #for i in xrange( len( CV.cv_params_set ) ): # print CV.cv_params_set[i], CV.scores[i] best_order = CV.best_cv_params['order'] best_index_norm_order = CV.best_cv_params['index_norm_order'] best_pce = PCE( num_dims, order = best_order, basis = basis, func_domain = domain, index_norm_order = best_index_norm_order) V = best_pce.vandermonde( build_pts ).T best_pce.set_solver( CV.best_cv_params['solver'] ) if cv_params['solver'] > 1 : best_res_tol = CV.best_cv_params['norm_residual'] best_pce.linear_solver.residual_tolerance = best_res_tol sols, sol_metrics = best_pce.linear_solver.solve( V, build_vals ) coeff = sols[:,-1] best_pce.set_coefficients( coeff ) error = abs( build_vals - best_pce.evaluate_set( build_pts ) ) print max( error ) print 'Evaluating best pce at test points' num_test_pts = test_pts.shape[1] pce_vals_pred = best_pce.evaluate_set( test_pts ).T print test_vals.shape, pce_vals_pred.shape error = test_vals.squeeze() - pce_vals_pred linf_error = numpy.max( numpy.absolute( error ) ) l2_error = numpy.sqrt( numpy.dot( error.T, error ) / num_test_pts ) mean = numpy.mean( pce_vals_pred ) var = numpy.var( pce_vals_pred ) pce_mean = best_pce.mean() pce_var = best_pce.variance() if results_file is not None: results_file.write( '%1.15e' %linf_error + ',' + '%1.15e' %l2_error + ',' + '%1.15e' %mean + ',' + '%1.15e' %var + ',%1.15e' %pce_mean + ',' + '%1.15e' %pce_var + '\n') print "linf error: ", linf_error print "l2 error: ", l2_error print "mean: ", mean print "var: ", var print "pce mean: ", pce_mean print "pce var: ", pce_var me, te, ie = best_pce.get_sensitivities() interaction_values, interaction_terms = best_pce.get_interactions() show = False fignum = 1 filename = 'oscillator-individual-interactions.png' plot_interaction_values( interaction_values, interaction_terms, title = 'Sobol indices', truncation_pct = 0.95, filename = filename, show = show, fignum = fignum ) fignum += 1 filename = 'oscillator-dimension-interactions.png' plot_interaction_effects( ie, title = 'Dimension-wise joint effects', truncation_pct = 0.95, filename = filename, show = show,fignum = fignum ) fignum += 1 filename = 'oscillator-main-effects.png' plot_main_effects( me, truncation_pct = 0.95, title = 'Main effect sensitivity indices', filename = filename, show = show, fignum = fignum ) fignum += 1 filename = 'oscillator-total-effects.png' plot_total_effects( te, truncation_pct = 0.95, title = 'Total effect sensitivity indices', filename = filename, show = show, fignum = fignum ) fignum += 1 from scipy.stats.kde import gaussian_kde pylab.figure( fignum ) pce_kde = gaussian_kde( pce_vals_pred ) pce_kde_x = numpy.linspace( pce_vals_pred.min(), pce_vals_pred.max(), 100 ) pce_kde_y = pce_kde( pce_kde_x ) pylab.plot( pce_kde_x, pce_kde_y,label = 'pdf of surrogate' ) true_kde = gaussian_kde( test_vals ) true_kde_x = numpy.linspace( test_vals.min(), test_vals.max(), 100 ) true_kde_y = true_kde( true_kde_x ) pylab.plot( true_kde_x, true_kde_y, label = 'true pdf' ) pylab.legend(loc=2) pylab.show()
def least_factorization( x, basis, domain, basis_indices = None ): assert x.ndim == 2, 'x must be a 2d array (num_dims x num_pts)' num_dims, num_pts = x.shape numpy.set_printoptions(precision=17) basis_indices_list = [] l = numpy.eye( num_pts ) u = numpy.eye( num_pts ) p = numpy.eye( num_pts ) # This is just a guess: this vector could be much larger, or much smaller v = numpy.zeros( ( 1000, 1 ) ) v_index = 0; # Current polynomial degree k_counter = 0 # k(q) gives the degree used to eliminate the q'th point k = numpy.zeros( ( num_pts , 1 ) ) # The current LU row to factor out: lu_row = 0 index_generator = IndexGenerator() # Current degree is k_counter, and we iterate on this while ( lu_row < num_pts ): # We are going to generate the appropriate columns of W -- # these are polynomial indices for degree k of the basis. # Get the current size of k-vectors if basis_indices is None: poly_indices = index_generator.get_isotropic_level_indices(num_dims, k_counter, 1. ) n = len( basis_indices_list ) for i in xrange( len ( poly_indices ) ): poly_indices[i].set_array_index( n + i ) else: n = len( basis_indices_list ) poly_indices = [] for index in basis_indices: if index.level_sum() == k_counter: poly_indices.append( index ) index.set_array_index( n ) n += 1 basis_indices_list += poly_indices current_dim = len( poly_indices ) W = numpy.empty( ( current_dim, num_pts ), numpy.double ) for i, index in enumerate( poly_indices ): W[i,:] = basis.value( x, index, domain ) #tmp to match akils polynomials #W[i,:] /= numpy.sqrt(basis.l2_norm(index))*2 W = dot( p, W.T ) #print '##############' #print lu_row #print W[lu_row:num_pts,:] #print 'p',numpy.nonzero(p)[1] # Row-reduce W according to previous elimination steps end = W.shape[0] for q in range( lu_row ): W[q,:] = W[q,:] / l[q,q]; W[q+1:end,:] -= dot( l[q+1:end,q].reshape( ( end-q-1, 1) ) , W[q,:].reshape( (1, W.shape[1] ) ) ); #print 'W',W #print 'l', l # The mass matrix defining the inner product for this degree M = numpy.eye( current_dim ) #M = numpy.zeros( ( poly_indices.shape[0], poly_indices.shape[0] ), # numpy.double ); #for i, index in enumerate( poly_indices ): # M[i,i] = basis.l2_norm( index ) # Get upper triangular factorization of mass matrix # M = numpy.linalg.cholesky( M ) # lapack function for qr DGEQP3 wm = dot(W[lu_row:num_pts,:] , M ).T Q, R, evec = qr( wm, pivoting = True, mode = 'economic' ) #print 'wm', wm #print 'Q',Q #print 'R',R #print 'e', evec #rnk = matrix_rank( R ) rnk = 0 for i in xrange( R.shape[0] ): # If RHS is too large then if basis is fixed on entry # then an error may be thrown by Python if abs( R[i,i] ) < 0.001 * abs( R[0,0] ): break rnk += 1 #print 'rnk',rnk NN = num_pts - lu_row e = numpy.zeros( ( NN, NN ), numpy.double ) for qq in xrange( NN ): e[evec[qq],qq] = 1.; # Now first we must permute the rows by e #print numpy.nonzero(p[lu_row:num_pts,:])[1] p[lu_row:num_pts,:] = dot( e.T, p[lu_row:num_pts,:] ); # And correct by permuting l as well: #print 'l_sub', l[lu_row:num_pts,:lu_row] #print 'l_sub', l[lu_row:num_pts,:lu_row].shape l[lu_row:num_pts,:lu_row] = dot( e.T, l[lu_row:num_pts,:lu_row] ); #print 'p', numpy.nonzero(p)[1] #print 'l', l # The matrix r gives us inner product information for all rows below # these in W l[lu_row:num_pts,lu_row:lu_row+rnk] = R[:rnk,:].T; #print 'l', l #print 'usub', u[:lu_row,lu_row:lu_row+rnk] #print 'wusb', W[:lu_row,:] #print 'qsub', Q[:,:rnk] # Now we must find inner products of all the other rows above these in W u[:lu_row,lu_row:lu_row+rnk] = dot( dot( W[:lu_row,:], M ), Q[:,:rnk] ); #print 'u', u #print Q if ( v_index+(current_dim*rnk) > v.shape[0] ): v.resize( (v.shape[0] + max(1000,current_dim*rnk), 1 ) ) # The matrix q must be saved in order to characterize basis # order = 'F' is used to reshape using column major # order (used in fortran/matlab) this makes code consisten with matlab v[v_index:v_index+(current_dim*rnk)] = numpy.reshape( Q[:,:rnk], (rnk*current_dim,1),order='F').copy(); v_index = v_index+(current_dim*rnk); #print Q[:,:rnk] #print 'v',v[:v_index] # Update degree markers, and node and degree count k[lu_row:(lu_row+rnk)] = k_counter; lu_row = lu_row + rnk; k_counter = k_counter + 1; # Chop off parts of unnecessarily allocated vector v v = numpy.resize( v, ( v_index ) ); # Make matrix H: H = get_least_polynomial_coefficients( v, num_dims, num_pts, k, #basis_indices ) basis_indices_list ) return l,u,p,H,v,k,basis_indices_list
def sequential_least_factorization( x, basis, domain, N = None, basis_indices = None ): assert x.ndim == 2, 'x must be a 2d array (num_dims x num_pts)' num_dims, num_pts = x.shape if N is None: N = num_pts else: assert N <= num_pts basis_indices_list = [] l = numpy.eye( N ) u = numpy.eye( N ) p = numpy.eye( N ) # This is just a guess: this vector could be much larger, or much smaller v = numpy.zeros( ( 1000, 1 ) ) v_index = 0; # Current polynomial degree k_counter = 0 # k(q) gives the degree used to eliminate the q'th point k = numpy.zeros( ( N , 1 ) ) # The current LU row to factor out: lu_row = 0 # Every time the basis degree is increased compute the rank # of the matrix formed using the new basis terms find_rank = True index_generator = IndexGenerator() while ( lu_row < N ): # Get the current size of k-vectors if find_rank: if basis_indices is None: poly_indices = \ index_generator.get_isotropic_level_indices(num_dims, k_counter, 1. ) n = len( basis_indices_list ) for i in xrange( len ( poly_indices ) ): poly_indices[i].set_array_index( n + i ) else: print '###############', k_counter poly_indices = [] for index in basis_indices: if index.level_sum() == k_counter: poly_indices.append( index ) basis_indices_list += poly_indices current_dim = len( poly_indices ) W = numpy.empty( ( current_dim, num_pts ), numpy.double ) for i, index in enumerate( poly_indices ): W[i,:] = basis.value( x, index, domain ) #tmp to match akils polynomials #W[i,:] /= numpy.sqrt(basis.l2_norm(index))*2 W = dot( p, W.T ) # Row-reduce W according to previous elimination steps end = W.shape[0] for q in range( lu_row ): W[q,:] = W[q,:] / l[q,q]; W[q+1:end,:] -= dot( l[q+1:end,q].reshape( ( end-q-1, 1) ) , W[q,:].reshape( (1, W.shape[1] ) ) ); #wm = dot(W[lu_row:N,:] , M ).T wm = W[lu_row:N,:].T #print 'W', numpy.sqrt( numpy.sum( wm**2, axis = 0 ) ) if find_rank: #rnk = numpy.linalg.matrix_rank( wm ) Q, R, evec = qr( wm, pivoting = True, mode = 'economic' ) rnk = 0 for i in range(R.shape[0] ): # If RHS is too large then if basis is fixed on entry # then an error may be thrown by Python if abs( R[i,i] ) < 0.001 * abs( R[0,0] ): break rnk += 1 find_rank = False # Find the column with the largest inner norm and compute inner products column_norms = numpy.sqrt( numpy.sum( wm**2, axis = 0 ) ) next_index = column_norms.argmax() row = wm[:,next_index] / column_norms[next_index] tmp = numpy.dot( row, wm ) inner_products = numpy.empty( ( tmp.shape[0] ), numpy.double ) inner_products[0] = column_norms[next_index] if ( next_index > 0 ): inner_products[1:next_index+1] = tmp[:next_index] if ( next_index < tmp.shape[0]-1 ): inner_products[next_index+1:] = tmp[next_index+1:] # Determine LU permutations. # Generate permutation indices I I = numpy.empty( ( inner_products.shape[0] ), numpy.int32 ) j = 0; I[j] = next_index; j += 1; for i in xrange( I.shape[0] ): if ( i != next_index ): I[j] = i j += 1 p[lu_row:N,:] = permute_matrix_rows( p[lu_row:N,:], I ) # Permute rows of l l[lu_row:N,:lu_row] = \ permute_matrix_rows( l[lu_row:N,:lu_row], I ) # Update l with inner product information l[lu_row:N,lu_row] = inner_products; # Compute inner products with rows above u[:lu_row,lu_row] = dot( W[:lu_row,:], row ); # allocate enough memory to store new information if v.shape[0] < v_index+current_dim: v.resize( v.shape[0]+1000, 1 ) # Save current information v[v_index:v_index+current_dim,0] = row # Update counters v_index = v_index + current_dim; k[lu_row] = k_counter lu_row += 1 if rnk < 2: k_counter += 1 find_rank = True else: rnk -= 1 # Chop off parts of unnecessarily allocated vector v v = numpy.resize( v, ( v_index ) ); # Make matrix H: H = get_least_polynomial_coefficients( v, num_dims, num_pts, k, basis_indices ) return l,u,p,H,v,k,basis_indices_list