def least_squares_leave_one_out_error( A, b ): M, N = A.shape assert b.ndim == 1 assert M == b.shape[0] x = ridge_regression( A, b ) # Compute the residual r = b - numpy.dot( A, x ) # Compute the leave one out cross validation error e = numpy.empty( ( M ), numpy.double ) AtA_inv = numpy.linalg.inv ( numpy.dot( A.T, A ) ) H = numpy.dot( A, numpy.dot( AtA_inv, A.T ) ) print e print r print H for i in xrange( M ): e[i] = r[i] / ( 1. - H[i,i] ) return x, r, e
def xtest_grid_search_cross_validation( self ): f_1d = lambda x: x**10 build_pts = numpy.linspace(-.85,.9,14) build_pts = numpy.atleast_2d( build_pts ) build_vals = f_1d( build_pts ).T # Test grid search cross validation when applied to Gaussian Process num_dims = 1 func_domain = TensorProductDomain( num_dims, [[-1,1]] ) GP = GaussianProcess() GP.set_verbosity( 0 ) GP.function_domain( func_domain ) loo_cv_iterator = LeaveOneOutCrossValidationIterator() CV = GridSearchCrossValidation( loo_cv_iterator, GP ) CV.run( build_pts, build_vals ) I = numpy.arange( build_pts.shape[1] ) for i in xrange( build_pts.shape[1] ): if i == 0 : J = I[1:] elif i == build_pts.shape[1]-1 : J = I[:-1] else: J = numpy.hstack( ( I[:i], I[i+1:] ) ) train_pts = build_pts[:,J] train_vals = build_vals[J,:] GP.build( train_pts, train_vals ) pred_vals = GP.evaluate_set( build_pts ) assert numpy.allclose( build_vals[i,0]-pred_vals[i], CV.residuals[0][i] ) # Test grid search cross validation when applied to polynomial chaos # expansions that are built using ridge regression # The vandermonde matrix is built from scratch every time by the pce num_dims = 1 order = 3 build_vals = f_1d( build_pts ).T poly_1d = [ LegendrePolynomial1D() ] basis = TensorProductBasis( num_dims, poly_1d ) pce = PCE( num_dims, basis, order, func_domain ) loo_cv_iterator = LeaveOneOutCrossValidationIterator() CV = GridSearchCrossValidation( loo_cv_iterator, pce ) CV.run( build_pts, build_vals ) I = numpy.arange( build_pts.shape[1] ) V = pce.vandermonde( build_pts ).T for i in xrange( V.shape[0] ): if i == 0 : J = I[1:] elif i == build_pts.shape[1]-1 : J = I[:-1] else: J = numpy.hstack( ( I[:i], I[i+1:] ) ) A = V[J,:] b = build_vals[J,:] x = ridge_regression( A, b ) assert numpy.allclose( (build_vals[i,0]-numpy.dot( V, x ))[i], CV.residuals[0][i] ) # Test grid search cross validation when applied to polynomial chaos # expansions that are built using ridge regression # Specifying parse_cross_validation_data = True will ensure that # the vandermonde matrix is not built from scratch every time by # the pce num_dims = 1 order = 3 build_vals = f_1d( build_pts ).T poly_1d = [ LegendrePolynomial1D() ] basis = TensorProductBasis( num_dims, poly_1d ) pce = PCE( num_dims, basis, order, func_domain ) loo_cv_iterator = LeaveOneOutCrossValidationIterator() CV = GridSearchCrossValidation( loo_cv_iterator, pce, use_predictor_cross_validation = True) CV.run( build_pts, build_vals ) I = numpy.arange( build_pts.shape[1] ) V = pce.vandermonde( build_pts ).T for i in xrange( V.shape[0] ): if i == 0 : J = I[1:] elif i == build_pts.shape[1]-1 : J = I[:-1] else: J = numpy.hstack( ( I[:i], I[i+1:] ) ) A = V[J,:] b = build_vals[J,:] x = ridge_regression( A, b ) assert numpy.allclose( (build_vals[i,0]-numpy.dot( V, x ))[i], CV.residuals[0][i] ) # Test grid search cross validation when applied to polynomial chaos # expansions that are built using ridge regression # A closed form for the cross validation residual is used num_dims = 1 order = 3 build_vals = f_1d( build_pts ).T poly_1d = [ LegendrePolynomial1D() ] basis = TensorProductBasis( num_dims, poly_1d ) pce = PCE( num_dims, basis, order, func_domain ) loo_cv_iterator = LeaveOneOutCrossValidationIterator() CV = GridSearchCrossValidation( loo_cv_iterator, pce, use_predictor_cross_validation = True, use_fast_predictor_cross_validation = True ) CV.run( build_pts, build_vals ) I = numpy.arange( build_pts.shape[1] ) V = pce.vandermonde( build_pts ).T for i in xrange( V.shape[0] ): if i == 0 : J = I[1:] elif i == build_pts.shape[1]-1 : J = I[:-1] else: J = numpy.hstack( ( I[:i], I[i+1:] ) ) A = V[J,:] b = build_vals[J,:] x = ridge_regression( A, b ) assert numpy.allclose( (build_vals[i,0]-numpy.dot( V, x ))[i], CV.residuals[0][i] ) # Test grid search cross validation when applied to polynomial chaos # expansions that are built using ridge regression num_dims = 1 order = 3 build_vals = f_1d( build_pts ).T poly_1d = [ LegendrePolynomial1D() ] basis = TensorProductBasis( num_dims, poly_1d ) pce = PCE( num_dims, basis, order, func_domain ) max_order = build_pts.shape[1] orders = numpy.arange( 1, max_order ) lamda = numpy.array( [0.,1e-3,1e-2,1e-1] ) # note cartesian product takes type from first array in 1d sets # so if I use orders first lamda will be rounded to 0 cv_params_grid_array = cartesian_product( [lamda,orders] ) cv_params_grid = [] for i in xrange( cv_params_grid_array.shape[0] ): cv_params = {} cv_params['lambda'] = cv_params_grid_array[i,0] cv_params['order'] = numpy.int32( cv_params_grid_array[i,1] ) cv_params_grid.append( cv_params ) loo_cv_iterator = LeaveOneOutCrossValidationIterator() CV = GridSearchCrossValidation( loo_cv_iterator, pce, use_predictor_cross_validation = True, use_fast_predictor_cross_validation = False ) CV.run( build_pts, build_vals, cv_params_grid ) k = 0 I = numpy.arange( build_pts.shape[1] ) for cv_params in cv_params_grid: order = cv_params['order'] lamda = cv_params['lambda'] pce.set_order( order ) V = pce.vandermonde( build_pts ).T for i in xrange( V.shape[0] ): if i == 0 : J = I[1:] elif i == build_pts.shape[1]-1 : J = I[:-1] else: J = numpy.hstack( ( I[:i], I[i+1:] ) ) A = V[J,:] b = build_vals[J,:] x = ridge_regression( A, b, lamda = lamda ) assert numpy.allclose( ( build_vals[i,0]- numpy.dot( V, x ) )[i], CV.residuals[k][i] ) k += 1 print 'best',CV.best_cv_params # Test grid search cross validation when applied to # expansions that are built using a step based method # ( LARS ) num_dims = 1 order = 3 build_vals = f_1d( build_pts ).T poly_1d = [ LegendrePolynomial1D() ] basis = TensorProductBasis( num_dims, poly_1d ) pce = PCE( num_dims, basis, order, func_domain ) max_order = build_pts.shape[1] orders = numpy.arange( 1, max_order ) lamda = numpy.array( [0.,1e-3,1e-2,1e-1] ) # note cartesian product takes type from first array in 1d sets # so if I use orders first lamda will be rounded to 0 cv_params_grid_array = cartesian_product( [lamda,orders] ) cv_params_grid = [] for i in xrange( cv_params_grid_array.shape[0] ): cv_params = {} cv_params['solver'] = 4 # LARS cv_params['order'] = numpy.int32( cv_params_grid_array[i,1] ) cv_params_grid.append( cv_params ) print cv_params_grid loo_cv_iterator = LeaveOneOutCrossValidationIterator() #loo_cv_iterator = KFoldCrossValidationIterator( 3 ) CV = GridSearchCrossValidation( loo_cv_iterator, pce, use_predictor_cross_validation = True, use_fast_predictor_cross_validation = False ) CV.run( build_pts, build_vals, cv_params_grid ) k = 0 I = numpy.arange( build_pts.shape[1] ) for cv_params in cv_params_grid: order = cv_params['order'] pce.set_order( order ) V = pce.vandermonde( build_pts ).T for i in xrange( V.shape[0] ): if i == 0 : J = I[1:] elif i == build_pts.shape[1]-1 : J = I[:-1] else: J = numpy.hstack( ( I[:i], I[i+1:] ) ) A = V[J,:] b = build_vals[J,:] b = b.reshape( b.shape[0] ) x, metrics = least_angle_regression( A, b, 0., 4, 0., 1000, 0 ) assert numpy.allclose( ( build_vals[i,0]- numpy.dot( V, x ) )[i], CV.residuals[k][i] ) k += 1 #for i in xrange( len( CV.cv_params_set ) ): # print CV.cv_params_set[i], CV.scores[i] print 'best param', CV.best_cv_params print 'best score', CV.best_score print build_pts.shape[1] # ( OMP ) num_dims = 1 order = 3 build_vals = f_1d( build_pts ).T poly_1d = [ LegendrePolynomial1D() ] basis = TensorProductBasis( num_dims, poly_1d ) pce = PCE( num_dims, basis, order, func_domain ) max_order = build_pts.shape[1] orders = numpy.arange( 1, max_order ) lamda = numpy.array( [0.,1e-3,1e-2,1e-1] ) # note cartesian product takes type from first array in 1d sets # so if I use orders first lamda will be rounded to 0 cv_params_grid_array = cartesian_product( [lamda,orders] ) cv_params_grid = [] for i in xrange( cv_params_grid_array.shape[0] ): cv_params = {} cv_params['solver'] = 2 # OMP cv_params['order'] = numpy.int32( cv_params_grid_array[i,1] ) cv_params_grid.append( cv_params ) print cv_params_grid loo_cv_iterator = LeaveOneOutCrossValidationIterator() #loo_cv_iterator = KFoldCrossValidationIterator( 3 ) CV = GridSearchCrossValidation( loo_cv_iterator, pce, use_predictor_cross_validation = True, use_fast_predictor_cross_validation = False ) CV.run( build_pts, build_vals, cv_params_grid ) k = 0 I = numpy.arange( build_pts.shape[1] ) for cv_params in cv_params_grid: order = cv_params['order'] pce.set_order( order ) V = pce.vandermonde( build_pts ).T for i in xrange( V.shape[0] ): if i == 0 : J = I[1:] elif i == build_pts.shape[1]-1 : J = I[:-1] else: J = numpy.hstack( ( I[:i], I[i+1:] ) ) A = V[J,:] b = build_vals[J,:] b = b.reshape( b.shape[0] ) x, metrics = orthogonal_matching_pursuit( A, b, 0., 1000, 0 ) assert numpy.allclose( ( build_vals[i,0]- numpy.dot( V, x ) )[i], CV.residuals[k][i] ) k += 1 #for i in xrange( len( CV.cv_params_set ) ): # print CV.cv_params_set[i], CV.scores[i] print 'best param', CV.best_cv_params print 'best score', CV.best_score print build_pts.shape[1]
def test_omp_choloesky( self ): f_1d = lambda x: x**10 num_dims = 1 order = 20 func_domain = TensorProductDomain( num_dims, [[-1,1]] ) build_pts = numpy.linspace(-.85,.9,14) build_pts = numpy.atleast_2d( build_pts ) build_vals = f_1d( build_pts ).T poly_1d = [ LegendrePolynomial1D() ] basis = TensorProductBasis( num_dims, poly_1d ) pce = PCE( num_dims, basis, order, func_domain ) all_train_indices = [] all_validation_indices = [] cv_iterator = LeaveOneOutCrossValidationIterator( build_pts.shape[1] ) for train_indices, validation_indices in cv_iterator: all_train_indices.append( train_indices ) all_validation_indices.append( validation_indices ) vandermonde = pce.vandermonde( build_pts ).T out = orthogonal_matching_pursuit_cholesky( vandermonde, build_vals.squeeze(), all_train_indices, all_validation_indices, 0.0, 1000, 0 ) num_steps = out[1].shape[1] # use num_steps -1 bscause leave one out cross validation is # invalid when V is underdterimed which happens when i = num_steps. for i in xrange( num_steps-1 ): I = numpy.asarray( out[1][1,:i+1], dtype = numpy.int32 ) V = vandermonde[:,I] for j in xrange( len( all_validation_indices ) ): J = all_train_indices[j] K = all_validation_indices[j] A = V[J,:] b = build_vals[J,:] x = ridge_regression( A, b ) assert numpy.allclose( ( build_vals[K,0] - numpy.dot( V, x )[K,0 ]), out[2][i][j] ) all_train_indices = [] all_validation_indices = [] num_folds = 5 cv_iterator = KFoldCrossValidationIterator( num_folds, build_pts.shape[1] ) for train_indices, validation_indices in cv_iterator: all_train_indices.append( train_indices ) all_validation_indices.append( validation_indices ) vandermonde = pce.vandermonde( build_pts ).T out = orthogonal_matching_pursuit_cholesky( vandermonde, build_vals.squeeze(), all_train_indices, all_validation_indices, 0.0, 1000, 0 ) num_steps = out[1].shape[1] for i in xrange( num_steps-1 ): I = numpy.asarray( out[1][1,:i+1], dtype = numpy.int32 ) V = vandermonde[:,I] for j in xrange( len( all_validation_indices ) ): J = all_train_indices[j] K = all_validation_indices[j] A = V[J,:] b = build_vals[J,:] x = ridge_regression( A, b ) if ( len( I ) <= len( J ) ): assert numpy.allclose( ( build_vals[K,0] - numpy.dot( V, x )[K,0] ), out[2][i][j] )
def solve( self, A, b ): x = ridge_regression( A, b, self.lamda ) return x.reshape( x.shape[0], 1 ), None