Esempio n. 1
0
     def test_linear_algebra( self ):
          M = 3; N = 20
          A = numpy.random.normal( 0., 1., ( M, N ) )
          b = numpy.random.normal( 0., 1., ( M ) )
          
          out = numpy.linalg.lstsq( A , b )
          x_true = out[0]
          x = svd_solve( A, b.reshape(b.shape[0],1))[0].squeeze()
          assert numpy.allclose( x, x_true )

          M = 10; N = 5
          A = numpy.random.normal( 0., 1., ( M, N ) )
          b = numpy.random.normal( 0., 1., ( M ) )

          AtA = numpy.dot( A.T, A )
          assert numpy.allclose( numpy.linalg.cholesky( AtA ), cholesky( AtA ) )
          
          out = numpy.linalg.lstsq( A , b )
          x_true = out[0]
          x = cholesky_solve( AtA, numpy.dot( A.T, b ) ).squeeze()
          assert numpy.allclose( x, x_true )

          x = solve_using_cholesky_factor( numpy.linalg.cholesky( AtA ), 
                                           numpy.dot( A.T, b ).reshape(A.shape[1],1), 1 )[1].squeeze()
          assert numpy.allclose( x, x_true )

          x = qr_solve( numpy.dot( A.T, A ), numpy.dot( A.T, b ).reshape(A.shape[1],1), 1 ).squeeze()
          assert numpy.allclose( x, x_true )
Esempio n. 2
0
def OMP_fast_cv( pts, vals, pce, test_pts = None, test_vals = None ):
    
    i = 0
    indices = PolyIndexVector()
    pce.get_basis_indices( indices )
    for index in indices:
        index.set_array_index( i )
        i += 1 


        
    all_train_indices = []
    all_validation_indices = []
    #cv_iterator = LeaveOneOutCrossValidationIterator( pts.shape[1] )
    cv_iterator = KFoldCrossValidationIterator( 10, pts.shape[1] )
    for train_indices, validation_indices in cv_iterator:
        all_train_indices.append( train_indices )
        all_validation_indices.append( validation_indices )


    A = pce.build_vandermonde( pts )
    out = orthogonal_matching_pursuit_cholesky( A, vals.squeeze(), 
                                                all_train_indices,
                                                all_validation_indices, 
                                                0., 
                                                numpy.iinfo(numpy.int32).max, 
                                                0 )
    """    
    # check cholesky omp is producing correct one at a time estimates
    new_indices = PolyIndexVector() 
    new_indices.resize( 20 )
    pce.get_basis_indices( indices )
    for i in xrange( 20 ):
    new_indices[int(i)] = indices[int(out[1][1,i])]
    new_indices[int(i)].set_array_index( i )
    pce.set_basis_indices( new_indices )
    i = 0
    error = numpy.empty( (len(all_validation_indices)), numpy.double )
    for train_indices, validation_indices in cv_iterator:
    A = pce.build_vandermonde( pts[:,train_indices] )
    coeff = svd_solve_default( A, vals[train_indices] )
    pce.set_coefficients( coeff[0] );
    pred_vals = pce.evaluate_set( pts[:,validation_indices] )
    print pred_vals, pts[:,validation_indices], i
    error[i] = vals[validation_indices,0] - pred_vals.squeeze()
    i += 1
    print error
    print numpy.asarray( out[2][19] )[:,0]
    assert False
    """
    
    
    solutions = out[0] #num_folds x num_steps x num_pts_per_fold
    metrics = out[1]
    cv_residuals = out[2]

    print len( cv_residuals ), len( cv_residuals[0] ), len( cv_residuals[0][0] )

    num_steps = len( cv_residuals )
    num_folds = len( cv_residuals[0] )

    scores = numpy.zeros( ( num_steps ), numpy.double )
    for i in xrange( num_folds ):
        for j in xrange( num_steps ):
            scores[j] += numpy.sqrt( numpy.mean( cv_residuals[j][i]**2, axis = 0 ) )
    scores /= num_folds

    argmin = int( numpy.argmin( scores ) )
    
    # plot true error vs cross validation error

    if test_pts is not None:
        # plot cv errors
        tau = numpy.sum( numpy.absolute( out[0] ), axis = 0 )
        pylab.semilogy( tau, scores, 'bh-' )
        pylab.semilogy( tau[argmin], scores[argmin], 'gd', )

        # plot true errors
        pce.set_coefficients( out[0] )
        pred_vals = pce.evaluate_set( test_pts )
        print numpy.linalg.norm( pred_vals[:,30] - test_vals )
        errors = pred_vals - numpy.tile( test_vals.reshape(test_vals.shape[0], 1 ), ( 1, pred_vals.shape[1] ) )
        error_norms = numpy.sqrt( numpy.sum( errors**2, axis = 0 ) / 
                                  test_vals.shape[0] )
        error_argmin = numpy.argmin( error_norms )

        omp_tau, cv_error = OMP_brute_cv( pts, vals, pce, True )
        omp_argmin = numpy.argmin( cv_error )
        pylab.semilogy( omp_tau[::-1], cv_error[::-1], 'o-' )
        pylab.semilogy( [omp_tau[omp_argmin]], [cv_error[omp_argmin]], 'go' )
        print [omp_tau[omp_argmin]], [cv_error[omp_argmin]]
        print cv_error, omp_argmin

        #pylab.loglog( tau, out[1][0,:], 'o-' )
        pylab.semilogy( tau, error_norms, 'ks-' )
        pylab.semilogy( [tau[error_argmin]], [error_norms[error_argmin]], 'rd' )
        pylab.semilogy( [tau[argmin]], [error_norms[argmin]], 'gd' )
        print error_norms[error_argmin]
        pylab.show()

    
    best_indices = PolyIndexVector()
    best_indices.resize( argmin )
    for i in xrange( argmin ):
        best_indices[i] = indices[int(metrics[1,i])]
        best_indices[i].set_array_index( i )
    
    pce.set_basis_indices( best_indices )

    A = pce.build_vandermonde( pts )
    coeff = svd_solve( A, vals )
    pce.set_coefficients( coeff[0] )