def run_krypy_arnoldi(A,x0,M, tol, maxiter=None): N=len(x0) if maxiter is None: nmax=N else: nmax=maxiter x0=x0.reshape((N,1)) Aop=spla.aslinearoperator(A) prec=spla.aslinearoperator(M) arnoldi = kp.utils.Arnoldi(Aop, x0,M=prec, maxiter=nmax, ortho='mgs') for m in xrange(nmax): arnoldi.advance() v,h,p=arnoldi.get() resid = abs(h[m+1,m]*v[m,m+1]) if resid<=tol: break if m+1==nmax: print "Convergence not achieved within the Arnoldi algorithm after %d iterations, r^(k)= %g"%(m,resid ) else: print "--"*30 print "%g accuracy reached with %d Arnoldi iterations"%(tol,m) print "--"*30 #orthonormalize the Arnoldi basis #Q,R=kp.utils.qr(p) return v,h,m
def f(w, X_, Y_, Z_, size_u, alpha, u0): n_task = Y_.shape[1] size_v = X_.shape[1] / size_u X_ = splinalg.aslinearoperator(X_) Z_ = splinalg.aslinearoperator(Z_) W = w.reshape((-1, n_task), order='F') u, v, c = W[:size_u], W[size_u:size_u + size_v], W[size_u + size_v:] return obj(X_, Y_, Z_, u, v, c, alpha, u0)
def fprime(w, X_, Y_, Z_, size_u, alpha, u0): X_ = splinalg.aslinearoperator(X_) Z_ = splinalg.aslinearoperator(Z_) n_task = Y_.shape[1] size_v = X1.shape[1] / size_u W = w.reshape((-1, n_task), order='F') u, v, c = W[:size_u], W[size_u:size_u + size_v], W[size_u + size_v:] tmp = Y_ - matmat2(X_, u, v, n_task) - Z_.matmat(c) grad = np.empty((size_u + size_v + Z_.shape[1], n_task)) # TODO: do outside grad[:size_u] = rmatmat1(X_, v, tmp, n_task) - alpha * (u - u0[:, None]) grad[size_u:size_u + size_v] = rmatmat2(X_, u, tmp, n_task) grad[size_u + size_v:] = Z_.rmatvec(tmp) return - grad.reshape((-1,), order='F')
def basis_generation_with_eigenvalue_shifting_and_scaling_block_vecs(self, mat, blockvec, step_val, max_eigenval, min_eigenval): """ step_val >=1""" assert step_val>=1, "Need a larger step_val" block_size = blockvec.shape[1] #print("block_size ", block_size) chebyshev_basis = np.zeros((mat.shape[0], block_size*step_val)) #print("basis ", chebyshev_basis.shape[0], " , ",chebyshev_basis.shape[1]) op_linalg_A = linalg.aslinearoperator(mat) s_alpha = 2.0 / (max_eigenval - min_eigenval) s_beta = - (max_eigenval + min_eigenval) / (max_eigenval - min_eigenval) for sIdx in range(1,step_val+1): degree = sIdx-1 if degree == 0: chebyshev_basis[:,0:block_size] = blockvec[:,0:block_size] elif degree == 1: chebyshev_basis[:,block_size:2*block_size] = (s_alpha * op_linalg_A.matmat(blockvec) + s_beta * blockvec)[:,0:block_size] else: chebyshev_basis[:,degree*block_size:(degree+1)*block_size] = 2 * s_alpha * op_linalg_A.matmat(chebyshev_basis[:, (degree-1)*block_size:degree*block_size]) \ + 2 * s_beta * chebyshev_basis[:,(degree-1)*block_size:degree*block_size] \ - chebyshev_basis[:,(degree-2)*block_size:(degree-1)*block_size] return chebyshev_basis
def makeOperator( operatorInput, expectedShape ): """Internal. Takes a dense numpy array or a sparse matrix or a function and makes an operator performing matrix * blockvector products. Example ------- >>> A = makeOperator( arrayA, (n, n) ) >>> vectorB = A( vectorX ) """ if operatorInput is None: def ident(x): return x operator = LinearOperator(expectedShape, ident, matmat=ident) else: operator = aslinearoperator(operatorInput) if operator.shape != expectedShape: raise ValueError('operator has invalid shape') operator.__call__ = operator.matmat return operator
def get_linearoperator(shape, A): """Enhances aslinearoperator if A is None.""" if A is None: identity = lambda x: x return LinearOperator(shape, identity, identity, identity, numpy.double) else: return aslinearoperator(A)
def __init__(self, A): A = aslinearoperator(A) def f(x, out=None, inwork=None, outwork=None, comm=None): if out is None: out = np.empty(()) if comm is None: comm = MPI.COMM_WORLD if outwork is not None: if len(outwork) == 0: outwork.append(A.matvec(x.ravel())) else: outwork[0][:] = A.matvec(x.ravel()) Ax = outwork[0] else: Ax = A.matvec(x.ravel()) out.flat = dot(x, Ax, comm=comm) return float(out) def df(x, out=None, inwork=None, outwork=None, comm=None): if comm is None: comm = MPI.COMM_WORLD if inwork is not None: Ax = inwork[0] else: Ax = A.matvec(x.ravel()) if out is not None: out[:] = 2 * Ax return out return 2 * Ax self.__call__ = f self.D = Function(df)
def check_precond_dummy(solver, case): tol = 1e-8 def identity(b,which=None): """trivial preconditioner""" return b A = case.A M,N = A.shape D = spdiags( [1.0/A.diagonal()], [0], M, N) b = arange(A.shape[0], dtype=float) x0 = 0*b precond = LinearOperator(A.shape, identity, rmatvec=identity) if solver is qmr: x, info = solver(A, b, M1=precond, M2=precond, x0=x0, tol=tol) else: x, info = solver(A, b, M=precond, x0=x0, tol=tol) assert_equal(info,0) assert_normclose(A.dot(x), b, tol) A = aslinearoperator(A) A.psolve = identity A.rpsolve = identity x, info = solver(A, b, x0=x0, tol=tol) assert_equal(info,0) assert_normclose(A*x, b, tol=tol)
def estimate_spectral_norm(A, its=20): """ Estimate spectral norm of a matrix by the randomized power method. .. This function automatically detects the matrix data type and calls the appropriate backend. For details, see :func:`backend.idd_snorm` and :func:`backend.idz_snorm`. Parameters ---------- A : :class:`scipy.sparse.linalg.LinearOperator` Matrix given as a :class:`scipy.sparse.linalg.LinearOperator` with the `matvec` and `rmatvec` methods (to apply the matrix and its adjoint). its : int Number of power method iterations. Returns ------- float Spectral norm estimate. """ from scipy.sparse.linalg import aslinearoperator A = aslinearoperator(A) m, n = A.shape matvec = lambda x: A. matvec(x) matveca = lambda x: A.rmatvec(x) if _is_real(A): return backend.idd_snorm(m, n, matveca, matvec, its=its) else: return backend.idz_snorm(m, n, matveca, matvec, its=its)
def estimate_spectral_norm(A, its=20): """ Estimate spectral norm of a matrix by the randomized power method. This function automatically detects the matrix data type and calls the appropriate backend. For details, see :func:`backend.idd_snorm` and :func:`backend.idz_snorm`. :param A: Matrix given as a :class:`scipy.sparse.linalg.LinearOperator` with the `matvec` and `rmatvec` methods (to apply the matrix and its adjoint). :type A: :class:`scipy.sparse.linalg.LinearOperator` :keyword its: Number of power method iterations. :type its: int :return: Spectral norm estimate. :rtype: float """ A = aslinearoperator(A) m, n = A.shape matvec = lambda x: A. matvec(x) matveca = lambda x: A.rmatvec(x) if A.dtype == 'float64': return backend.idd_snorm(m, n, matveca, matvec, its=its) elif A.dtype == 'complex128': return backend.idz_snorm(m, n, matveca, matvec, its=its) else: raise _DTYPE_ERROR
def __init__(self, n=100, mode='sparse'): np.random.seed(0) self.n = n self.x0 = -np.ones(n) self.lb = np.linspace(-2, -1.5, n) self.ub = np.linspace(-0.8, 0.0, n) self.lb += 0.1 * np.random.randn(n) self.ub += 0.1 * np.random.randn(n) self.x0 += 0.1 * np.random.randn(n) self.x0 = make_strictly_feasible(self.x0, self.lb, self.ub) if mode == 'sparse': self.sparsity = lil_matrix((n, n), dtype=int) i = np.arange(n) self.sparsity[i, i] = 1 i = np.arange(1, n) self.sparsity[i, i - 1] = 1 i = np.arange(n - 1) self.sparsity[i, i + 1] = 1 self.jac = self._jac elif mode == 'operator': self.jac = lambda x: aslinearoperator(self._jac(x)) elif mode == 'dense': self.sparsity = None self.jac = lambda x: self._jac(x).toarray() else: assert_(False)
def makeOperator( operatorInput, expectedShape ): """Internal. Takes a dense numpy array or a sparse matrix or a function and makes an operator performing matrix * blockvector products. Examples -------- >>> A = makeOperator( arrayA, (n, n) ) >>> vectorB = A( vectorX ) """ if operatorInput is None: def ident(x): return x operator = LinearOperator(expectedShape, ident, matmat=ident) else: operator = aslinearoperator(operatorInput) if operator.shape != expectedShape: raise ValueError('operator has invalid shape') if sys.version_info[0] >= 3: # special methods are looked up on the class -- so make a new one operator.__class__ = CallableLinearOperator else: operator.__call__ = operator.matmat return operator
def _arpack(self, k): Qt = _MQM(self.Q, self.M) Mt = _Mass(self.M) Minv = _MassInv(self.M) verbose = self.verbose l, v = eigsh(aslinearoperator(Qt), M = aslinearoperator(Mt), \ k = k, which = 'LM', Minv = aslinearoperator(Minv)) l = l[::-1] v = v[:,::-1] if verbose: print "Ax %d, Bx %d, B^{-1}x %d"%\ (Qt.mvcount, Mt.mvcount, Minv.mvcount) return l, v
def hess(w, s, X_, Y_, Z_, n_task, u0): # TODO: regularization s = s.reshape((-1, 1)) X_ = splinalg.aslinearoperator(X_) Z_ = splinalg.aslinearoperator(Z_) size_v = X_.shape[1] / size_u W = w.reshape((-1, 1), order='F') XY = X_.rmatvec(Y_) # TODO: move out u, v, c = W[:size_u], W[size_u:size_u + size_v], W[size_u + size_v:] s1, s2, s3 = s[:size_u], s[size_u:size_u + size_v], s[size_u + size_v:] W2 = X_.rmatvec(matmat2(X_, u, v, 1)) W2 = W2.reshape((-1, s2.shape[0]), order='F') XY = XY.reshape((-1, s2.shape[0]), order='F') n_task = 1 A_tmp = matmat2(X_, s1, v, n_task) As1 = rmatmat1(X_, v, A_tmp, n_task) tmp = matmat2(X_, u, s2, n_task) Ds2 = rmatmat2(X_, u, tmp, n_task) tmp = Z_.matvec(s3) Cs3 = rmatmat1(X_, v, tmp, n_task) tmp = matmat2(X_, s1, v, n_task).T Cts1 = Z_.rmatvec(tmp.T) tmp = matmat2(X_, u, s2, n_task) Bs2 = rmatmat1(X_, v, tmp, n_task) + W2.dot(s2) - XY.dot(s2) tmp = matmat2(X_, s1, v, n_task) Bts1 = rmatmat2(X_, u, tmp, n_task) + W2.T.dot(s1) - XY.T.dot(s1) tmp = Z_.matvec(s3) Es3 = rmatmat2(X_, u, tmp, n_task) tmp = matmat2(X_, u, s2, n_task) Ets2 = Z_.rmatvec(tmp) Fs3 = - Z_.rmatvec(Z_.matvec(s3)) line0 = As1 + Bs2 + Cs3 line1 = Bts1 + Ds2 + Es3 line2 = Cts1 + Ets2 + Fs3 return np.concatenate((line0, line1, line2)).ravel()
def solveH(W): h=np.empty((nl,nk)) A=sp_linalg.aslinearoperator(Xlo.matvec(W)) def mvH(s): return A.rmatvec(A.matvec(s))+alpha*s for i in range(Y.shape[1]): Ch=sp_linalg.LinearOperator((nk,nk),matvec=mvH,dtype=Xlo.dtype) bh=A.rmatvec(Y[:,i]) h[i], info=sp_linalg.cg(Ch,bh,maxiter=max_iter,tol=tol) return h
def test_sparse_and_LinearOperator(self): m = 5000 n = 1000 A = rand(m, n, random_state=0) b = self.rnd.randn(m) res = lsq_linear(A, b) assert_allclose(res.optimality, 0, atol=1e-6) A = aslinearoperator(A) res = lsq_linear(A, b) assert_allclose(res.optimality, 0, atol=1e-6)
def get_inv_diag_plus_low_rank_cov_op(X, rank=2): fa = FactorAnalysis(n_components=rank) fa.fit(X) components = fa.components_ noise_vars = fa.noise_variance_ activations = fa.transform(X) return _woodbury_inverse(_diagonal_operator(1. / noise_vars), aslinearoperator(np.linalg.inv(1. / len(activations) * activations.T.dot(activations))), components.T, components)
def gmres(self, mat, rhs, lu): if 1: size = len(rhs) A = aslinearoperator(mat) M = LinearOperator((size, size), dtype=float, matvec=lu.solve) self.counter = 0 sol, info = gmres(A, rhs, M=M, maxiter=10, #callback=self.callback, tol=1e-12) return sol else: return lu.solve(rhs)
def __init__(self, inner_product="l2", map_operator=None, inverse = "default"): if inner_product == "l2": self.riesz_map = 1 self.riesz_inv = 1 elif inner_product == "custom": from numpy import ndarray, equal from scipy.sparse.linalg import LinearOperator, aslinearoperator if not isinstance(map_operator, (ndarray, LinearOperator)) or not equal(*map_operator.shape): raise TypeError("only square numpy arrays are currently supported") self.riesz_map = aslinearoperator(map_operator) if inverse in ("default", "lu") and isinstance(map_operator, ndarray): from numpy.linalg import inv self.riesz_inv = aslinearoperator(inv(map_operator)) else: self.riesz_inv = inverse self.inner_product = inner_product
def bcbcg_solver_least_square_eigen_param(self, mat, RHS, init_X, step_val, tol, maxiter,whichcol, max_eigenvalue, min_eigenvalue): #gerschgorin_estimator = GerschgorinCircleTheoremEigenvalueEstimator() #max_eigenvalue, min_eigenvalue = gerschgorin_estimator.csr_mat_extreme_eigenvalue_estimation(mat) chebyshev_basis_generator = ChebyshevPolynomial() op_A = linalg.aslinearoperator(mat) m_R = RHS - op_A(init_X) m_X = init_X.copy() R_to_RHS_norm_ratio = lambda x: np.linalg.norm(m_R[:,x])/np.linalg.norm(RHS[:,x]) residual_ratio_hist = [R_to_RHS_norm_ratio(whichcol)] print ("max and min eigen which are going to be used ... ", max_eigenvalue, " , ", min_eigenvalue) for itercounter in range(1, maxiter+1): m_chebyshev_basis = \ chebyshev_basis_generator.basis_generation_with_eigenvalue_shifting_and_scaling_block_vecs(\ mat, m_R, step_val, max_eigenvalue, min_eigenvalue) #print("basis rank",np.linalg.matrix_rank(m_chebyshev_basis)) #return if itercounter == 1: m_Q = m_chebyshev_basis else: m_AQ_trans_mul_chebyshev_basis = np.matmul(m_AQ.T , m_chebyshev_basis) #m_B = np.matmul(m_Q_trans_AQ_inverse , m_AQ_trans_mul_chebyshev_basis) m_B = np.linalg.lstsq(m_Q_trans_AQ, m_AQ_trans_mul_chebyshev_basis)[0] m_Q = m_chebyshev_basis - np.matmul(m_Q, m_B) m_AQ = op_A.matmat(m_Q) m_Q_trans_AQ = np.matmul(m_Q.T, m_AQ) #m_Q_trans_AQ_inverse = np.linalg.inv(m_Q_trans_AQ) #m_alpha = np.matmul( m_Q_trans_AQ_inverse, np.matmul(m_Q.T, m_R) ) m_alpha = np.linalg.lstsq( m_Q_trans_AQ, np.matmul(m_Q.T, m_R) )[0] m_X += np.matmul(m_Q,m_alpha) m_R -= np.matmul(m_AQ, m_alpha) residual_ratio_hist.append(R_to_RHS_norm_ratio(whichcol)) print(itercounter, ": ", R_to_RHS_norm_ratio(whichcol)) if residual_ratio_hist[-1] <= tol: return m_X, m_R, residual_ratio_hist return m_X, m_R, residual_ratio_hist
def solve_sudoku(clues, maxiter=1000, epsilon=0.5, threshold=1e-6, l1weight=1e-2): clues = np.asarray(clues, dtype=int) n = len(clues) assert clues.shape == (n, n), "sudoku must be square" assert int(np.sqrt(n)) ** 2 == n, "size of sudoku must be a square number" assert 0 < epsilon < 1, "epsilon must be between 0 and 1" idx = LinearIndexer((9, 9, 9)) cs = [] # constraints: each entry is a list of components whose sum should be one for j in range(9): # in each row, ... for i in range(9): # ...each number... cs.append(idx[j, :, i]) # ...must occur exactly once for j in range(9): # in each column, ... for i in range(9): # ...each number... cs.append(idx[:, j, i]) # ...must occur exactly once for x in range(0, 9, 3): # in each box along x... for y in range(0, 9, 3): # ...and y, ... for i in range(9): # ...each number... cs.append(idx[x:x+3, y:y+3, i]) # ...must occur exactly once for x in range(9): # in each cell along x... for y in range(9): # ...and y, ... cs.append(idx[x, y, :]) # ...there must be exactly one number for i, row in enumerate(clues): # for each cell along x... for j, col in enumerate(row): # ...and y... if col: # ...for that a nonzero clue is given, ... cs.append(idx[i, j, col - 1]) # ...this number must occur in this cell ms = [sp.coo_matrix((np.ones(len(k)), (np.zeros(len(k)), k)), shape=(1, 9 * 9 * 9)) for k in cs] A = sla.aslinearoperator(sp.vstack(ms)) b = np.ones(len(cs)) l1 = l1weight * NonnegativeL1Norm() l2 = DataTerm(A, b) # iterative reweighted l1-norm minimization x = fista(l2, l1, maxiter=maxiter) while True: tau = 1.0 / (x + epsilon) old_x = x x = fista(l2, l1 * tau, maxiter=maxiter) d = np.square(x - old_x).sum() print d if d < threshold: break x = x.reshape(9, 9, 9) # row, column, number -> probability return np.argmax(x, axis=2) + 1
def _makeOperator(operatorInput, expectedShape): """Takes a dense numpy array or a sparse matrix or a function and makes an operator performing matrix * blockvector products.""" if operatorInput is None: return None else: operator = aslinearoperator(operatorInput) if operator.shape != expectedShape: raise ValueError('operator has invalid shape') return operator
def estimate_spectral_norm_diff(A, B, its=20): """ Estimate spectral norm of the difference of two matrices by the randomized power method. This function automatically detects the matrix data type and calls the appropriate backend. For details, see :func:`backend.idd_diffsnorm` and :func:`backend.idz_diffsnorm`. :param A: First matrix given as a :class:`scipy.sparse.linalg.LinearOperator` with the `matvec` and `rmatvec` methods (to apply the matrix and its adjoint). :type A: :class:`scipy.sparse.linalg.LinearOperator` :param B: Second matrix given as a :class:`scipy.sparse.linalg.LinearOperator` with the `matvec` and `rmatvec` methods (to apply the matrix and its adjoint). :type B: :class:`scipy.sparse.linalg.LinearOperator` :keyword its: Number of power method iterations. :type its: int :return: Spectral norm estimate of matrix difference. :rtype: float """ A = aslinearoperator(A) B = aslinearoperator(B) m, n = A.shape matvec1 = lambda x: A. matvec(x) matveca1 = lambda x: A.rmatvec(x) matvec2 = lambda x: B. matvec(x) matveca2 = lambda x: B.rmatvec(x) if A.dtype == 'float64': return backend.idd_diffsnorm(m, n, matveca1, matveca2, matvec1, matvec2, its=its) elif A.dtype == 'complex128': return backend.idz_diffsnorm(m, n, matveca1, matveca2, matvec1, matvec2, its=its) else: raise _DTYPE_ERROR
def estimate_spectral_norm_diff(A, B, its=20): """ Estimate spectral norm of the difference of two matrices by the randomized power method. .. This function automatically detects the matrix data type and calls the appropriate backend. For details, see :func:`backend.idd_diffsnorm` and :func:`backend.idz_diffsnorm`. Parameters ---------- A : :class:`scipy.sparse.linalg.LinearOperator` First matrix given as a :class:`scipy.sparse.linalg.LinearOperator` with the `matvec` and `rmatvec` methods (to apply the matrix and its adjoint). B : :class:`scipy.sparse.linalg.LinearOperator` Second matrix given as a :class:`scipy.sparse.linalg.LinearOperator` with the `matvec` and `rmatvec` methods (to apply the matrix and its adjoint). its : int Number of power method iterations. Returns ------- float Spectral norm estimate of matrix difference. """ from scipy.sparse.linalg import aslinearoperator A = aslinearoperator(A) B = aslinearoperator(B) m, n = A.shape matvec1 = lambda x: A. matvec(x) matveca1 = lambda x: A.rmatvec(x) matvec2 = lambda x: B. matvec(x) matveca2 = lambda x: B.rmatvec(x) if _is_real(A): return backend.idd_diffsnorm( m, n, matveca1, matveca2, matvec1, matvec2, its=its) else: return backend.idz_diffsnorm( m, n, matveca1, matveca2, matvec1, matvec2, its=its)
def test_arnoldi_algorithm(): """ test the scipy routine of ARPACK to get the smallest eigenvectors of a matrix A """ size=500 diag=1.*np.arange(1,size+1) #np.logspace(-1,2,num=size) D=np.diag(diag) prec=np.diag(1./diag) A=D val=[min(diag),max(diag),diag[size/3],diag[-size/3],diag[size/2]] for i in val: vTv=vecT_vec(size) A+=float(i)*vTv B=spla.aslinearoperator(dgemm(prec,A)) b=prec.dot(np.random.random(size)) Alo=spla.aslinearoperator(A) mbd=spla.aslinearoperator(prec) x0=np.ones(size) import time eigs,eigv=spla.eigsh(mbd*Alo,k=len(val),which='SM',ncv=24,tol=1.e-3) #eigs,eigv=spla.eigsh(A,M=D,Minv=prec,v0=x0,k=len(val),which='SM',ncv=24,tol=1.e-3) #eigs,eigv=spla.eigsh(A,M=D,Minv=mbd,v0=x0,k=len(val),which='SM',ncv=48,tol=1.e-3) e=time.clock() #print "sm",e-s #print eigs,np.allclose(eigs,eigs1) r=len(eigs) print eigs for i in range(r): print np.allclose(norm2(B*eigv[:,i])/norm2(eigv[:,i]),eigs[i]) #assert np.allclose(norm2(B*eigv[:,i])/norm2(eigv[:,i]),eigs[i]) x,info=spla.cg(Alo,eigv[:,i],M=mbd,maxiter=2,tol=1.e-10) print info
def test_eigsh_for_k_greater(): # Test eigsh() for k beyond limits. A_sparse = diags([1, -2, 1], [-1, 0, 1], shape=(4, 4)) # sparse A = generate_matrix(4, sparse=False) M_dense = generate_matrix_symmetric(4, pos_definite=True) M_sparse = generate_matrix_symmetric(4, pos_definite=True, sparse=True) M_linop = aslinearoperator(M_dense) eig_tuple1 = eigh(A, b=M_dense) eig_tuple2 = eigh(A, b=M_sparse) with suppress_warnings() as sup: sup.filter(RuntimeWarning) assert_equal(eigsh(A, M=M_dense, k=4), eig_tuple1) assert_equal(eigsh(A, M=M_dense, k=5), eig_tuple1) assert_equal(eigsh(A, M=M_sparse, k=5), eig_tuple2) # M as LinearOperator assert_raises(TypeError, eigsh, A, M=M_linop, k=4) # Test 'A' for different types assert_raises(TypeError, eigsh, aslinearoperator(A), k=4) assert_raises(TypeError, eigsh, A_sparse, M=M_dense, k=4)
def cbcg_solver_least_square_eigen_param(self, mat, rhs, init_x, step_val, tol, maxiter, max_eigenvalue, min_eigenvalue): """get the inverse matrix by least square method""" #gerschgorin_estimator = GerschgorinCircleTheoremEigenvalueEstimator() #max_eigenvalue, min_eigenvalue = gerschgorin_estimator.csr_mat_extreme_eigenvalue_estimation(mat) chebyshev_basis_generator = ChebyshevPolynomial() op_A = linalg.aslinearoperator(mat) v_r = rhs - op_A(init_x) v_x = init_x.copy() s_normb = np.linalg.norm(rhs) residual_ratio_hist = [np.linalg.norm(v_r)/s_normb] print ("max and min eigen which are going to be used ... ", max_eigenvalue, " , ", min_eigenvalue) for itercounter in range(1, maxiter+1): m_chebyshev_basis = \ chebyshev_basis_generator.basis_generation_with_eigenvalue_shifting_and_scaling_single_vec(\ mat, v_r, step_val, max_eigenvalue, min_eigenvalue) if itercounter == 1: m_Q = m_chebyshev_basis else: #op_AQ_trans = linalg.aslinearoperator(m_AQ.transpose()) #AQ_trans_mul_chebyshev_basis = op_AQ_trans.matmat(m_chebyshev_basis) #m_B = linalg.aslinearoperator(Q_trans_AQ_inverse).matmat (AQ_trans_mul_chebyshev_basis) m_AQ_trans_mul_chebyshev_basis = np.matmul(m_AQ.T , m_chebyshev_basis) #m_B = np.matmul(m_Q_trans_AQ_inverse , m_AQ_trans_mul_chebyshev_basis) m_B = np.linalg.lstsq(m_Q_trans_AQ, m_AQ_trans_mul_chebyshev_basis)[0] m_Q = m_chebyshev_basis - np.matmul(m_Q, m_B) m_AQ = op_A.matmat(m_Q) #m_Q_trans_AQ = linalg.aslinearoperator(m_Q.transpose())(m_AQ) m_Q_trans_AQ = np.matmul(m_Q.T, m_AQ) #m_Q_trans_AQ_inverse = np.linalg.inv(m_Q_trans_AQ) #v_alpha = np.matmul( m_Q_trans_AQ_inverse, np.matmul(m_Q.T, v_r) ) v_alpha = np.linalg.lstsq(m_Q_trans_AQ, np.matmul(m_Q.T, v_r))[0] v_x += np.matmul(m_Q,v_alpha) v_r -= np.matmul(m_AQ, v_alpha) residual_ratio_hist.append( np.linalg.norm(v_r)/s_normb) print(itercounter, ": ", np.linalg.norm(v_r)/s_normb) if residual_ratio_hist[-1] <= tol: return v_x, v_r, residual_ratio_hist return v_x, v_r, residual_ratio_hist
def right_multiplied_operator(J, d): """Return J diag(d) as LinearOperator.""" J = aslinearoperator(J) def matvec(x): return J.matvec(np.ravel(x) * d) def matmat(X): return J.matmat(X * d[:, np.newaxis]) def rmatvec(x): return d * J.rmatvec(x) return LinearOperator(J.shape, matvec=matvec, matmat=matmat, rmatvec=rmatvec)
def left_multiplied_operator(J, d): """Return diag(d) J as LinearOperator.""" J = aslinearoperator(J) def matvec(x): return d * J.matvec(x) def matmat(X): return d * J.matmat(X) def rmatvec(x): return J.rmatvec(x.ravel() * d) return LinearOperator(J.shape, matvec=matvec, matmat=matmat, rmatvec=rmatvec)
def test_atol(solver): # TODO: minres. It didn't historically use absolute tolerances, so # fixing it is less urgent. np.random.seed(1234) A = np.random.rand(10, 10) A = A.dot(A.T) + 10 * np.eye(10) b = 1e3 * np.random.rand(10) b_norm = np.linalg.norm(b) tols = np.r_[0, np.logspace(np.log10(1e-10), np.log10(1e2), 7), np.inf] # Check effect of badly scaled preconditioners M0 = np.random.randn(10, 10) M0 = M0.dot(M0.T) Ms = [None, 1e-6 * M0, 1e6 * M0] for M, tol, atol in itertools.product(Ms, tols, tols): if tol == 0 and atol == 0: continue if solver is qmr: if M is not None: M = aslinearoperator(M) M2 = aslinearoperator(np.eye(10)) else: M2 = None x, info = solver(A, b, M1=M, M2=M2, tol=tol, atol=atol) else: x, info = solver(A, b, M=M, tol=tol, atol=atol) assert_equal(info, 0) residual = A.dot(x) - b err = np.linalg.norm(residual) atol2 = tol * b_norm assert_(err <= max(atol, atol2))
for i in range(0, len(sparse_list)): componentMatrix = sparse_list[i] csrForm = componentMatrix.tocsr() normMatrix = onenormest(csrForm, t=3, itmax=5, compute_v=False, compute_w=False) #diffFlag = numpy.allclose(csrForm.data, csrForm.transpose().data) #symmFlag = 'symmetric' if diffFlag else 'not symmetric' eps = pow(10, -9) rank = estimate_rank(aslinearoperator(componentMatrix), eps) #print 'Approximate rank with relative error of(', eps, ')for numerical rank definition = ',rank print obj.data_file_list[i], obj.data_file_name[ i], componentMatrix.shape, ' NonZeros = ', componentMatrix.nnz, ' 1-Norm = ', normMatrix, ' rank ', rank obj.logger_i.info(obj.data_file_list[i] + ' ' + obj.data_file_name[i] + str(componentMatrix.shape) + ' NonZeros = ' + str(componentMatrix.nnz) + ' 1-Norm = ' + str(normMatrix) + ' rank ' + str(rank)) #saving the sparsity pattern fig = plt.figure(figsize=(24.0, 15.0)) fig.clf() fig.gca().add_artist(plt.spy(componentMatrix)) brake.save(obj.output_path + 'dataAnalysis/' + obj.data_file_name[i], ext="png",
def mgs(A, Z, verbose=False): """ Returns QR decomposition of Z. Q and R satisfy the following relations in exact arithmetic 1. QR = Z 2. Q^*AQ = I 3. Q^*AZ = R 4. ZR^{-1} = Q Uses Modified Gram-Schmidt for computing the A-orthogonal QR factorization Parameters ---------- A : {sparse matrix, dense matrix, LinearOperator} An array, sparse matrix, or LinearOperator representing the operation ``A * x``, where A is a real or complex square matrix. Z : ndarray verbose : bool, optional Displays information about the accuracy of the resulting QR Returns ------- q : ndarray The A-orthogonal vectors Aq : ndarray The A^{-1}-orthogonal vectors r : ndarray The r of the QR decomposition See Also -------- mgs_stable : Modified Gram-Schmidt with re-orthogonalization precholqr : Based on CholQR References ---------- .. [1] B. Lowery and J. Langou, Stability Analysis of QR factorization in an Oblique Inner Product http://arxiv.org/abs/1401.5171 Examples -------- >>> import numpy as np >>> A = np.diag(np.arange(1,101)) >>> Z = np.random.randn(100,10) >>> q, Aq, r = mgs(A, Z, verbose = True) """ #Get sizes n = np.size(Z, 0) k = np.size(Z, 1) #Convert into linear operator Aop = aslinearoperator(A) #Initialize Aq = np.zeros_like(Z, dtype='d') q = np.zeros_like(Z, dtype='d') r = np.zeros((k, k), dtype='d') z = Z[:, 0] Aq[:, 0] = Aop.matvec(z) r[0, 0] = np.sqrt(np.dot(z.T, Aq[:, 0])) q[:, 0] = Z[:, 0] / r[0, 0] Aq[:, 0] /= r[0, 0] for j in np.arange(1, k): q[:, j] = Z[:, j] for i in np.arange(j): r[i, j] = np.dot(q[:, j].T, Aq[:, i]) q[:, j] -= r[i, j] * q[:, i] Aq[:, j] = Aop.matvec(q[:, j]) r[j, j] = np.sqrt(np.dot(q[:, j].T, Aq[:, j])) #If element becomes too small, terminate if np.abs(r[j, j]) < 1.e-14: k = j q = q[:, :kt] Aq = Aq[:, :kt] r = r[:kt, :kt] print "A-orthonormalization broke down" break q[:, j] /= r[j, j] Aq[:, j] /= r[j, j] q = q[:, :k] Aq = Aq[:, :k] r = r[:k, :k] if verbose: #Verify Q*R = Y print "||QR-Y|| is ", np.linalg.norm(np.dot(q, r) - Z[:, :k], 2) #Verify Q'*A*Q = I T = np.dot(q.T, Aq) print "||Q^TAQ-I|| is ", np.linalg.norm(T - np.eye(k, dtype='d'), ord=2) #verify Q'AY = R print "||Q^TAY-R|| is ", np.linalg.norm(np.dot(Aq.T, Z[:, :k]) - r, 2) #Verify YR^{-1} = Q print "||YR^{-1}-Q|| is ", np.linalg.norm( np.linalg.solve(r.T, Z[:, :k].T).T - q, 2) return q, Aq, r
def _aslinearoperator_with_dtype(m): m = aslinearoperator(m) if not hasattr(m, 'dtype'): x = np.zeros(m.shape[1]) m.dtype = (m * x).dtype return m
def build_pc_amg(A: spmatrix, **kwargs) -> LinearOperator: """AMG preconditioner""" return aslinearoperator(amgcl(A, **kwargs))
def __init__(self,conf,grisms,sources,extconf,mskconf,grismFF): # dimensionalities self.nimg=len(grisms) self.nsrc=len(sources) # print a message msg="[info]Building the matrix: {} images, {} sources" print(msg.format(self.nimg,self.nsrc)) # stuff for LSQR lsqrconf=conf['lsqr'] self.atol=float(lsqrconf['atol']) self.btol=float(lsqrconf['btol']) self.conlim=float(lsqrconf['conlim']) self.maxiter=lsqrconf['maxiter'] self.show=lsqrconf['show'] # double check the type of maxiter if self.maxiter is not None: self.maxiter=int(self.maxiter) ## get extraction properties for the sources #nwav=[] #for segid,src in sources: # for key in ['lamb0','lamb1','dlamb']: # self.epar2(src,conf,extconf,key) # #if src.lamb0 is None: src.lamb0=self.epar(conf,extconf,'lamb0') # #if src.lamb1 is None: src.lamb1=self.epar(conf,extconf,'lamb1') # #if src.dlamb is None: src.dlamb=self.epar(conf,extconf,'dlamb') # nwav.append(src.nwav) # get number of wavelengths to use nwav=[src.nwav for segid,src in sources] cwav=np.cumsum(nwav) # get cumulative indices self.npar=cwav[-1] self.cwav=np.array([0,*cwav],dtype=cwav.dtype) # data to hold matrix/vector stuff i,j,aij=[],[],[] self.bi=np.array([],float) self.downtype=False # attempt to save space # this was like 'i' before. but now we need to increment for # each FLT and detector self.imgindex=0 # just a short hand path=conf['tables']['path'] # loop over images pb=progressbar.ProgressBar(self.nimg,prefix='Loading ODTs') # output values #if __RAM__: # import os,psutil # pid = os.getpid() # py = psutil.Process(pid) for fltindex,(fltfile,flt) in enumerate(grisms): # update the progressbar #if __RAM__: # print("top:",py.memory_info()[0]/1024/1024/1024) pb.increment() data=self.loadFLT(flt,sources,extconf,mskconf,grismFF,pb,path) #if __RAM__: # print("read loadFLT:",py.memory_info()[0]/1024/1024/1024) i.append(data[0]) j.append(data[1]) aij.append(data[2]) #if __RAM__: # print("stacked:",py.memory_info()[0]/1024/1024/1024) # stacking all the data into a 1D numpy array i = np.hstack(i) j = np.hstack(j) aij = np.hstack(aij) #if __RAM__: # print("finished:",py.memory_info()[0]/1024/1024/1024) if len(i)==0: print('[alarm]Matrix has no elements.') #raise RuntimeError("matrix had no elements") return # loaded everything print("[info]Compressing the indices") ic,iu=indices.compress(i) jc,ju=indices.compress(j) dim=np.array([len(iu),len(ju)]) self.npar=dim[1] #self.npar=np.amax(ju)+1 # IDL has +1 here (this was an error?) self.npar=self.npar.astype(ju.dtype) del i,j # compute some things for ragged arrays if len(sources)==1: #srcind=np.zeros(self.npar+1,dtype=int) srcind=np.zeros(self.npar,dtype=ju.dtype) else: srcind=np.digitize(ju,self.cwav)-1 # get the wavelength indices try: self.lam=ju-self.cwav[srcind] except: print(len(ju),len(srcind),len(sources)) print('[debug]something wrong in matrix.py') import pdb pdb.set_trace() #self.lam=lam.astype(int) # get the reverse indices segids=np.array(list(sources.keys())) self.ri=indices.reverse(segids[srcind]) self.hsrc=np.bincount(srcind.astype(int)) # recast somethings aij=np.array(aij) # compute the frobenius norm self.frob=np.sqrt(np.sum(aij*aij)) # sparse matrix is constructed as (ic,jc,np.array(mat['aij']),dim) self.A=ssl.aslinearoperator(coo_matrix((aij,(ic,jc)),shape=dim)) del aij # record stuff self.bi=np.array(self.bi) self.icomp=ic self.iuniq=iu self.jcomp=jc self.juniq=ju # for making a plot self.lcurve=lcurve.LCurve(self.frob)
def test_linear_op_data_term(norm,inter,addL1,add2L1,processor,testNumber): m = 40 d = 10 p = 15 d2 = 10 if getNewOptVals and (testNumber==0): A,y = getLSdata(m,d) H = np.random.normal(0,1,[d2,p]) cache['AdataTerm']=A cache['ydataTerm']=y cache['HdataTerm']=H else: A = cache['AdataTerm'] y = cache['ydataTerm'] H = cache['HdataTerm'] projSplit = ps.ProjSplitFit() processor.setStep(5e-1) gamma = 1e0 projSplit.setDualScaling(gamma) projSplit.addData(A,y,2,processor,normalize=norm,intercept=inter, linearOp = aslinearoperator(H)) lam = 0.01 step = 1.0 if addL1: regObj = L1(lam,step) projSplit.addRegularizer(regObj) if add2L1: regObj2 = L1(lam,step) projSplit.addRegularizer(regObj2) projSplit.run(maxIterations=10000,keepHistory = True, nblocks = 3,primalTol=1e-3,dualTol=1e-3) ps_val = projSplit.getObjective() primViol = projSplit.getPrimalViolation() dualViol = projSplit.getDualViolation() print("primal violation = {}".format(primViol)) print("dual violation = {}".format(dualViol)) if getNewOptVals: opt = cache.get((addL1,add2L1,inter,norm,'optdata')) if opt == None: if norm == True: scaling = np.linalg.norm(A,axis=0) scaling += 1.0*(scaling < 1e-10) A = np.sqrt(A.shape[0])*A/scaling if inter == True: AwithIntercept = np.zeros((m,d+1)) AwithIntercept[:,0] = np.ones(m) AwithIntercept[:,1:(d+1)] = A A = AwithIntercept HwithIntercept = np.zeros((d2+1,p+1)) HwithIntercept[:,0] = np.zeros(d2+1) HwithIntercept[0] = np.ones(p+1) HwithIntercept[0,0] = 1.0 HwithIntercept[1:(d2+1),1:(p+1)] = H H = HwithIntercept (m,_) = A.shape if inter: x_cvx = cvx.Variable(p+1) else: x_cvx = cvx.Variable(p) f = (1/(2*m))*cvx.sum_squares(A@H@x_cvx - y) if addL1: f += lam*cvx.norm(x_cvx,1) if add2L1: f += lam*cvx.norm(x_cvx,1) prob = cvx.Problem(cvx.Minimize(f)) prob.solve(verbose=True) opt = prob.value cache[(addL1,add2L1,inter,norm,'optdata')]=opt else: opt=cache[(addL1,add2L1,inter,norm,'optdata')] print("ps opt = {}".format(ps_val)) print("cvx opt = {}".format(opt)) assert(ps_val-opt<1e-2)
def spgl1(A, b, tau=0, sigma=0, x0=None, fid=None, verbosity=0, iter_lim=None, n_prev_vals=3, bp_tol=1e-6, ls_tol=1e-6, opt_tol=1e-4, dec_tol=1e-4, step_min=1e-16, step_max=1e5, active_set_niters=np.inf, subspace_min=False, iscomplex=False, max_matvec=np.inf, weights=None, project=_norm_l1_project, primal_norm=_norm_l1_primal, dual_norm=_norm_l1_dual): r"""SPGL1 solver. Solve basis pursuit (BP), basis pursuit denoise (BPDN), or LASSO problems [1]_ [2]_ depending on the choice of ``tau`` and ``sigma``:: (BP) minimize ||x||_1 subj. to Ax = b (BPDN) minimize ||x||_1 subj. to ||Ax-b||_2 <= sigma (LASSO) minimize ||Ax-b||_2 subj, to ||x||_1 <= tau The matrix ``A`` may be square or rectangular (over-determined or under-determined), and may have any rank. Parameters ---------- A : {sparse matrix, ndarray, LinearOperator} Representation of an m-by-n matrix. It is required that the linear operator can produce ``Ax`` and ``A^T x``. b : array_like, shape (m,) Right-hand side vector ``b``. tau : float, optional LASSO threshold. If different from ``None``, spgl1 solves LASSO problem sigma : float, optional BPDN threshold. If different from ``None``, spgl1 solves BPDN problem x0 : array_like, shape (n,), optional Initial guess of x, if None zeros are used. fid : file, optional File ID to direct log output, if None print on screen. verbosity : int, optional 0=quiet, 1=some output, 2=more output. iter_lim : int, optional Max. number of iterations (default if ``10*m``). n_prev_vals : int, optional Line-search history lenght. bp_tol : float, optional Tolerance for identifying a basis pursuit solution. ls_tol : float, optional Tolerance for least-squares solution. Iterations are stopped when the ratio between the dual norm of the gradient and the L2 norm of the residual becomes smaller or equal to ``ls_tol``. opt_tol : float, optional Optimality tolerance. More specifically, when using basis pursuit denoise, the optimility condition is met when the absolute difference between the L2 norm of the residual and the ``sigma`` is smaller than ``opt_tol``. dec_tol : float, optional Required relative change in primal objective for Newton. Larger ``decTol`` means more frequent Newton updates. step_min : float, optional Minimum spectral step. step_max : float, optional Maximum spectral step. active_set_niters : float, optional Maximum number of iterations where no change in support is tolerated. Exit with EXIT_ACTIVE_SET if no change is observed for ``activeSetIt`` iterations subspace_min : bool, optional Subspace minimization (``True``) or not (``False``) iscomplex : bool, optional Problem with complex variables (``True``) or not (``False``) max_matvec : int, optional Maximum matrix-vector multiplies allowed weights : {float, ndarray}, optional Weights ``W`` in ``||Wx||_1`` project : func, optional Projection function primal_norm : func, optional Primal norm evaluation fun dual_norm : func, optional Dual norm eval function Returns ------- x : array_like, shape (n,) Inverted model r : array_like, shape (m,) Final residual g : array_like, shape (h,) Final gradient info : dict Dictionary with the following information: ``tau``, final value of tau (see sigma above) ``rnorm``, two-norm of the optimal residual ``rgap``, relative duality gap (an optimality measure) ``gnorm``, Lagrange multiplier of (LASSO) ``stat``, ``1``: found a BPDN solution, ``2``: found a BP solution; exit based on small gradient, ``3``: found a BP solution; exit based on small residual, ``4``: found a LASSO solution, ``5``: error: too many iterations, ``6``: error: linesearch failed, ``7``: error: found suboptimal BP solution, ``8``: error: too many matrix-vector products ``niters``, number of iterations ``nProdA``, number of multiplications with A ``nProdAt``, number of multiplications with A' ``n_newton``, number of Newton steps ``time_project``, projection time (seconds) ``time_matprod``, matrix-vector multiplications time (seconds) ``time_total``, total solution time (seconds) ``niters_lsqr``, number of lsqr iterations (if ``subspace_min=True``) ``xnorm1``, L1-norm model solution history through iterations ``rnorm2``, L2-norm residual history through iterations ``lambdaa``, Lagrange multiplier history through iterations References ---------- .. [1] E. van den Berg and M. P. Friedlander, "Probing the Pareto frontier for basis pursuit solutions", SIAM J. on Scientific Computing, 31(2):890-912. (2008). .. [2] E. van den Berg and M. P. Friedlander, "Sparse optimization with least-squares constraints", Tech. Rep. TR-2010-02, Dept of Computer Science, Univ of British Columbia (2010). """ start_time = time.time() A = aslinearoperator(A) m, n = A.shape if tau == 0: single_tau = False else: single_tau = True if iter_lim is None: iter_lim = 10 * m max_line_errors = 10 # Maximum number of line-search failures. piv_tol = 1e-12 # Threshold for significant Newton step. max_matvec = max(3, max_matvec) # Max number of allowed matvec/rmatvec. # Initialize local variables. niters = 0 # Total SPGL1 iterations. niters_lsqr = 0 # Total LSQR iterations. nprodA = 0 # Number of matvec operations nprodAt = 0 # Number of rmatvec operations last_fv = np.full(10, -np.inf) # Last m function values. nline_tot = 0 # Total number of linesearch steps. print_tau = False n_newton = 0 # Number of Newton iterations bnorm = np.linalg.norm(b) stat = False time_project = 0 # Time spent in projections time_matprod = 0 # Time spent in matvec computations nnz_niters = 0 # No. of iterations with fixed pattern. nnz_idx = None # Active-set indicator. subspace = False # Flag if did subspace min in current itn. stepg = 1 # Step length for projected gradient. test_updatetau = False # Previous step did not update tau # Determine initial x and see if problem is complex realx = np.lib.isreal(A).all() and np.lib.isreal(b).all() if x0 is None: x = np.zeros(n) else: x = np.asarray(x0) # Override realx when iscomplex flag is set if iscomplex: realx = False # Check if all weights (if any) are strictly positive. In previous # versions we also checked if the number of weights was equal to # n. In the case of multiple measurement vectors, this no longer # needs to apply, so the check was removed. if weights is not None: if not np.isfinite(weights).all(): raise ValueError('Entries in weights must be finite') if np.any(weights <= 0): raise ValueError('Entries in weights must be strictly positive') else: weights = 1 # Quick exit if sigma >= ||b||. Set tau = 0 to short-circuit the loop. if bnorm <= sigma: logger.warning('W: sigma >= ||b||. Exact solution is x = 0.') tau = 0 single_tau = True # Do not do subspace minimization if x is complex. if not realx and subspace_min: logger.warning( 'W: Subspace minimization disabled when variables are complex.') subspace_min = False #% Pre-allocate iteration info vectors xnorm1 = np.zeros(min(iter_lim + 1, _allocSize)) rnorm2 = np.zeros(min(iter_lim + 1, _allocSize)) lambdaa = np.zeros(min(iter_lim + 1, _allocSize)) # Log header. if verbosity >= 1: _printf(fid, '') _printf(fid, '=' * 80 + '') _printf(fid, 'SPGL1') _printf(fid, '=' * 80 + '') _printf(fid, '%-22s: %8i %4s' % ('No. rows', m, '')) _printf(fid, '%-22s: %8i\n' % ('No. columns', n)) _printf(fid, '%-22s: %8.2e %4s' % ('Initial tau', tau, '')) _printf(fid, '%-22s: %8.2e\n' % ('Two-norm of b', bnorm)) _printf(fid, '%-22s: %8.2e %4s' % ('Optimality tol', opt_tol, '')) if single_tau: _printf(fid, '%-22s: %8.2e\n' % ('Target one-norm of x', tau)) else: _printf(fid, '%-22s: %8.2e\n' % ('Target objective', sigma)) _printf(fid, '%-22s: %8.2e %4s' % ('Basis pursuit tol', bp_tol, '')) _printf(fid, '%-22s: %8i\n' % ('Maximum iterations', iter_lim)) if verbosity >= 2: if single_tau: logb = '%5i %13.7e %13.7e %9.2e %6.1f %6i %6i %6s' logh = '%5s %13s %13s %9s %6s %6s %6s\n' _printf( fid, logh % ('iterr', 'Objective', 'Relative Gap', 'gnorm', 'stepg', 'nnz_x', 'nnz_g')) else: logb = '%5i %13.7e %13.7e %9.2e %9.3e %6.1f %6i %6i %6s' logh = '%5s %13s %13s %9s %9s %6s %6s %6s %6s\n' _printf( fid, logh % ('iterr', 'Objective', 'Relative Gap', 'Rel Error', 'gnorm', 'stepg', 'nnz_x', 'nnz_g', 'tau')) # Project the starting point and evaluate function and gradient. start_time_project = time.time() x = project(x, weights, tau) time_project += time.time() - start_time_project start_time_matvec = time.time() r = b - A.matvec(x) # r = b - Ax g = -A.rmatvec(r) # g = -A'r time_matprod += time.time() - start_time_matvec f = np.linalg.norm(r)**2 / 2. nprodA += 1 nprodAt += 1 # Required for nonmonotone strategy. last_fv[0] = f fbest = f xbest = x.copy() fold = f # Compute projected gradient direction and initial step length. start_time_project = time.time() dx = project(x - g, weights, tau) - x time_project += time.time() - start_time_project dxnorm = np.linalg.norm(dx, np.inf) if dxnorm < (1. / step_max): gstep = step_max else: gstep = min(step_max, max(step_min, 1. / dxnorm)) # Main iteration loop. while 1: # Test exit conditions. # Compute quantities needed for log and exit conditions. gnorm = dual_norm(-g, weights) rnorm = np.linalg.norm(r) gap = np.dot(np.conj(r), r - b) + tau * gnorm rgap = abs(gap) / max(1., f) aerror1 = rnorm - sigma aerror2 = f - sigma**2. / 2. rerror1 = abs(aerror1) / max(1., rnorm) rerror2 = abs(aerror2) / max(1., f) #% Count number of consecutive iterations with identical support. nnz_old = nnz_idx nnz_x, nnz_g, nnz_idx, nnz_diff = _active_vars(x, g, nnz_idx, opt_tol, weights, dual_norm) if nnz_diff: nnz_niters = 0 else: nnz_niters += nnz_niters if nnz_niters + 1 >= active_set_niters: stat = EXIT_ACTIVE_SET # Single tau: Check if were optimal. # The 2nd condition is there to guard against large tau. if single_tau: if rgap <= opt_tol or rnorm < opt_tol * bnorm: stat = EXIT_OPTIMAL else: # Multiple tau: Check if found root and/or if tau needs updating. # Test if a least-squares solution has been found if gnorm <= ls_tol * rnorm: stat = EXIT_LEAST_SQUARES if rgap <= max(opt_tol, rerror2) or rerror1 <= opt_tol: # The problem is nearly optimal for the current tau. # Check optimality of the current root. if rnorm <= sigma: stat = EXIT_SUBOPTIMAL_BP # Found suboptimal BP sol. if rerror1 <= opt_tol: stat = EXIT_ROOT_FOUND # Found approx root. if rnorm <= bp_tol * bnorm: stat = EXIT_BPSOL_FOUND # Resid minimzd -> BP sol. fchange = np.abs(f - fold) test_relchange1 = fchange <= dec_tol * f test_relcchange2 = fchange <= 1e-1 * f * (np.abs(rnorm - sigma)) test_updatetau = ((test_relchange1 and rnorm > 2 * sigma) or \ (test_relcchange2 and rnorm <= 2 * sigma)) and \ not stat and not test_updatetau if test_updatetau: # Update tau. tau_old = tau tau = max(0, tau + (rnorm * aerror1) / gnorm) n_newton += 1 print_tau = np.abs(tau_old - tau) >= 1e-6 * tau # For log only. if tau < tau_old: # The one-norm ball has decreased. Need to make sure that # the next iterate is feasible, which we do by projecting it. start_time_project = time.time() x = project(x, weights, tau) time_project += time.time() - start_time_project # Update the residual, gradient, and function value start_time_matvec = time.time() r = b - A.matvec(x) g = -A.rmatvec(r) time_matprod += time.time() - start_time_matvec f = np.linalg.norm(r)**2 / 2. nprodA += 1 nprodAt += 1 # Reset the function value history. last_fv = np.full(10, -np.inf) last_fv[1] = f # Too many iterations and not converged. if not stat and niters >= iter_lim: stat = EXIT_ITERATIONS # Print log, update history and act on exit conditions. if verbosity >= 2 and \ (((niters < 10) or (iter_lim - niters < 10) or (niters % 10 == 0)) or single_tau or print_tau or stat): tauflag = ' ' subflag = '' if print_tau: tauflag = ' %13.7e' % tau if subspace: subflag = ' S %2i' % niters_lsqr if single_tau: _printf( fid, logb % (niters, rnorm, rgap, gnorm, np.log10(stepg), nnz_x, nnz_g, subflag)) if subspace: _printf(fid, ' %s' % subflag) else: _printf( fid, logb % (niters, rnorm, rgap, rerror1, gnorm, np.log10(stepg), nnz_x, nnz_g, tauflag + subflag)) print_tau = False subspace = False # Update history info if niters > 0 and niters % _allocSize == 0: # enlarge allocation allocincrement = min(_allocSize, iter_lim - xnorm1.shape[0]) xnorm1 = np.hstack((xnorm1, np.zeros(allocincrement))) rnorm2 = np.hstack((rnorm2, np.zeros(allocincrement))) lambdaa = np.hstack((lambdaa, np.zeros(allocincrement))) xnorm1[niters] = primal_norm(x, weights) rnorm2[niters] = rnorm lambdaa[niters] = gnorm if stat: break # Iterations begin here. niters += 1 xold = x.copy() fold = f.copy() gold = g.copy() rold = r.copy() while 1: # Projected gradient step and linesearch. f, x, r, niter_line, stepg, lnerr, \ time_project_curvy, time_matprod_curvy = \ _spg_line_curvy(x, gstep*g, max(last_fv), A, b, project, weights, tau) time_project += time_project_curvy time_matprod += time_matprod_curvy nprodA += niter_line + 1 nline_tot += niter_line if nprodA + nprodAt > max_matvec: stat = EXIT_MATVEC_LIMIT break if lnerr: # Projected backtrack failed. # Retry with feasible dirn linesearch. x = xold.copy() f = fold start_time_project = time.time() dx = project(x - gstep * g, weights, tau) - x time_project += time.time() - start_time_project gtd = np.dot(np.conj(g), dx) f, x, r, niter_line, lnerr, time_matprod = \ _spg_line(f, x, dx, gtd, max(last_fv), A, b) time_matprod += time_matprod nprodA += niter_line + 1 nline_tot += niter_line if nprodA + nprodAt > max_matvec: stat = EXIT_MATVEC_LIMIT break if lnerr: # Failed again. # Revert to previous iterates and damp max BB step. x = xold f = fold if max_line_errors <= 0: stat = EXIT_LINE_ERROR else: step_max = step_max / 10. logger.warning( 'Linesearch failed with error %s. ' 'Damping max BB scaling to %s', lnerr, step_max) max_line_errors -= 1 # Subspace minimization (only if active-set change is small). if subspace_min: start_time_matvec = time.time() g = -A.rmatvec(r) time_matprod += time.time() - start_time_matvec nprodAt += 1 nnz_x, nnz_g, nnz_idx, nnz_diff = \ _active_vars(x, g, nnz_old, opt_tol, weights, dual_norm) if not nnz_diff: if nnz_x == nnz_g: iter_lim_lsqr = 20 else: iter_lim_lsqr = 5 nnz_idx = np.abs(x) >= opt_tol ebar = np.sign(x[nnz_idx]) nebar = np.size(ebar) Sprod = _LSQRprod(A, nnz_idx, ebar, n) dxbar, istop, niters_lsqr = \ lsqr(Sprod, r, 1e-5, 1e-1, 1e-1, 1e12, iter_lim=iter_lim_lsqr, show=0)[0:3] nprodA += niters_lsqr nprodAt += niters_lsqr + 1 niters_lsqr = niters_lsqr + niters_lsqr # LSQR iterations successful. Take the subspace step. if istop != 4: # Push dx back into full space: dx = Z dx. dx = np.zeros(n) dx[nnz_idx] = \ dxbar - (1/nebar)*np.dot(np.dot(np.conj(ebar.T), dxbar), dxbar) # Find largest step to a change in sign. block1 = nnz_idx & (x < 0) & (dx > +piv_tol) block2 = nnz_idx & (x > 0) & (dx < -piv_tol) alpha1 = np.inf alpha2 = np.inf if np.any(block1): alpha1 = min(-x[block1] / dx[block1]) if np.any(block2): alpha2 = min(-x[block2] / dx[block2]) alpha = min([1, alpha1, alpha2]) if alpha < 0: raise ValueError('Alpha smaller than zero') if np.dot(np.conj(ebar.T), dx[nnz_idx]) > opt_tol: raise ValueError('Subspace update signed sum ' 'bigger than tolerance') # Update variables. x = x + alpha * dx start_time_matvec = time.time() r = b - A.matvec(x) time_matprod += time.time() - start_time_matvec f = abs(np.dot(np.conj(r), r)) / 2. subspace = True nprodA += 1 if primal_norm(x, weights) > tau + opt_tol: raise ValueError('Primal norm out of bound') # Update gradient and compute new Barzilai-Borwein scaling. if not lnerr: start_time_matvec = time.time() g = -A.rmatvec(r) time_matprod += time.time() - start_time_matvec nprodAt += 1 s = x - xold y = g - gold sts = np.dot(np.conj(s), s) sty = np.dot(np.conj(s), y) if sty <= 0: gstep = step_max else: gstep = min(step_max, max(step_min, sts / sty)) else: gstep = min(step_max, gstep) break # Leave while loop. This is done to allow stopping the # computations at any time within the loop if max_matvec is # reached. If this is not the case, the loop is stopped here. if stat == EXIT_MATVEC_LIMIT: niters -= 1 x = xold.copy() f = fold g = gold.copy() r = rold.copy() break # Update function history. if single_tau or f > sigma**2 / 2.: # Dont update if superoptimal. last_fv[np.mod(niters, n_prev_vals)] = f.copy() if fbest > f: fbest = f.copy() xbest = x.copy() # Restore best solution (only if solving single problem). if single_tau and f > fbest: rnorm = np.sqrt(2. * fbest) print('Restoring best iterate to objective ' + str(rnorm)) x = xbest.copy() start_time_matvec = time.time() r = b - A.matvec(x) g = -A.rmatvec(r) time_matprod += time.time() - start_time_matvec gnorm = dual_norm(g, weights) rnorm = np.linalg.norm(r) nprodA += 1 nprodAt += 1 # Final cleanup before exit. info = {} info['tau'] = tau info['rnorm'] = rnorm info['rgap'] = rgap info['gnorm'] = gnorm info['stat'] = stat info['niters'] = niters info['nprodA'] = nprodA info['nprodAt'] = nprodAt info['n_newton'] = n_newton info['time_project'] = time_project info['time_matprod'] = time_matprod info['niters_lsqr'] = niters_lsqr info['time_total'] = time.time() - start_time info['xnorm1'] = xnorm1[0:niters] info['rnorm2'] = rnorm2[0:niters] info['lambdaa'] = lambdaa[0:niters] # Print final output. if verbosity >= 1: _printf(fid, '') if stat == EXIT_OPTIMAL: _printf(fid, 'EXIT -- Optimal solution found') elif stat == EXIT_ITERATIONS: _printf(fid, 'ERROR EXIT -- Too many iterations') elif stat == EXIT_ROOT_FOUND: _printf(fid, 'EXIT -- Found a root') elif stat == EXIT_BPSOL_FOUND: _printf(fid, 'EXIT -- Found a BP solution') elif stat == EXIT_LEAST_SQUARES: _printf(fid, 'EXIT -- Found a least-squares solution') elif stat == EXIT_LINE_ERROR: _printf(fid, 'ERROR EXIT -- Linesearch error (%i)' % lnerr) elif stat == EXIT_SUBOPTIMAL_BP: _printf(fid, 'EXIT -- Found a suboptimal BP solution') elif stat == EXIT_MATVEC_LIMIT: _printf(fid, 'EXIT -- Maximum matrix-vector operations reached') elif stat == EXIT_ACTIVE_SET: _printf(fid, 'EXIT -- Found a possible active set') else: _printf(fid, 'SPGL1 ERROR: Unknown termination condition') _printf(fid, '') _printf( fid, '%-20s: %6i %6s %-20s: %6.1f' % ('Products with A', nprodA, '', 'Total time (secs)', info['time_total'])) _printf( fid, '%-20s: %6i %6s %-20s: %6.1f' % ('Products with A^H', nprodAt, '', 'Project time (secs)', info['time_project'])) _printf( fid, '%-20s: %6i %6s %-20s: %6.1f' % ('Newton iterations', n_newton, '', 'Mat-vec time (secs)', info['time_matprod'])) _printf( fid, '%-20s: %6i %6s %-20s: %6i' % ('Line search its', nline_tot, '', 'Subspace iterations', niters_lsqr)) return x, r, g, info
def spg_mmv(A, B, sigma=0, **kwargs): """MMV problem. ``spg_mmv`` is designed to solve the multi-measurement vector basis pursuit denoise:: (MMV) minimize ||X||_1,2 subject to ||A X - B||_2,2 <= sigma where ``A`` is an M-by-N matrix, ``b`` is an M-by-G matrix, and ```sigma`` is a nonnegative scalar. ``A`` can be an explicit M-by-N matrix or a :class:`scipy.sparse.linalg.LinearOperator`. Parameters ---------- A : {sparse matrix, ndarray, LinearOperator} Representation of an M-by-N matrix. It is required that the linear operator can produce ``Ax`` and ``A^T x``. b : array_like, shape (m,) Right-hand side matrix ``b`` of size M-by-G. sigma : float, optional BPDN threshold. If different from ``None``, spgl1 solves BPDN problem kwargs : dict, optional Additional input parameters (refer to :func:`spgl1.spgl1` for a list of possible parameters) Returns ------- x : array_like, shape (n,) Inverted model r : array_like, shape (m,) Final residual g : array_like, shape (h,) Final gradient info : dict See spgl1. """ A = aslinearoperator(A) m, n = A.shape groups = B.shape[1] A = _blockdiag(A, m, n, groups) # Set projection specific functions _primal_norm = _norm_l12_primal if 'primal_norm' not in kwargs.keys() \ else kwargs['primal_norm'] _dual_norm = _norm_l12_dual if 'dual_norm' not in kwargs.keys() \ else kwargs['dual_norm'] _project = _norm_l12_project if 'project' not in kwargs.keys() \ else kwargs['project'] kwargs.pop('primal_norm', None) kwargs.pop('dual_norm', None) kwargs.pop('project', None) project = lambda x, weight, tau: _project(groups, x, weight, tau) primal_norm = lambda x, weight: _primal_norm(groups, x, weight) dual_norm = lambda x, weight: _dual_norm(groups, x, weight) tau = 0 x0 = None x, r, g, info = spgl1(A, B.ravel(), tau, sigma, x0, project=project, primal_norm=primal_norm, dual_norm=dual_norm, **kwargs) x = x.reshape(n, groups) g = g.reshape(n, groups) return x, r, g, info
def convert(self, x): if (isinstance(x, (np.ndarray, sp.spmatrix))): return sla.aslinearoperator(x) else: assert (False)
def lsqr_single_rhs(A, b, tol=1e-14, iter_lim=None): """ A simple version of LSQR """ A = aslinearoperator(A) m, n = A.shape eps = 32 * np.finfo(float).eps # slightly larger than eps if tol < eps: tol = eps elif tol >= 1: tol = 1 - eps u = b.squeeze().copy() beta = norm(u) if beta != 0: u /= beta v = A.rmatvec(u) alpha = norm(v) if alpha != 0: v /= alpha w = v.copy() x = np.zeros(n) phibar = beta rhobar = alpha nrm_a = 0.0 cnd_a = 0.0 sq_d = 0.0 nrm_r = beta nrm_ar_0 = alpha * beta if nrm_ar_0 == 0: # alpha == 0 || beta == 0 return x, 0, 0 nrm_x = 0 sq_x = 0 z = 0 cs2 = -1 sn2 = 0 max_n_stag = 3 stag = 0 flag = -1 if iter_lim is None: iter_lim = np.max([20, 2 * np.min([m, n])]) for itn in xrange(int(iter_lim)): u = A.matvec(v) - alpha * u beta = norm(u) u /= beta # estimate of norm(A) nrm_a = sqrt(nrm_a**2 + alpha**2 + beta**2) v = A.rmatvec(u) - beta * v alpha = norm(v) v /= alpha rho = sqrt(rhobar**2 + beta**2) cs = rhobar / rho sn = beta / rho theta = sn * alpha rhobar = -cs * alpha phi = cs * phibar phibar = sn * phibar x += (phi / rho) * w w = v - (theta / rho) * w # estimate of norm(r) nrm_r = phibar # estimate of norm(A'*r) nrm_ar = phibar * alpha * np.abs(cs) # check convergence if nrm_ar < tol * nrm_ar_0: flag = 0 break if nrm_ar < eps * nrm_a * nrm_r: flag = 0 break # estimate of cond(A) sq_w = np.dot(w, w) nrm_w = sqrt(sq_w) sq_d += sq_w / (rho**2) cnd_a = nrm_a * sqrt(sq_d) # check condition number if cnd_a > 1 / eps: flag = 1 break # check stagnation if abs(phi / rho) * nrm_w < eps * nrm_x: stag += 1 else: stag = 0 if stag >= max_n_stag: flag = 1 break # estimate of norm(x) delta = sn2 * rho gambar = -cs2 * rho rhs = phi - delta * z zbar = rhs / gambar nrm_x = sqrt(sq_x + zbar**2) gamma = sqrt(gambar**2 + theta**2) cs2 = gambar / gamma sn2 = theta / gamma z = rhs / gamma sq_x += z**2 return x, flag, itn
def lsqr(A, B, X=None, tol=1e-14, iter_lim=1000): """ A simple version of LSQR for solving || AX - B ||^2_fro """ start_time = time.time() A = aslinearoperator(A) m, n = A.shape m, k = B.shape eps = 32 * np.finfo(float).eps # slightly larger than eps if tol < eps: tol = eps elif tol >= 1: tol = 1 - eps U = np.copy(B) beta = colnorm(U) ibeta = np.copy(beta) ibeta[np.nonzero(beta)] = 1. / beta[np.nonzero(beta)] U = np.dot(U, np.diag(ibeta)) #if beta != 0: # u /= beta V = A.rmatvec(U) #V = np.dot(A.T, U) + alpha = colnorm(V) ialpha = np.copy(alpha) ialpha[np.nonzero(alpha)] = 1. / alpha[np.nonzero(alpha)] V = np.dot(V, np.diag(ialpha)) #if alpha != 0: # v /= alpha W = V.copy() if X is None: X = np.zeros((n, k)) phibar = beta rhobar = alpha nrm_a = np.zeros(k) cnd_a = np.zeros(k) sq_d = np.zeros(k) nrm_r = beta nrm_ar_0 = alpha * beta if all(nrm_ar_0 == 0): # alpha == 0 || beta == 0 return X, 0, 0 nrm_x = np.zeros(k) sq_x = np.zeros(k) z = np.zeros(k) cs2 = -1 * np.ones(k) sn2 = np.zeros(k) max_n_stag = 200 stag = 0 flag = -1 if iter_lim is None: iter_lim = np.max([20, 2 * np.min([m, n])]) for itn in xrange(int(iter_lim)): AV = A.matmat(V) U = AV - np.dot(U, np.diag(alpha)) beta = colnorm(U) ibeta = np.copy(beta) ibeta[np.nonzero(beta)] = 1. / beta[np.nonzero(beta)] U = np.dot(U, np.diag(ibeta)) # estimate of norm(A) nrm_a = np.sqrt(nrm_a**2 + alpha**2 + beta**2) V = A.rmatvec(U) - np.dot(V, np.diag(beta)) alpha = colnorm(V) ialpha = np.copy(alpha) ialpha[np.nonzero(alpha)] = 1. / alpha[np.nonzero(alpha)] V = np.dot(V, np.diag(ialpha)) rho = np.sqrt(rhobar**2 + beta**2) cs = rhobar / rho sn = beta / rho theta = sn * alpha rhobar = -cs * alpha phi = cs * phibar phibar = sn * phibar X += np.dot(W, np.diag(phi / rho)) W = V - np.dot(W, np.diag(theta / rho)) # estimate of norm(r) nrm_r = phibar # estimate of norm(A'*r) nrm_ar = phibar * alpha * np.abs(cs) elapsed_time = time.time() - start_time print >> sys.stdout, "Iteration = %d, Residual Norm = %f, Time Elapsed = %f" % ( itn, np.sum(nrm_ar), elapsed_time) sys.stdout.flush() # check convergence if np.sum(nrm_ar) < tol * np.sum(nrm_ar_0): flag = 0 print "Converged (1)" break if np.sum(nrm_ar) < eps * np.sum(nrm_a * nrm_r): flag = 0 print "Converged (2)" break # estimate of cond(A) #sq_w = np.dot(W,W) nrm_w = colnorm(W) sq_w = nrm_w * nrm_w sq_d += sq_w / (rho**2) cnd_a = nrm_a * np.sqrt(sq_d) # check condition number if any(cnd_a > 1 / eps): flag = 1 break # check stagnation if any(abs(phi / rho) * nrm_w < eps * nrm_x): stag += 1 else: stag = 0 if stag >= max_n_stag: flag = 1 break # estimate of norm(x) delta = sn2 * rho gambar = -cs2 * rho rhs = phi - delta * z zbar = rhs * gambar nrm_x = np.sqrt(sq_x + zbar**2) gamma = np.sqrt(gambar**2 + theta**2) cs2 = gambar / gamma sn2 = theta / gamma z = rhs / gamma sq_x += z**2 return X, flag, itn
def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale, loss_function, tr_solver, tr_options, verbose): f = f0 f_true = f.copy() nfev = 1 J = J0 njev = 1 if loss_function is not None: rho = loss_function(f) cost = 0.5 * np.sum(rho[0]) J, f = scale_for_robust_loss_function(J, f, rho) else: cost = 0.5 * np.dot(f, f) g = compute_grad(J, f) jac_scale = isinstance(x_scale, string_types) and x_scale == 'jac' if jac_scale: scale, scale_inv = compute_jac_scale(J) else: scale, scale_inv = x_scale, 1 / x_scale Delta = norm(x0 * scale_inv, ord=np.inf) if Delta == 0: Delta = 1.0 on_bound = np.zeros_like(x0, dtype=int) on_bound[np.equal(x0, lb)] = -1 on_bound[np.equal(x0, ub)] = 1 x = x0 step = np.empty_like(x0) if max_nfev is None: max_nfev = x0.size * 100 termination_status = None iteration = 0 step_norm = None actual_reduction = None if verbose == 2: print_header_nonlinear() while True: active_set = on_bound * g < 0 free_set = ~active_set g_free = g[free_set] g_full = g.copy() g[active_set] = 0 g_norm = norm(g, ord=np.inf) if g_norm < gtol: termination_status = 1 if verbose == 2: print_iteration_nonlinear(iteration, nfev, cost, actual_reduction, step_norm, g_norm) if termination_status is not None or nfev == max_nfev: break x_free = x[free_set] lb_free = lb[free_set] ub_free = ub[free_set] scale_free = scale[free_set] # Compute (Gauss-)Newton and build quadratic model for Cauchy step. if tr_solver == 'exact': J_free = J[:, free_set] newton_step = lstsq(J_free, -f, rcond=-1)[0] # Coefficients for the quadratic model along the anti-gradient. a, b = build_quadratic_1d(J_free, g_free, -g_free) elif tr_solver == 'lsmr': Jop = aslinearoperator(J) # We compute lsmr step in scaled variables and then # transform back to normal variables, if lsmr would give exact lsq # solution, this would be equivalent to not doing any # transformations, but from experience it's better this way. # We pass active_set to make computations as if we selected # the free subset of J columns, but without actually doing any # slicing, which is expensive for sparse matrices and impossible # for LinearOperator. lsmr_op = lsmr_operator(Jop, scale, active_set) newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set] newton_step *= scale_free # Components of g for active variables were zeroed, so this call # is correct and equivalent to using J_free and g_free. a, b = build_quadratic_1d(Jop, g, -g) actual_reduction = -1.0 while actual_reduction <= 0 and nfev < max_nfev: tr_bounds = Delta * scale_free step_free, on_bound_free, tr_hit = dogleg_step( x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free) step.fill(0.0) step[free_set] = step_free if tr_solver == 'exact': predicted_reduction = -evaluate_quadratic( J_free, g_free, step_free) elif tr_solver == 'lsmr': predicted_reduction = -evaluate_quadratic(Jop, g, step) x_new = x + step f_new = fun(x_new) nfev += 1 step_h_norm = norm(step * scale_inv, ord=np.inf) if not np.all(np.isfinite(f_new)): Delta = 0.25 * step_h_norm continue # Usual trust-region step quality estimation. if loss_function is not None: cost_new = loss_function(f_new, cost_only=True) else: cost_new = 0.5 * np.dot(f_new, f_new) actual_reduction = cost - cost_new Delta, ratio = update_tr_radius(Delta, actual_reduction, predicted_reduction, step_h_norm, tr_hit) step_norm = norm(step) termination_status = check_termination(actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol) if termination_status is not None: break if actual_reduction > 0: on_bound[free_set] = on_bound_free x = x_new # Set variables exactly at the boundary. mask = on_bound == -1 x[mask] = lb[mask] mask = on_bound == 1 x[mask] = ub[mask] f = f_new f_true = f.copy() cost = cost_new J = jac(x, f) njev += 1 if loss_function is not None: rho = loss_function(f) J, f = scale_for_robust_loss_function(J, f, rho) g = compute_grad(J, f) if jac_scale: scale, scale_inv = compute_jac_scale(J, scale_inv) else: step_norm = 0 actual_reduction = 0 iteration += 1 if termination_status is None: termination_status = 0 return OptimizeResult(x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm, active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)
n = 100 x = np.linspace(0, 1, n) X, Y = np.meshgrid(x, x) Q = np.exp(-np.abs(X - Y)) class LowRank: def __init__(self, Q, n): self.Q = Q self.shape = (n, n) def matvec(self, x): return np.dot(Q, x) mat = LowRank(Q, n) matop = aslinearoperator(mat) l, v = RandomizedHEP(matop, k=10, twopass=False) le, ve = eig(Q) le = np.real(le) #Test A-orthonormalize z = np.random.randn(n, 10) q, _, r = Aorthonormalize(matop, z, verbose=False) class Identity: def __init__(self, n): self.shape = (n, n) def matvec(self, x):
def onenormest(A, t=2, itmax=5, compute_v=False, compute_w=False): """ Compute a lower bound of the 1-norm of a sparse matrix. .. versionadded:: 0.13.0 Parameters ---------- A : ndarray or other linear operator A linear operator that can be transposed and that can produce matrix products. t : int, optional A positive parameter controlling the tradeoff between accuracy versus time and memory usage. Larger values take longer and use more memory but give more accurate output. itmax : int, optional Use at most this many iterations. compute_v : bool, optional Request a norm-maximizing linear operator input vector if True. compute_w : bool, optional Request a norm-maximizing linear operator output vector if True. Returns ------- est : float An underestimate of the 1-norm of the sparse matrix. v : ndarray, optional The vector such that ||Av||_1 == est*||v||_1. It can be thought of as an input to the linear operator that gives an output with particularly large norm. w : ndarray, optional The vector Av which has relatively large 1-norm. It can be thought of as an output of the linear operator that is relatively large in norm compared to the input. Notes ----- This is algorithm 2.4 of [1]. In [2] it is described as follows. "This algorithm typically requires the evaluation of about 4t matrix-vector products and almost invariably produces a norm estimate (which is, in fact, a lower bound on the norm) correct to within a factor 3." References ---------- .. [1] Nicholas J. Higham and Francoise Tisseur (2000), "A Block Algorithm for Matrix 1-Norm Estimation, with an Application to 1-Norm Pseudospectra." SIAM J. Matrix Anal. Appl. Vol. 21, No. 4, pp. 1185-1201. .. [2] Awad H. Al-Mohy and Nicholas J. Higham (2009), "A new scaling and squaring algorithm for the matrix exponential." SIAM J. Matrix Anal. Appl. Vol. 31, No. 3, pp. 970-989. """ # Check the input. if len(A.shape) != 2 or A.shape[0] != A.shape[1]: raise ValueError('expected the operator to act like a square matrix') # If the operator size is small compared to t, # then it is easier to compute the exact norm. # Otherwise estimate the norm. n = A.shape[1] if t >= n: A_explicit = np.asarray(aslinearoperator(A).matmat(np.identity(n))) if A_explicit.shape != (n, n): raise Exception('internal error: ', 'unexpected shape ' + str(A_explicit.shape)) col_abs_sums = abs(A_explicit).sum(axis=0) if col_abs_sums.shape != (n, ): raise Exception('internal error: ', 'unexpected shape ' + str(col_abs_sums.shape)) argmax_j = np.argmax(col_abs_sums) v = elementary_vector(n, argmax_j) w = A_explicit[:, argmax_j] est = col_abs_sums[argmax_j] else: est, v, w, nmults, nresamples = _onenormest_core(A, A.T, t, itmax) # Report the norm estimate along with some certificates of the estimate. if compute_v or compute_w: result = (est,) if compute_v: result += (v,) if compute_w: result += (w,) return result else: return est
def test_multi_linear_op_l1(norm,inter,testNumber,numblocks): m = 40 d = 10 numregs = 5 if getNewOptVals and (testNumber==0): A,y = getLSdata(m,d) cache['AmutliLinL1']=A cache['ymutliLinL1']=y H = [] for i in range(numregs): p = np.random.randint(1,100) H.append(np.random.normal(0,1,[p,d])) cache['HmultiLinL1']=H else: H=cache['HmultiLinL1'] A=cache['AmutliLinL1'] y=cache['ymutliLinL1'] projSplit = ps.ProjSplitFit() stepsize = 1e-1 processor = lp.Forward2Fixed(stepsize) gamma = 1e0 if norm and inter: gamma = 1e2 projSplit.setDualScaling(gamma) projSplit.addData(A,y,2,processor,normalize=norm,intercept=inter) lam = [] for i in range(numregs): lam.append(0.001*(i+1)) step = 1.0 regObj = L1(lam[-1],step) projSplit.addRegularizer(regObj,linearOp = aslinearoperator(H[i])) projSplit.run(maxIterations=5000,keepHistory = True, nblocks = numblocks, primalTol=1e-6,dualTol=1e-6) ps_val = projSplit.getObjective() if getNewOptVals: if norm: Anorm = A m = Anorm.shape[0] scaling = np.linalg.norm(Anorm,axis=0) scaling += 1.0*(scaling < 1e-10) Anorm = np.sqrt(m)*Anorm/scaling A = Anorm if inter: AwithIntercept = np.zeros((m,d+1)) AwithIntercept[:,0] = np.ones(m) AwithIntercept[:,1:(d+1)] = A A = AwithIntercept (m,d) = A.shape x_cvx = cvx.Variable(d) f = (1/(2*m))*cvx.sum_squares(A@x_cvx - y) for i in range(numregs): if inter: f += lam[i]*cvx.norm(H[i] @ x_cvx[1:d],1) else: f += lam[i]*cvx.norm(H[i] @ x_cvx,1) prob = cvx.Problem(cvx.Minimize(f)) prob.solve(verbose=True) opt = prob.value cache[(norm,inter,'opt')]=opt else: opt=cache[(norm,inter,'opt')] print("ps val = {}".format(ps_val)) print("cvx val = {}".format(opt)) assert ps_val - opt < 1e-2
def _algorithm_2_2(A, AT, t): """ This is Algorithm 2.2. Parameters ---------- A : ndarray or other linear operator A linear operator that can produce matrix products. AT : ndarray or other linear operator The transpose of A. t : int, optional A positive parameter controlling the tradeoff between accuracy versus time and memory usage. Returns ------- g : sequence A non-negative decreasing vector such that g[j] is a lower bound for the 1-norm of the column of A of jth largest 1-norm. The first entry of this vector is therefore a lower bound on the 1-norm of the linear operator A. This sequence has length t. ind : sequence The ith entry of ind is the index of the column A whose 1-norm is given by g[i]. This sequence of indices has length t, and its entries are chosen from range(n), possibly with repetition, where n is the order of the operator A. Notes ----- This algorithm is mainly for testing. It uses the 'ind' array in a way that is similar to its usage in algorithm 2.4. This algorithm 2.2 may be easier to test, so it gives a chance of uncovering bugs related to indexing which could have propagated less noticeably to algorithm 2.4. """ A_linear_operator = aslinearoperator(A) AT_linear_operator = aslinearoperator(AT) n = A_linear_operator.shape[0] # Initialize the X block with columns of unit 1-norm. X = np.ones((n, t)) if t > 1: X[:, 1:] = np.random.randint(0, 2, size=(n, t-1))*2 - 1 X /= float(n) # Iteratively improve the lower bounds. # Track extra things, to assert invariants for debugging. g_prev = None h_prev = None k = 1 ind = range(t) while True: Y = np.asarray(A_linear_operator.matmat(X)) g = [norm_1d_1(Y[:, j]) for j in range(t)] best_j = np.argmax(g) g = sorted(g, reverse=True) S = sign_round_up(Y) Z = np.asarray(AT_linear_operator.matmat(S)) h = [norm_1d_inf(row) for row in Z] # If this algorithm runs for fewer than two iterations, # then its return values do not have the properties indicated # in the description of the algorithm. # In particular, the entries of g are not 1-norms of any # column of A until the second iteration. # Therefore we will require the algorithm to run for at least # two iterations, even though this requirement is not stated # in the description of the algorithm. if k >= 2: if less_than_or_close(max(h), np.dot(Z[:, best_j], X[:, best_j])): break h_i_pairs = zip(h, range(n)) h, ind = zip(*sorted(h_i_pairs, reverse=True)[:t]) for j in range(t): X[:, j] = elementary_vector(n, ind[j]) # Check invariant (2.2). if k >= 2: if not less_than_or_close(g_prev[0], h_prev[0]): raise Exception('invariant (2.2) is violated') if not less_than_or_close(h_prev[0], g[0]): raise Exception('invariant (2.2) is violated') # Check invariant (2.3). if k >= 3: for j in range(t): if not less_than_or_close(g[j], g_prev[j]): raise Exception('invariant (2.3) is violated') # Update for the next iteration. g_prev = g h_prev = h k += 1 # Return the lower bounds and the corresponding column indices. return g, ind
def test_linear_op_l1(norm,inter): m = 40 d = 10 p = 15 if getNewOptVals: A = cache.get('AlinL1') y = cache.get('ylinL1') H = cache.get('HlinL1') if A is None: A,y = getLSdata(m,d) H = np.random.normal(0,1,[p,d]) cache['AlinL1']=A cache['ylinL1']=y cache['HlinL1']=H else: A=cache['AlinL1'] y=cache['ylinL1'] H=cache['HlinL1'] projSplit = ps.ProjSplitFit() stepsize = 1e-1 processor = lp.Forward2Fixed(stepsize) gamma = 1e0 projSplit.setDualScaling(gamma) projSplit.addData(A,y,2,processor,normalize=norm,intercept=inter) lam = 0.01 step = 1.0 regObj = L1(lam,step) projSplit.addRegularizer(regObj,linearOp = aslinearoperator(H)) projSplit.run(maxIterations=5000,keepHistory = True, nblocks = 1, primalTol=1e-3,dualTol=1e-3) ps_val = projSplit.getObjective() if getNewOptVals: opt = cache.get((norm,inter,'optlinL1')) if opt is None: (m,d) = A.shape if norm: Anorm = A scaling = np.linalg.norm(Anorm,axis=0) scaling += 1.0*(scaling < 1e-10) Anorm = np.sqrt(m)*Anorm/scaling A = Anorm if inter: AwithIntercept = np.zeros((m,d+1)) AwithIntercept[:,0] = np.ones(m) AwithIntercept[:,1:(d+1)] = A A = AwithIntercept HwithIntercept = np.zeros((p,d+1)) HwithIntercept[:,0] = np.zeros(p) HwithIntercept[:,1:(d+1)] = H H = HwithIntercept x_cvx = cvx.Variable(d+1) else: x_cvx = cvx.Variable(d) f = (1/(2*m))*cvx.sum_squares(A@x_cvx - y) f += lam*cvx.norm(H @ x_cvx,1) prob = cvx.Problem(cvx.Minimize(f)) prob.solve(verbose=True) opt = prob.value cache[(norm,inter,'optlinL1')]=opt else: opt=cache[(norm,inter,'optlinL1')] primViol = projSplit.getPrimalViolation() dualViol = projSplit.getDualViolation() print("primal violation = {}".format(primViol)) print("dual violation = {}".format(dualViol)) print("ps val = {}".format(ps_val)) print("cvx val = {}".format(opt)) assert ps_val - opt < 1e-2
def _onenormest_core(A, AT, t, itmax): """ Compute a lower bound of the 1-norm of a sparse matrix. Parameters ---------- A : ndarray or other linear operator A linear operator that can produce matrix products. AT : ndarray or other linear operator The transpose of A. t : int, optional A positive parameter controlling the tradeoff between accuracy versus time and memory usage. itmax : int, optional Use at most this many iterations. Returns ------- est : float An underestimate of the 1-norm of the sparse matrix. v : ndarray, optional The vector such that ||Av||_1 == est*||v||_1. It can be thought of as an input to the linear operator that gives an output with particularly large norm. w : ndarray, optional The vector Av which has relatively large 1-norm. It can be thought of as an output of the linear operator that is relatively large in norm compared to the input. nmults : int, optional The number of matrix products that were computed. nresamples : int, optional The number of times a parallel column was observed, necessitating a re-randomization of the column. Notes ----- This is algorithm 2.4. """ # This function is a more or less direct translation # of Algorithm 2.4 from the Higham and Tisseur (2000) paper. A_linear_operator = aslinearoperator(A) AT_linear_operator = aslinearoperator(AT) if itmax < 2: raise ValueError('at least two iterations are required') if t < 1: raise ValueError('at least one column is required') n = A.shape[0] if t >= n: raise ValueError('t should be smaller than the order of A') # Track the number of big*small matrix multiplications # and the number of resamplings. nmults = 0 nresamples = 0 # "We now explain our choice of starting matrix. We take the first # column of X to be the vector of 1s [...] This has the advantage that # for a matrix with nonnegative elements the algorithm converges # with an exact estimate on the second iteration, and such matrices # arise in applications [...]" X = np.ones((n, t), dtype=float) # "The remaining columns are chosen as rand{-1,1}, # with a check for and correction of parallel columns, # exactly as for S in the body of the algorithm." if t > 1: for i in range(1, t): # These are technically initial samples, not resamples, # so the resampling count is not incremented. resample_column(i, X) for i in range(t): while column_needs_resampling(i, X): resample_column(i, X) nresamples += 1 # "Choose starting matrix X with columns of unit 1-norm." X /= float(n) # "indices of used unit vectors e_j" ind_hist = set() est_old = 0 S = np.zeros((n, t), dtype=float) k = 1 ind = None while True: Y = np.asarray(A_linear_operator.matmat(X)) nmults += 1 mags = [norm_1d_1(Y[:, j]) for j in range(t)] est = np.max(mags) best_j = np.argmax(mags) if est > est_old or k == 2: if k >= 2: ind_best = ind[best_j] w = Y[:, best_j] # (1) if k >= 2 and est <= est_old: est = est_old break est_old = est S_old = S if k > itmax: break S = sign_round_up(Y) # (2) if every_col_of_X_is_parallel_to_a_col_of_Y(S, S_old): break if t > 1: # "Ensure that no column of S is parallel to another column of S # or to a column of S_old by replacing columns of S by rand{-1,1}." for i in range(t): while column_needs_resampling(i, S, S_old): resample_column(i, S) nresamples += 1 # (3) Z = np.asarray(AT_linear_operator.matmat(S)) nmults += 1 h = [norm_1d_inf(row) for row in Z] # (4) if k >= 2 and max(h) == h[ind_best]: break # "Sort h so that h_first >= ... >= h_last # and re-order ind correspondingly." h_i_pairs = zip(h, range(n)) h, ind = zip(*sorted(h_i_pairs, reverse=True)) if t > 1: # (5) # Break if the most promising t vectors have been visited already. if set(ind[:t]) <= ind_hist: break # Put the most promising unvisited vectors at the front of the list # and put the visited vectors at the end of the list. # Preserve the order of the indices induced by the ordering of h. unused_entries = [i for i in ind if i not in ind_hist] used_entries = [i for i in ind if i in ind_hist] ind = unused_entries + used_entries for j in range(t): X[:, j] = elementary_vector(n, ind[j]) ind_hist.update(ind[:t]) k += 1 v = elementary_vector(n, ind_best) return est, v, w, nmults, nresamples
def _approximate_eigenvalues(A, tol, maxiter, symmetric=None, initial_guess=None): """Apprixmate eigenvalues. Used by approximate_spectral_radius and condest. Returns [W, E, H, V, breakdown_flag], where W and E are the eigenvectors and eigenvalues of the Hessenberg matrix H, respectively, and V is the Krylov space. breakdown_flag denotes whether Lanczos/Arnoldi suffered breakdown. E is therefore the approximate eigenvalues of A. To obtain approximate eigenvectors of A, compute V*W. """ from scipy.sparse.linalg import aslinearoperator A = aslinearoperator(A) # A could be dense or sparse, or something weird # Choose tolerance for deciding if break-down has occurred t = A.dtype.char eps = np.finfo(np.float).eps feps = np.finfo(np.single).eps geps = np.finfo(np.longfloat).eps _array_precision = {'f': 0, 'd': 1, 'g': 2, 'F': 0, 'D': 1, 'G': 2} breakdown = { 0: feps * 1e3, 1: eps * 1e6, 2: geps * 1e6 }[_array_precision[t]] breakdown_flag = False if A.shape[0] != A.shape[1]: raise ValueError('expected square matrix') maxiter = min(A.shape[0], maxiter) if initial_guess is None: v0 = sp.rand(A.shape[1], 1) if A.dtype == complex: v0 = v0 + 1.0j * sp.rand(A.shape[1], 1) else: v0 = initial_guess v0 /= norm(v0) # Important to type H based on v0, so that a real nonsymmetric matrix, can # have an imaginary initial guess for its Arnoldi Krylov space H = np.zeros((maxiter + 1, maxiter), dtype=np.find_common_type([v0.dtype, A.dtype], [])) V = [v0] beta = 0.0 for j in range(maxiter): w = A * V[-1] if symmetric: if j >= 1: H[j - 1, j] = beta w -= beta * V[-2] alpha = np.dot(np.conjugate(w.ravel()), V[-1].ravel()) H[j, j] = alpha w -= alpha * V[-1] # axpy(V[-1],w,-alpha) beta = norm(w) H[j + 1, j] = beta if (H[j + 1, j] < breakdown): breakdown_flag = True break w /= beta V.append(w) V = V[-2:] # retain only last two vectors else: # orthogonalize against Vs for i, v in enumerate(V): H[i, j] = np.dot(np.conjugate(v.ravel()), w.ravel()) w = w - H[i, j] * v H[j + 1, j] = norm(w) if (H[j + 1, j] < breakdown): breakdown_flag = True if H[j + 1, j] != 0.0: w = w / H[j + 1, j] V.append(w) break w = w / H[j + 1, j] V.append(w) # if upper 2x2 block of Hessenberg matrix H is almost symmetric, # and the user has not explicitly specified symmetric=False, # then switch to symmetric Lanczos algorithm # if symmetric is not False and j == 1: # if abs(H[1,0] - H[0,1]) < 1e-12: # #print "using symmetric mode" # symmetric = True # V = V[1:] # H[1,0] = H[0,1] # beta = H[2,1] # print "Approximated spectral radius in %d iterations" % (j + 1) from scipy.linalg import eig Eigs, Vects = eig(H[:j + 1, :j + 1], left=False, right=True) return (Vects, Eigs, H, V, breakdown_flag)
def solve_sudoku(clues, maxiter=1000, epsilon=0.5, threshold=1e-6, l1weight=1e-2): clues = np.asarray(clues, dtype=int) n = len(clues) assert clues.shape == (n, n), "sudoku must be square" assert int(np.sqrt(n))**2 == n, "size of sudoku must be a square number" assert 0 < epsilon < 1, "epsilon must be between 0 and 1" idx = LinearIndexer((9, 9, 9)) cs = [ ] # constraints: each entry is a list of components whose sum should be one for j in range(9): # in each row, ... for i in range(9): # ...each number... cs.append(idx[j, :, i]) # ...must occur exactly once for j in range(9): # in each column, ... for i in range(9): # ...each number... cs.append(idx[:, j, i]) # ...must occur exactly once for x in range(0, 9, 3): # in each box along x... for y in range(0, 9, 3): # ...and y, ... for i in range(9): # ...each number... cs.append(idx[x:x + 3, y:y + 3, i]) # ...must occur exactly once for x in range(9): # in each cell along x... for y in range(9): # ...and y, ... cs.append(idx[x, y, :]) # ...there must be exactly one number for i, row in enumerate(clues): # for each cell along x... for j, col in enumerate(row): # ...and y... if col: # ...for that a nonzero clue is given, ... cs.append(idx[i, j, col - 1]) # ...this number must occur in this cell ms = [ sp.coo_matrix((np.ones(len(k)), (np.zeros(len(k)), k)), shape=(1, 9 * 9 * 9)) for k in cs ] A = sla.aslinearoperator(sp.vstack(ms)) b = np.ones(len(cs)) l1 = l1weight * NonnegativeL1Norm() l2 = DataTerm(A, b) # iterative reweighted l1-norm minimization x = fista(l2, l1, maxiter=maxiter) while True: tau = 1.0 / (x + epsilon) old_x = x x = fista(l2, l1 * tau, maxiter=maxiter) d = np.square(x - old_x).sum() print d if d < threshold: break x = x.reshape(9, 9, 9) # row, column, number -> probability return np.argmax(x, axis=2) + 1
k = m // 8 stdx = 1. snr = 20. # try 10., 5.. and observe the bias # use a random matrix as a basis (design matrix) A = rng.randn(m, n).astype(dtype) / sqrt(m) # random design x_true = np.zeros(n, dtype=dtype) T = rng.choice(n, k, replace=False) x_true[T] = rng.randn(k).astype(dtype) * stdx #x_true[T] = rng.rand(k) * stdx #x_true = rng.randn(n) #x_true[abs(x_true) < 2.5] = 0 ## sparsify # make the query vector b_true = splinalg.aslinearoperator(A).matvec(x_true) # add noise normb = linalg.norm(b_true) noise = rng.randn(m).astype(dtype) noise = noise / linalg.norm(noise) * normb / snr tol = linalg.norm(noise) b = b_true + noise plt.close('all') import numpy as np from scipy import linalg # BP delta print("Running BP_delta")
def test_as_linearoperator(A): L = spla.aslinearoperator(A) npt.assert_allclose(L * [1, 2, 3, 4], A @ [1, 2, 3, 4])
def __init__(self, grisms, sources, extbeams, inverter='lsqr', mskbeams=None, path='tables', usehdf5=False, hdf5file='matrix.h5'): # set some defaults self.path = os.path.join(path) self.target = None # get some dimensions self.nimg = len(grisms) self.nsrc = len(sources) # size of a grism image self.imgdim = None # (nx,ny) # set the inversion scheme self.inverter = inverter # compute the number of wavelengths to extract nwav = [src.nwavelength() for src in sources] cwav = np.cumsum(nwav) # get the cumulative indices # number of parameters self.npar = cwav[-1] # max number of wavelengths to determine self.cwav = np.insert(cwav, 0, 0) # index for the image in questin self.images = [] #self.images.append((grism.dataset,device.name)) # load the matrix data if usehdf5: i, j, aij, self.bi = self.load_from_hdf5(grisms, sources, extbeams, mskbeams, hdf5file) else: i, j, aij, self.bi = self.load_from_images(grisms, sources, extbeams, mskbeams, hdf5file) # just do a quick check if len(i) == 0: print('[alarm]Matrix has no elements.') return # everthing is loaded, so let's juggle the indices print('[info]Compressing the indices') self.icomp, self.iuniq = indices.compress(i) self.jcomp, self.juniq = indices.compress(j) #original lines #ic,iu=indices.compress(i) #jc,ju=indices.compress(j) # get some dimensionalities ni = len(self.iuniq) nj = len(self.juniq) dim = np.array([ni, nj], dtype=self.INT) # do a quick check if np.amax(self.icomp) != len(self.bi) - 1: print('[warn]Invalid dimensionality in matrix') # the datatype of the j (used a lot) jtype = self.juniq.dtype # total number of wavelenghts *actually present* in the matrix self.npix = dim[0] self.npar = dim[1] if self.npar >= self.npix: print() print('[alarm]Underdetermined matrix!!!') print('[alarm]There are {} measurments and {} unknowns.'.format( *dim)) print('[alarm]pyLINEAR can invert, but results will be suspect.') print() # compute some things for the ragged arrays if self.nsrc == 1: srcind = np.zeros(self.npar, dtype=jtype) else: srcind = np.digitize(self.juniq, self.cwav) - 1 # get the wavelength indices for each of the sources try: self.lam = self.juniq - self.cwav[srcind] except: print(ni, nj, self.nsrc) print('[debug]Something is fishy in matrix.py') import pdb pdb.set_trace() # get the reverse indices self.segids = list(sources.keys()) self.ri = indices.reverse(np.array(self.segids, dtype=int)[srcind]) self.hsrc = np.bincount(srcind.astype(np.int)).astype(self.INT) # compute the frobenius norm (used when calling LSQR) # this could be avoided if we collect sum(aij^2) along the way self.frob = np.sqrt(sum(a * a for a in aij)) # ok... finally... Package up the matrix as a coo_matrix self.A = ssl.aslinearoperator( coo_matrix((aij, (self.icomp, self.jcomp)), shape=dim)) #g=np.where(self.A.A.col == 0)[0] #ii=self.A.A.row[g] #jj=self.A.A.col[g] #aij=self.A.A.data[g] #tot=np.sum(aij*aij) #u=1./np.sqrt(tot) #print(ii,jj,u) #print(unc) #import pdb #pdb.set_trace() #iic,iiu=indices.compress(ii) #jjc,jju=indices.compress(jj) #dim=np.array([max(iic),max(jjc)])+1 # compute the density of this matrix self.density = float(self.A.A.nnz) / (float(dim[0]) * float(dim[1])) # now we want to create an Lcurve for this matrix self.lcurve = LCurve(norm=self.frob)
def solve_and_derivative(A, b, c, cone_dict, warm_start=None, **kwargs): """Solves a cone program, returns its derivative as an abstract linear map. This function solves a convex cone program, with primal-dual problems min. c^T x min. b^Ty subject to Ax + s = b subject to A^Ty + c = 0 s \in K y \in K^* The problem data A, b, and c correspond to the arguments `A`, `b`, and `c`, and the convex cone `K` corresponds to `cone_dict`; x and s are the primal variables, and y is the dual variable. This function returns a solution (x, y, s) to the program. It also returns two functions that respectively represent application of the derivative (at A, b, and c) and its adjoint. The problem data must be formatted according to the SCS convention, see https://github.com/cvxgrp/scs. For background on derivatives of cone programs, see http://web.stanford.edu/~boyd/papers/diff_cone_prog.html. Args: A: A sparse SciPy matrix in CSC format; the first block of rows must correspondond to the zero cone, the next block to the positive orthant, then the second-order cone, the PSD cone, the exponential cone, and finally the exponential dual cone. PSD matrix variables must be vectorized by scaling the off-diagonal entries by sqrt(2) and stacking the lower triangular part in column-major order. b: A NumPy array representing the offset. c: A NumPy array representing the objective function. cone_dict: A dictionary with keys corresponding to cones, values corresponding to their dimensions. The keys must be a subset of diffcp.ZERO, diffcp.POS, diffcp.SOC, diffcp.PSD, diffcp.EXP; the values of diffcp.SOC, diffcp.PSD, and diffcp.EXP should be lists. A k-dimensional PSD cone corresponds to a k-by-k matrix variable; a value of k for diffcp.EXP corresponds to k / 3 exponential cones. See SCS documentation for more details. warm_start: (optional) A tuple (x, y, s) at which to warm-start SCS. kwargs: (optional) Keyword arguments to forward to SCS. Returns: x: Optimal value of the primal variable x. y: Optimal value of the dual variable y. s: Optimal value of the slack variable s. derivative: A callable with signature derivative(dA, db, dc) -> dx, dy, ds that applies the derivative of the cone program at (A, b, and c) to the perturbations `dA`, `db`, `dc`. `dA` must be a SciPy sparse matrix in CSC format with the same sparsity pattern as `A`; `db` and `dc` are NumPy arrays. adjoint_derivative: A callable with signature adjoint_derivative(dx, dy, ds) -> dA, db, dc that applies the adjoint of the derivative of the cone program at (A, b, and c) to the perturbations `dx`, `dy`, `ds`, which must be NumPy arrays. The output `dA` matches the sparsity pattern of `A`. Raises: SolverError: if the cone program is infeasible or unbounded. """ data = {"A": A, "b": b, "c": c} if warm_start is not None: data["x"] = warm_start[0] data["y"] = warm_start[1] data["s"] = warm_start[2] kwargs.setdefault("verbose", False) result = scs.solve(data, cone_dict, **kwargs) # check status status = result["info"]["status"] if status == "Solved/Innacurate": warnings.warn("Solved/Innacurate.") elif status != "Solved": raise SolverError("Solver scs returned status %s" % status) x = result["x"] y = result["y"] s = result["s"] # pre-compute quantities for the derivative m, n = A.shape N = m + n + 1 cones = cone_lib.parse_cone_dict(cone_dict) z = (x, y - s, np.array([1])) u, v, w = z D_proj_dual_cone = cone_lib.dpi(v, cones, dual=True) Q = sparse.bmat( [[None, A.T, np.expand_dims(c, -1)], [-A, None, np.expand_dims(b, -1)], [-np.expand_dims(c, -1).T, -np.expand_dims(b, -1).T, None]]) M = splinalg.aslinearoperator(Q - sparse.eye(N)) @ dpi( z, cones) + splinalg.aslinearoperator(sparse.eye(N)) pi_z = pi(z, cones) rows, cols = A.nonzero() def derivative(dA, db, dc, **kwargs): """Applies derivative at (A, b, c) to perturbations dA, db, dc Args: dA: SciPy sparse matrix in CSC format; must have same sparsity pattern as the matrix `A` from the cone program db: NumPy array representing perturbation in `b` dc: NumPy array representing perturbation in `c` Returns: NumPy arrays dx, dy, ds, the result of applying the derivative to the perturbations. """ dQ = sparse.bmat( [[None, dA.T, np.expand_dims(dc, -1)], [-dA, None, np.expand_dims(db, -1)], [-np.expand_dims(dc, -1).T, -np.expand_dims(db, -1).T, None]]) # can ignore w since w = 1 rhs = dQ @ pi_z if np.allclose(rhs, 0): dz = np.zeros(rhs.size) else: dz = splinalg.lsqr(M, rhs, **kwargs)[0] du, dv, dw = np.split(dz, [n, n + m]) dx = du - x * dw dy = D_proj_dual_cone @ dv - y * dw ds = D_proj_dual_cone @ dv - dv - s * dw return -dx, -dy, -ds def adjoint_derivative(dx, dy, ds, **kwargs): """Applies adjoint of derivative at (A, b, c) to perturbations dx, dy, ds Args: dx: NumPy array representing perturbation in `x` dy: NumPy array representing perturbation in `y` ds: NumPy array representing perturbation in `s` Returns: (`dA`, `db`, `dc`), the result of applying the adjoint to the perturbations; the sparsity pattern of `dA` matches that of `A`. """ dw = -(x @ dx + y @ dy + s @ ds) dz = np.concatenate( [dx, D_proj_dual_cone.rmatvec(dy + ds) - ds, np.array([dw])]) if np.allclose(dz, 0): r = np.zeros(dz.shape) else: r = splinalg.lsqr(cone_lib.transpose_linear_operator(M), dz, **kwargs)[0] # dQ is the outer product of pi_z and r. Instead of materializing this, # the code below only computes the entries needed to compute dA, db, dc values = pi_z[cols] * r[rows + n] - pi_z[n + rows] * r[cols] dA = sparse.csc_matrix((values, (rows, cols)), shape=A.shape) db = pi_z[n:n + m] * r[-1] - pi_z[-1] * r[n:n + m] dc = pi_z[:n] * r[-1] - pi_z[-1] * r[:n] return dA, db, dc return x, y, s, derivative, adjoint_derivative
def cholqr(A, Z, verbose=False): """ Returns QR decomposition of Z. Q and R satisfy the following relations in exact arithmetic 1. QR = Z 2. Q^*AQ = I 3. Q^*AZ = R 4. ZR^{-1} = Q Uses Chol QR algorithm proposed in [1] for computing the A-orthogonal QR factorization. 'precholqr' function has better orthogonality properties Parameters ---------- A : {sparse matrix, dense matrix, LinearOperator} An array, sparse matrix, or LinearOperator representing the operation ``A * x``, where A is a real or complex square matrix. Z : ndarray verbose : bool, optional Displays information about the accuracy of the resulting QR Default: False Returns ------- q : ndarray The A-orthogonal vectors Aq : ndarray The A^{-1}-orthogonal vectors r : ndarray The r of the QR decomposition See Also -------- mgs : Modified Gram-Schmidt without re-orthogonalization mgs_stable : Modified Gram-Schmidt with re-orthogonalization References ---------- .. [1] B. Lowery and J. Langou, Stability Analysis of QR factorization in an Oblique Inner Product http://arxiv.org/abs/1401.5171 .. [2] A.K. Saibaba, J. Lee and P.K. Kitanidis, Randomized algorithms for Generalized Hermitian Eigenvalue Problems with application to computing Karhunen-Loe've expansion http://arxiv.org/abs/1307.6885 Examples -------- >>> import numpy as np >>> A = np.diag(np.arange(1,101)) >>> Z = np.random.randn(100,10) >>> q, Aq, r = cholqr(A, Z, verbose = True) """ #Convert into linear operator Aop = aslinearoperator(A) B = np.apply_along_axis(lambda x: Aop.matvec(x), 0, Z) C = np.dot(Z.T, B) r = np.linalg.cholesky(C).T q = np.linalg.solve(r.T, Z.T).T Aq = np.linalg.solve(r.T, B.T).T if verbose: #Verify Q*R = Y print "||QR-Y|| is ", np.linalg.norm(np.dot(q, r) - Z, 2) #Verify Q'*A*Q = I T = np.dot(q.T, Aq) n = T.shape[1] print "||Q^TAQ-I|| is ", \ np.linalg.norm(T - np.eye(n, dtype = 'd'), ord = 2) #verify Q'AY = R print "||Q^TAY-R|| is ", np.linalg.norm(np.dot(Aq.T, Z) - r, 2) #Verify YR^{-1} = Q val = np.inf try: val = np.linalg.norm(np.linalg.solve(r.T, Z.T).T - q, 2) except LinAlgError: print "||YR^{-1}-Q|| is ", "Singular" print "||YR^{-1}-Q|| is ", val return q, Aq, r
def check_id(self, dtype): # Test ID routines on a Hilbert matrix. # set parameters n = 300 eps = 1e-12 # construct Hilbert matrix A = hilbert(n).astype(dtype) if np.issubdtype(dtype, np.complexfloating): A = A * (1 + 1j) L = aslinearoperator(A) # find rank S = np.linalg.svd(A, compute_uv=False) try: rank = np.nonzero(S < eps)[0][0] except IndexError: rank = n # print input summary _debug_print("Hilbert matrix dimension: %8i" % n) _debug_print("Working precision: %8.2e" % eps) _debug_print("Rank to working precision: %8i" % rank) # set print format fmt = "%8.2e (s) / %5s" # test real ID routines _debug_print("-----------------------------------------") _debug_print("Real ID routines") _debug_print("-----------------------------------------") # fixed precision _debug_print("Calling iddp_id / idzp_id ...", ) t0 = time.time() k, idx, proj = pymatrixid.interp_decomp(A, eps, rand=False) t = time.time() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_allclose(A, B, rtol=eps, atol=1e-08) _debug_print("Calling iddp_aid / idzp_aid ...", ) t0 = time.time() k, idx, proj = pymatrixid.interp_decomp(A, eps) t = time.time() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_allclose(A, B, rtol=eps, atol=1e-08) _debug_print("Calling iddp_rid / idzp_rid ...", ) t0 = time.time() k, idx, proj = pymatrixid.interp_decomp(L, eps) t = time.time() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_allclose(A, B, rtol=eps, atol=1e-08) # fixed rank k = rank _debug_print("Calling iddr_id / idzr_id ...", ) t0 = time.time() idx, proj = pymatrixid.interp_decomp(A, k, rand=False) t = time.time() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_allclose(A, B, rtol=eps, atol=1e-08) _debug_print("Calling iddr_aid / idzr_aid ...", ) t0 = time.time() idx, proj = pymatrixid.interp_decomp(A, k) t = time.time() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_allclose(A, B, rtol=eps, atol=1e-08) _debug_print("Calling iddr_rid / idzr_rid ...", ) t0 = time.time() idx, proj = pymatrixid.interp_decomp(L, k) t = time.time() - t0 B = pymatrixid.reconstruct_matrix_from_id(A[:, idx[:k]], idx, proj) _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_allclose(A, B, rtol=eps, atol=1e-08) # check skeleton and interpolation matrices idx, proj = pymatrixid.interp_decomp(A, k, rand=False) P = pymatrixid.reconstruct_interp_matrix(idx, proj) B = pymatrixid.reconstruct_skel_matrix(A, k, idx) assert_allclose(B, A[:, idx[:k]], rtol=eps, atol=1e-08) assert_allclose(B @ P, A, rtol=eps, atol=1e-08) # test SVD routines _debug_print("-----------------------------------------") _debug_print("SVD routines") _debug_print("-----------------------------------------") # fixed precision _debug_print("Calling iddp_svd / idzp_svd ...", ) t0 = time.time() U, S, V = pymatrixid.svd(A, eps, rand=False) t = time.time() - t0 B = U * S @ V.T.conj() _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_allclose(A, B, rtol=eps, atol=1e-08) _debug_print("Calling iddp_asvd / idzp_asvd...", ) t0 = time.time() U, S, V = pymatrixid.svd(A, eps) t = time.time() - t0 B = U * S @ V.T.conj() _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_allclose(A, B, rtol=eps, atol=1e-08) _debug_print("Calling iddp_rsvd / idzp_rsvd...", ) t0 = time.time() U, S, V = pymatrixid.svd(L, eps) t = time.time() - t0 B = U * S @ V.T.conj() _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_allclose(A, B, rtol=eps, atol=1e-08) # fixed rank k = rank _debug_print("Calling iddr_svd / idzr_svd ...", ) t0 = time.time() U, S, V = pymatrixid.svd(A, k, rand=False) t = time.time() - t0 B = U * S @ V.T.conj() _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_allclose(A, B, rtol=eps, atol=1e-08) _debug_print("Calling iddr_asvd / idzr_asvd ...", ) t0 = time.time() U, S, V = pymatrixid.svd(A, k) t = time.time() - t0 B = U * S @ V.T.conj() _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_allclose(A, B, rtol=eps, atol=1e-08) _debug_print("Calling iddr_rsvd / idzr_rsvd ...", ) t0 = time.time() U, S, V = pymatrixid.svd(L, k) t = time.time() - t0 B = U * S @ V.T.conj() _debug_print(fmt % (t, np.allclose(A, B, eps))) assert_allclose(A, B, rtol=eps, atol=1e-08) # ID to SVD idx, proj = pymatrixid.interp_decomp(A, k, rand=False) Up, Sp, Vp = pymatrixid.id_to_svd(A[:, idx[:k]], idx, proj) B = U * S @ V.T.conj() assert_allclose(A, B, rtol=eps, atol=1e-08) # Norm estimates s = svdvals(A) norm_2_est = pymatrixid.estimate_spectral_norm(A) assert_allclose(norm_2_est, s[0], rtol=1e-6, atol=1e-8) B = A.copy() B[:, 0] *= 1.2 s = svdvals(A - B) norm_2_est = pymatrixid.estimate_spectral_norm_diff(A, B) assert_allclose(norm_2_est, s[0], rtol=1e-6, atol=1e-8) # Rank estimates B = np.array([[1, 1, 0], [0, 0, 1], [0, 0, 1]], dtype=dtype) for M in [A, B]: ML = aslinearoperator(M) rank_tol = 1e-9 rank_np = np.linalg.matrix_rank(M, norm(M, 2) * rank_tol) rank_est = pymatrixid.estimate_rank(M, rank_tol) rank_est_2 = pymatrixid.estimate_rank(ML, rank_tol) assert_(rank_est >= rank_np) assert_(rank_est <= rank_np + 10) assert_(rank_est_2 >= rank_np - 4) assert_(rank_est_2 <= rank_np + 4)
def mgs_stable(A, Z, verbose=False): """ Returns QR decomposition of Z. Q and R satisfy the following relations in exact arithmetic 1. QR = Z 2. Q^*AQ = I 3. Q^*AZ = R 4. ZR^{-1} = Q Uses Modified Gram-Schmidt with re-orthogonalization (Rutishauser variant) for computing the A-orthogonal QR factorization Parameters ---------- A : {sparse matrix, dense matrix, LinearOperator} An array, sparse matrix, or LinearOperator representing the operation ``A * x``, where A is a real or complex square matrix. Z : ndarray verbose : bool, optional Displays information about the accuracy of the resulting QR Default: False Returns ------- q : ndarray The A-orthogonal vectors Aq : ndarray The A^{-1}-orthogonal vectors r : ndarray The r of the QR decomposition See Also -------- mgs : Modified Gram-Schmidt without re-orthogonalization precholqr : Based on CholQR References ---------- .. [1] A.K. Saibaba, J. Lee and P.K. Kitanidis, Randomized algorithms for Generalized Hermitian Eigenvalue Problems with application to computing Karhunen-Loe've expansion http://arxiv.org/abs/1307.6885 .. [2] W. Gander, Algorithms for the QR decomposition. Res. Rep, 80(02), 1980 Examples -------- >>> import numpy as np >>> A = np.diag(np.arange(1,101)) >>> Z = np.random.randn(100,10) >>> q, Aq, r = mgs_stable(A, Z, verbose = True) """ #Get sizes m = np.size(Z, 0) n = np.size(Z, 1) #Convert into linear operator Aop = aslinearoperator(A) #Initialize Aq = np.zeros_like(Z, dtype='d') q = np.zeros_like(Z, dtype='d') r = np.zeros((n, n), dtype='d') reorth = np.zeros((n, ), dtype='d') eps = np.finfo(np.float64).eps q = np.copy(Z) for k in np.arange(n): Aq[:, k] = Aop.matvec(q[:, k]) t = np.sqrt(np.dot(q[:, k].T, Aq[:, k])) nach = 1 u = 0 while nach: u += 1 for i in np.arange(k): s = np.dot(Aq[:, i].T, q[:, k]) r[i, k] += s q[:, k] -= s * q[:, i] Aq[:, k] = Aop.matvec(q[:, k]) tt = np.sqrt(np.dot(q[:, k].T, Aq[:, k])) if tt > t * 10. * eps and tt < t / 10.: nach = 1 t = tt else: nach = 0 if tt < 10. * eps * t: tt = 0. reorth[k] = u r[k, k] = tt tt = 1. / tt if np.abs(tt * eps) > 0. else 0. q[:, k] *= tt Aq[:, k] *= tt if verbose: #Verify Q*R = Y print "||QR-Y|| is ", np.linalg.norm(np.dot(q, r) - Z, 2) #Verify Q'*A*Q = I T = np.dot(q.T, Aq) print "||Q^TAQ-I|| is ", np.linalg.norm(T - np.eye(n, dtype='d'), ord=2) #verify Q'AY = R print "||Q^TAY-R|| is ", np.linalg.norm(np.dot(Aq.T, Z) - r, 2) #Verify YR^{-1} = Q val = np.inf try: val = np.linalg.norm(np.linalg.solve(r.T, Z.T).T - q, 2) except LinAlgError: print "Singular" print "||YR^{-1}-Q|| is ", val return q, Aq, r
def _dproj(x, cone, dual=False): """Returns the derivative of projecting onto a cone (or its dual cone) at x. The derivative is represented as either a sparse matrix or linear operator. """ shape = (x.size, x.size) if cone == ZERO: return sparse.eye(*shape) if dual else sparse.csc_matrix(shape) elif cone == POS: return sparse.diags(.5 * (np.sign(x) + 1), format="csc") elif cone == SOC: t = x[0] z = x[1:] norm_z = np.linalg.norm(z, 2) if norm_z <= t: return sparse.eye(*shape) elif norm_z <= -t: return sparse.csc_matrix(shape) else: z = z.reshape(z.size) unit_z = z / norm_z scale_factor = 1.0 / (2 * norm_z) t_plus_norm_z = t + norm_z def matvec(y): t_in = y[0] z_in = y[1:] first = norm_z * t_in + np.dot(z, z_in) rest = z * t_in + t_plus_norm_z * z_in - \ t * unit_z * np.dot(unit_z, z_in) return scale_factor * np.append(first, rest) # derivative is symmetric return splinalg.LinearOperator(shape, matvec=matvec, rmatvec=matvec) elif cone == PSD: dim = psd_dim(x) X = unvec_symm(x, dim) lambd, Q = np.linalg.eig(X) if np.all(lambd >= 0): matvec = lambda y: y return splinalg.LinearOperator(shape, matvec=matvec, rmatvec=matvec) # Sort eigenvalues, eigenvectors in ascending order, so that # we can obtain the index k such that lambd[k-1] < 0 < lambd[k] idx = lambd.argsort() lambd = lambd[idx] Q = Q[:, idx] k = np.searchsorted(lambd, 0) B = np.zeros((dim, dim)) pos_gt_k = np.outer(np.maximum(lambd, 0)[k:], np.ones(k)) neg_lt_k = np.outer(np.ones(dim - k), np.minimum(lambd, 0)[:k]) B[k:, :k] = pos_gt_k / (neg_lt_k + pos_gt_k) B[:k, k:] = B[k:, :k].T B[k:, k:] = 1 matvec = lambda y: vec_symm( Q @ (B * (Q.T @ unvec_symm(y, dim) @ Q)) @ Q.T) return splinalg.LinearOperator(shape, matvec=matvec, rmatvec=matvec) elif cone == EXP: raise NotImplementedError("EXP cone is not implemented here yet {}".format(EXP)) num_cones = int(x.size / 3) ops = [] offset = 0 for _ in range(num_cones): x_i = x[offset:offset + 3] offset += 3 if in_exp(x_i): ops.append(splinalg.aslinearoperator(sparse.eye(3))) elif in_exp_dual(-x_i): ops.append(splinalg.aslinearoperator( sparse.csc_matrix((3, 3)))) elif x_i[0] < 0 and x_i[1] and not np.isclose(x_i[2], 0): matvec = lambda y: np.array([ y[0], 0, y[2] * 0.5 * (1 + np.sign(x_i[2]))]) ops.append(splinalg.LinearOperator((3, 3), matvec=matvec, rmatvec=matvec)) else: # TODO(akshayka): Cache projection if this is a bottleneck # TODO(akshayka): y_st is sometimes zero ... x_st, y_st, _, mu = proj_lib.proj_exp_cone(x_i[0], x_i[1], x_i[2]) if np.equal(y_st, 0): y_st = np.abs(x_st) exp_x_y = np.exp(x_st / y_st) mu_exp_x_y = mu * exp_x_y x_mu_exp_x_y = x_st * mu_exp_x_y M = np.zeros((4, 4)) M[:, 0] = np.array([ 1 + mu_exp_x_y / y_st, -x_mu_exp_x_y / (y_st ** 2), 0, exp_x_y]) M[:, 1] = np.array([ -x_mu_exp_x_y / (y_st ** 2), 1 + x_st * x_mu_exp_x_y / (y_st ** 3), 0, exp_x_y - x_st * exp_x_y / y_st]) M[:, 2] = np.array([0, 0, 1, -1]) M[:, 3] = np.array([ exp_x_y, exp_x_y - x_st * exp_x_y / y_st, -1, 0]) ops.append(splinalg.aslinearoperator(np.linalg.inv(M)[:3, :3])) D = as_block_diag_linear_operator(ops) if dual: return splinalg.LinearOperator((x.size, x.size), matvec=lambda v: v - D.matvec(v), rmatvec=lambda v: v - D.rmatvec(v)) else: return D else: raise NotImplementedError(f"{cone} not implemented")
K_xx = (-((x_i - x_j)**2).sum(2) / 2).exp() # Symbolic (N,N) Gaussian kernel matrix print(K_xx) ######################################################################## # Linear operators # ~~~~~~~~~~~~~~~~~ # # As far as **scipy** is concerned, a KeOps :class:`pykeops.torch.LazyTensor` such # as **K_xx** can be directly understood as a # `LinearOperator <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.LinearOperator.html>`_: from scipy.sparse.linalg import aslinearoperator K = aslinearoperator(K_xx) ######################################################### # Just like regular numpy :mod:`arrays` or KeOps :class:`pykeops.torch.LazyTensor`, # :mod:`LinearOperators` fully support the "matrix" product operator ``@``. # For instance, to compute the mass coefficients # # .. math:: # D_i = \sum_{j=1}^N K_{i,j}, # # we can simply write: D = K @ np.ones(N, dtype=dtype) # Sum along the lines of the adjacency matrix ####################################################################### # Going further, robust and efficient routines such as
def randomhep(A, k, p=20, twopass=False): """ Randomized algorithm for Hermitian eigenvalue problems Returns k largest eigenvalues computed using the randomized algorithm Parameters: ----------- A : {SparseMatrix,DenseMatrix,LinearOperator} n x n Hermitian matrix operator whose eigenvalues need to be estimated k : int, number of eigenvalues/vectors to be estimated p : int, optional oversampling parameter which can improve accuracy of resulting solution Default: 20 twopass : bool, determines if matrix-vector product is to be performed twice Default: False Returns: -------- w : ndarray, (k,) eigenvalues arranged in descending order u : ndarray, (n,k) eigenvectors arranged according to eigenvalues References: ----------- .. [1] Halko, Nathan, Per-Gunnar Martinsson, and Joel A. Tropp. "Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions." SIAM review 53.2 (2011): 217-288. Examples: --------- >>> import numpy as np >>> A = np.diag(0.95**np.arange(100)) >>> w, v = RandomizedHEP(A, 10, twopass = True) """ #Get matrix sizes m, n = A.shape Aop = aslinearoperator(A) #For square matrices only assert m == n #Oversample k = k + p #Generate gaussian random matrix Omega = np.random.randn(n, k) Y = np.zeros((m, k), dtype='d') for i in np.arange(k): Y[:, i] = Aop.matvec(Omega[:, i]) q, _ = qr(Y, mode='economic') if twopass == True: B = np.zeros((k, k), dtype='d') for i in np.arange(k): Aq = Aop.matvec(q[:, i]) for j in np.arange(k): B[i, j] = np.dot(q[:, j].T, Aq) else: from scipy.linalg import inv, pinv, svd, pinv2 temp = np.dot(Omega.T, Y) temp2 = np.dot(q.T, Omega) temp3 = np.dot(q.T, Y) B = np.dot(pinv2(temp2.T), np.dot(temp, pinv2(temp2))) Binv = np.dot(pinv(temp3.T), np.dot(temp, pinv2(temp3))) B = inv(Binv) #Eigen subproblem w, v = eigh(B) #Reverse eigenvalues in descending order w = w[::-1] #Compute eigenvectors u = np.dot(q, v[:, ::-1]) k -= p return w[:k], u[:, :k]