def fista(A, b, x=None, tol=1e-5, maxiter=1000, tolx=1e-12, l=1., L=None, eta=2., nesterovs_momentum=True, restart_every=np.nan, prox=lambda z, l: prox.l1(z, l), debias=False): # FISTA to find support x_result, r_result, count = iterative_soft_thresholding( A, b, x=x, tol=tol, maxiter=maxiter, tolx=tolx, l=l, L=L, eta=eta, nesterovs_momentum=nesterovs_momentum, restart_every=restart_every, prox=prox) # debias by least squares (equivalent to a single step of OMP) if debias: x_result, r_result = omp(A, b, maxnnz=np.count_nonzero(x_result), s0=np.nonzero(x_result)[0])[0:2] return x_result, r_result, count
def BPdelta_scad(A, b, x=None, delta=None, maxiter=1000, rtol=1e-4, alpha=None, rho=1., eta=2., nesterovs_momentum=True, restart_every=np.nan, a=3.7, switch_to_scad_after=0, prox=lambda z, l: prox.l1(z, l), prox_loss=lambda z, delta: prox.ind_l2ball(z, delta)): # BPdelta up to switch_to_scad_after times to find good initial guess with bias if switch_to_scad_after > 0: x = basis_pursuit_delta(A, b, x=x, delta=delta, maxiter=switch_to_scad_after, rtol=rtol, alpha=alpha, rho=rho, eta=eta, nesterovs_momentum=nesterovs_momentum, restart_every=restart_every, prox_loss=prox_loss)[0] # BPdelta with SCAD thresholding to debias return basis_pursuit_delta( A, b, x=x, delta=delta, maxiter=maxiter, rtol=rtol, alpha=alpha, rho=rho, eta=eta, nesterovs_momentum=nesterovs_momentum, #restart_every=restart_every, prox=lambda z, thresh: th.smoothly_clipped_absolute_deviation( z, thresh, a=a))
def _stable_principal_component_pursuit( D, tol=None, ls=None, rtol=1e-12, rho=1., maxiter=300, verbose=False, nesterovs_momentum=False, restart_every=np.nan, prox_LS=lambda q, r, c: prox.ind_l2ball(q, r, c), prox_L=lambda Q, l: prox.nuclear(Q, l), prox_S=lambda q, l: prox.l1(q, l)): """ Low-rank and sparse matrix approximation (a.k.a. Stable principal component pursuit; SPCP, three-term decomposition; TTD) solves the following minimization problem by ADMM to find low-rank and sparse matrices, L and S, that approximate D as L + S. ([vecL; vecS], [z_LS; z_L; z_S]) = arg min_(x,z) g_LS(z_LS) + g_L(z_L) + g_S(z_S) s.t. [z_LS; z_L; z_S] = [I I; I O; O I] * [vecL; vecS]. Here, by default, g_LS(z_LS) = indicator function, i.e., zero if ||D - mat(z_LS)||_F <= tol, infinity otherwise, g_L(z_L) = ||mat(z_L)||_*, i.e., nuclear norm of mat(z_L), g_S(z_S) = ||ls.*z_S||_1, i.e., l1 norm of mat(z_S) with the weight ls. Parameters ---------- D : array_like, shape (`m`, `n`) `m` x `n` matrix to be separated into L and S. tol : scalar, optional, default None Tolerance for residual. If None, tol = rtol * ||D||_F ls : scalar, optional, default None Weight of sparse regularizer. `ls` can be a matrix of weights for each entries of `z_S.reshape(m,n)`. If None, ls = 1./sqrt(max(D.shape)). rtol : scalar, optional, default 1e-12 Relative convergence tolerance of `u` and `z` in ADMM, i.e., the primal and dual residuals. rho : scalar, optional, default 1. Augmented Lagrangian parameter. maxiter : int, optional, default 300 Maximum iterations. verbose: int or bool, optional, default False Print the costs every this number. nesterovs_momentum : bool, optional, default False Nesterov acceleration. restart_every : int, optional, default `np.nan` Restart the Nesterov acceleration every `restart_every` iterations. If `np.nan`, this is disabled. prox_LS : function, optional, default `spmlib.proxop.ind_l2ball` Proximity operator as a Python function for the regularizer g_LS of `z_LS` = `vecL`+`vecS`. By default, `prox_LS` is `lambda q,r,c:spmlib.proxop.ind_l2ball(q,r,c)`, i.e., the prox. of the indicator function of l2-ball with radius 'r' and center 'c'. prox_L : function, optional, default `spmlib.proxop.squ_l2_from_subspace` Proximity operator as a Python function for the regularizer g_L of `z_L` = `vecL`. By default, `prox_L` is `lambda Q,l:spmlib.proxop.nuclear(Q,1)`, i.e., the prox. of the nuclear norm * l*||mat z_L||_*. prox_S : function, optional, default `spmlib.proxop.l1` Proximity operator as a Python function for the regularizer g_S of `z_S` = `vecS`. By default, `prox_S` is `lambda q,l:spmlib.proxop.l1(q,l)`, i.e., the soft thresholding operator as the prox. of l1 norm ||l.*z_S||_1. Returns ------- L : (`m`, `n`) ndarray, x[:m].reshape(`m`,`n`) = np.dot(U, np.dot(sv, Vh)) Low-rank matrix estimate. S : (`m`, `n`) ndarray, x[m:].reshape(`m`,`n`) Sparse matrix estimate. U : ndarray, shape (`M`, `r`) with ``r = min(m, n)`` Unitary matrix having left singular vectors as columns. sv : ndarray, shape (`r`,) Singular values, sorted in non-increasing order. Vh : ndarray, shape (`r`, `n`) Unitary matrix having right singular vectors as rows. count : int Loop count at termination. Example ------- >>> stable_principal_component_pursuit(D, ls=1., tol=1e-4*linalg.norm(D), rtol=1e-4, maxiter=300, verbose=10)[0] See demo_spmlib_solvers_matrix.py """ m, n = D.shape mn = m * n if ls is None: ls = 1. / sqrt(max(D.shape)) ls = np.array(ls).ravel() if tol is None: tol = rtol * linalg.norm(D) # initialize x = np.zeros(2 * mn, dtype=D.dtype) z = np.zeros(3 * mn, dtype=D.dtype) u = np.zeros_like(z) count = 0 t = 1. # while count < maxiter: count += 1 if np.fmod(count, restart_every) == 0: # t = 1. if nesterovs_momentum: told = t t = 0.5 * (1. + sqrt(1. + 4. * t * t)) # update x #dx = x.copy() q = z - u x[:mn] = (1. / 3.) * (q[:mn] + 2. * q[mn:2 * mn] - q[2 * mn:]) x[mn:] = (1. / 3.) * (q[:mn] - q[mn:2 * mn] + 2. * q[2 * mn:]) #dx = x - dx # q = G(x) + u q[:mn] = x[:mn] + x[mn:] + u[:mn] q[mn:2 * mn] = x[:mn] + u[mn:2 * mn] q[2 * mn:] = x[mn:] + u[2 * mn:] # update z dz = z.copy() # z[:mn] = prox_LS(q[:mn], tol, D.ravel()) L, U, sv, Vh = prox_L(q[mn:2 * mn].reshape(m, n), 1. / rho) z[mn:2 * mn] = L.ravel() z[2 * mn:] = prox_S(q[2 * mn:], ls / rho) dz = z - dz # if nesterovs_momentum: # update x again (heuristic) q = z - u x[:mn] = (1. / 3.) * (q[:mn] + 2. * q[mn:2 * mn] - q[2 * mn:]) x[mn:] = (1. / 3.) * (q[:mn] - q[mn:2 * mn] + 2. * q[2 * mn:]) # update u # u = u + G(x) - z du = u.copy() u[:mn] += x[:mn] + x[mn:] - z[:mn] u[mn:2 * mn] += x[:mn] - z[mn:2 * mn] u[2 * mn:] += x[mn:] - z[2 * mn:] du = u - du # Nesterov acceleration if nesterovs_momentum: # z = z + ((told - 1.) / t) * dz u = u + ((told - 1.) / t) * du # update u only (heuristic) if verbose: if np.fmod(count, verbose) == 0: print( '%2d: ||D-(L+S)||_F = %.2e, ||L||_* = %.2e (rnk=%d), ||S||_1 = %.2e (nnz=%2.1f%%)' % (count, linalg.norm(x[:mn] + x[mn:] - D.ravel()), np.sum(sv), np.count_nonzero(sv), np.sum(np.abs( x[mn:])), 100. * np.count_nonzero(z[2 * mn:]) / mn)) # check convergence of primal and dual residuals if linalg.norm(du) < rtol * linalg.norm(u) and linalg.norm( dz) < rtol * linalg.norm(z): break return x[:mn].reshape(m, n), x[mn:].reshape(m, n), U, sv, Vh, count
def stable_principal_component_pursuit( C, R=None, tol=None, ls=None, rtol=1e-12, rho=1., maxiter=300, verbose=False, nesterovs_momentum=False, restart_every=np.nan, prox_LS=lambda q, r, c: prox.ind_l2ball(q, r, c), prox_L=lambda Q, l: prox.nuclear(Q, l), prox_S=lambda q, l: prox.l1(q, l)): """ Low-rank and sparse matrix approximation (a.k.a. Stable principal component pursuit; SPCP, three-term decomposition; TTD) solves the following minimization problem by ADMM to find low-rank and sparse matrices, L and S, that approximate C as L + S. ([vecL; vecS], [z_LS; z_L; z_S]) = arg min_(x,z) g_LS(z_LS) + g_L(z_L) + g_S(z_S) s.t. [z_LS; z_L; z_S] = [M M; I O; O I] * [vecL; vecS]. Here, by default, g_LS(z_LS) = indicator function, i.e., zero if ||C - mat(z_LS)||_F <= tol, infinity otherwise, g_L(z_L) = ||mat(z_L)||_*, i.e., nuclear norm of mat(z_L), g_S(z_S) = ||ls.*z_S||_1, i.e., l1 norm of mat(z_S) with the weight ls, M : linear operator that extracts valid entries from vecC=C.ravel(), i.e., M(v)=v[R.ravel()]. Parameters ---------- C : array_like, shape (`m`, `n`) `m` x `n` matrix to be completed and separated into L and S, some of whose entries can be np.nan, meaning "masked (not ovserved)". R : array_like, optional, default None `m` x `nc` bool matrix of a mask of C. False indicates "masked". tol : scalar, optional, default None Tolerance for residual. If None, tol = rtol * ||C||_F ls : scalar, optional, default None Weight of sparse regularizer. `ls` can be a matrix of weights for each entries of `Z_s.reshape(m,n)`. If None, ls = 1./sqrt(max(C.shape)). rtol : scalar, optional, default 1e-12 Relative convergence tolerance of `u` and `z` in ADMM, i.e., the primal and dual residuals. rho : scalar, optional, default 1. Augmented Lagrangian parameter. maxiter : int, optional, default 300 Maximum iterations. verbose: int or bool, optional, default False Print the costs every this number. nesterovs_momentum : bool, optional, default False Nesterov acceleration. restart_every : int, optional, default `np.nan` Restart the Nesterov acceleration every `restart_every` iterations. If `np.nan`, this is disabled. prox_LS : function, optional, default `spmlib.proxop.ind_l2ball` Proximity operator as a Python function for the regularizer g_LS of `z_LS` = `vecL`+`vecS`. By default, `prox_LS` is `lambda q,r,c:spmlib.proxop.ind_l2ball(q,r,c)`, i.e., the prox. of the indicator function of l2-ball with radius 'r' and center 'c'. prox_L : function, optional, default `spmlib.proxop.squ_l2_from_subspace` Proximity operator as a Python function for the regularizer g_L of `z_L` = `vecL`. By default, `prox_L` is `lambda Q,l:spmlib.proxop.nuclear(Q,1)`, i.e., the prox. of the nuclear norm * l*||mat z_L||_*. prox_S : function, optional, default `spmlib.proxop.l1` Proximity operator as a Python function for the regularizer g_S of `z_S` = `vecS`. By default, `prox_S` is `lambda q,l:spmlib.proxop.l1(q,l)`, i.e., the soft thresholding operator as the prox. of l1 norm ||l.*z_S||_1. Returns ------- L : (`m`, `n`) ndarray, x[:m].reshape(`m`,`n`) = np.dot(U, np.dot(sv, Vh)) Low-rank matrix estimate. S : (`m`, `n`) ndarray, x[m:].reshape(`m`,`n`) Sparse matrix estimate. U : ndarray, shape (`M`, `r`) with ``r = min(m, n)`` Unitary matrix having left singular vectors as columns. sv : ndarray, shape (`r`,) Singular values, sorted in non-increasing order. Vh : ndarray, shape (`r`, `n`) Unitary matrix having right singular vectors as rows. count : int Loop count at termination. Example ------- >>> stable_principal_component_pursuit(C, ls=1., tol=1e-4*linalg.norm(C), rtol=1e-4, maxiter=300, verbose=10)[0] See demo_spmlib_solvers_matrix.py """ C = C.copy() # if a bool matrix R (observation) is given, mask C with False of R if R is not None: C[~R] = np.nan # NaN in C is masked C = np.ma.masked_invalid(C) numObsC = C.count() # C and R are modified to data and mask arrays, respectively. C, R = C.data, ~C.mask m, n = C.shape mn = m * n if ls is None: ls = 1. / sqrt(max(C.shape)) #ls = np.array(ls).ravel() if tol is None: tol = rtol * linalg.norm(C) def G(x, q): q[:numObsC] = (x[:mn] + x[mn:])[R.ravel()] q[numObsC:-mn] = x[:mn] q[-mn:] = x[mn:] def GT(q, p): p[:mn] = 0. p[:mn][R.ravel()] = q[:numObsC] p[mn:] = p[:mn] + q[-mn:] p[:mn] += q[numObsC:-mn] def p2x(p, x): rp = np.zeros(mn, dtype=C.dtype) rp[R.ravel()] = (1. / 3.) * (p[:mn] + p[mn:])[R.ravel()] x[:mn] = p[:mn] - rp x[mn:] = p[mn:] - rp # initialize x = np.zeros(2 * mn, dtype=C.dtype) z = np.zeros(numObsC + 2 * mn, dtype=C.dtype) u = np.zeros_like(z) count = 0 p = np.zeros_like(x) t = 1. # while count < maxiter: count += 1 if np.fmod(count, restart_every) == 0: # t = 1. if nesterovs_momentum: told = t t = 0.5 * (1. + sqrt(1. + 4. * t * t)) # update x #dx = x.copy() #p = G.T.dot(z - u) q = z - u GT(q, p) p2x(p, x) #dx = x - dx G(x, q) q += u # update z dz = z.copy() # z[:numObsC] = prox_LS(q[:numObsC], tol, C[R].ravel()) L, U, sv, Vh = prox_L(q[numObsC:-mn].reshape(m, n), 1. / rho) z[numObsC:-mn] = L.ravel() z[-mn:] = prox_S(q[-mn:], ls / rho) dz = z - dz # if nesterovs_momentum: # update x again (heuristic) q = z - u GT(q, p) p2x(p, x) # update u # u = u + G(x) - z du = u.copy() G(x, q) u = u + q - z du = u - du # Nesterov acceleration if nesterovs_momentum: # z = z + ((told - 1.) / t) * dz u = u + ((told - 1.) / t) * du # update u only (heuristic) if verbose: if np.fmod(count, verbose) == 0: print( '%2d: ||R*(C-(L+S))||_F=%.2e, ||L||_*=%.2e (rnk=%d), ||S||_1=%.2e (nnz=%2.1f%%)' % (count, linalg.norm((x[:mn] + x[mn:])[R.ravel()] - C[R].ravel()), np.sum(sv), np.count_nonzero(sv), np.sum(np.abs( x[mn:])), 100. * np.count_nonzero(z[-mn:]) / mn)) # check convergence of primal and dual residuals if linalg.norm(du) < rtol * linalg.norm(u) and linalg.norm( dz) < rtol * linalg.norm(z): break return x[:mn].reshape(m, n), x[mn:].reshape(m, n), U, sv, Vh, count
def column_incremental_stable_principal_component_pursuit( c, U, sv, l=None, s=None, rtol=1e-12, maxiter=1000, delta=1e-12, ls=1., rho=1., update_basis=False, adjust_basis_every=np.nan, forget=1., max_rank=np.inf, min_sv=0., orth_eps=1e-12, orthogonalize_basis=False, nesterovs_momentum=False, restart_every=np.nan, prox_ls=lambda q, r, c: prox.ind_l2ball(q, r, c), prox_l=lambda q, th, U: prox.squ_l2_from_subspace(q, U, th), prox_s=lambda q, ls: prox.l1(q, ls)): """ Column incremental online stable principal component pursuit (OnlSPCP) performs the low-rank and sparse matrix approximation by solving ([l;s], [zls; zl; zs]) = arg min_(x,z) g_ls(z_ls) + g_l(z_l) + g_s(z_s) s.t. [zls; zl; zs] = [I I; I O; O I] * [l; s]. Here, by default, g_ls(z_ls) = indicator function, i.e., zero if ||c - z_ls||_2 <= delta, infinity otherwise, g_l(z_l) = 0.5 * ||(I-U*U')*z_l||_2^2, g_s(z_s) = ||ls.*z_s||_1 Parameters ---------- c : ndarray, shape (`m`,) `m`-dimensional vector to be decomposed into `l` and `s` such that ||d-(l+s)||_2<=delta. U : ndarray, shape (`m`,`r`) `m` x `r` matrix of left singular vectors approximately spanning the subspace of low-rank components (overwritten with the update if update_basis is True). sv : array_like, shape ('r',) `r`-dimensional vector of singular values (overwritten with the update if update_basis is True). l : array_like, shape (`m`,), optional, default None Initial guess of `l`. If None, `c`-`s` is used. s : array_like, shape (`m`,), optional, default None Initial guess of `s`. If None, `s` is numpy.zeros_like(c) is used. rtol : scalar, optional, default 1e-12 Relative convergence tolerance of `y` and `z` in ADMM, i.e., the primal and dual residuals. maxiter : int, optional, default 1000 Maximum iterations. delta : scalar, optional, default 1e-12 l2-ball radius used in the indicator function for the approximation error. ls : scalar or 1d array, optional, default 1. Weight of sparse regularizer. `ls` can be a 1d array of weights for the entries of `s`. rho : scalar, optional, default 1. Augmented Lagrangian parameter. update_basis : bool, optional, default False Update `U` and `sv` with `l` after convergence. adjust_basis_every : int, optional, default `np.nan` Temporalily update `U` with `l` every `adjust_basis_every` iterations in the ADMM loop. If `np.nan`, this is disabled. forget : scalar, optional, default 1. Forgetting parameter in updating `U`. max_rank : int, optional, default np.inf Maximum rank. `U.shape[1]` and `sv.shape[0]` won't be greater than `max_rank`. min_sv : scalar, optional, default 0. Singular values smaller than `min_sv` is neglected. `sv` >= max(`sv`)*abs(`min_sv`) if `min_sv` is negative. orth_eps : scalar, optional, default 1e-12 Rank increases if the magnitude of `c` in the orthogonal subspace is larger than `orth_eps`. orthogonalize_basis : bool, optional, default False If True, perform QR decomposition to orthogonalize `U`. nesterovs_momentum : bool, optional, default False Nesterov acceleration. restart_every : int, optional, default `np.nan` Restart the Nesterov acceleration every `restart_every` iterations. If `np.nan`, this is disabled. prox_ls : function, optional, default `spmlib.proxop.ind_l2ball` Proximity operator as a Python function for the regularizer g_ls of `z_ls` = `l`+`s`. By default, `prox_ls` is `lambda q,r,c:spmlib.proxop.ind_l2ball(q,r,c)`, i.e., the prox. of the indicator function of l2-ball with radius 'r' and center 'c'. prox_l : function, optional, default `spmlib.proxop.squ_l2_from_subspace` Proximity operator as a Python function for the regularizer g_l of `z_l` = `l`. By default, `prox_l` is `lambda q,U:spmlib.proxop.squ_l2_from_subspace(q,U,th)`, i.e., the prox. of the distance function defined as 0.5*(squared l2 distance between `l` and span`U`). prox_s : function, optional, default `spmlib.proxop.l1` Proximity operator as a Python function for the regularizer g_s of `z_s` = `s`. By default, `prox_s` is `lambda q,ls:spmlib.proxop.l1(q,ls)`, i.e., the soft thresholding operator as the prox. of l1 norm ||ls.*z_s||_1. Returns ------- l : ndarray, shape (`m`,) Low-rank component. s : ndarray, shape (`m`,) Sparse component U : ndarray Matrix of left singular vectors. sv : ndarray Vector of singular values. count : int Iteration count. References ---------- Tomoya Sakai, Shun Ogawa, and Hiroki Kuhara "Sequential decomposition of 3D apparent motion fields basedon low-rank and sparse approximation" APSIPA2017 (to appear). Example ------- >>> from spmlib import solver as sps >>> U, sv = np.empty([0,0]), np.empty(0) # initialize >>> L, S = np.zeros(C.shape), np.zeros(C.shape) >>> for j in range(n): >>> L[:,j], S[:,j], U, sv = sps.column_incremental_stable_principal_component_pursuit(C[:,j], U, sv, ls=0.5, update_basis=True, max_rank=50, orth_eps=linalg.norm(Y[:,j])*1e-12)[:4] """ m = c.shape[0] # initialize l and s if s is None: s = np.zeros_like(c) # np.zeros(m, dtype=c.dtype) if l is None: l = c.ravel() - s if sv.size == 0: U, sv, V = linalg.svd(np.atleast_2d(c.T).T, full_matrices=False) return l, s, U, sv, 0 # G = lambda x: np.concatenate((x[:m]+x[m:], x[:m], x[m:])) # x = np.concatenate((l,s)) x = np.zeros(2 * m, dtype=c.dtype) x[:m] = l x[m:] = s # z = G(x) z = np.zeros(3 * m, dtype=c.dtype) z[:m] = x[:m] + x[m:] z[m:2 * m] = x[:m] z[2 * m:] = x[m:] y = np.zeros_like(z) # np.zeros(3*m, dtype=c.dtype) t = 1. count = 0 Ut = U while count < maxiter: count += 1 if np.fmod(count, restart_every) == 0: t = 1. if nesterovs_momentum: told = t t = 0.5 * (1. + sqrt(1. + 4. * t * t)) # update x #dx = x.copy() q = z - y x[:m] = (1. / 3.) * (q[:m] + 2. * q[m:2 * m] - q[2 * m:]) x[m:] = (1. / 3.) * (q[:m] - q[m:2 * m] + 2. * q[2 * m:]) #dx = x - dx # q = G(x) + y q[:m] = x[:m] + x[m:] + y[:m] q[m:2 * m] = x[:m] + y[m:2 * m] q[2 * m:] = x[m:] + y[2 * m:] # update z if np.fmod(count, adjust_basis_every) == 0: Ut = column_incremental_SVD(x[:m], U, sv, forget=forget, max_rank=max_rank, min_sv=min_sv, orth_eps=orth_eps, orthogonalize_basis=False)[0] dz = z.copy() z[:m] = prox_ls(q[:m], delta, c.ravel()) z[m:2 * m] = prox_l(q[m:2 * m], 1. / rho, Ut) z[2 * m:] = prox_s(q[2 * m:], ls / rho) dz = z - dz # update y #y = y + G(x) - z dy = y.copy() y[:m] += x[:m] + x[m:] - z[:m] y[m:2 * m] += x[:m] - z[m:2 * m] y[2 * m:] += x[m:] - z[2 * m:] dy = y - dy # Nesterov acceleration if nesterovs_momentum: z = z + ((told - 1.) / t) * dz y = y + ((told - 1.) / t) * dy # check convergence of primal and dual residuals if linalg.norm(dy) < rtol * linalg.norm(y) and linalg.norm( dz) < rtol * linalg.norm(z): break l = x[:m] s = x[m:] if update_basis: U, sv = column_incremental_SVD(l, U, sv, forget=forget, max_rank=max_rank, min_sv=min_sv, orth_eps=orth_eps, orthogonalize_basis=orthogonalize_basis) return l, s, U, sv, count
def basis_pursuit_delta(A, b, x=None, delta=None, maxiter=1000, rtol=1e-4, alpha=None, rho=1., eta=2., nesterovs_momentum=False, restart_every=np.nan, prox=lambda q, l: prox.l1(q, l), prox_loss=lambda q, delta: prox.ind_l2ball(q, delta)): """ ADMM algorithm with subproblem approximation for constrained basis pursuit denoising (a.k.a. BP delta) in Eq. (2.16) [Yang&Zhang11] solves x = arg min_x g(x) s.t. f(b-Ax)=||b-Ax||_2 <= delta where g(x)=||x||_1 (by default) Parameters ---------- A : m x n matrix, LinearOperator, or tuple (fA, fAT) of functions fA(z)=A.dot(z) and fAT(r)=A.conj().T.dot(r). b : m-dimensional vector. x : initial guess, (A.conj().T.dot(b) by default), will be mdified in this function. delta : tolerance for residual (1e-3*||b||_2 by default). maxiter: max. iterations (1000 by default) as a stopping criterion. rtol : scalar, optional, default 1e-4 Relative convergence tolerance of `y` and `z` in ADMM, i.e., the primal and dual residuals as a stopping criterion. alpha : scaling factor of the gradient of the quadratic term in the ADMM subproblem for g(x) to approximate the proximity operator (automatically computed by default). rho : scalar, optional, default 1. Augmented Lagrangian parameter. nesterovs_momentum : Nesterov acceleration (False by default). restart_every: restart the Nesterov acceleration every this number of iterations (disabled by default). prox : proximity operator of sparsity inducing function g(x), the soft thresholding soft_thresh(q,l) (=prox.l1(q,l)) by default. prox_loss : proximity operator of loss function f(b-Ax), the orthogonal projection onto l2 ball with radius delta (=prox.ind_l2ball(q,delta)) by default. Returns ------- x : sparse solution. r : residual (b - Ax). count : loop count at termination. References ---------- J. Yang and Y. Zhang "Alternating Direction Algorithms for $\ell_1$-Problems in Compressive Sensing" SIAM J. Sci. Comput., 33(1), pp. 250-278, 2011. https://epubs.siam.org/doi/10.1137/090777761 Example ------- >>> x = basis_pursuit_delta(A, b, delta=0.05*linalg.norm(b), maxiter=1000, nesterovs_momentum=True)[0] """ # define the functions that compute projections by A and its adjoint if type(A) is tuple: fA = A[0] fAT = A[1] else: linopA = splinalg.aslinearoperator(A) fA = linopA.matvec fAT = linopA.rmatvec # initialize x if x is None: x = fAT(b) # initialize delta if delta is None: delta = linalg.norm(b) * 1e-3 # initialize alpha using rough estimate of the Lipschitz constant # alpha L + 1/rho < 2 if alpha is None: L = 2 * linalg.norm(fA(fAT(b))) / linalg.norm(b) #L = linalg.norm(A) ** 2 # Lipschitz constant alpha = (2. - 1. / rho) / L #print('basis_pursuit_delta(): alpha = %.2e' % (alpha)) y = np.zeros_like(b) # np.zeros(3*m, dtype=c.dtype) # initialize variables t = 1. count = 0 while count < maxiter: count += 1 if np.fmod(count, restart_every) == 0: t = 1. if nesterovs_momentum: told = t t = 0.5 * (1. + sqrt(1. + 4. * t * t)) # update r #dr = r.copy() # old r r = prox_loss(b - fA(x) - y, delta) #dr = r - dr # update x g = fAT(fA(x) + r - b + y) dx = x.copy() # old x x = prox(x - alpha * g, alpha / rho) dx = x - dx # update y dy = y.copy() y = y + r + fA(x) - b dy = y - dy # Nesterov acceleration if nesterovs_momentum: #r = r + ((told - 1.) / t) * dr #x = x + ((told - 1.) / t) * dx y = y + ((told - 1.) / t) * dy # check convergence of primal and dual residuals if linalg.norm(dx) < rtol * linalg.norm(x) and linalg.norm( dy) < rtol * linalg.norm(y): break if np.max(np.abs(x)) > 1e+20: # check overflow x = fAT(b) y = np.zeros_like(b) r = b.copy() alpha /= eta count = 0 print( 'basis_pursuit_delta(): Overflow. Restarted with a smaller constant alpha = %.2e' % (alpha)) r = b - fA(x) # residual return x, r, count
def greedy_coordinate_descent(A, b, x=None, tol=1e-5, maxiter=1000, tolx=1e-12, l=1., nesterovs_momentum=False, restart_every=np.nan, prox=lambda z, l: prox.l1(z, l), N=1): """ Coordinate descent algorithm solves x = arg min_x f(x) + g(x) where f(x)=0.5||b-Ax||^2, g(x)=l*||x||_1 = arg min_x 0.5*|| b - A x ||_2^2 + l' * abs(x) Parameters ---------- A : m x n matrix, LinearOperator, or tuple (fA, fAT) of functions fA(z)=A.dot(z) and fAT(r)=A.conj().T.dot(r). b : m-dimensional vector. x : initial guess, (A.conj().T.dot(b) by default), will be mdified in this function. tol : tolerance for residual (1e-5 by default) as a stopping criterion. maxiter: max. iterations (1000 by default) as a stopping criterion. tolx : tolerance for x displacement (1e-12 by default) as a stopping criterion. l : barancing parameter lambda (1. by default). nesterovs_momentum : Nesterov acceleration (False by default). restart_every: restart the Nesterov acceleration every this number of iterations (disabled by default). prox : proximity operator of g(x), the soft thresholding soft_thresh(z,l) (=prox.l1(z,l)) by default for g(x)=l*||x||_1. N : number of coordinates to pick at each step. Returns ------- x : sparse solution. r : residual (b - Ax). count : loop count at termination. Example ------- >>> x = greedy_coordinate_descent(A, b, l=1.5, maxiter=1000, tol=linalg.norm(b)*1e-12, N=3)[0] """ # define the functions that compute projections by A and its adjoint if type(A) is tuple: fA = A[0] fAT = A[1] else: linopA = splinalg.aslinearoperator(A) fA = linopA.matvec fAT = linopA.rmatvec # initialize x if x is None: x = np.zeros_like(fAT(b)) #x = np.zeros(A.shape[1], dtype=b.dtype) # A = splinalg.LinearOperator((b.shape[0],x.shape[0]), matvec=fA, rmatvec=fAT) # initialize variables t = 1 count = 0 r = b - fA(x) # residual c = fAT(b) while count < maxiter and linalg.norm(r) > tol: count += 1 z = prox(c, l) dx = z - x s = np.argsort( -np.abs(dx))[:N] # multiple coordinate choice (tsakai heuristic) #s = np.argmax(np.abs(dx)) dxs = np.zeros_like(dx) dxs[s] = dx[s] #cs = c[s] c = c + dxs - fAT(fA(dxs)) # Gregor&LeCun version #c[s] = cs # Li&Osher original version dx = x.copy() x[s] = z[s] dx = x - dx if np.fmod(count, restart_every) == 0: t = 1. if nesterovs_momentum: told = t t = 0.5 * (1. + sqrt(1. + 4. * t * t)) w = x + ((told - 1.) / t) * dx else: w = x r = b - fA(w) if linalg.norm(dx) < tolx: break if np.max(np.abs(x)) > 1e+20: # check overflow x = fAT(b) c = fAT(b) count = 0 print('CoD(): Overflow.') return x, r, count
def iterative_soft_thresholding(A, b, x=None, tol=1e-5, maxiter=1000, tolx=1e-12, l=1., L=None, eta=2., nesterovs_momentum=False, restart_every=np.nan, prox=lambda z, l: prox.l1(z, l)): """ Iterative soft thresholding algorithm solves x = arg min_x f(x) + g(x) where f(x)=0.5||b-Ax||^2, g(x)=l*||x||_1 = arg min_x 0.5*|| b - A x ||_2^2 + l' * abs(x) Parameters ---------- A : m x n matrix, LinearOperator, or tuple (fA, fAT) of functions fA(z)=A.dot(z) and fAT(r)=A.conj().T.dot(r). b : m-dimensional vector. x : initial guess, (A.conj().T.dot(b) by default), will be mdified in this function. tol : tolerance for residual (1e-5 by default) as a stopping criterion. maxiter: max. iterations (1000 by default) as a stopping criterion. tolx : tolerance for x displacement (1e-12 by default) as a stopping criterion. l : barancing parameter lambda (1. by default). L : Lipschitz constant (automatically computed by default). eta : magnification L*=eta in the linear search of L. nesterovs_momentum : Nesterov acceleration (False by default). restart_every: restart the Nesterov acceleration every this number of iterations (disabled by default). prox : proximity operator of g(x), the soft thresholding soft_thresh(z,l) (=prox.l1(z,l)) by default for g(x)=l*||x||_1. Returns ------- x : sparse solution. r : residual (b - Ax). count : loop count at termination. Example ------- >>> x = iterative_soft_thresholding(A, b, l=1.5, maxiter=1000, tol=linalg.norm(b)*1e-12, nesterovs_momentum=True)[0] """ # define the functions that compute projections by A and its adjoint if type(A) is tuple: fA = A[0] fAT = A[1] else: A = splinalg.aslinearoperator(A) fA = A.matvec fAT = A.rmatvec # initialize x if x is None: x = fAT(b) # A = splinalg.LinearOperator((b.shape[0],x.shape[0]), matvec=fA, rmatvec=fAT) # initialize variables t = 1 w = x.copy() # roughly estimate the Lipschitz constant if L is None: L = 2 * linalg.norm(fA(fAT(b))) / linalg.norm(b) #L = linalg.norm(A) ** 2 # Lipschitz constant count = 0 r = b - fA(w) # residual while count < maxiter and linalg.norm(r) > tol: count += 1 dx = x.copy() # old x # x = prox(w + A.conj().T.dot(r) / L, l / L) x = prox(w + fAT(r) / L, l / L) dx = x - dx if np.fmod(count, restart_every) == 0: t = 1. if nesterovs_momentum: told = t t = 0.5 * (1. + sqrt(1. + 4. * t * t)) w = x + ((told - 1.) / t) * dx else: w = x r = b - fA(w) if linalg.norm(dx) < tolx: break if np.max(np.abs(x)) > 1e+20: # check overflow x = fAT(b) w = x.copy() r = b.copy() L *= eta count = 0 print( 'FISTA(): Overflow. Restarted with a larger Lipschitz constant L = %.2e' % (L)) return x, r, count