def qr_destroy(la): """ Return QR decomposition of `la[0]`. Content of `la` gets destroyed in the process. Using this function should be less memory intense than calling `scipy.linalg.qr(la[0])`, because the memory used in `la[0]` is reclaimed earlier. """ a = numpy.asfortranarray(la[0]) del la[0], la # now `a` is the only reference to the input matrix m, n = a.shape # perform q, r = QR(a); code hacked out of scipy.linalg.qr logger.debug("computing QR of %s dense matrix" % str(a.shape)) geqrf, = get_lapack_funcs(('geqrf',), (a,)) qr, tau, work, info = geqrf(a, lwork=-1, overwrite_a=True) qr, tau, work, info = geqrf(a, lwork=work[0], overwrite_a=True) del a # free up mem assert info >= 0 r = triu(qr[:n, :n]) if m < n: # rare case, #features < #topics qr = qr[:, :m] # retains fortran order gorgqr, = get_lapack_funcs(('orgqr',), (qr,)) q, work, info = gorgqr(qr, tau, lwork=-1, overwrite_a=True) q, work, info = gorgqr(qr, tau, lwork=work[0], overwrite_a=True) assert info >= 0, "qr failed" assert q.flags.f_contiguous return q, r
def test_sycon_hecon(): seed(1234) for ind, dtype in enumerate(DTYPES+COMPLEX_DTYPES): # DTYPES + COMPLEX DTYPES = <s,d,c,z> sycon + <c,z>hecon n = 10 # For <s,d,c,z>sycon if ind < 4: func_lwork = get_lapack_funcs('sytrf_lwork', dtype=dtype) funcon, functrf = get_lapack_funcs(('sycon', 'sytrf'), dtype=dtype) A = (rand(n, n)).astype(dtype) # For <c,z>hecon else: func_lwork = get_lapack_funcs('hetrf_lwork', dtype=dtype) funcon, functrf = get_lapack_funcs(('hecon', 'hetrf'), dtype=dtype) A = (rand(n, n) + rand(n, n)*1j).astype(dtype) # Since sycon only refers to upper/lower part, conj() is safe here. A = (A + A.conj().T)/2 + 2*np.eye(n, dtype=dtype) anorm = np.linalg.norm(A, 1) lwork = _compute_lwork(func_lwork, n) ldu, ipiv, _ = functrf(A, lwork=lwork, lower=1) rcond, _ = funcon(a=ldu, ipiv=ipiv, anorm=anorm, lower=1) # The error is at most 1-fold assert_(abs(1/rcond - np.linalg.cond(A, p=1))*rcond < 1)
def test_gelsd(self): for dtype in REAL_DTYPES: a1 = np.array([[1.0,2.0], [4.0,5.0], [7.0,8.0]], dtype=dtype) b1 = np.array([16.0, 17.0, 20.0], dtype=dtype) gelsd, gelsd_lwork = get_lapack_funcs(('gelsd','gelsd_lwork'), (a1, b1)) m, n = a1.shape if len(b1.shape) == 2: nrhs = b1.shape[1] else: nrhs = 1 # Request of sizes work,iwork,info = gelsd_lwork(m,n,nrhs,-1) lwork = int(np.real(work)) iwork_size = iwork x, s, rank, info = gelsd(a1, b1, lwork, iwork_size, -1, False, False) assert_allclose(x[:-1], np.array([-14.333333333333323, 14.999999999999991], dtype=dtype), rtol=25*np.finfo(dtype).eps) assert_allclose(s, np.array([12.596017180511966, 0.583396253199685], dtype=dtype), rtol=25*np.finfo(dtype).eps) for dtype in COMPLEX_DTYPES: a1 = np.array([[1.0+4.0j,2.0], [4.0+0.5j,5.0-3.0j], [7.0-2.0j,8.0+0.7j]], dtype=dtype) b1 = np.array([16.0, 17.0+2.0j, 20.0-4.0j], dtype=dtype) gelsd, gelsd_lwork = get_lapack_funcs(('gelsd','gelsd_lwork'), (a1, b1)) m, n = a1.shape if len(b1.shape) == 2: nrhs = b1.shape[1] else: nrhs = 1 # Request of sizes work, rwork, iwork, info = gelsd_lwork(m,n,nrhs,-1) lwork = int(np.real(work)) rwork_size = int(rwork) iwork_size = iwork x, s, rank, info = gelsd(a1, b1, lwork, rwork_size, iwork_size, -1, False, False) assert_allclose(x[:-1], np.array([1.161753632288328-1.901075709391912j, 1.735882340522193+1.521240901196909j], dtype=dtype), rtol=25*np.finfo(dtype).eps) assert_allclose(s, np.array([13.035514762572043, 4.337666985231382], dtype=dtype), rtol=25*np.finfo(dtype).eps)
def multiple_fast_inverse(a): """Compute the inverse of a set of arrays. Parameters ---------- a: array_like of shape (n_samples, n_dim, n_dim) Set of square matrices to be inverted. A is changed in place. Returns ------- a: ndarray yielding the inverse of the inputs Raises ------ LinAlgError : If `a` is singular. ValueError : If `a` is not square, or not 2-dimensional. Notes ----- This function is borrowed from scipy.linalg.inv, but with some customizations for speed-up. """ if a.shape[1] != a.shape[2]: raise ValueError('a must have shape (n_samples, n_dim, n_dim)') from scipy.linalg.lapack import get_lapack_funcs a1, n = a[0], a.shape[0] getrf, getri = get_lapack_funcs(('getrf', 'getri'), (a1,)) getrf, getri, getri_lwork = get_lapack_funcs( ('getrf', 'getri', 'getri_lwork'), (a1,)) for i in range(n): if (getrf.module_name[:7] == 'clapack' and getri.module_name[:7] != 'clapack'): # ATLAS 3.2.1 has getrf but not getri. lu, piv, info = getrf(np.transpose(a[i]), rowmajor=0, overwrite_a=True) a[i] = np.transpose(lu) else: a[i], piv, info = getrf(a[i], overwrite_a=True) if info == 0: if getri.module_name[:7] == 'flapack': lwork, info_ = getri_lwork(a1.shape[0]) # XXX: the following line fixes curious SEGFAULT when # benchmarking 500x500 matrix inverse. This seems to # be a bug in LAPACK ?getri routine because if lwork is # minimal (when using lwork[0] instead of lwork[1]) then # all tests pass. Further investigation is required if # more such SEGFAULTs occur. lwork = int(1.01 * lwork.real) a[i], _ = getri(a[i], piv, lwork=lwork, overwrite_lu=1) else: # clapack a[i], _ = getri(a[i], piv, overwrite_lu=1) else: raise ValueError('Matrix LU decomposition failed') return a
def test_ormrz_unmrz(): """ This test performs a matrix multiplication with an arbitrary m x n matric C and a unitary matrix Q without explicitly forming the array. The array data is encoded in the rectangular part of A which is obtained from ?TZRZF. Q size is inferred by m, n, side keywords. """ seed(1234) qm, qn, cn = 10, 15, 15 for ind, dtype in enumerate(DTYPES): tzrzf, tzrzf_lw = get_lapack_funcs(('tzrzf', 'tzrzf_lwork'), dtype=dtype) lwork_rz = _compute_lwork(tzrzf_lw, qm, qn) if ind < 2: A = triu(rand(qm, qn).astype(dtype)) C = rand(cn, cn).astype(dtype) orun_mrz, orun_mrz_lw = get_lapack_funcs(('ormrz', 'ormrz_lwork'), dtype=dtype) else: A = triu((rand(qm, qn) + rand(qm, qn)*1j).astype(dtype)) C = (rand(cn, cn) + rand(cn, cn)*1j).astype(dtype) orun_mrz, orun_mrz_lw = get_lapack_funcs(('unmrz', 'unmrz_lwork'), dtype=dtype) lwork_mrz = _compute_lwork(orun_mrz_lw, cn, cn) rz, tau, info = tzrzf(A, lwork=lwork_rz) # Get Q manually for comparison V = np.hstack((np.eye(qm, dtype=dtype), rz[:, qm:])) Id = np.eye(qn, dtype=dtype) ref = [Id-tau[x]*V[[x], :].T.dot(V[[x], :].conj()) for x in range(qm)] Q = reduce(np.dot, ref) # Now that we have Q, we can test whether lapack results agree with # each case of CQ, CQ^H, QC, and QC^H trans = 'T' if ind < 2 else 'C' tol = 10*np.spacing(dtype(1.0).real) cq, info = orun_mrz(rz, tau, C, lwork=lwork_mrz) assert_(info == 0) assert_allclose(cq - Q.dot(C), zeros_like(C), atol=tol, rtol=0.) cq, info = orun_mrz(rz, tau, C, trans=trans, lwork=lwork_mrz) assert_(info == 0) assert_allclose(cq - Q.conj().T.dot(C), zeros_like(C), atol=tol, rtol=0.) cq, info = orun_mrz(rz, tau, C, side='R', lwork=lwork_mrz) assert_(info == 0) assert_allclose(cq - C.dot(Q), zeros_like(C), atol=tol, rtol=0.) cq, info = orun_mrz(rz, tau, C, side='R', trans=trans, lwork=lwork_mrz) assert_(info == 0) assert_allclose(cq - C.dot(Q.conj().T), zeros_like(C), atol=tol, rtol=0.)
def test_gelsy(self): for dtype in REAL_DTYPES: a1 = np.array([[1.0, 2.0], [4.0, 5.0], [7.0, 8.0]], dtype=dtype) b1 = np.array([16.0, 17.0, 20.0], dtype=dtype) gelsy, gelsy_lwork = get_lapack_funcs(('gelsy', 'gelss_lwork'), (a1, b1)) m, n = a1.shape if len(b1.shape) == 2: nrhs = b1.shape[1] else: nrhs = 1 # Request of sizes work, info = gelsy_lwork(m, n, nrhs, 10*np.finfo(dtype).eps) lwork = int(np.real(work)) jptv = np.zeros((a1.shape[1], 1), dtype=np.int32) v, x, j, rank, info = gelsy(a1, b1, jptv, np.finfo(dtype).eps, lwork, False, False) assert_allclose(x[:-1], np.array([-14.333333333333323, 14.999999999999991], dtype=dtype), rtol=25*np.finfo(dtype).eps) for dtype in COMPLEX_DTYPES: a1 = np.array([[1.0+4.0j, 2.0], [4.0+0.5j, 5.0-3.0j], [7.0-2.0j, 8.0+0.7j]], dtype=dtype) b1 = np.array([16.0, 17.0+2.0j, 20.0-4.0j], dtype=dtype) gelsy, gelsy_lwork = get_lapack_funcs(('gelsy', 'gelss_lwork'), (a1, b1)) m, n = a1.shape if len(b1.shape) == 2: nrhs = b1.shape[1] else: nrhs = 1 # Request of sizes work, info = gelsy_lwork(m, n, nrhs, 10*np.finfo(dtype).eps) lwork = int(np.real(work)) jptv = np.zeros((a1.shape[1], 1), dtype=np.int32) v, x, j, rank, info = gelsy(a1, b1, jptv, np.finfo(dtype).eps, lwork, False, False) assert_allclose(x[:-1], np.array([1.161753632288328-1.901075709391912j, 1.735882340522193+1.521240901196909j], dtype=dtype), rtol=25*np.finfo(dtype).eps)
def test_gels(self): for dtype in REAL_DTYPES: a1 = np.array([[1.0, 2.0], [4.0, 5.0], [7.0, 8.0]], dtype=dtype) b1 = np.array([16.0, 17.0, 20.0], dtype=dtype) gels, gels_lwork, geqrf = get_lapack_funcs( ('gels', 'gels_lwork', 'geqrf'), (a1, b1)) m, n = a1.shape if len(b1.shape) == 2: nrhs = b1.shape[1] else: nrhs = 1 # Request of sizes lwork = _compute_lwork(gels_lwork, m, n, nrhs) lqr, x, info = gels(a1, b1, lwork=lwork) assert_allclose(x[:-1], np.array([-14.333333333333323, 14.999999999999991], dtype=dtype), rtol=25*np.finfo(dtype).eps) lqr_truth, _, _, _ = geqrf(a1) assert_array_equal(lqr, lqr_truth) for dtype in COMPLEX_DTYPES: a1 = np.array([[1.0+4.0j, 2.0], [4.0+0.5j, 5.0-3.0j], [7.0-2.0j, 8.0+0.7j]], dtype=dtype) b1 = np.array([16.0, 17.0+2.0j, 20.0-4.0j], dtype=dtype) gels, gels_lwork, geqrf = get_lapack_funcs( ('gels', 'gels_lwork', 'geqrf'), (a1, b1)) m, n = a1.shape if len(b1.shape) == 2: nrhs = b1.shape[1] else: nrhs = 1 # Request of sizes lwork = _compute_lwork(gels_lwork, m, n, nrhs) lqr, x, info = gels(a1, b1, lwork=lwork) assert_allclose(x[:-1], np.array([1.161753632288328-1.901075709391912j, 1.735882340522193+1.521240901196909j], dtype=dtype), rtol=25*np.finfo(dtype).eps) lqr_truth, _, _, _ = geqrf(a1) assert_array_equal(lqr, lqr_truth)
def _geneig(a1, b1, left=False, right=True, overwrite_a=False, overwrite_b=False, return_ab=True): ggev, = get_lapack_funcs(("ggev",), (a1, b1)) cvl, cvr = left, right res = ggev(a1, b1, lwork=-1) lwork = res[-2][0].real.astype(numpy.int) if ggev.typecode in "cz": alpha, beta, vl, vr, work, info = ggev(a1, b1, cvl, cvr, lwork, overwrite_a, overwrite_b) w = alpha / beta else: alphar, alphai, beta, vl, vr, work, info = ggev(a1, b1, cvl, cvr, lwork, overwrite_a, overwrite_b) w = (alphar + _I * alphai) / beta alpha = alphar + _I * alphai if info < 0: raise ValueError("illegal value in %d-th argument of internal ggev" % -info) if info > 0: raise LinAlgError("generalized eig algorithm did not converge (info=%d)" % info) only_real = numpy.logical_and.reduce(numpy.equal(w.imag, 0.0)) if not (ggev.typecode in "cz" or only_real): t = w.dtype.char if left: vl = _make_complex_eigvecs(w, vl, t) if right: vr = _make_complex_eigvecs(w, vr, t) if not (left or right): return w if left: if right: return w, vl, vr return w, vl if return_ab: return alpha, beta, w, vr return w, vr
def test_sygst(): seed(1234) for ind, dtype in enumerate(REAL_DTYPES): # DTYPES = <s,d> sygst n = 10 potrf, sygst, syevd, sygvd = get_lapack_funcs(('potrf', 'sygst', 'syevd', 'sygvd'), dtype=dtype) A = rand(n, n).astype(dtype) A = (A + A.T)/2 # B must be positive definite B = rand(n, n).astype(dtype) B = (B + B.T)/2 + 2 * np.eye(n, dtype=dtype) # Perform eig (sygvd) _, eig_gvd, info = sygvd(A, B) assert_(info == 0) # Convert to std problem potrf b, info = potrf(B) assert_(info == 0) a, info = sygst(A, b) assert_(info == 0) eig, _, info = syevd(a) assert_(info == 0) assert_allclose(eig, eig_gvd, rtol=1e-4)
def test_sfrk_hfrk(): """ Test for performing a symmetric rank-k operation for matrix in RFP format. """ seed(1234) for ind, dtype in enumerate(DTYPES): n = 20 if ind > 1: A = (rand(n, n) + rand(n, n)*1j).astype(dtype) A = A + A.conj().T + n*eye(n) else: A = (rand(n, n)).astype(dtype) A = A + A.T + n*eye(n) prefix = 's'if ind < 2 else 'h' trttf, tfttr, shfrk = get_lapack_funcs(('trttf', 'tfttr', '{}frk' ''.format(prefix)), dtype=dtype) Afp, _ = trttf(A) C = np.random.rand(n, 2).astype(dtype) Afp_out = shfrk(n, 2, -1, C, 2, Afp) A_out, _ = tfttr(n, Afp_out) assert_array_almost_equal(A_out, triu(-C.dot(C.conj().T) + 2*A), decimal=4 if ind % 2 == 0 else 6)
def test_sing_val_update(self): sigmas = np.array([4., 3., 2., 0]) m_vec = np.array([3.12, 5.7, -4.8, -2.2]) M = np.hstack((np.vstack((np.diag(sigmas[0:-1]), np.zeros((1,len(m_vec) - 1)))), m_vec[:, np.newaxis])) SM = svd(M, full_matrices=False, compute_uv=False, overwrite_a=False, check_finite=False) it_len = len(sigmas) sgm = np.concatenate((sigmas[::-1], (sigmas[0] + it_len*np.sqrt(np.sum(np.power(m_vec,2))),))) mvc = np.concatenate((m_vec[::-1], (0,))) lasd4 = get_lapack_funcs('lasd4',(sigmas,)) roots = [] for i in range(0, it_len): res = lasd4(i, sgm, mvc) roots.append(res[1]) assert_((res[3] <= 0),"LAPACK root finding dlasd4 failed to find \ the singular value %i" % i) roots = np.array(roots)[::-1] assert_((not np.any(np.isnan(roots)),"There are NaN roots")) assert_allclose(SM, roots, atol=100*np.finfo(np.float64).eps, rtol=100*np.finfo(np.float64).eps)
def test_rot(): # srot, drot from blas and crot and zrot from lapack. for dtype in 'fdFD': c = 0.6 s = 0.8 u = np.ones(4, dtype) * 3 v = np.ones(4, dtype) * 4 atol = 10**-(np.finfo(dtype).precision-1) if dtype in 'fd': rot = get_blas_funcs('rot', dtype=dtype) f = 4 else: rot = get_lapack_funcs('rot', dtype=dtype) s *= -1j v *= 1j f = 4j assert_allclose(rot(u, v, c, s), [[5,5,5,5],[0,0,0,0]], atol=atol) assert_allclose(rot(u, v, c, s, n=2), [[5,5,3,3],[0,0,f,f]], atol=atol) assert_allclose(rot(u, v, c, s, offx=2,offy=2), [[3,3,5,5],[f,f,0,0]], atol=atol) assert_allclose(rot(u, v, c, s, incx=2, offy=2, n=2), [[5,3,5,3],[f,f,0,0]], atol=atol) assert_allclose(rot(u, v, c, s, offx=2, incy=2, n=2), [[3,3,5,5],[0,f,0,f]], atol=atol) assert_allclose(rot(u, v, c, s, offx=2, incx=2, offy=2, incy=2, n=1), [[3,3,5,3],[f,f,0,f]], atol=atol) assert_allclose(rot(u, v, c, s, incx=-2, incy=-2, n=2), [[5,3,5,3],[0,f,0,f]], atol=atol) a, b = rot(u, v, c, s, overwrite_x=1, overwrite_y=1) assert_(a is u) assert_(b is v) assert_allclose(a, [5,5,5,5], atol=atol) assert_allclose(b, [0,0,0,0], atol=atol)
def test_lange(self): a = np.array([ [-149, -50,-154], [537, 180, 546], [-27, -9, -25]]) for dtype in 'fdFD': for norm in 'Mm1OoIiFfEe': a1 = a.astype(dtype) if dtype.isupper(): # is complex dtype a1[0,0] += 1j lange, = get_lapack_funcs(('lange',), (a1,)) value = lange(norm, a1) if norm in 'FfEe': if dtype in 'Ff': decimal = 3 else: decimal = 7 ref = np.sqrt(np.sum(np.square(np.abs(a1)))) assert_almost_equal(value, ref, decimal) else: if norm in 'Mm': ref = np.max(np.abs(a1)) elif norm in '1Oo': ref = np.max(np.sum(np.abs(a1), axis=0)) elif norm in 'Ii': ref = np.max(np.sum(np.abs(a1), axis=1)) assert_equal(value, ref)
def test_hegst(): seed(1234) for ind, dtype in enumerate(COMPLEX_DTYPES): # DTYPES = <c,z> hegst n = 10 potrf, hegst, heevd, hegvd = get_lapack_funcs(('potrf', 'hegst', 'heevd', 'hegvd'), dtype=dtype) A = rand(n, n).astype(dtype) + 1j * rand(n, n).astype(dtype) A = (A + A.conj().T)/2 # B must be positive definite B = rand(n, n).astype(dtype) + 1j * rand(n, n).astype(dtype) B = (B + B.conj().T)/2 + 2 * np.eye(n, dtype=dtype) # Perform eig (hegvd) _, eig_gvd, info = hegvd(A, B) assert_(info == 0) # Convert to std problem potrf b, info = potrf(B) assert_(info == 0) a, info = hegst(A, b) assert_(info == 0) eig, _, info = heevd(a) assert_(info == 0) assert_allclose(eig, eig_gvd, rtol=1e-4)
def test_pftrs(): """ Test Cholesky factorization of a positive definite Rectengular Full Packed (RFP) format array and solve a linear system """ seed(1234) for ind, dtype in enumerate(DTYPES): n = 20 if ind > 1: A = (rand(n, n) + rand(n, n)*1j).astype(dtype) A = A + A.conj().T + n*eye(n) else: A = (rand(n, n)).astype(dtype) A = A + A.T + n*eye(n) B = ones((n, 3), dtype=dtype) Bf1 = ones((n+2, 3), dtype=dtype) Bf2 = ones((n-2, 3), dtype=dtype) pftrs, pftrf, trttf, tfttr = get_lapack_funcs(('pftrs', 'pftrf', 'trttf', 'tfttr'), dtype=dtype) # Get the original array from TP Afp, info = trttf(A) A_chol_rfp, info = pftrf(n, Afp) # larger B arrays shouldn't segfault soln, info = pftrs(n, A_chol_rfp, Bf1) assert_(info == 0) assert_raises(Exception, pftrs, n, A_chol_rfp, Bf2) soln, info = pftrs(n, A_chol_rfp, B) assert_(info == 0) assert_array_almost_equal(solve(A, B), soln, decimal=4 if ind % 2 == 0 else 6)
def test_tzrzf(): """ This test performs an RZ decomposition in which an m x n upper trapezoidal array M (m <= n) is factorized as M = [R 0] * Z where R is upper triangular and Z is unitary. """ seed(1234) m, n = 10, 15 for ind, dtype in enumerate(DTYPES): tzrzf, tzrzf_lw = get_lapack_funcs(('tzrzf', 'tzrzf_lwork'), dtype=dtype) lwork = _compute_lwork(tzrzf_lw, m, n) if ind < 2: A = triu(rand(m, n).astype(dtype)) else: A = triu((rand(m, n) + rand(m, n)*1j).astype(dtype)) # assert wrong shape arg, f2py returns generic error assert_raises(Exception, tzrzf, A.T) rz, tau, info = tzrzf(A, lwork=lwork) # Check success assert_(info == 0) # Get Z manually for comparison R = np.hstack((rz[:, :m], np.zeros((m, n-m), dtype=dtype))) V = np.hstack((np.eye(m, dtype=dtype), rz[:, m:])) Id = np.eye(n, dtype=dtype) ref = [Id-tau[x]*V[[x], :].T.dot(V[[x], :].conj()) for x in range(m)] Z = reduce(np.dot, ref) assert_allclose(R.dot(Z) - A, zeros_like(A, dtype=dtype), atol=10*np.spacing(dtype(1.0).real), rtol=0.)
def _expm_multiply_simple_core(A, B, t, mu, m_star, s, tol=None, balance=False): """ A helper function. This is similar to algorithm 3.2 except with some values having been pre-calculated, including mu, m_star, and s. """ if balance: raise NotImplementedError if tol is None: u_d = 2 ** -53 tol = u_d # Get the lapack function for computing matrix norms. lange, = get_lapack_funcs(('lange',), (B,)) F = B eta = np.exp(t*mu / float(s)) for i in range(s): #c1 = exact_inf_norm(B) c1 = lange('i', B) for j in range(m_star): coeff = t / float(s*(j+1)) B = coeff * A.dot(B) #c2 = exact_inf_norm(B) c2 = lange('i', B) F = F + B #if c1 + c2 <= tol * exact_inf_norm(F): if c1 + c2 <= tol * lange('i', F): break c1 = c2 F = eta * F B = F return F
def test_pftri(): """ Test Cholesky factorization of a positive definite Rectengular Full Packed (RFP) format array to find its inverse """ seed(1234) for ind, dtype in enumerate(DTYPES): n = 20 if ind > 1: A = (rand(n, n) + rand(n, n)*1j).astype(dtype) A = A + A.conj().T + n*eye(n) else: A = (rand(n, n)).astype(dtype) A = A + A.T + n*eye(n) pftri, pftrf, trttf, tfttr = get_lapack_funcs(('pftri', 'pftrf', 'trttf', 'tfttr'), dtype=dtype) # Get the original array from TP Afp, info = trttf(A) A_chol_rfp, info = pftrf(n, Afp) A_inv_rfp, info = pftri(n, A_chol_rfp) assert_(info == 0) A_inv_r, _ = tfttr(n, A_inv_rfp) Ainv = inv(A) assert_array_almost_equal(A_inv_r, triu(Ainv), decimal=4 if ind % 2 == 0 else 6)
def test_trsyl(self): a = np.array([[1, 2], [0, 4]]) b = np.array([[5, 6], [0, 8]]) c = np.array([[9, 10], [11, 12]]) trans = 'T' # Test single and double implementations, including most # of the options for dtype in 'fdFD': a1, b1, c1 = a.astype(dtype), b.astype(dtype), c.astype(dtype) trsyl, = get_lapack_funcs(('trsyl',), (a1,)) if dtype.isupper(): # is complex dtype a1[0] += 1j trans = 'C' x, scale, info = trsyl(a1, b1, c1) assert_array_almost_equal(np.dot(a1, x) + np.dot(x, b1), scale * c1) x, scale, info = trsyl(a1, b1, c1, trana=trans, tranb=trans) assert_array_almost_equal( np.dot(a1.conjugate().T, x) + np.dot(x, b1.conjugate().T), scale * c1, decimal=4) x, scale, info = trsyl(a1, b1, c1, isgn=-1) assert_array_almost_equal(np.dot(a1, x) - np.dot(x, b1), scale * c1, decimal=4)
def _cho_inv_batch(a, check_finite=True): """ Invert the matrices a_i, using a Cholesky factorization of A, where a_i resides in the last two dimensions of a and the other indices describe the index i. Overwrites the data in a. Parameters ---------- a : array Array of matrices to invert, where the matrices themselves are stored in the last two dimensions. check_finite : boolean, optional Whether to check that the input matrices contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Returns ------- x : array Array of inverses of the matrices a_i See also -------- scipy.linalg.cholesky : Cholesky factorization of a matrix """ if check_finite: a1 = asarray_chkfinite(a) else: a1 = asarray(a) if len(a1.shape) < 2 or a1.shape[-2] != a1.shape[-1]: raise ValueError('expected square matrix in last two dimensions') potrf, potri = get_lapack_funcs(('potrf','potri'), (a1,)) tril_idx = np.tril_indices(a.shape[-2], k=-1) triu_idx = np.triu_indices(a.shape[-2], k=1) for index in np.ndindex(a1.shape[:-2]): # Cholesky decomposition a1[index], info = potrf(a1[index], lower=True, overwrite_a=False, clean=False) if info > 0: raise LinAlgError("%d-th leading minor not positive definite" % info) if info < 0: raise ValueError('illegal value in %d-th argument of internal' ' potrf' % -info) # Inversion a1[index], info = potri(a1[index], lower=True, overwrite_c=False) if info > 0: raise LinAlgError("the inverse could not be computed") if info < 0: raise ValueError('illegal value in %d-th argument of internal' ' potrf' % -info) # Make symmetric (dpotri only fills in the lower triangle) a1[index][triu_idx] = a1[index][tril_idx] return a1
def test_ticket_1645(self): # Check that RQ routines have correct lwork for dtype in DTYPES: a = np.zeros((300, 2), dtype=dtype) gerqf, = get_lapack_funcs(['gerqf'], [a]) assert_raises(Exception, gerqf, a, lwork=2) rq, tau, work, info = gerqf(a) if dtype in REAL_DTYPES: orgrq, = get_lapack_funcs(['orgrq'], [a]) assert_raises(Exception, orgrq, rq[-2:], tau, lwork=1) orgrq(rq[-2:], tau, lwork=2) elif dtype in COMPLEX_DTYPES: ungrq, = get_lapack_funcs(['ungrq'], [a]) assert_raises(Exception, ungrq, rq[-2:], tau, lwork=1) ungrq(rq[-2:], tau, lwork=2)
def test_tfttr_trttf(): """ Test conversion routines between the Rectengular Full Packed (RFP) format and Standard Triangular Array (TR) """ seed(1234) for ind, dtype in enumerate(DTYPES): n = 20 if ind > 1: A_full = (rand(n, n) + rand(n, n)*1j).astype(dtype) transr = 'C' else: A_full = (rand(n, n)).astype(dtype) transr = 'T' trttf, tfttr = get_lapack_funcs(('trttf', 'tfttr'), dtype=dtype) A_tf_U, info = trttf(A_full) assert_(info == 0) A_tf_L, info = trttf(A_full, uplo='L') assert_(info == 0) A_tf_U_T, info = trttf(A_full, transr=transr, uplo='U') assert_(info == 0) A_tf_L_T, info = trttf(A_full, transr=transr, uplo='L') assert_(info == 0) # Create the RFP array manually (n is even!) A_tf_U_m = zeros((n+1, n//2), dtype=dtype) A_tf_U_m[:-1, :] = triu(A_full)[:, n//2:] A_tf_U_m[n//2+1:, :] += triu(A_full)[:n//2, :n//2].conj().T A_tf_L_m = zeros((n+1, n//2), dtype=dtype) A_tf_L_m[1:, :] = tril(A_full)[:, :n//2] A_tf_L_m[:n//2, :] += tril(A_full)[n//2:, n//2:].conj().T assert_array_almost_equal(A_tf_U, A_tf_U_m.reshape(-1, order='F')) assert_array_almost_equal(A_tf_U_T, A_tf_U_m.conj().T.reshape(-1, order='F')) assert_array_almost_equal(A_tf_L, A_tf_L_m.reshape(-1, order='F')) assert_array_almost_equal(A_tf_L_T, A_tf_L_m.conj().T.reshape(-1, order='F')) # Get the original array from RFP A_tr_U, info = tfttr(n, A_tf_U) assert_(info == 0) A_tr_L, info = tfttr(n, A_tf_L, uplo='L') assert_(info == 0) A_tr_U_T, info = tfttr(n, A_tf_U_T, transr=transr, uplo='U') assert_(info == 0) A_tr_L_T, info = tfttr(n, A_tf_L_T, transr=transr, uplo='L') assert_(info == 0) assert_array_almost_equal(A_tr_U, triu(A_full)) assert_array_almost_equal(A_tr_U_T, triu(A_full)) assert_array_almost_equal(A_tr_L, tril(A_full)) assert_array_almost_equal(A_tr_L_T, tril(A_full))
def qr_decomposition(la): """Reduced QR decomposition.""" print "+"*100, "Performing reduced QR decomposition ..." a = numpy.asfortranarray(la[0]) del la[0], la m, n = a.shape geqrf, = get_lapack_funcs(('geqrf',), (a,)) qr, tau, work, info = geqrf(a, lwork=-1, overwrite_a=True) qr, tau, work, info = geqrf(a, lwork=work[0], overwrite_a=True) del a assert info >= 0 r = triu(qr[:n, :n]) if m < n: qr = qr[:, :m] gorgqr, = get_lapack_funcs(('orgqr',), (qr,)) q, work, info = gorgqr(qr, tau, lwork=-1, overwrite_a=True) q, work, info = gorgqr(qr, tau, lwork=work[0], overwrite_a=True) assert info >= 0, "qr failed" assert q.flags.f_contiguous return q, r
def qr_destroy(la): a = numpy.asfortranarray(la[0]) del la[0], la # now `a` is the only reference to the input matrix m, n = a.shape # perform q, r = QR(component); code hacked out of scipy.linalg.qr logger.debug("computing QR of %s dense matrix" % str(a.shape)) geqrf, = get_lapack_funcs(('geqrf',), (a,)) qr, tau, work, info = geqrf(a, lwork = -1, overwrite_a = True) qr, tau, work, info = geqrf(a, lwork = work[0], overwrite_a = True) del a # free up mem assert info >= 0 r = triu(qr[:n, :n]) if m < n: # rare case, #features < #topics qr = qr[:, :m] # retains fortran order gorgqr, = get_lapack_funcs(('orgqr',), (qr,)) q, work, info = gorgqr(qr, tau, lwork = -1, overwrite_a = True) q, work, info = gorgqr(qr, tau, lwork = work[0], overwrite_a = True) assert info >= 0, "qr failed" assert q.flags.f_contiguous return q, r
def __init__(self, df, scale, size=1, preload=1, *args, **kwargs): # Initialize the Wishart super(InverseWishart, self).__init__(df, scale, size, preload, *args, **kwargs) # Replace the wishart _rvs with an invwishart self._frozen = invwishart(self.df, self.scale) self._rvs = self._frozen._invwishart # df, scale are the same # Helpers for the triangular matrix inversion self._trtri = lapack.get_lapack_funcs(('trtri'), (self.scale,))
def test_gglse(): # Example data taken from NAG manual for ind, dtype in enumerate(DTYPES): # DTYPES = <s,d,c,z> gglse func, func_lwork = get_lapack_funcs(('gglse', 'gglse_lwork'), dtype=dtype) lwork = _compute_lwork(func_lwork, m=6, n=4, p=2) # For <s,d>gglse if ind < 2: a = np.array([[-0.57, -1.28, -0.39, 0.25], [-1.93, 1.08, -0.31, -2.14], [2.30, 0.24, 0.40, -0.35], [-1.93, 0.64, -0.66, 0.08], [0.15, 0.30, 0.15, -2.13], [-0.02, 1.03, -1.43, 0.50]], dtype=dtype) c = np.array([-1.50, -2.14, 1.23, -0.54, -1.68, 0.82], dtype=dtype) d = np.array([0., 0.], dtype=dtype) # For <s,d>gglse else: a = np.array([[0.96-0.81j, -0.03+0.96j, -0.91+2.06j, -0.05+0.41j], [-0.98+1.98j, -1.20+0.19j, -0.66+0.42j, -0.81+0.56j], [0.62-0.46j, 1.01+0.02j, 0.63-0.17j, -1.11+0.60j], [0.37+0.38j, 0.19-0.54j, -0.98-0.36j, 0.22-0.20j], [0.83+0.51j, 0.20+0.01j, -0.17-0.46j, 1.47+1.59j], [1.08-0.28j, 0.20-0.12j, -0.07+1.23j, 0.26+0.26j]]) c = np.array([[-2.54+0.09j], [1.65-2.26j], [-2.11-3.96j], [1.82+3.30j], [-6.41+3.77j], [2.07+0.66j]]) d = np.zeros(2, dtype=dtype) b = np.array([[1., 0., -1., 0.], [0., 1., 0., -1.]], dtype=dtype) _, _, _, result, _ = func(a, b, c, d, lwork=lwork) if ind < 2: expected = np.array([0.48904455, 0.99754786, 0.48904455, 0.99754786]) else: expected = np.array([1.08742917-1.96205783j, -0.74093902+3.72973919j, 1.08742917-1.96205759j, -0.74093896+3.72973895j]) assert_array_almost_equal(result, expected, decimal=4)
def test_gh_2691(self): # 'lower' argument of dportf/dpotri for lower in [True, False]: for clean in [True, False]: np.random.seed(42) x = np.random.normal(size=(3, 3)) a = x.dot(x.T) dpotrf, dpotri = get_lapack_funcs(("potrf", "potri"), (a, )) c, info = dpotrf(a, lower, clean=clean) dpt = dpotri(c, lower)[0] if lower: assert_allclose(np.tril(dpt), np.tril(inv(a))) else: assert_allclose(np.triu(dpt), np.triu(inv(a)))
def _rvs(self, n, shape, dim, df, C): """ Parameters ---------- n : integer Number of variates to generate shape : iterable Shape of the variates to generate dim : int Dimension of the scale matrix df : int Degrees of freedom C : ndarray Cholesky factorization of the scale matrix, lower triagular. Notes ----- As this function does no argument checking, it should not be called directly; use 'rvs' instead. """ # Get random draws A such that A ~ W(df, I) A = super(invwishart_gen, self)._standard_rvs(n, shape, dim, df) # Calculate SA = (CA)'^{-1} (CA)^{-1} ~ iW(df, scale) eye = np.eye(dim) trtrs = get_lapack_funcs(('trtrs'), (A,)) for index in np.ndindex(A.shape[:-2]): # Calculate CA CA = np.dot(C, A[index]) # Get (C A)^{-1} via triangular solver if dim > 1: CA, info = trtrs(CA, eye, lower=True) if info > 0: raise LinAlgError("Singular matrix.") if info < 0: raise ValueError('Illegal value in %d-th argument of' ' internal trtrs' % -info) else: CA = 1. / CA # Get SA A[index] = np.dot(CA.T, CA) return A
def _lstsq(X, y, indices, fit_intercept): """Least Squares Estimator for TheilSenRegressor class. This function calculates the least squares method on a subset of rows of X and y defined by the indices array. Optionally, an intercept column is added if intercept is set to true. Parameters ---------- X : array, shape = [n_samples, n_features] Design matrix, where n_samples is the number of samples and n_features is the number of features. y : array, shape = [n_samples] Target vector, where n_samples is the number of samples. indices : array, shape = [n_subpopulation, n_subsamples] Indices of all subsamples with respect to the chosen subpopulation. fit_intercept : bool Fit intercept or not. Returns ------- weights : array, shape = [n_subpopulation, n_features + intercept] Solution matrix of n_subpopulation solved least square problems. """ fit_intercept = int(fit_intercept) n_features = X.shape[1] + fit_intercept n_subsamples = indices.shape[1] weights = np.empty((indices.shape[0], n_features)) X_subpopulation = np.ones((n_subsamples, n_features)) # gelss need to pad y_subpopulation to be of the max dim of X_subpopulation y_subpopulation = np.zeros((max(n_subsamples, n_features))) lstsq, = get_lapack_funcs(('gelss',), (X_subpopulation, y_subpopulation)) for index, subset in enumerate(indices): X_subpopulation[:, fit_intercept:] = X[subset, :] y_subpopulation[:n_subsamples] = y[subset] weights[index] = lstsq(X_subpopulation, y_subpopulation)[1][:n_features] return weights
def test_lartg(): for dtype in 'fdFD': lartg = get_lapack_funcs('lartg', dtype=dtype) f = np.array(3, dtype) g = np.array(4, dtype) if np.iscomplexobj(g): g *= 1j cs, sn, r = lartg(f, g) assert_allclose(cs, 3.0/5.0) assert_allclose(r, 5.0) if np.iscomplexobj(g): assert_allclose(sn, -4.0j/5.0) assert_(type(r) == complex) assert_(type(cs) == float) else: assert_allclose(sn, 4.0/5.0)
def lars_path(X, y, Xy=None, Gram=None, max_iter=500, alpha_min=0, method='lar', copy_X=True, eps=np.finfo(np.float).eps, copy_Gram=True, verbose=False, return_path=True): """Compute Least Angle Regression and Lasso path The optimization objective for Lasso is:: (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1 Parameters ----------- X: array, shape: (n_samples, n_features) Input data y: array, shape: (n_samples) Input targets max_iter: integer, optional Maximum number of iterations to perform, set to infinity for no limit. Gram: None, 'auto', array, shape: (n_features, n_features), optional Precomputed Gram matrix (X' * X), if 'auto', the Gram matrix is precomputed from the given X, if there are more samples than features alpha_min: float, optional Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso. method: {'lar', 'lasso'} Specifies the returned model. Select 'lar' for Least Angle Regression, 'lasso' for the Lasso. eps: float, optional The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. copy_X: bool If False, X is overwritten. copy_Gram: bool If False, Gram is overwritten. Returns -------- alphas: array, shape: (max_features + 1,) Maximum of covariances (in absolute value) at each iteration. active: array, shape (max_features,) Indices of active variables at the end of the path. coefs: array, shape (n_features, max_features + 1) Coefficients along the path See also -------- lasso_path LassoLars Lars LassoLarsCV LarsCV sklearn.decomposition.sparse_encode Notes ------ * http://en.wikipedia.org/wiki/Least-angle_regression * http://en.wikipedia.org/wiki/Lasso_(statistics)#LASSO_method """ n_features = X.shape[1] n_samples = y.size max_features = min(max_iter, n_features) if return_path: coefs = np.zeros((max_features + 1, n_features)) alphas = np.zeros(max_features + 1) else: coef, prev_coef = np.zeros(n_features), np.zeros(n_features) alpha, prev_alpha = np.array([0.]), np.array([0.]) # better ideas? n_iter, n_active = 0, 0 active, indices = list(), np.arange(n_features) # holds the sign of covariance sign_active = np.empty(max_features, dtype=np.int8) drop = False # will hold the cholesky factorization. Only lower part is # referenced. L = np.empty((max_features, max_features), dtype=X.dtype) swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (X, )) solve_cholesky, = get_lapack_funcs(('potrs', ), (X, )) if Gram is None: if copy_X: # force copy. setting the array to be fortran-ordered # speeds up the calculation of the (partial) Gram matrix # and allows to easily swap columns X = X.copy('F') elif Gram == 'auto': Gram = None if X.shape[0] > X.shape[1]: Gram = np.dot(X.T, X) elif copy_Gram: Gram = Gram.copy() if Xy is None: Cov = np.dot(X.T, y) else: Cov = Xy.copy() if verbose: if verbose > 1: print "Step\t\tAdded\t\tDropped\t\tActive set size\t\tC" else: sys.stdout.write('.') sys.stdout.flush() tiny = np.finfo(np.float).tiny # to avoid division by 0 warning tiny32 = np.finfo(np.float32).tiny # to avoid division by 0 warning while True: if Cov.size: C_idx = np.argmax(np.abs(Cov)) C_ = Cov[C_idx] C = np.fabs(C_) else: C = 0. if return_path: alpha = alphas[n_iter, np.newaxis] coef = coefs[n_iter] prev_alpha = alphas[n_iter - 1, np.newaxis] prev_coef = coefs[n_iter - 1] alpha[0] = C / n_samples if alpha[0] <= alpha_min: # early stopping if not alpha[0] == alpha_min: # interpolation factor 0 <= ss < 1 if n_iter > 0: # In the first iteration, all alphas are zero, the formula # below would make ss a NaN ss = ((prev_alpha[0] - alpha_min) / (prev_alpha[0] - alpha[0])) coef[:] = prev_coef + ss * (coef - prev_coef) alpha[0] = alpha_min if return_path: coefs[n_iter] = coef break if n_iter >= max_iter or n_active >= n_features: break if not drop: ########################################################## # Append x_j to the Cholesky factorization of (Xa * Xa') # # # # ( L 0 ) # # L -> ( ) , where L * w = Xa' x_j # # ( w z ) and z = ||x_j|| # # # ########################################################## sign_active[n_active] = np.sign(C_) m, n = n_active, C_idx + n_active Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0]) indices[n], indices[m] = indices[m], indices[n] Cov_not_shortened = Cov Cov = Cov[1:] # remove Cov[0] if Gram is None: X.T[n], X.T[m] = swap(X.T[n], X.T[m]) c = nrm2(X.T[n_active])**2 L[n_active, :n_active] = \ np.dot(X.T[n_active], X.T[:n_active].T) else: # swap does only work inplace if matrix is fortran # contiguous ... Gram[m], Gram[n] = swap(Gram[m], Gram[n]) Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n]) c = Gram[n_active, n_active] L[n_active, :n_active] = Gram[n_active, :n_active] # Update the cholesky decomposition for the Gram matrix arrayfuncs.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) v = np.dot(L[n_active, :n_active], L[n_active, :n_active]) diag = max(np.sqrt(np.abs(c - v)), eps) L[n_active, n_active] = diag if diag < 1e-7: # The system is becoming too ill-conditioned. # We have degenerate vectors in our active set. # We'll 'drop for good' the last regressor added warnings.warn( ('Regressors in active set degenerate. ' 'Dropping a regressor, after %i iterations, ' 'i.e. alpha=%.3e, ' 'with an active set of %i regressors, and ' 'the smallest cholesky pivot element being %.3e') % (n_iter, alphas[n_iter], n_active, diag)) # XXX: need to figure a 'drop for good' way Cov = Cov_not_shortened Cov[0] = 0 Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0]) continue active.append(indices[n_active]) n_active += 1 if verbose > 1: print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], '', n_active, C) if method == 'lasso' and n_iter > 0 and prev_alpha[0] < alpha[0]: # alpha is increasing. This is because the updates of Cov are # bringing in too much numerical error that is greater than # than the remaining correlation with the # regressors. Time to bail out warnings.warn( 'Early stopping the lars path, as the residues ' 'are small and the current value of alpha is no longer ' 'well controled. %i iterations, alpha=%.3e, previous ' 'alpha=%.3e, with an active set of %i regressors' % (n_iter, alpha, prev_alpha, n_active)) break # least squares solution least_squares, info = solve_cholesky(L[:n_active, :n_active], sign_active[:n_active], lower=True) if least_squares.size == 1 and least_squares == 0: # This happens because sign_active[:n_active] = 0 least_squares[...] = 1 AA = 1. else: # is this really needed ? AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active])) if not np.isfinite(AA): # L is too ill-conditioned i = 0 L_ = L[:n_active, :n_active].copy() while not np.isfinite(AA): L_.flat[::n_active + 1] += (2**i) * eps least_squares, info = solve_cholesky( L_, sign_active[:n_active], lower=True) tmp = max(np.sum(least_squares * sign_active[:n_active]), eps) AA = 1. / np.sqrt(tmp) i += 1 least_squares *= AA if Gram is None: # equiangular direction of variables in the active set eq_dir = np.dot(X.T[:n_active].T, least_squares) # correlation between each unactive variables and # eqiangular vector corr_eq_dir = np.dot(X.T[n_active:], eq_dir) else: # if huge number of features, this takes 50% of time, I # think could be avoided if we just update it using an # orthogonal (QR) decomposition of X corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares) g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny)) g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny)) gamma_ = min(g1, g2, C / AA) # TODO: better names for these variables: z drop = False z = -coef[active] / (least_squares + tiny32) z_pos = arrayfuncs.min_pos(z) if z_pos < gamma_: # some coefficients have changed sign idx = np.where(z == z_pos)[0] # update the sign, important for LAR sign_active[idx] = -sign_active[idx] if method == 'lasso': gamma_ = z_pos drop = True n_iter += 1 if return_path: if n_iter >= coefs.shape[0]: del coef, alpha, prev_alpha, prev_coef # resize the coefs and alphas array add_features = 2 * max(1, (max_features - n_active)) coefs.resize((n_iter + add_features, n_features)) alphas.resize(n_iter + add_features) coef = coefs[n_iter] prev_coef = coefs[n_iter - 1] alpha = alphas[n_iter, np.newaxis] prev_alpha = alphas[n_iter - 1, np.newaxis] else: # mimic the effect of incrementing n_iter on the array references prev_coef = coef prev_alpha[0] = alpha[0] coef = np.zeros_like(coef) coef[active] = prev_coef[active] + gamma_ * least_squares # update correlations Cov -= gamma_ * corr_eq_dir # See if any coefficient has changed sign if drop and method == 'lasso': arrayfuncs.cholesky_delete(L[:n_active, :n_active], idx) n_active -= 1 m, n = idx, n_active drop_idx = active.pop(idx) if Gram is None: # propagate dropped variable for i in range(idx, n_active): X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1]) indices[i], indices[i + 1] = \ indices[i + 1], indices[i] # yeah this is stupid # TODO: this could be updated residual = y - np.dot(X[:, :n_active], coef[active]) temp = np.dot(X.T[n_active], residual) Cov = np.r_[temp, Cov] else: for i in range(idx, n_active): indices[i], indices[i + 1] = \ indices[i + 1], indices[i] Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1]) Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i], Gram[:, i + 1]) # Cov_n = Cov_j + x_j * X + increment(betas) TODO: # will this still work with multiple drops ? # recompute covariance. Probably could be done better # wrong as Xy is not swapped with the rest of variables # TODO: this could be updated residual = y - np.dot(X, coef) temp = np.dot(X.T[drop_idx], residual) Cov = np.r_[temp, Cov] sign_active = np.delete(sign_active, idx) sign_active = np.append(sign_active, 0.) # just to maintain size if verbose > 1: print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx, n_active, abs(temp)) if return_path: # resize coefs in case of early stop alphas = alphas[:n_iter + 1] coefs = coefs[:n_iter + 1] return alphas, active, coefs.T else: return alpha, active, coef
# License: BSD Style. # avoid division truncation from __future__ import division import warnings import numpy as np from scipy.linalg.lapack import get_lapack_funcs from scipy import linalg from ..base import BaseEstimator from ..utils import array2d from ..utils.extmath import fast_logdet # import useful Lapack function to speedup matrices inversions getri, getrf = get_lapack_funcs(('getri', 'getrf'), (np.empty((), dtype=np.float64), np.empty((), dtype=np.float64))) def log_likelihood(emp_cov, precision): """Computes the log_likelihood of the data Params ------ emp_cov: 2D ndarray (n_features, n_features) Maximum Likelihood Estimator of covariance precision: 2D ndarray (n_features, n_features) The precision matrix of the covariance model to be tested """ return -np.sum(emp_cov * precision) + fast_logdet(precision)
def gram_omp(D_all, the_y, n_nonzero_coefs, tol_0=None, tol=None, copy_Gram=True, copy_Xy=True, return_path=False, start_change=None): """Orthogonal Matching Pursuit step on a precomputed Gram matrix. This function uses the Cholesky decomposition method. Parameters ---------- Gram : ndarray of shape (n_features, n_features) Gram matrix of the input data matrix. Xy : ndarray of shape (n_features,) Input targets. n_nonzero_coefs : int Targeted number of non-zero elements. tol_0 : float, default=None Squared norm of y, required if tol is not None. tol : float, default=None Targeted squared error, if not None overrides n_nonzero_coefs. copy_Gram : bool, default=True Whether the gram matrix must be copied by the algorithm. A false value is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway. copy_Xy : bool, default=True Whether the covariance vector Xy must be copied by the algorithm. If False, it may be overwritten. return_path : bool, default=False Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation. Returns ------- gamma : ndarray of shape (n_nonzero_coefs,) Non-zero elements of the solution. idx : ndarray of shape (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector. coefs : ndarray of shape (n_features, n_nonzero_coefs) The first k values of column k correspond to the coefficient value for the active features at that step. The lower left triangle contains garbage. Only returned if ``return_path=True``. n_active : int Number of active features at convergence. """ Gram = np.dot(D_all.T, D_all) Xy = np.dot(D_all.T, the_y) residual = np.copy(the_y) resi_reci = 1. / residual resi_reci[resi_reci == np.inf] = 0.0 D_resi = np.dot(D_all.T, resi_reci) Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram) D_all_T = np.copy(D_all.T) if copy_Xy or not Xy.flags.writeable: Xy = Xy.copy() min_float = np.finfo(Gram.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram, )) potrs, = get_lapack_funcs(('potrs', ), (Gram, )) indices = np.arange(len(Gram)) # keeping track of swapping alpha = Xy tol_curr = tol_0 delta = 0 gamma = np.empty(0) n_active = 0 max_features = len(Gram) if tol is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=Gram.dtype) L[0, 0] = 1. idx_used = np.zeros(D_all.shape[1], dtype=int) if return_path: coefs = np.empty_like(L) if start_change == None: start_change = 10 #for ethnic start_change=17 # start_change=10 #for clothes origin:62.87% angle:65.21% # YaleB train time (avg. per iteration 10.77s) while True: # lam = np.argmax(np.abs(Xy)) lam = None if n_active < start_change: lam = np.argmin( np.abs(abs(D_resi[n_active:]) - the_y.shape[0])) + n_active else: lam = np.argmax(np.abs(alpha)) # lam = np.argmin(np.abs(D_resi[n_active:]-the_y.shape[0]))+n_active if alpha[lam]**2 < min_float: # selected same atom twice, or inner product too small # warnings.warn(premature, RuntimeWarning, stacklevel=3) print("1 found problem") sys.stdout.flush() break if lam < n_active: # selected same atom twice, or inner product too small # warnings.warn(premature, RuntimeWarning, stacklevel=3) print("2 found problem") sys.stdout.flush() pdb.set_trace() break if n_active > 0: L[n_active, :n_active] = Gram[lam, :n_active] linalg.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active], trans=0, lower=1, overwrite_b=True, check_finite=False) v = nrm2(L[n_active, :n_active])**2 Lkk = Gram[lam, lam] - v if Lkk <= min_float: # selected atoms are dependent # warnings.warn(premature, RuntimeWarning, stacklevel=3) print("3 found problem") sys.stdout.flush() break L[n_active, n_active] = sqrt(Lkk) else: L[0, 0] = sqrt(Gram[lam, lam]) Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam]) Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam]) indices[n_active], indices[lam] = indices[lam], indices[n_active] Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active] if n_active < start_change: temp = np.copy(D_all_T[lam]) D_all_T[lam] = D_all_T[n_active] D_all_T[n_active] = temp n_active += 1 # solves LL'x = X'y as a composition of two triangular systems gamma, _ = potrs(L[:n_active, :n_active], Xy[:n_active], lower=True, overwrite_b=False) if n_active < start_change: Y_pre = np.dot(D_all_T[:n_active].T, gamma) residual = the_y - Y_pre resi_temp = np.copy(residual) lambda_of_norm_resi = 15 #for ethnic lambda_of_norm_resi = 2 #for clothes min_temp = abs(resi_temp.min()) * lambda_of_norm_resi resi_temp += min_temp resi_temp = preprocessing.normalize(resi_temp.reshape(1, -1), norm='l2')[0] resi_reci = 1. / resi_temp # resi_reci[resi_reci<0]=0. resi_reci[resi_reci == np.inf] = 0.0 D_resi = np.dot(D_all.T, resi_reci) else: beta = np.dot(Gram[:, :n_active], gamma) alpha = Xy - beta if n_active == max_features: break if return_path: return gamma, indices[:n_active], coefs[:, :n_active], n_active else: return gamma, indices[:n_active], n_active
def gram_omp_ano(D_all, the_y, n_nonzero_coefs, tol_0=None, tol=None, copy_Gram=True, copy_Xy=True, return_path=False): Gram = np.dot(D_all.T, D_all) Gram_reci = np.linalg.inv(Gram) Xy = np.dot(D_all.T, the_y) residual = np.copy(the_y) resi_reci = 1. / residual resi_reci[resi_reci == np.inf] = 0.0 D_resi = np.dot(D_all.T, resi_reci) Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram) if copy_Xy or not Xy.flags.writeable: Xy = Xy.copy() min_float = np.finfo(Gram.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram, )) potrs, = get_lapack_funcs(('potrs', ), (Gram, )) indices = np.arange(len(Gram)) # keeping track of swapping alpha = Xy tol_curr = tol_0 delta = 0 gamma = np.empty(0) n_active = 0 idx_used = np.zeros(D_all.shape[1], dtype=int) while True: # lam = np.argmax(np.abs(Xy)) lam = np.argmin(np.abs(D_resi - the_y.shape[0])) idx_used[lam] = 1 if abs(residual).sum()**2 < min_float: # selected same atom twice, or inner product too small # warnings.warn(premature, RuntimeWarning, stacklevel=3) print("1 found problem") sys.stdout.flush() break # indices[n_active], indices[lam] = indices[lam], indices[n_active] n_active += 1 # solves LL'x = X'y as a composition of two triangular systems D_part = D_all[:, idx_used == 1] Dy = np.dot(D_part.T, residual) gamma = np.dot(np.linalg.inv(np.dot(D_part.T, D_part)), Dy) if n_nonzero_coefs == n_active: break Y_pre = np.dot(D_part, gamma) residual -= Y_pre print(abs(residual).sum()) resi_reci = 1. / residual resi_reci[resi_reci == np.inf] = 0.0 D_resi = np.dot(D_all.T, resi_reci) D_resi[idx_used == 1] = np.inf pdb.set_trace() if return_path: return gamma, idx_used == 1, coefs[:, :n_active], n_active else: return gamma, idx_used == 1, n_active
def lars_path(X, y, Xy=None, Gram=None, max_iter=500, alpha_min=0, method='lar', copy_X=True, eps=np.finfo(np.float).eps, copy_Gram=True, verbose=0, return_path=True, return_n_iter=False): """Compute Least Angle Regression or Lasso path using LARS algorithm [1] The optimization objective for the case method='lasso' is:: (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1 in the case of method='lars', the objective function is only known in the form of an implicit equation (see discussion in [1]) Parameters ----------- X : array, shape: (n_samples, n_features) Input data. y : array, shape: (n_samples) Input targets. max_iter : integer, optional (default=500) Maximum number of iterations to perform, set to infinity for no limit. Gram : None, 'auto', array, shape: (n_features, n_features), optional Precomputed Gram matrix (X' * X), if ``'auto'``, the Gram matrix is precomputed from the given X, if there are more samples than features. alpha_min : float, optional (default=0) Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso. method : {'lar', 'lasso'}, optional (default='lar') Specifies the returned model. Select ``'lar'`` for Least Angle Regression, ``'lasso'`` for the Lasso. eps : float, optional (default=``np.finfo(np.float).eps``) The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. copy_X : bool, optional (default=True) If ``False``, ``X`` is overwritten. copy_Gram : bool, optional (default=True) If ``False``, ``Gram`` is overwritten. verbose : int (default=0) Controls output verbosity. return_path : bool, optional (default=True) If ``return_path==True`` returns the entire path, else returns only the last point of the path. return_n_iter : bool, optional (default=False) Whether to return the number of iterations. Returns -------- alphas : array, shape: [n_alphas + 1] Maximum of covariances (in absolute value) at each iteration. ``n_alphas`` is either ``max_iter``, ``n_features`` or the number of nodes in the path with ``alpha >= alpha_min``, whichever is smaller. active : array, shape [n_alphas] Indices of active variables at the end of the path. coefs : array, shape (n_features, n_alphas + 1) Coefficients along the path n_iter : int Number of iterations run. Returned only if return_n_iter is set to True. See also -------- lasso_path LassoLars Lars LassoLarsCV LarsCV sklearn.decomposition.sparse_encode References ---------- .. [1] "Least Angle Regression", Effron et al. http://www-stat.stanford.edu/~tibs/ftp/lars.pdf .. [2] `Wikipedia entry on the Least-angle regression <http://en.wikipedia.org/wiki/Least-angle_regression>`_ .. [3] `Wikipedia entry on the Lasso <http://en.wikipedia.org/wiki/Lasso_(statistics)#Lasso_method>`_ """ n_features = X.shape[1] n_samples = y.size max_features = min(max_iter, n_features) if return_path: coefs = np.zeros((max_features + 1, n_features)) alphas = np.zeros(max_features + 1) else: coef, prev_coef = np.zeros(n_features), np.zeros(n_features) alpha, prev_alpha = np.array([0.]), np.array([0.]) # better ideas? n_iter, n_active = 0, 0 active, indices = list(), np.arange(n_features) # holds the sign of covariance sign_active = np.empty(max_features, dtype=np.int8) drop = False # will hold the cholesky factorization. Only lower part is # referenced. # We are initializing this to "zeros" and not empty, because # it is passed to scipy linalg functions and thus if it has NaNs, # even if they are in the upper part that it not used, we # get errors raised. # Once we support only scipy > 0.12 we can use check_finite=False and # go back to "empty" L = np.zeros((max_features, max_features), dtype=X.dtype) swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (X, )) solve_cholesky, = get_lapack_funcs(('potrs', ), (X, )) if Gram is None: if copy_X: # force copy. setting the array to be fortran-ordered # speeds up the calculation of the (partial) Gram matrix # and allows to easily swap columns X = X.copy('F') elif Gram == 'auto': Gram = None if X.shape[0] > X.shape[1]: Gram = np.dot(X.T, X) elif copy_Gram: Gram = Gram.copy() if Xy is None: Cov = np.dot(X.T, y) else: Cov = Xy.copy() if verbose: if verbose > 1: print("Step\t\tAdded\t\tDropped\t\tActive set size\t\tC") else: sys.stdout.write('.') sys.stdout.flush() tiny = np.finfo(np.float).tiny # to avoid division by 0 warning tiny32 = np.finfo(np.float32).tiny # to avoid division by 0 warning equality_tolerance = np.finfo(np.float32).eps while True: if Cov.size: C_idx = np.argmax(np.abs(Cov)) C_ = Cov[C_idx] C = np.fabs(C_) else: C = 0. if return_path: alpha = alphas[n_iter, np.newaxis] coef = coefs[n_iter] prev_alpha = alphas[n_iter - 1, np.newaxis] prev_coef = coefs[n_iter - 1] alpha[0] = C / n_samples if alpha[0] <= alpha_min + equality_tolerance: # early stopping if abs(alpha[0] - alpha_min) > equality_tolerance: # interpolation factor 0 <= ss < 1 if n_iter > 0: # In the first iteration, all alphas are zero, the formula # below would make ss a NaN ss = ((prev_alpha[0] - alpha_min) / (prev_alpha[0] - alpha[0])) coef[:] = prev_coef + ss * (coef - prev_coef) alpha[0] = alpha_min if return_path: coefs[n_iter] = coef break if n_iter >= max_iter or n_active >= n_features: break if not drop: ########################################################## # Append x_j to the Cholesky factorization of (Xa * Xa') # # # # ( L 0 ) # # L -> ( ) , where L * w = Xa' x_j # # ( w z ) and z = ||x_j|| # # # ########################################################## sign_active[n_active] = np.sign(C_) m, n = n_active, C_idx + n_active Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0]) indices[n], indices[m] = indices[m], indices[n] Cov_not_shortened = Cov Cov = Cov[1:] # remove Cov[0] if Gram is None: X.T[n], X.T[m] = swap(X.T[n], X.T[m]) c = nrm2(X.T[n_active])**2 L[n_active, :n_active] = \ np.dot(X.T[n_active], X.T[:n_active].T) else: # swap does only work inplace if matrix is fortran # contiguous ... Gram[m], Gram[n] = swap(Gram[m], Gram[n]) Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n]) c = Gram[n_active, n_active] L[n_active, :n_active] = Gram[n_active, :n_active] # Update the cholesky decomposition for the Gram matrix if n_active: linalg.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active], trans=0, lower=1, overwrite_b=True, **solve_triangular_args) v = np.dot(L[n_active, :n_active], L[n_active, :n_active]) diag = max(np.sqrt(np.abs(c - v)), eps) L[n_active, n_active] = diag if diag < 1e-7: # The system is becoming too ill-conditioned. # We have degenerate vectors in our active set. # We'll 'drop for good' the last regressor added. # Note: this case is very rare. It is no longer triggered by the # test suite. The `equality_tolerance` margin added in 0.16.0 to # get early stopping to work consistently on all versions of # Python including 32 bit Python under Windows seems to make it # very difficult to trigger the 'drop for good' strategy. warnings.warn( 'Regressors in active set degenerate. ' 'Dropping a regressor, after %i iterations, ' 'i.e. alpha=%.3e, ' 'with an active set of %i regressors, and ' 'the smallest cholesky pivot element being %.3e' % (n_iter, alpha, n_active, diag), ConvergenceWarning) # XXX: need to figure a 'drop for good' way Cov = Cov_not_shortened Cov[0] = 0 Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0]) continue active.append(indices[n_active]) n_active += 1 if verbose > 1: print("%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], '', n_active, C)) if method == 'lasso' and n_iter > 0 and prev_alpha[0] < alpha[0]: # alpha is increasing. This is because the updates of Cov are # bringing in too much numerical error that is greater than # than the remaining correlation with the # regressors. Time to bail out warnings.warn( 'Early stopping the lars path, as the residues ' 'are small and the current value of alpha is no ' 'longer well controlled. %i iterations, alpha=%.3e, ' 'previous alpha=%.3e, with an active set of %i ' 'regressors.' % (n_iter, alpha, prev_alpha, n_active), ConvergenceWarning) break # least squares solution least_squares, info = solve_cholesky(L[:n_active, :n_active], sign_active[:n_active], lower=True) if least_squares.size == 1 and least_squares == 0: # This happens because sign_active[:n_active] = 0 least_squares[...] = 1 AA = 1. else: # is this really needed ? AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active])) if not np.isfinite(AA): # L is too ill-conditioned i = 0 L_ = L[:n_active, :n_active].copy() while not np.isfinite(AA): L_.flat[::n_active + 1] += (2**i) * eps least_squares, info = solve_cholesky( L_, sign_active[:n_active], lower=True) tmp = max(np.sum(least_squares * sign_active[:n_active]), eps) AA = 1. / np.sqrt(tmp) i += 1 least_squares *= AA if Gram is None: # equiangular direction of variables in the active set eq_dir = np.dot(X.T[:n_active].T, least_squares) # correlation between each unactive variables and # eqiangular vector corr_eq_dir = np.dot(X.T[n_active:], eq_dir) else: # if huge number of features, this takes 50% of time, I # think could be avoided if we just update it using an # orthogonal (QR) decomposition of X corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares) g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny)) g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny)) gamma_ = min(g1, g2, C / AA) # TODO: better names for these variables: z drop = False z = -coef[active] / (least_squares + tiny32) z_pos = arrayfuncs.min_pos(z) if z_pos < gamma_: # some coefficients have changed sign idx = np.where(z == z_pos)[0][::-1] # update the sign, important for LAR sign_active[idx] = -sign_active[idx] if method == 'lasso': gamma_ = z_pos drop = True n_iter += 1 if return_path: if n_iter >= coefs.shape[0]: del coef, alpha, prev_alpha, prev_coef # resize the coefs and alphas array add_features = 2 * max(1, (max_features - n_active)) coefs = np.resize(coefs, (n_iter + add_features, n_features)) alphas = np.resize(alphas, n_iter + add_features) coef = coefs[n_iter] prev_coef = coefs[n_iter - 1] alpha = alphas[n_iter, np.newaxis] prev_alpha = alphas[n_iter - 1, np.newaxis] else: # mimic the effect of incrementing n_iter on the array references prev_coef = coef prev_alpha[0] = alpha[0] coef = np.zeros_like(coef) coef[active] = prev_coef[active] + gamma_ * least_squares # update correlations Cov -= gamma_ * corr_eq_dir # See if any coefficient has changed sign if drop and method == 'lasso': # handle the case when idx is not length of 1 [ arrayfuncs.cholesky_delete(L[:n_active, :n_active], ii) for ii in idx ] n_active -= 1 m, n = idx, n_active # handle the case when idx is not length of 1 drop_idx = [active.pop(ii) for ii in idx] if Gram is None: # propagate dropped variable for ii in idx: for i in range(ii, n_active): X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1]) # yeah this is stupid indices[i], indices[i + 1] = indices[i + 1], indices[i] # TODO: this could be updated residual = y - np.dot(X[:, :n_active], coef[active]) temp = np.dot(X.T[n_active], residual) Cov = np.r_[temp, Cov] else: for ii in idx: for i in range(ii, n_active): indices[i], indices[i + 1] = indices[i + 1], indices[i] Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1]) Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i], Gram[:, i + 1]) # Cov_n = Cov_j + x_j * X + increment(betas) TODO: # will this still work with multiple drops ? # recompute covariance. Probably could be done better # wrong as Xy is not swapped with the rest of variables # TODO: this could be updated residual = y - np.dot(X, coef) temp = np.dot(X.T[drop_idx], residual) Cov = np.r_[temp, Cov] sign_active = np.delete(sign_active, idx) sign_active = np.append(sign_active, 0.) # just to maintain size if verbose > 1: print("%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx, n_active, abs(temp))) if return_path: # resize coefs in case of early stop alphas = alphas[:n_iter + 1] coefs = coefs[:n_iter + 1] if return_n_iter: return alphas, active, coefs.T, n_iter else: return alphas, active, coefs.T else: if return_n_iter: return alpha, active, coef, n_iter else: return alpha, active, coef
def test_gels(self): seed(1234) # Test fat/tall matrix argument handling - gh-issue #8329 for ind, dtype in enumerate(DTYPES): m = 10 n = 20 nrhs = 1 a1 = rand(m, n).astype(dtype) b1 = rand(n).astype(dtype) gls, glslw = get_lapack_funcs(('gels', 'gels_lwork'), dtype=dtype) # Request of sizes lwork = _compute_lwork(glslw, m, n, nrhs) _, _, info = gls(a1, b1, lwork=lwork) assert_(info >= 0) _, _, info = gls(a1, b1, trans='TTCC'[ind], lwork=lwork) assert_(info >= 0) for dtype in REAL_DTYPES: a1 = np.array([[1.0, 2.0], [4.0, 5.0], [7.0, 8.0]], dtype=dtype) b1 = np.array([16.0, 17.0, 20.0], dtype=dtype) gels, gels_lwork, geqrf = get_lapack_funcs( ('gels', 'gels_lwork', 'geqrf'), (a1, b1)) m, n = a1.shape if len(b1.shape) == 2: nrhs = b1.shape[1] else: nrhs = 1 # Request of sizes lwork = _compute_lwork(gels_lwork, m, n, nrhs) lqr, x, info = gels(a1, b1, lwork=lwork) assert_allclose(x[:-1], np.array([-14.333333333333323, 14.999999999999991], dtype=dtype), rtol=25*np.finfo(dtype).eps) lqr_truth, _, _, _ = geqrf(a1) assert_array_equal(lqr, lqr_truth) for dtype in COMPLEX_DTYPES: a1 = np.array([[1.0+4.0j, 2.0], [4.0+0.5j, 5.0-3.0j], [7.0-2.0j, 8.0+0.7j]], dtype=dtype) b1 = np.array([16.0, 17.0+2.0j, 20.0-4.0j], dtype=dtype) gels, gels_lwork, geqrf = get_lapack_funcs( ('gels', 'gels_lwork', 'geqrf'), (a1, b1)) m, n = a1.shape if len(b1.shape) == 2: nrhs = b1.shape[1] else: nrhs = 1 # Request of sizes lwork = _compute_lwork(gels_lwork, m, n, nrhs) lqr, x, info = gels(a1, b1, lwork=lwork) assert_allclose(x[:-1], np.array([1.161753632288328-1.901075709391912j, 1.735882340522193+1.521240901196909j], dtype=dtype), rtol=25*np.finfo(dtype).eps) lqr_truth, _, _, _ = geqrf(a1) assert_array_equal(lqr, lqr_truth)
def multiple_fast_inv(a): """ Compute the inverse of a set of arrays in-place Parameters ---------- a: array_like of shape (n_samples, M, M) Set of square matrices to be inverted. `a` is changed in place. Returns ------- a: ndarray shape (n_samples, M, M) The input array `a`, overwritten with the inverses of the original 2D arrays in ``a[0], a[1], ...``. Thus ``a[0]`` replaced with ``inv(a[0])`` etc. Raises ------ LinAlgError : If `a` is singular. ValueError : If `a` is not square, or not 2-dimensional. Notes ----- This function is copied from scipy.linalg.inv, but with some customizations for speed-up from operating on multiple arrays. It also has some conditionals to work with different scipy versions. """ # Consider errors for sparse, masked, object arrays, as for # _asarray_validated? from scipy.linalg.lapack import get_lapack_funcs S, M, N = a.shape if M != N: raise ValueError('a must have shape(n_samples, M, M)') a = np.asarray_chkfinite(a) getrf, getri = get_lapack_funcs(('getrf','getri'), (a[0],)) # Calculate lwork on different scipy versions try: getri_lwork, = get_lapack_funcs(('getri_lwork',), (a[0],)) except (ValueError, AttributeError): # scipy < 0.15 # scipy 0.10, 0.11 -> AttributeError # scipy 0.12, 0.13, 0.14 -> ValueError from scipy.linalg import calc_lwork lwork = calc_lwork.getri(getri.prefix, M)[1] else: # scipies >= 0.15 have getri_lwork function lwork, info = getri_lwork(M) if info != 0: raise ValueError('internal getri work space query failed: %d' % (info,)) lwork = int(lwork.real) # XXX: the following line fixes curious SEGFAULT when # benchmarking 500x500 matrix inverse. This seems to # be a bug in LAPACK ?getri routine because if lwork is # minimal (when using lwork[0] instead of lwork[1]) then # all tests pass. Further investigation is required if # more such SEGFAULTs occur. lwork = int(1.01 * lwork) for i, ai in enumerate(a): lu, piv, info = getrf(ai, overwrite_a=True) if info == 0: a[i], info = getri(lu, piv, lwork=lwork, overwrite_lu=1) if info > 0: raise np.linalg.LinAlgError("singular matrix") if info < 0: raise ValueError('illegal value in %d-th argument of internal ' 'getrf|getri' % -info) return a
""" try: info = checkPD.potrf(A, lower = False, overwrite_a = False, clean = False)[1] except: info = 1 if info: if exception: raise ValueError("The matrix is not positive definite") else: return False return True checkPD.potrf, = get_lapack_funcs(('potrf',)) ### S(\alpha) ### def Sdef(M0, M1, alpha): """ :math:`S(\\alpha)`, as defined in the paper. Parameters ---------- M0 : ndarray or matrix A symmetric indefinite matrix to be shrunk. M1 : ndarray or matrix A positive definite target matrix.
def symeig_semidefinite_ldl( A, B = None, eigenvectors=True, turbo="on", rng=None, type=1, overwrite=False, rank_threshold=1e-12, dfc_out=None): """ LDL-based routine to solve generalized symmetric positive semidefinite eigenvalue problems. This can be used in case the normal symeig() call in _stop_training() throws SymeigException ('Covariance matrices may be singular'). This solver uses SciPy's raw LAPACK interface to access LDL decomposition. www.netlib.org/lapack/lug/node54.html describes how to solve a generalized eigenvalue problem with positive definite B using Cholesky/LL decomposition. We extend this method to solve for positive semidefinite B using LDL decomposition, which is a variant of Cholesky/LL decomposition for indefinite Matrices. Accessing raw LAPACK's LDL decomposition (sytrf) is challenging. This code is partly based on code for SciPy 1.1: github.com/scipy/scipy/pull/7941/files#diff-9bf9b4b2f0f40415bc0e72143584c889 We optimized and shortened that code for the real-valued positive semidefinite case. This procedure is almost as efficient as the ordinary eigh implementation. This is because implementations for symmetric generalized eigenvalue problems usually perform the Cholesky approach mentioned above. The more general LDL decomposition is only slightly more expensive than Cholesky, due to pivotization. The signature of this function equals that of mdp.utils.symeig, but has two additional parameters: rank_threshold: A threshold to determine if an eigenvalue counts as zero. dfc_out: If dfc_out is not None dfc_out.rank_deficit will be set to an integer indicating how many zero-eigenvalues were detected. Note: This method requires SciPy >= 1.0. """ if type != 1: raise ValueError('Only type=1 is supported.') # LDL-based method appears to be particularly unstable if blank lines # and columns exist in B. So we circumvent this case: nonzero_idx = _find_blank_data_idx(B, rank_threshold) if not nonzero_idx is None: orig_shape = B.shape B = B[nonzero_idx, :][:, nonzero_idx] A = A[nonzero_idx, :][:, nonzero_idx] # This method has special requirements, which is why we import here # rather than module wide. from scipy.linalg.lapack import get_lapack_funcs, _compute_lwork from scipy.linalg.blas import get_blas_funcs try: inv_tri, solver, solver_lwork = get_lapack_funcs( ('trtri', 'sytrf', 'sytrf_lwork'), (B,)) mult_tri, = get_blas_funcs(('trmm',), (B,)) except ValueError: err_msg = ("ldl method for solving symeig with rank deficit B " "requires at least SciPy 1.0.") raise SymeigException(err_msg) n = B.shape[0] arng = numx.arange(n) lwork = _compute_lwork(solver_lwork, n, lower=1) lu, piv, _ = solver(B, lwork=lwork, lower=1, overwrite_a=overwrite) # using piv properly requires some postprocessing: swap_ = numx.arange(n) pivs = numx.zeros(swap_.shape, dtype=int) skip_2x2 = False for ind in range(n): # If previous spin belonged already to a 2x2 block if skip_2x2: skip_2x2 = False continue cur_val = piv[ind] # do we have a 1x1 block or not? if cur_val > 0: if cur_val != ind+1: # Index value != array value --> permutation required swap_[ind] = swap_[cur_val-1] pivs[ind] = 1 # Not. elif cur_val < 0 and cur_val == piv[ind+1]: # first neg entry of 2x2 block identifier if -cur_val != ind+2: # Index value != array value --> permutation required swap_[ind+1] = swap_[-cur_val-1] pivs[ind] = 2 skip_2x2 = True full_perm = numx.arange(n) for ind in range(n-1, -1, -1): s_ind = swap_[ind] if s_ind != ind: col_s = ind if pivs[ind] else ind-1 # 2x2 block lu[[s_ind, ind], col_s:] = lu[[ind, s_ind], col_s:] full_perm[[s_ind, ind]] = full_perm[[ind, s_ind]] # usually only a few indices actually permute, so we reduce perm: perm = (full_perm-arng).nonzero()[0] perm_idx = full_perm[perm] # end of ldl postprocessing # perm_idx and perm now describe a permutation as dest and source indexes lu[perm_idx, :] = lu[perm, :] dgd = abs(numx.diag(lu)) dnz = (dgd > rank_threshold).nonzero()[0] dgd_sqrt_I = numx.sqrt(1.0/dgd[dnz]) rank_deficit = len(dgd) - len(dnz) # later used # c, lower, unitdiag, overwrite_c LI, _ = inv_tri(lu, 1, 1, 1) # invert triangular # we mainly apply tril here, because we need to make a # copy of LI anyway, because original result from # dtrtri seems to be read-only regarding some operations LI = numx.tril(LI, -1) LI[arng, arng] = 1 LI[dnz, :] *= dgd_sqrt_I.reshape((dgd_sqrt_I.shape[0], 1)) A2 = A if overwrite else A.copy() A2[perm_idx, :] = A2[perm, :] A2[:, perm_idx] = A2[:, perm] # alpha, a, b, side 0=left 1=right, lower, trans_a, diag 1=unitdiag, # overwrite_b A2 = mult_tri(1.0, LI, A2, 1, 1, 1, 0, 1) # A2 = mult(A2, LI.T) A2 = mult_tri(1.0, LI, A2, 0, 1, 0, 0, 1) # A2 = mult(LI, A2) A2 = A2[dnz, :] A2 = A2[:, dnz] # overwrite=True is okay here, because at this point A2 is a copy anyway eg, ev = mdp.utils.symeig(A2, None, True, turbo, rng, overwrite=True) ev = mdp.utils.mult(LI[dnz].T, ev) if rank_deficit \ else mult_tri(1.0, LI, ev, 0, 1, 1, 0, 1) ev[perm] = ev[perm_idx] if not nonzero_idx is None: # restore ev to original size rank_deficit += orig_shape[0]-len(nonzero_idx) ev_tmp = ev ev = numx.zeros((orig_shape[0], ev.shape[1])) ev[nonzero_idx, :] = ev_tmp if not dfc_out is None: dfc_out.rank_deficit = rank_deficit return eg, ev
def merge(self, other, decay=1.0): """ Merge this Projection with another. The content of `other` is destroyed in the process, so pass this function a copy of `other` if you need it further. """ if other.u is None: # the other projection is empty => do nothing return if self.u is None: # we are empty => result of merge is the other projection, whatever it is if other.s is None: # other.u contains a direct document chunk, not svd => perform svd docs = other.u assert scipy.sparse.issparse(docs) if self.m * self.k < 10000: # SVDLIBC gives spurious results for small matrices.. run full # LAPACK on them instead logger.info("computing dense SVD of %s matrix" % str(docs.shape)) u, s, vt = numpy.linalg.svd(docs.todense(), full_matrices=False) else: try: import sparsesvd except ImportError: raise ImportError( "for LSA, the `sparsesvd` module is needed but not found; run `easy_install sparsesvd`" ) logger.info("computing sparse SVD of %s matrix" % str(docs.shape)) ut, s, vt = sparsesvd.sparsesvd( docs, self.k + 30 ) # ask for a few extra factors, because for some reason SVDLIBC sometimes returns fewer factors than requested u = ut.T del ut del vt k = clipSpectrum(s**2, self.k) self.u = u[:, :k].copy('F') self.s = s[:k] else: self.u = other.u.copy('F') self.s = other.s.copy() return if self.m != other.m: raise ValueError( "vector space mismatch: update has %s features, expected %s" % (other.m, self.m)) logger.info("merging projections: %s + %s" % (str(self.u.shape), str(other.u.shape))) m, n1, n2 = self.u.shape[0], self.u.shape[1], other.u.shape[1] if other.s is None: other.u = other.u.todense() other.s = 1.0 # broadcasting will promote this to eye(n2) where needed # TODO Maybe keep the bases as elementary reflectors, without # forming explicit matrices with ORGQR. # The only operation we ever need is basis^T*basis ond basis*component. # But how to do that in scipy? And is it fast(er)? # find component of u2 orthogonal to u1 # IMPORTANT: keep matrices in memory suitable order for matrix products; failing to do so gives 8x lower performance :( self.u = numpy.asfortranarray( self.u) # does nothing if input already fortran-order array other.u = numpy.asfortranarray(other.u) gemm = matutils.blas('gemm', self.u) logger.debug("constructing orthogonal component") c = gemm(1.0, self.u, other.u, trans_a=True) gemm(-1.0, self.u, c, beta=1.0, c=other.u, overwrite_c=True) # perform q, r = QR(component); code hacked out of scipy.linalg.qr logger.debug("computing QR of %s dense matrix" % str(other.u.shape)) geqrf, = get_lapack_funcs(('geqrf', ), (other.u, )) qr, tau, work, info = geqrf(other.u, lwork=-1, overwrite_a=True) qr, tau, work, info = geqrf(other.u, lwork=work[0], overwrite_a=True) del other.u assert info >= 0 r = triu(qr[:n2, :n2]) if m < n2: # rare case, #features < #topics qr = qr[:, :m] # retains fortran order gorgqr, = get_lapack_funcs(('orgqr', ), (qr, )) q, work, info = gorgqr(qr, tau, lwork=-1, overwrite_a=True) q, work, info = gorgqr(qr, tau, lwork=work[0], overwrite_a=True) assert info >= 0, "qr failed" assert q.flags.f_contiguous # find the rotation that diagonalizes r k = numpy.bmat([[numpy.diag(decay * self.s), c * other.s], [ matutils.pad( numpy.matrix([]).reshape(0, 0), min(m, n2), n1), r * other.s ]]) logger.debug("computing SVD of %s dense matrix" % str(k.shape)) try: # in numpy < 1.1.0, running SVD sometimes results in "LinAlgError: SVD did not converge'. # for these early versions of numpy, catch the error and try to compute # SVD again, but over k*k^T. # see http://www.mail-archive.com/[email protected]/msg07224.html and # bug ticket http://projects.scipy.org/numpy/ticket/706 u_k, s_k, _ = numpy.linalg.svd( k, full_matrices=False ) # TODO *ugly overkill*!! only need first self.k SVD factors... but there is no LAPACK wrapper for partial svd/eigendecomp in numpy :( except numpy.linalg.LinAlgError: logging.error("SVD(A) failed; trying SVD(A * A^T)") u_k, s_k, _ = numpy.linalg.svd( numpy.dot(k, k.T), full_matrices=False) # if this fails too, give up s_k = numpy.sqrt(s_k) k = clipSpectrum(s_k**2, self.k) u_k, s_k = u_k[:, :k], s_k[:k] # update & rotate current basis U logger.debug("updating orthonormal basis U") self.u = gemm( 1.0, self.u, u_k[:n1] ) # TODO temporarily creates an extra (m,k) dense array in memory. find a way to avoid this! gemm(1.0, q, u_k[n1:], beta=1.0, c=self.u, overwrite_c=True) # u = [u,u']*u_k self.s = s_k
def lstsq(a, b, cond=None, overwrite_a=0, overwrite_b=0): """Compute least-squares solution to equation :m:`a x = b` Compute a vector x such that the 2-norm :m:`|b - a x|` is minimised. Parameters ---------- a : array, shape (M, N) b : array, shape (M,) or (M, K) cond : float Cutoff for 'small' singular values; used to determine effective rank of a. Singular values smaller than rcond*largest_singular_value are considered zero. overwrite_a : boolean Discard data in a (may enhance performance) overwrite_b : boolean Discard data in b (may enhance performance) Returns ------- x : array, shape (N,) or (N, K) depending on shape of b Least-squares solution residues : array, shape () or (1,) or (K,) Sums of residues, squared 2-norm for each column in :m:`b - a x` If rank of matrix a is < N or > M this is an empty array. If b was 1-d, this is an (1,) shape array, otherwise the shape is (K,) rank : integer Effective rank of matrix a s : array, shape (min(M,N),) Singular values of a. The condition number of a is abs(s[0]/s[-1]). Raises LinAlgError if computation does not converge """ a1, b1 = lmap(asarray_chkfinite, (a, b)) if a1.ndim != 2: raise ValueError('expected matrix') m, n = a1.shape if b1.ndim == 2: nrhs = b1.shape[1] else: nrhs = 1 if m != b1.shape[0]: raise ValueError('incompatible dimensions') gelss, = get_lapack_funcs(('gelss', ), (a1, b1)) if n > m: # need to extend b matrix as it will be filled with # a larger solution matrix b2 = zeros((n, nrhs), dtype=gelss.dtype) if b1.ndim == 2: b2[:m, :] = b1 else: b2[:m, 0] = b1 b1 = b2 overwrite_a = overwrite_a or (a1 is not a and not hasattr(a, '__array__')) overwrite_b = overwrite_b or (b1 is not b and not hasattr(b, '__array__')) if gelss.module_name[:7] == 'flapack': # get optimal work array work = gelss(a1, b1, lwork=-1)[4] lwork = work[0].real.astype(np.int) v, x, s, rank, work, info = gelss(a1, b1, cond=cond, lwork=lwork, overwrite_a=overwrite_a, overwrite_b=overwrite_b) else: raise NotImplementedError('calling gelss from %s' % gelss.module_name) if info > 0: raise LinAlgError("SVD did not converge in Linear Least Squares") if info < 0: raise ValueError('illegal value in %-th argument of ' 'internal gelss' % -info) resids = asarray([], dtype=x.dtype) if n < m: x1 = x[:n] if rank == n: resids = sum(x[n:]**2, axis=0) x = x1 return x, resids, rank, s
""" Operations on the manifold of SPD matrices and mapping to a flat space. """ import numpy as np from scipy import linalg from scipy.linalg.lapack import get_lapack_funcs def my_stack(arrays): return np.concatenate([a[np.newaxis] for a in arrays]) # Bypass scipy for faster eigh (and dangerous: Nan will kill it) my_eigh, = get_lapack_funcs(('syevr', ), np.zeros(1)) def frobenius(mat): """ Return the Frobenius norm """ return np.sqrt((mat**2).sum()) / mat.size def sqrtm(mat): """ Matrix square-root, for symetric positive definite matrices. """ vals, vecs, success_flag = my_eigh(mat) return np.dot(vecs * np.sqrt(vals), vecs.T) def inv_sqrtm(mat):
def _gram_omp(Gram, Xy, n_nonzero_coefs, eps_0=None, eps=None, overwrite_gram=False, overwrite_xy=False): """Orthogonal Matching Pursuit step on a precomputed Gram matrix. This function uses the the Cholesky decomposition method. Parameters: ----------- Gram: array, shape = (n_features, n_features) Gram matrix of the input data matrix Xy: array, shape = (n_features,) Input targets n_nonzero_coefs: int Targeted number of non-zero elements eps_0: float Squared norm of y, required if eps is not None. eps: float Targeted squared error, if not None overrides n_nonzero_coefs. overwrite_gram: bool, Whether the gram matrix can be overwritten by the algorithm. This is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway. overwrite_xy: bool, Whether the covariance vector Xy can be overwritten by the algorithm. Returns: -------- gamma: array, shape = (n_nonzero_coefs,) Non-zero elements of the solution idx: array, shape = (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector """ if not overwrite_gram: Gram = Gram.copy('F') else: Gram = np.asfortranarray(Gram) if not overwrite_xy: Xy = Xy.copy() min_float = np.finfo(Gram.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram,)) potrs, = get_lapack_funcs(('potrs',), (Gram,)) idx = [] alpha = Xy eps_curr = eps_0 delta = 0 n_active = 0 max_features = len(Gram) if eps is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=Gram.dtype) L[0, 0] = 1. while True: lam = np.argmax(np.abs(alpha)) if lam < n_active or alpha[lam] ** 2 < min_float: # selected same atom twice, or inner product too small warn(premature) break if n_active > 0: L[n_active, :n_active] = Gram[lam, :n_active] solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) v = nrm2(L[n_active, :n_active]) ** 2 if 1 - v <= min_float: # selected atoms are dependent warn(premature) break L[n_active, n_active] = np.sqrt(1 - v) idx.append(lam) Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam]) Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam]) Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active] n_active += 1 # solves LL'x = y as a composition of two triangular systems gamma, _ = potrs(L[:n_active, :n_active], Xy[:n_active], lower=True, overwrite_b=False) beta = np.dot(Gram[:, :n_active], gamma) alpha = Xy - beta if eps is not None: eps_curr += delta delta = np.inner(gamma, beta[:n_active]) eps_curr -= delta if eps_curr <= eps: break elif n_active == max_features: break return gamma, idx
np.sqrt(ratio / (4 * np.pi ** 3)) / np.power((1 - (1 - ratio) * z ** 2), 1.5), -1., 1.) else: sharp = quad(lambda z: lpn(l, z)[0][-1] * np.sqrt(1 / (1 - (1 - ratio) * z * z)), -1., 1.) sdt[l // 2] = sharp[0] frt[l // 2] = 2 * np.pi * lpn(l, 0)[0][-1] idx = n // 2 b = sdt[idx] bb = frt[idx] return np.diag(b), np.diag(bb) potrf, potrs = ll.get_lapack_funcs(('potrf', 'potrs')) def _solve_cholesky(Q, z): L, info = potrf(Q, lower=False, overwrite_a=False, clean=False) if info > 0: msg = "%d-th leading minor not positive definite" % info raise la.LinAlgError(msg) if info < 0: msg = 'illegal value in %d-th argument of internal potrf' % -info raise ValueError(msg) f, info = potrs(L, z, lower=False, overwrite_b=False) if info != 0: msg = 'illegal value in %d-th argument of internal potrs' % -info raise ValueError(msg) return f
def lars_path(X, y, Xy=None, Gram=None, max_iter=500, alpha_min=0, method='lar', copy_X=True, eps=np.finfo(np.float).eps, copy_Gram=True, verbose=False): """Compute Least Angle Regression and LASSO path Parameters ----------- X: array, shape: (n_samples, n_features) Input data y: array, shape: (n_samples) Input targets max_iter: integer, optional Maximum number of iterations to perform, set to infinity for no limit. Gram: None, 'auto', array, shape: (n_features, n_features), optional Precomputed Gram matrix (X' * X), if 'auto', the Gram matrix is precomputed from the given X, if there are more samples than features alpha_min: float, optional Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso. method: {'lar', 'lasso'} Specifies the returned model. Select 'lar' for Least Angle Regression, 'lasso' for the Lasso. eps: float, optional The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Returns -------- alphas: array, shape: (max_features + 1,) Maximum of covariances (in absolute value) at each iteration. active: array, shape (max_features,) Indices of active variables at the end of the path. coefs: array, shape (n_features, max_features + 1) Coefficients along the path See also -------- :ref:`LassoLars` :ref:`Lars` decomposition.sparse_encode decomposition.sparse_encode_parallel Notes ------ * http://en.wikipedia.org/wiki/Least-angle_regression * http://en.wikipedia.org/wiki/Lasso_(statistics)#LASSO_method """ n_features = X.shape[1] n_samples = y.size max_features = min(max_iter, n_features) coefs = np.zeros((max_features + 1, n_features)) alphas = np.zeros(max_features + 1) n_iter, n_active = 0, 0 active, indices = list(), np.arange(n_features) # holds the sign of covariance sign_active = np.empty(max_features, dtype=np.int8) drop = False # will hold the cholesky factorization. Only lower part is # referenced. L = np.empty((max_features, max_features), dtype=X.dtype) swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (X, )) solve_cholesky, = get_lapack_funcs(('potrs', ), (X, )) if Gram is None: if copy_X: # force copy. setting the array to be fortran-ordered # speeds up the calculation of the (partial) Gram matrix # and allows to easily swap columns X = X.copy('F') elif Gram == 'auto': Gram = None if X.shape[0] > X.shape[1]: Gram = np.dot(X.T, X) elif copy_Gram: Gram = Gram.copy() if Xy is None: Cov = np.dot(X.T, y) else: Cov = Xy.copy() if verbose: print "Step\t\tAdded\t\tDropped\t\tActive set size\t\tC" while True: if Cov.size: C_idx = np.argmax(np.abs(Cov)) C_ = Cov[C_idx] C = np.fabs(C_) else: C = 0. alphas[n_iter] = C / n_samples if alphas[n_iter] < alpha_min: # early stopping # interpolation factor 0 <= ss < 1 if n_iter > 0: # In the first iteration, all alphas are zero, the formula # below would make ss a NaN ss = (alphas[n_iter - 1] - alpha_min) / (alphas[n_iter - 1] - alphas[n_iter]) coefs[n_iter] = coefs[n_iter - 1] + ss * (coefs[n_iter] - coefs[n_iter - 1]) alphas[n_iter] = alpha_min break if n_iter >= max_iter or n_active >= n_features: break if not drop: ########################################################## # Append x_j to the Cholesky factorization of (Xa * Xa') # # # # ( L 0 ) # # L -> ( ) , where L * w = Xa' x_j # # ( w z ) and z = ||x_j|| # # # ########################################################## sign_active[n_active] = np.sign(C_) m, n = n_active, C_idx + n_active Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0]) indices[n], indices[m] = indices[m], indices[n] Cov = Cov[1:] # remove Cov[0] if Gram is None: X.T[n], X.T[m] = swap(X.T[n], X.T[m]) c = nrm2(X.T[n_active])**2 L[n_active, :n_active] = \ np.dot(X.T[n_active], X.T[:n_active].T) else: # swap does only work inplace if matrix is fortran # contiguous ... Gram[m], Gram[n] = swap(Gram[m], Gram[n]) Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n]) c = Gram[n_active, n_active] L[n_active, :n_active] = Gram[n_active, :n_active] # Update the cholesky decomposition for the Gram matrix arrayfuncs.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) v = np.dot(L[n_active, :n_active], L[n_active, :n_active]) diag = max(np.sqrt(np.abs(c - v)), eps) L[n_active, n_active] = diag active.append(indices[n_active]) n_active += 1 if verbose: print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], '', n_active, C) # least squares solution least_squares, info = solve_cholesky(L[:n_active, :n_active], sign_active[:n_active], lower=True) # is this really needed ? AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active])) least_squares *= AA if Gram is None: # equiangular direction of variables in the active set eq_dir = np.dot(X.T[:n_active].T, least_squares) # correlation between each unactive variables and # eqiangular vector corr_eq_dir = np.dot(X.T[n_active:], eq_dir) else: # if huge number of features, this takes 50% of time, I # think could be avoided if we just update it using an # orthogonal (QR) decomposition of X corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares) g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir)) g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir)) gamma_ = min(g1, g2, C / AA) # TODO: better names for these variables: z drop = False z = -coefs[n_iter, active] / least_squares z_pos = arrayfuncs.min_pos(z) if z_pos < gamma_: # some coefficients have changed sign idx = np.where(z == z_pos)[0] # update the sign, important for LAR sign_active[idx] = -sign_active[idx] if method == 'lasso': gamma_ = z_pos drop = True n_iter += 1 if n_iter >= coefs.shape[0]: # resize the coefs and alphas array add_features = 2 * max(1, (max_features - n_active)) coefs.resize((n_iter + add_features, n_features)) alphas.resize(n_iter + add_features) coefs[n_iter, active] = coefs[n_iter - 1, active] + \ gamma_ * least_squares # update correlations Cov -= gamma_ * corr_eq_dir # See if any coefficient has changed sign if drop and method == 'lasso': arrayfuncs.cholesky_delete(L[:n_active, :n_active], idx) n_active -= 1 m, n = idx, n_active drop_idx = active.pop(idx) if Gram is None: # propagate dropped variable for i in range(idx, n_active): X.T[i], X.T[i + 1] = swap(X.T[i], X.T[i + 1]) indices[i], indices[i + 1] = \ indices[i + 1], indices[i] # yeah this is stupid # TODO: this could be updated residual = y - np.dot(X[:, :n_active], coefs[n_iter, active]) temp = np.dot(X.T[n_active], residual) Cov = np.r_[temp, Cov] else: for i in range(idx, n_active): indices[i], indices[i + 1] = \ indices[i + 1], indices[i] Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1]) Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i], Gram[:, i + 1]) # Cov_n = Cov_j + x_j * X + increment(betas) TODO: # will this still work with multiple drops ? # recompute covariance. Probably could be done better # wrong as Xy is not swapped with the rest of variables # TODO: this could be updated residual = y - np.dot(X, coefs[n_iter]) temp = np.dot(X.T[drop_idx], residual) Cov = np.r_[temp, Cov] sign_active = np.delete(sign_active, idx) sign_active = np.append(sign_active, 0.) # just to maintain size if verbose: print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx, n_active, abs(temp)) # resize coefs in case of early stop alphas = alphas[:n_iter + 1] coefs = coefs[:n_iter + 1] return alphas, active, coefs.T
def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True, return_path=False): """Orthogonal Matching Pursuit step using the Cholesky decomposition. Parameters ---------- X : array, shape (n_samples, n_features) Input dictionary. Columns are assumed to have unit norm. y : array, shape (n_samples,) Input targets n_nonzero_coefs : int Targeted number of non-zero elements tol : float Targeted squared error, if not None overrides n_nonzero_coefs. copy_X : bool, optional Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway. return_path : bool, optional. Default: False Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation. Returns ------- gamma : array, shape (n_nonzero_coefs,) Non-zero elements of the solution idx : array, shape (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector coef : array, shape (n_features, n_nonzero_coefs) The first k values of column k correspond to the coefficient value for the active features at that step. The lower left triangle contains garbage. Only returned if ``return_path=True``. n_active : int Number of active features at convergence. """ if copy_X: X = X.copy('F') else: # even if we are allowed to overwrite, still copy it if bad order X = np.asfortranarray(X) min_float = np.finfo(X.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (X, )) potrs, = get_lapack_funcs(('potrs', ), (X, )) alpha = np.dot(X.T, y) residual = y gamma = np.empty(0) n_active = 0 indices = np.arange(X.shape[1]) # keeping track of swapping max_features = X.shape[1] if tol is not None else n_nonzero_coefs if solve_triangular_args: # new scipy, don't need to initialize because check_finite=False L = np.empty((max_features, max_features), dtype=X.dtype) else: # old scipy, we need the garbage upper triangle to be non-Inf L = np.zeros((max_features, max_features), dtype=X.dtype) L[0, 0] = 1. if return_path: coefs = np.empty_like(L) while True: lam = np.argmax(np.abs(np.dot(X.T, residual))) if lam < n_active or alpha[lam]**2 < min_float: # atom already selected or inner product too small warnings.warn(premature, RuntimeWarning, stacklevel=2) break if n_active > 0: # Updates the Cholesky decomposition of X' X L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam]) linalg.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active], trans=0, lower=1, overwrite_b=True, **solve_triangular_args) v = nrm2(L[n_active, :n_active])**2 if 1 - v <= min_float: # selected atoms are dependent warnings.warn(premature, RuntimeWarning, stacklevel=2) break L[n_active, n_active] = np.sqrt(1 - v) X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam]) alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active] indices[n_active], indices[lam] = indices[lam], indices[n_active] n_active += 1 # solves LL'x = y as a composition of two triangular systems gamma, _ = potrs(L[:n_active, :n_active], alpha[:n_active], lower=True, overwrite_b=False) if return_path: coefs[:n_active, n_active - 1] = gamma residual = y - np.dot(X[:, :n_active], gamma) if tol is not None and nrm2(residual)**2 <= tol: break elif n_active == max_features: break if return_path: return gamma, indices[:n_active], coefs[:, :n_active], n_active else: return gamma, indices[:n_active], n_active
def test_hetrd(self): for real_dtype, complex_dtype in zip(REAL_DTYPES, COMPLEX_DTYPES): # Assert that a 0x0 matrix raises an error A = np.zeros((0, 0), dtype=complex_dtype) hetrd, hetrd_lwork = \ get_lapack_funcs(('hetrd', 'hetrd_lwork'), (A,)) assert_raises(ValueError, hetrd, A) # Tests for n = 1 currently fail with # ``` # ValueError: failed to create intent(cache|hide)|optional array-- # must have defined dimensions but got (0,) # ``` # This is a NumPy issue # <https://github.com/numpy/numpy/issues/9617>. # TODO once the issue has been resolved, test for n=1 # some upper triangular array n = 3 A = np.zeros((n, n), dtype=complex_dtype) A[np.triu_indices_from(A)] = ( np.arange(1, n*(n+1)//2+1, dtype=real_dtype) + 1j * np.arange(1, n*(n+1)//2+1, dtype=real_dtype) ) np.fill_diagonal(A, np.real(np.diag(A))) # query lwork lwork, info = hetrd_lwork(n) assert_equal(info, 0) # check lower=1 behavior (shouldn't do much since the matrix is # upper triangular) data, d, e, tau, info = hetrd(A, lower=1, lwork=lwork) assert_equal(info, 0) assert_allclose(data, A, atol=5*np.finfo(real_dtype).eps, rtol=1.0) assert_allclose(d, np.real(np.diag(A))) assert_allclose(e, 0.0) assert_allclose(tau, 0.0) # and now for the proper test (lower=0 is the default) data, d, e, tau, info = hetrd(A, lwork=lwork) assert_equal(info, 0) # assert Q^T*A*Q = tridiag(e, d, e) # build tridiagonal matrix T = np.zeros_like(A, dtype=real_dtype) k = np.arange(A.shape[0], dtype=int) T[k, k] = d k2 = np.arange(A.shape[0]-1, dtype=int) T[k2+1, k2] = e T[k2, k2+1] = e # build Q Q = np.eye(n, n, dtype=complex_dtype) for i in range(n-1): v = np.zeros(n, dtype=complex_dtype) v[:i] = data[:i, i+1] v[i] = 1.0 H = np.eye(n, n, dtype=complex_dtype) \ - tau[i] * np.outer(v, np.conj(v)) Q = np.dot(H, Q) # Make matrix fully Hermetian i_lower = np.tril_indices(n, -1) A[i_lower] = np.conj(A.T[i_lower]) QHAQ = np.dot(np.conj(Q.T), np.dot(A, Q)) # disable rtol here since some values in QTAQ and T are very close # to 0. assert_allclose( QHAQ, T, atol=10*np.finfo(real_dtype).eps, rtol=1.0 )
def pinv_array(a, cond=None): """Calculate the Moore-Penrose pseudo inverse of each block of the three dimensional array a. Parameters ---------- a : {dense array} Is of size (n, m, m) cond : {float} Used by gelss to filter numerically zeros singular values. If None, a suitable value is chosen for you. Returns ------- Nothing, a is modified in place so that a[k] holds the pseudoinverse of that block. Notes ----- By using lapack wrappers, this can be much faster for large n, than directly calling pinv2 Examples -------- >>> import numpy as np >>> from pyamg.util.linalg import pinv_array >>> a = np.array([[[1.,2.],[1.,1.]], [[1.,1.],[3.,3.]]]) >>> ac = a.copy() >>> # each block of a is inverted in-place >>> pinv_array(a) """ n = a.shape[0] m = a.shape[1] if m == 1: # Pseudo-inverse of 1 x 1 matrices is trivial zero_entries = (a == 0.0).nonzero()[0] a[zero_entries] = 1.0 a[:] = 1.0 / a a[zero_entries] = 0.0 del zero_entries else: # The block size is greater than 1 # Create necessary arrays and function pointers for calculating pinv gelss, gelss_lwork = get_lapack_funcs(('gelss', 'gelss_lwork'), (np.ones((1, ), dtype=a.dtype))) RHS = np.eye(m, dtype=a.dtype) lwork = _compute_lwork(gelss_lwork, m, m, m) # Choose tolerance for which singular values are zero in *gelss below if cond is None: t = a.dtype.char eps = np.finfo(np.float).eps feps = np.finfo(np.single).eps geps = np.finfo(np.longfloat).eps _array_precision = {'f': 0, 'd': 1, 'g': 2, 'F': 0, 'D': 1, 'G': 2} cond = { 0: feps * 1e3, 1: eps * 1e6, 2: geps * 1e6 }[_array_precision[t]] # Invert each block of a for kk in range(n): gelssoutput = gelss(a[kk], RHS, cond=cond, lwork=lwork, overwrite_a=True, overwrite_b=False) a[kk] = gelssoutput[1]
def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None, copy_Gram=True, copy_Xy=True, return_path=False): """Orthogonal Matching Pursuit step on a precomputed Gram matrix. This function uses the Cholesky decomposition method. Parameters ---------- Gram : array, shape (n_features, n_features) Gram matrix of the input data matrix Xy : array, shape (n_features,) Input targets n_nonzero_coefs : int Targeted number of non-zero elements tol_0 : float Squared norm of y, required if tol is not None. tol : float Targeted squared error, if not None overrides n_nonzero_coefs. copy_Gram : bool, optional Whether the gram matrix must be copied by the algorithm. A false value is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway. copy_Xy : bool, optional Whether the covariance vector Xy must be copied by the algorithm. If False, it may be overwritten. return_path : bool, optional. Default: False Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation. Returns ------- gamma : array, shape (n_nonzero_coefs,) Non-zero elements of the solution idx : array, shape (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector coefs : array, shape (n_features, n_nonzero_coefs) The first k values of column k correspond to the coefficient value for the active features at that step. The lower left triangle contains garbage. Only returned if ``return_path=True``. n_active : int Number of active features at convergence. """ Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram) if copy_Xy or not Xy.flags.writeable: Xy = Xy.copy() min_float = np.finfo(Gram.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram, )) potrs, = get_lapack_funcs(('potrs', ), (Gram, )) indices = np.arange(len(Gram)) # keeping track of swapping alpha = Xy tol_curr = tol_0 delta = 0 gamma = np.empty(0) n_active = 0 max_features = len(Gram) if tol is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=Gram.dtype) L[0, 0] = 1. if return_path: coefs = np.empty_like(L) while True: lam = np.argmax(np.abs(alpha)) if lam < n_active or alpha[lam]**2 < min_float: # selected same atom twice, or inner product too small warnings.warn(premature, RuntimeWarning, stacklevel=3) break if n_active > 0: L[n_active, :n_active] = Gram[lam, :n_active] linalg.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active], trans=0, lower=1, overwrite_b=True, check_finite=False) v = nrm2(L[n_active, :n_active])**2 Lkk = Gram[lam, lam] - v if Lkk <= min_float: # selected atoms are dependent warnings.warn(premature, RuntimeWarning, stacklevel=3) break L[n_active, n_active] = sqrt(Lkk) else: L[0, 0] = sqrt(Gram[lam, lam]) Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam]) Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam]) indices[n_active], indices[lam] = indices[lam], indices[n_active] Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active] n_active += 1 # solves LL'x = X'y as a composition of two triangular systems gamma, _ = potrs(L[:n_active, :n_active], Xy[:n_active], lower=True, overwrite_b=False) if return_path: coefs[:n_active, n_active - 1] = gamma beta = np.dot(Gram[:, :n_active], gamma) alpha = Xy - beta if tol is not None: tol_curr += delta delta = np.inner(gamma, beta[:n_active]) tol_curr -= delta if abs(tol_curr) <= tol: break elif n_active == max_features: break if return_path: return gamma, indices[:n_active], coefs[:, :n_active], n_active else: return gamma, indices[:n_active], n_active
def merge(self, other, decay=1.0): """ Merge this Projection with another. Content of `other` is destroyed in the process, so pass this function a copy if you need it further. This is the optimized merge described in algorithm 5. """ if other.u is None: # the other projection is empty => do nothing return if self.u is None: # we are empty => result of merge is the other projection, whatever it is self.u = other.u.copy() self.s = other.s.copy() return if self.m != other.m: raise ValueError( "vector space mismatch: update has %s features, expected %s" % (other.m, self.m)) logger.info("merging projections: %s + %s" % (str(self.u.shape), str(other.u.shape))) # diff = numpy.dot(self.u.T, self.u) - numpy.eye(self.u.shape[1]) # logger.info('orth error after=%f' % numpy.sum(diff * diff)) m, n1, n2 = self.u.shape[0], self.u.shape[1], other.u.shape[1] # TODO Maybe keep the bases as elementary reflectors, without # forming explicit matrices with gorgqr. # The only operation we ever need is basis^T*basis ond basis*component. # But how to do that in numpy? And is it fast(er)? # find component of u2 orthogonal to u1 # IMPORTANT: keep matrices in suitable order for matrix products; failing to do so gives 8x lower performance :( self.u = numpy.asfortranarray( self.u) # does nothing if input already fortran-order array other.u = numpy.asfortranarray(other.u) gemm, = get_blas_funcs(('gemm', ), (self.u, )) logger.debug("constructing orthogonal component") c = gemm(1.0, self.u, other.u, trans_a=True) gemm(-1.0, self.u, c, beta=1.0, c=other.u, overwrite_c=True) # perform q, r = QR(component); code hacked out of scipy.linalg.qr logger.debug("computing QR of %s dense matrix" % str(other.u.shape)) geqrf, = get_lapack_funcs(('geqrf', ), (other.u, )) qr, tau, work, info = geqrf( other.u, lwork=-1, overwrite_a=True) # sometimes segfaults with overwrite_a=True... qr, tau, work, info = geqrf( other.u, lwork=work[0], overwrite_a=True) # sometimes segfaults with overwrite_a=True... del other.u assert info >= 0 r = triu(qr[:n2, :n2]) if m < n2: # rare case... qr = qr[:, :m] # retains fortran order gorgqr, = get_lapack_funcs(('orgqr', ), (qr, )) q, work, info = gorgqr(qr, tau, lwork=-1, overwrite_a=True) q, work, info = gorgqr(qr, tau, lwork=work[0], overwrite_a=True) assert info >= 0, "qr failed" assert q.flags.f_contiguous # find rotation that diagonalizes r k = numpy.bmat([[ numpy.diag(decay * self.s), c * other.s ], [matutils.pad(numpy.matrix([]).reshape(0, 0), n2, n1), r * other.s]]) logger.debug("computing SVD of %s dense matrix" % str(k.shape)) u_k, s_k, _ = numpy.linalg.svd( k, full_matrices=False ) # TODO *ugly overkill*!! only need first self.k SVD factors... but there is no LAPACK wrapper for partial svd/eigendecomp in numpy :( k = clipSpectrum(s_k, self.k) u_k, s_k = u_k[:, :k], s_k[:k] # update & rotate current basis U logger.debug("updating orthonormal basis U") self.u = gemm( 1.0, self.u, u_k[:n1] ) # TODO temporarily creates an extra (m,k) dense array in memory. find a way to avoid this! gemm(1.0, q, u_k[n1:], beta=1.0, c=self.u, overwrite_c=True) # u = [u,u']*u_k self.s = s_k
idx: array, shape = (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector """ if not overwrite_gram: Gram = Gram.copy('F') else: Gram = np.asfortranarray(Gram) if not overwrite_Xy: Xy = Xy.copy() min_float = np.finfo(Gram.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram,)) potrs, = get_lapack_funcs(('potrs',), (Gram,)) idx = [] alpha = Xy eps_curr = eps_0 delta = 0 n_active = 0 max_features = len(Gram) if eps is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=Gram.dtype) L[0, 0] = 1. while True: lam = np.argmax(np.abs(alpha)) if lam < n_active or alpha[lam] ** 2 < min_float: # selected same atom twice, or inner product too small
def test_gelsd(self): for dtype in REAL_DTYPES: a1 = np.array([[1.0, 2.0], [4.0, 5.0], [7.0, 8.0]], dtype=dtype) b1 = np.array([16.0, 17.0, 20.0], dtype=dtype) gelsd, gelsd_lwork = get_lapack_funcs(('gelsd', 'gelsd_lwork'), (a1, b1)) m, n = a1.shape if len(b1.shape) == 2: nrhs = b1.shape[1] else: nrhs = 1 # Request of sizes work, iwork, info = gelsd_lwork(m, n, nrhs, -1) lwork = int(np.real(work)) iwork_size = iwork x, s, rank, info = gelsd(a1, b1, lwork, iwork_size, -1, False, False) assert_allclose(x[:-1], np.array([-14.333333333333323, 14.999999999999991], dtype=dtype), rtol=25 * np.finfo(dtype).eps) assert_allclose(s, np.array([12.596017180511966, 0.583396253199685], dtype=dtype), rtol=25 * np.finfo(dtype).eps) for dtype in COMPLEX_DTYPES: a1 = np.array([[1.0 + 4.0j, 2.0], [4.0 + 0.5j, 5.0 - 3.0j], [7.0 - 2.0j, 8.0 + 0.7j]], dtype=dtype) b1 = np.array([16.0, 17.0 + 2.0j, 20.0 - 4.0j], dtype=dtype) gelsd, gelsd_lwork = get_lapack_funcs(('gelsd', 'gelsd_lwork'), (a1, b1)) m, n = a1.shape if len(b1.shape) == 2: nrhs = b1.shape[1] else: nrhs = 1 # Request of sizes work, rwork, iwork, info = gelsd_lwork(m, n, nrhs, -1) lwork = int(np.real(work)) rwork_size = int(rwork) iwork_size = iwork x, s, rank, info = gelsd(a1, b1, lwork, rwork_size, iwork_size, -1, False, False) assert_allclose(x[:-1], np.array([ 1.161753632288328 - 1.901075709391912j, 1.735882340522193 + 1.521240901196909j ], dtype=dtype), rtol=25 * np.finfo(dtype).eps) assert_allclose(s, np.array([13.035514762572043, 4.337666985231382], dtype=dtype), rtol=25 * np.finfo(dtype).eps)
def lars_path_copy(X, y, Xy=None, Gram=None, max_features=None, alpha_min=0, method='lar', overwrite_X=False, overwrite_Gram=False, verbose=False): """ Copy made to avoid bugs in the scikits.learn toolbox Compute Least Angle Regression and LASSO path Parameters ----------- X: array, shape: (n_samples, n_features) Input data y: array, shape: (n_samples) Input targets max_features: integer, optional Maximum number of selected features. Gram: array, shape: (n_features, n_features), optional Precomputed Gram matrix (X' * X) alpha_min: float, optional Minimum correlation along the path. It corresponds to the regularization parameter alpha parameter in the Lasso. method: 'lar' | 'lasso' Specifies the returned model. Select 'lar' for Least Angle Regression, 'lasso' for the Lasso. Returns -------- alphas: array, shape: (max_features + 1,) Maximum of covariances (in absolute value) at each iteration. active: array, shape (max_features,) Indices of active variables at the end of the path. coefs: array, shape (n_features, max_features+1) Coefficients along the path See also -------- :ref:`LassoLARS`, :ref:`LARS` Notes ------ * http://en.wikipedia.org/wiki/Least-angle_regression * http://en.wikipedia.org/wiki/Lasso_(statistics)#LASSO_method """ import numpy as np from scipy import linalg from scipy.linalg.lapack import get_lapack_funcs from scikits.learn.linear_model.base import LinearModel from scikits.learn.utils import arrayfuncs n_features = X.shape[1] n_samples = len(y) if max_features is None: max_features = min(n_samples, n_features) coefs = np.zeros((max_features + 1, n_features)) alphas = np.zeros(max_features + 1) n_iter, n_active = 0, 0 active, indices = list(), np.arange(n_features) # holds the sign of covariance sign_active = np.empty(max_features, dtype=np.int8) drop = False eps = np.finfo(X.dtype).eps # will hold the cholesky factorization. Only lower part is # referenced. L = np.empty((max_features, max_features), dtype=X.dtype) swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (X,)) potrs, = get_lapack_funcs(('potrs',), (X,)) if Gram is None: if not overwrite_X: # force copy. setting the array to be fortran-ordered # speeds up the calculation of the (partial) Gram matrix # and allows to easily swap columns X = X.copy('F') else: if not overwrite_Gram: Gram = Gram.copy() if Xy is None: Cov = np.dot(X.T, y) else: Cov = Xy.copy() if verbose: print "Step\t\tAdded\t\tDropped\t\tActive set size\t\tC" while 1: if Cov.size: C_idx = np.argmax(np.abs(Cov)) C_ = Cov[C_idx] C = np.fabs(C_) # to match a for computing gamma_ else: if Gram is None: C -= gamma_ * np.abs(np.dot(X.T[0], eq_dir)) else: C -= gamma_ * np.abs(np.dot(Gram[0], least_squares)) alphas[n_iter] = C / n_samples # Check for early stopping if alphas[n_iter] < alpha_min: # interpolate # interpolation factor 0 <= ss < 1 ss = (alphas[n_iter-1] - alpha_min) / (alphas[n_iter-1] - alphas[n_iter]) coefs[n_iter] = coefs[n_iter-1] + ss*(coefs[n_iter] - coefs[n_iter-1]) alphas[n_iter] = alpha_min break if n_active == max_features: break if not drop: # Update the Cholesky factorization of (Xa * Xa') # # # # ( L 0 ) # # L -> ( ) , where L * w = b # # ( w z ) z = 1 - ||w|| # # # # where u is the last added to the active set # sign_active[n_active] = np.sign(C_) m, n = n_active, C_idx+n_active Cov[C_idx], Cov[0] = swap(Cov[C_idx], Cov[0]) indices[n], indices[m] = indices[m], indices[n] Cov = Cov[1:] # remove Cov[0] if Gram is None: X.T[n], X.T[m] = swap(X.T[n], X.T[m]) c = nrm2(X.T[n_active])**2 L[n_active, :n_active] = \ np.dot(X.T[n_active], X.T[:n_active].T) else: # swap does only work inplace if matrix is fortran # contiguous ... Gram[m], Gram[n] = swap(Gram[m], Gram[n]) Gram[:, m], Gram[:, n] = swap(Gram[:, m], Gram[:, n]) c = Gram[n_active, n_active] L[n_active, :n_active] = Gram[n_active, :n_active] # Update the cholesky decomposition for the Gram matrix arrayfuncs.solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) v = np.dot(L[n_active, :n_active], L[n_active, :n_active]) diag = max(np.sqrt(np.abs(c - v)), eps) L[n_active, n_active] = diag active.append(indices[n_active]) n_active += 1 if verbose: print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], '', n_active, C) # least squares solution least_squares, info = potrs(L[:n_active, :n_active], sign_active[:n_active], lower=True) # is this really needed ? AA = 1. / np.sqrt(np.sum(least_squares * sign_active[:n_active])) least_squares *= AA if Gram is None: # equiangular direction of variables in the active set eq_dir = np.dot(X.T[:n_active].T, least_squares) # correlation between each unactive variables and # eqiangular vector corr_eq_dir = np.dot(X.T[n_active:], eq_dir) else: # if huge number of features, this takes 50% of time, I # think could be avoided if we just update it using an # orthogonal (QR) decomposition of X corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, least_squares) g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir)) g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir)) gamma_ = min(g1, g2, C/AA) # TODO: better names for these variables: z drop = False z = - coefs[n_iter, active] / least_squares z_pos = arrayfuncs.min_pos(z) if z_pos < gamma_: # some coefficients have changed sign idx = np.where(z == z_pos)[0] # update the sign, important for LAR sign_active[idx] = -sign_active[idx] if method == 'lasso': gamma_ = z_pos drop = True n_iter += 1 if n_iter >= coefs.shape[0]: # resize the coefs and alphas array add_features = 2 * (max_features - n_active) coefs.resize((n_iter + add_features, n_features)) alphas.resize(n_iter + add_features) if n_active == max_features: break coefs[n_iter, active] = coefs[n_iter-1, active] + \ gamma_ * least_squares # update correlations Cov -= gamma_ * corr_eq_dir if n_active > n_features: break # See if any coefficient has changed sign if drop and method == 'lasso': arrayfuncs.cholesky_delete(L[:n_active, :n_active], idx) n_active -= 1 m, n = idx, n_active drop_idx = active.pop(idx) if Gram is None: # propagate dropped variable for i in range(idx, n_active): X.T[i], X.T[i+1] = swap(X.T[i], X.T[i+1]) indices[i], indices[i+1] = \ indices[i+1], indices[i] # yeah this is stupid # TODO: this could be updated residual = y - np.dot(X[:, :n_active], coefs[n_iter, active]) temp = np.dot(X.T[n_active], residual) Cov = np.r_[temp, Cov] else: for i in range(idx, n_active): indices[i], indices[i+1] = \ indices[i+1], indices[i] Gram[i], Gram[i+1] = swap(Gram[i], Gram[i+1]) Gram[:, i], Gram[:, i+1] = swap(Gram[:, i], Gram[:, i+1]) # Cov_n = Cov_j + x_j * X + increment(betas) TODO: # will this still work with multiple drops ? # recompute covariance. Probably could be done better # wrong as Xy is not swapped with the rest of variables # TODO: this could be updated residual = y - np.dot(X, coefs[n_iter]) temp = np.dot(X.T[drop_idx], residual) Cov = np.r_[temp, Cov] sign_active = np.delete(sign_active, idx) sign_active = np.append(sign_active, 0.) # just to maintain size if verbose: print "%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx, n_active, abs(temp)) # resize coefs in case of early stop alphas = alphas[:n_iter+1] coefs = coefs[:n_iter+1] return alphas, active, coefs.T
"""Module that redifine leastsq from scipy.optimize to remove some slow verifications. """ import scipy.optimize as op import numpy as np from scipy.linalg.lapack import get_lapack_funcs from scipy.linalg import calc_lwork from numpy.linalg import LinAlgError getrf, getri = get_lapack_funcs(('getrf', 'getri'), (np.eye(3), )) def inv(a): global getrf, getri lu, piv, info = getrf(a, overwrite_a=True) if info == 0: lwork = calc_lwork.getri(getri.typecode, a.shape[0]) lwork = lwork[1] lwork = int(1.01 * lwork) inv_a, info = getri(lu, piv, lwork=lwork, overwrite_lu=1) if info > 0: raise LinAlgError("singular matrix") if info < 0: raise ValueError('illegal value in %d-th argument of internal ' 'getrf|getri' % -info) return inv_a def leastsq(func, p0, args, full_output): n = len(p0) retval = op._minpack._lmdif(func, p0, args, 1, 1.49012e-8, 1.49012e-8, 0.0,
#定义各种变量和集合 X = x Gram = np.dot(X.T, X) Cov = np.dot(X.T, y) n_features = X.shape[1] n_samples = X.shape[0] #在return_path为True的条件下 coefs = np.zeros((n_features + 1, n_features)) alphas = np.zeros(n_features + 1) n_iter, n_active = 0, 0 active, indices = list(), np.arange(n_features) sign_active = np.empty(n_features, dtype=np.int8) L = np.empty((n_features, n_features), dtype=Gram.dtype) swap, nrm2 = linalg.get_blas_funcs(('swap', 'nrm2'), (Cov, )) solve_cholesky, = get_lapack_funcs(('potrs', ), (L, )) tiny32 = np.finfo(np.float32).tiny # to avoid division by 0 warning equality_tolerance = np.finfo(np.float32).eps Gram_copy = Gram.copy() Cov_copy = Cov.copy() drop = False SOLVE_TRIANGULAR_ARGS = {'check_finite': False} # In[362]: cplist = [] while True: #退出循环条件 if n_active >= n_features: break #每一轮更新要进去的变量的序号
def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True): """Orthogonal Matching Pursuit step using the Cholesky decomposition. Parameters: ----------- X: array, shape = (n_samples, n_features) Input dictionary. Columns are assumed to have unit norm. y: array, shape = (n_samples,) Input targets n_nonzero_coefs: int Targeted number of non-zero elements tol: float Targeted squared error, if not None overrides n_nonzero_coefs. copy_X: bool, optional Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway. Returns: -------- gamma: array, shape = (n_nonzero_coefs,) Non-zero elements of the solution idx: array, shape = (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector """ if copy_X: X = X.copy('F') else: # even if we are allowed to overwrite, still copy it if bad order X = np.asfortranarray(X) min_float = np.finfo(X.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (X, )) potrs, = get_lapack_funcs(('potrs', ), (X, )) alpha = np.dot(X.T, y) residual = y gamma = np.empty(0) n_active = 0 indices = np.arange(X.shape[1]) # keeping track of swapping max_features = X.shape[1] if tol is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=X.dtype) L[0, 0] = 1. while True: lam = np.argmax(np.abs(np.dot(X.T, residual))) if lam < n_active or alpha[lam]**2 < min_float: # atom already selected or inner product too small warnings.warn(premature, RuntimeWarning, stacklevel=2) break if n_active > 0: # Updates the Cholesky decomposition of X' X L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam]) solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) v = nrm2(L[n_active, :n_active])**2 if 1 - v <= min_float: # selected atoms are dependent warnings.warn(premature, RuntimeWarning, stacklevel=2) break L[n_active, n_active] = np.sqrt(1 - v) X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam]) alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active] indices[n_active], indices[lam] = indices[lam], indices[n_active] n_active += 1 # solves LL'x = y as a composition of two triangular systems gamma, _ = potrs(L[:n_active, :n_active], alpha[:n_active], lower=True, overwrite_b=False) residual = y - np.dot(X[:, :n_active], gamma) if tol is not None and nrm2(residual)**2 <= tol: break elif n_active == max_features: break return gamma, indices[:n_active]
def schwarz_parameters(A, subdomain=None, subdomain_ptr=None, inv_subblock=None, inv_subblock_ptr=None): ''' Helper function for setting up Schwarz relaxation. This function avoids recomputing the subdomains and block inverses manytimes, e.g., it avoids a costly double computation when setting up pre and post smoothing with Schwarz. Parameters ---------- A {csr_matrix} Returns ------- A.schwarz_parameters[0] is subdomain A.schwarz_parameters[1] is subdomain_ptr A.schwarz_parameters[2] is inv_subblock A.schwarz_parameters[3] is inv_subblock_ptr ''' # Check if A has a pre-existing set of Schwarz parameters if hasattr(A, 'schwarz_parameters'): if subdomain is not None and subdomain_ptr is not None: # check that the existing parameters correspond to the same # subdomains if np.array(A.schwarz_parameters[0] == subdomain).all() and \ np.array(A.schwarz_parameters[1] == subdomain_ptr).all(): return A.schwarz_parameters else: return A.schwarz_parameters # Default is to use the overlapping regions defined by A's sparsity pattern if subdomain is None or subdomain_ptr is None: subdomain_ptr = A.indptr.copy() subdomain = A.indices.copy() # Extract each subdomain's block from the matrix if inv_subblock is None or inv_subblock_ptr is None: inv_subblock_ptr = np.zeros(subdomain_ptr.shape, dtype=A.indices.dtype) blocksize = (subdomain_ptr[1:] - subdomain_ptr[:-1]) inv_subblock_ptr[1:] = np.cumsum(blocksize * blocksize) # Extract each block column from A inv_subblock = np.zeros((inv_subblock_ptr[-1], ), dtype=A.dtype) amg_core.extract_subblocks(A.indptr, A.indices, A.data, inv_subblock, inv_subblock_ptr, subdomain, subdomain_ptr, int(subdomain_ptr.shape[0] - 1), A.shape[0]) # Choose tolerance for which singular values are zero in *gelss below t = A.dtype.char eps = np.finfo(np.float).eps feps = np.finfo(np.single).eps geps = np.finfo(np.longfloat).eps _array_precision = {'f': 0, 'd': 1, 'g': 2, 'F': 0, 'D': 1, 'G': 2} cond = { 0: feps * 1e3, 1: eps * 1e6, 2: geps * 1e6 }[_array_precision[t]] # Invert each block column my_pinv, = la.get_lapack_funcs(['gelss'], (np.ones( (1, ), dtype=A.dtype))) for i in range(subdomain_ptr.shape[0] - 1): m = blocksize[i] rhs = sp.eye(m, m, dtype=A.dtype) j0 = inv_subblock_ptr[i] j1 = inv_subblock_ptr[i + 1] gelssoutput = my_pinv(inv_subblock[j0:j1].reshape(m, m), rhs, cond=cond, overwrite_a=True, overwrite_b=True) inv_subblock[j0:j1] = np.ravel(gelssoutput[1]) A.schwarz_parameters = (subdomain, subdomain_ptr, inv_subblock, inv_subblock_ptr) return A.schwarz_parameters
def pinv_array(a, tol=None): """Calculate the Moore-Penrose pseudo inverse of each block of the 3D array a. Parameters ---------- a : {dense array} Is of size (n, m, m) tol : {float} Used by gelss to filter numerically zeros singular values. If None, a suitable value is chosen for you. Returns ------- Nothing, a is modified in place so that a[k] holds the pseudoinverse of that block. Notes ----- By using lapack wrappers, this can be much faster for large n, than directly calling a pseudoinverse (SVD) Examples -------- >>> import numpy as np >>> from pyamg.util.linalg import pinv_array >>> a = np.array([[[1.,2.],[1.,1.]], [[1.,1.],[3.,3.]]]) >>> ac = a.copy() >>> # each block of a is inverted in-place >>> pinv_array(a) """ n = a.shape[0] m = a.shape[1] if m == 1: # Pseudo-inverse of 1 x 1 matrices is trivial zero_entries = (a == 0.0).nonzero()[0] a[zero_entries] = 1.0 a[:] = 1.0 / a a[zero_entries] = 0.0 del zero_entries else: # The block size is greater than 1 # Create necessary arrays and function pointers for calculating pinv gelss, gelss_lwork = lapack.get_lapack_funcs( ('gelss', 'gelss_lwork'), (np.ones((1, ), dtype=a.dtype))) RHS = np.eye(m, dtype=a.dtype) # pylint: disable=protected-access lwork = lapack._compute_lwork(gelss_lwork, m, m, m) # pylint: enable=protected-access # Choose tolerance for which singular values are zero in *gelss below if tol is None: tol = set_tol(a.dtype) # Invert each block of a for kk in range(n): gelssoutput = gelss(a[kk], RHS, cond=tol, lwork=lwork, overwrite_a=True, overwrite_b=False) a[kk] = gelssoutput[1]
def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None, copy_Gram=True, copy_Xy=True): """Orthogonal Matching Pursuit step on a precomputed Gram matrix. This function uses the the Cholesky decomposition method. Parameters: ----------- Gram: array, shape = (n_features, n_features) Gram matrix of the input data matrix Xy: array, shape = (n_features,) Input targets n_nonzero_coefs: int Targeted number of non-zero elements tol_0: float Squared norm of y, required if tol is not None. tol: float Targeted squared error, if not None overrides n_nonzero_coefs. copy_Gram: bool, optional Whether the gram matrix must be copied by the algorithm. A false value is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway. copy_Xy: bool, optional Whether the covariance vector Xy must be copied by the algorithm. If False, it may be overwritten. Returns: -------- gamma: array, shape = (n_nonzero_coefs,) Non-zero elements of the solution idx: array, shape = (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector """ Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram) if copy_Xy: Xy = Xy.copy() min_float = np.finfo(Gram.dtype).eps nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram, )) potrs, = get_lapack_funcs(('potrs', ), (Gram, )) indices = np.arange(len(Gram)) # keeping track of swapping alpha = Xy tol_curr = tol_0 delta = 0 gamma = np.empty(0) n_active = 0 max_features = len(Gram) if tol is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=Gram.dtype) L[0, 0] = 1. while True: lam = np.argmax(np.abs(alpha)) if lam < n_active or alpha[lam]**2 < min_float: # selected same atom twice, or inner product too small warnings.warn(premature, RuntimeWarning, stacklevel=2) break if n_active > 0: L[n_active, :n_active] = Gram[lam, :n_active] solve_triangular(L[:n_active, :n_active], L[n_active, :n_active]) v = nrm2(L[n_active, :n_active])**2 if 1 - v <= min_float: # selected atoms are dependent warnings.warn(premature, RuntimeWarning, stacklevel=2) break L[n_active, n_active] = np.sqrt(1 - v) Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam]) Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam]) indices[n_active], indices[lam] = indices[lam], indices[n_active] Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active] n_active += 1 # solves LL'x = y as a composition of two triangular systems gamma, _ = potrs(L[:n_active, :n_active], Xy[:n_active], lower=True, overwrite_b=False) beta = np.dot(Gram[:, :n_active], gamma) alpha = Xy - beta if tol is not None: tol_curr += delta delta = np.inner(gamma, beta[:n_active]) tol_curr -= delta if tol_curr <= tol: break elif n_active == max_features: break return gamma, indices[:n_active]
def lstsq(a, b, cond=None, overwrite_a=False, overwrite_b=False, check_finite=True, lapack_driver=None): """ Compute least-squares solution to equation Ax = b. Compute a vector x such that the 2-norm ``|b - A x|`` is minimized. This code was adapted from the Scipy distribution: https://github.com/scipy/scipy/blob/v1.2.1/scipy/linalg/basic.py#L1047-L1264 Parameters ---------- a : (M, N) array_like Left hand side matrix (2-D array). b : (M,) or (M, K) array_like Right hand side matrix or vector (1-D or 2-D array). cond : float, optional Cutoff for 'small' singular values; used to determine effective rank of a. Singular values smaller than ``rcond * largest_singular_value`` are considered zero. overwrite_a : bool, optional Discard data in `a` (may enhance performance). Default is False. overwrite_b : bool, optional Discard data in `b` (may enhance performance). Default is False. check_finite : bool, optional Whether to check that the input matrices contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. lapack_driver : str, optional Which LAPACK driver is used to solve the least-squares problem. Options are ``'gelsd'``, ``'gelsy'``, ``'gelss'``. Default (``'gelsd'``) is a good choice. However, ``'gelsy'`` can be slightly faster on many problems. ``'gelss'`` was used historically. It is generally slow but uses less memory. .. versionadded:: 0.17.0 Returns ------- x : (N,) or (N, K) ndarray Least-squares solution. Return shape matches shape of `b`. residues : (0,) or () or (K,) ndarray Sums of residues, squared 2-norm for each column in ``b - a x``. If rank of matrix a is ``< N`` or ``N > M``, or ``'gelsy'`` is used, this is a length zero array. If b was 1-D, this is a () shape array (numpy scalar), otherwise the shape is (K,). rank : int Effective rank of matrix `a`. s : (min(M,N),) ndarray or None Singular values of `a`. The condition number of a is ``abs(s[0] / s[-1])``. None is returned when ``'gelsy'`` is used. Raises ------ LinAlgError If computation does not converge. ValueError When parameters are wrong. See Also -------- optimize.nnls : linear least squares with non-negativity constraint Examples -------- >>> from scipy.linalg import lstsq >>> import matplotlib.pyplot as plt Suppose we have the following data: >>> x = np.array([1, 2.5, 3.5, 4, 5, 7, 8.5]) >>> y = np.array([0.3, 1.1, 1.5, 2.0, 3.2, 6.6, 8.6]) We want to fit a quadratic polynomial of the form ``y = a + b*x**2`` to this data. We first form the "design matrix" M, with a constant column of 1s and a column containing ``x**2``: >>> M = x[:, np.newaxis]**[0, 2] >>> M array([[ 1. , 1. ], [ 1. , 6.25], [ 1. , 12.25], [ 1. , 16. ], [ 1. , 25. ], [ 1. , 49. ], [ 1. , 72.25]]) We want to find the least-squares solution to ``M.dot(p) = y``, where ``p`` is a vector with length 2 that holds the parameters ``a`` and ``b``. >>> p, res, rnk, s = lstsq(M, y) >>> p array([ 0.20925829, 0.12013861]) Plot the data and the fitted curve. >>> plt.plot(x, y, 'o', label='data') >>> xx = np.linspace(0, 9, 101) >>> yy = p[0] + p[1]*xx**2 >>> plt.plot(xx, yy, label='least squares fit, $y = a + bx^2$') >>> plt.xlabel('x') >>> plt.ylabel('y') >>> plt.legend(framealpha=1, shadow=True) >>> plt.grid(alpha=0.25) >>> plt.show() """ a1 = _asarray_validated(a, check_finite=check_finite) b1 = _asarray_validated(b, check_finite=check_finite) if len(a1.shape) != 2: raise ValueError('expected matrix') m, n = a1.shape if len(b1.shape) == 2: nrhs = b1.shape[1] else: nrhs = 1 if m != b1.shape[0]: raise ValueError('incompatible dimensions') if m == 0 or n == 0: # Zero-sized problem, confuses LAPACK x = np.zeros((n, ) + b1.shape[1:], dtype=np.common_type(a1, b1)) if n == 0: residues = np.linalg.norm(b1, axis=0)**2 else: residues = np.empty((0, )) return x, residues, 0, np.empty((0, )) driver = lapack_driver if driver is None: global default_lapack_driver driver = default_lapack_driver if driver not in ('gelsd', 'gelsy', 'gelss'): raise ValueError('LAPACK driver "%s" is not found' % driver) lapack_func, lapack_lwork = get_lapack_funcs( (driver, '%s_lwork' % driver), (a1, b1)) real_data = True if (lapack_func.dtype.kind == 'f') else False if m < n: # need to extend b matrix as it will be filled with # a larger solution matrix if len(b1.shape) == 2: b2 = np.zeros((n, nrhs), dtype=lapack_func.dtype) b2[:m, :] = b1 else: b2 = np.zeros(n, dtype=lapack_func.dtype) b2[:m] = b1 b1 = b2 overwrite_a = overwrite_a or _datacopied(a1, a) overwrite_b = overwrite_b or _datacopied(b1, b) if cond is None: cond = np.finfo(lapack_func.dtype).eps a1_wrk = np.copy(a1) b1_wrk = np.copy(b1) lwork, iwork = _compute_lwork(lapack_lwork, m, n, nrhs, cond) x_check, s_check, rank_check, info = lapack_func( a1_wrk, b1_wrk, lwork, iwork, cond, False, False) driver = 'gelss' if driver in ('gelss', 'gelsd'): if driver == 'gelss': if not context: a1_wrk = np.copy(a1) b1_wrk = np.copy(b1) lwork, iwork = _compute_lwork(lapack_lwork, m, n, nrhs, cond) x, s, rank, info = lapack_func(a1_wrk, b1_wrk, lwork, iwork, cond, False, False) else: try: # Check that we aren't dealing with an underconstrained problem ... if m < n: pkg.log.error( Exception( "Underconstrained problems not yet supported by Magma." )) # Initialize a1_trans = np.copy(a1, order='F') a1_gpu = gpuarray.to_gpu(a1_trans) # Note that the result for 'x' gets written to the vector inputted for b x_trans = np.copy(b1, order='F') x_gpu = gpuarray.to_gpu(x_trans) # Init singular-value decomposition (SVD) output & buffer arrays s = np.zeros(min(m, n), np.float32) u = np.zeros((m, m), np.float32) vh = np.zeros((n, n), np.float32) # Query and allocate optimal workspace # n.b.: - the result for 'x' gets written to the input vector for b, so we just label b->x # - assume magma variables lda=ldb=m throughout here lwork_SVD = magma.magma_sgesvd_buffersize( 'A', 'A', m, n, a1_trans.ctypes.data, m, s.ctypes.data, u.ctypes.data, m, vh.ctypes.data, n) # For some reason, magma_sgels_buffersize() does not return the right value for large problems, so # we compute the values used for the validation check (see Magma SGELS documentation) directly and use that #lwork_LS = magma.magma_sgels_buffersize('n', m, n, nrhs, a1_trans.ctypes.data, m, x_trans.ctypes.data, m) nb = magma.magma_get_sgeqrf_nb(m, n) check = (m - n + nb) * (nrhs + nb) + nrhs * nb lwork_LS = check # Allocate workspaces hwork_SVD = np.zeros(lwork_SVD, np.float32, order='F') hwork_LS = np.zeros(lwork_LS, np.float32) # Compute SVD timer.start("SVD") magma.magma_sgesvd('A', 'A', m, n, a1_trans.ctypes.data, m, s.ctypes.data, u.ctypes.data, m, vh.ctypes.data, n, hwork_SVD.ctypes.data, lwork_SVD) timer.stop("SVD") # Note, the use of s_i>rcond here; this is meant to select # values that are effectively non-zero. Results will depend # somewhat on the choice for this value. This criterion was # adopted from that utilized by scipy.linalg.basic.lstsq() rcond = np.finfo(lapack_func.dtype).eps * s[0] rank = sum(1 for s_i in s if s_i > rcond) # Run LS solver timer.start("LS") magma.magma_sgels_gpu('n', m, n, nrhs, a1_gpu.gpudata, m, x_gpu.gpudata, m, hwork_LS.ctypes.data, lwork_LS) timer.stop("LS") # Unload result from GPU x = x_gpu.get() except magma.MagmaError as e: info = e._status else: info = 0 elif driver == 'gelsd': if real_data: if not context: raise Exception( "For some reason, the CUDA implementation of fit() is being called when context is False." ) else: raise Exception( "gelsd not supported using Cuda yet") else: # complex data raise LinAlgError( "driver=%s not yet supported for complex data" % (driver)) if info > 0: raise LinAlgError( "SVD did not converge in Linear Least Squares") if info < 0: raise ValueError( 'illegal value in %d-th argument of internal %s' % (-info, lapack_driver)) resids = np.asarray([], dtype=x.dtype) if m > n: x1 = x[:n] if rank == n: resids = np.sum(np.abs(x[n:])**2, axis=0) x = x1 elif driver == 'gelsy': raise LinAlgError("driver=%s not yet supported" % (driver)) #pkg.log.close("Done", time_elapsed=True) return x, resids, rank, s