def cholupdate_blas(L, x, upper=False, overwrite_x=False, out=None): if out is None: out = np.copy(L, order='C' if upper else 'F') elif out is not L: np.copyto(out, L) if not overwrite_x: x = np.copy(x) if upper: out = out.T rotg = blas.get_blas_funcs('rotg', (L, )) rot = blas.get_blas_funcs('rot', (L, x)) n = len(x) for k in range(n): c, s = rotg(out[k, k], x[k]) rot(out, x, c, s, n=n - k, offx=k * n + k, offy=k, overwrite_x=True, overwrite_y=True) if upper: out = out.T return out
def estim_kinship(self, std=True, chunk_size=2048): """ Estimate kinship. :param std: If True, then standardized K is estimated. The default is True. :param chunk_size: Size of chunk used to compute K. The default is 2048. :return: Kinship matrix. """ if len(self.d_snps.columns) < 2: raise ValueError('Kinship matrix cannot be calculated') markers = np.array(self.d_snps) n, p = markers.shape out = np.zeros((n, n), order="F") gemm = get_blas_funcs("gemm", [out]) start = 0 while start < p: end = start + chunk_size g = markers[:, start:end] m = np.nanmean(g, 0) g = np.where(np.isnan(g), m, g) g = g - m if std: g /= np.std(g, 0) g /= np.sqrt(p) gemm(1.0, g, g, 1.0, out, 0, 1, 1) start = end try: c = self.d_snps.index return pd.DataFrame(out, columns=c, index=c) except AttributeError: pass return out
def test_rot(): # srot, drot from blas and crot and zrot from lapack. for dtype in 'fdFD': c = 0.6 s = 0.8 u = np.ones(4, dtype) * 3 v = np.ones(4, dtype) * 4 atol = 10**-(np.finfo(dtype).precision-1) if dtype in 'fd': rot = get_blas_funcs('rot', dtype=dtype) f = 4 else: rot = get_lapack_funcs('rot', dtype=dtype) s *= -1j v *= 1j f = 4j assert_allclose(rot(u, v, c, s), [[5,5,5,5],[0,0,0,0]], atol=atol) assert_allclose(rot(u, v, c, s, n=2), [[5,5,3,3],[0,0,f,f]], atol=atol) assert_allclose(rot(u, v, c, s, offx=2,offy=2), [[3,3,5,5],[f,f,0,0]], atol=atol) assert_allclose(rot(u, v, c, s, incx=2, offy=2, n=2), [[5,3,5,3],[f,f,0,0]], atol=atol) assert_allclose(rot(u, v, c, s, offx=2, incy=2, n=2), [[3,3,5,5],[0,f,0,f]], atol=atol) assert_allclose(rot(u, v, c, s, offx=2, incx=2, offy=2, incy=2, n=1), [[3,3,5,3],[f,f,0,f]], atol=atol) assert_allclose(rot(u, v, c, s, incx=-2, incy=-2, n=2), [[5,3,5,3],[0,f,0,f]], atol=atol) a, b = rot(u, v, c, s, overwrite_x=1, overwrite_y=1) assert_(a is u) assert_(b is v) assert_allclose(a, [5,5,5,5], atol=atol) assert_allclose(b, [0,0,0,0], atol=atol)
def lin_comb_Q_T(coeff,Q_T,out=None): """ Computes a linear combination of the Lanczos basis vectors: .. math:: v_j = \\sum_{i=1}^{m} c_i \\left(Q^T\\right)_{ij} Parameters ----------- coeff : (m,) array_like list of coefficients to compute the linear combination of Lanczos basis vectors with. Q_T : (m,n) numpy.ndarray, generator Lanczos basis vectors or a generator for the Lanczos basis. out : (n,) numpy.ndarray, optional Array to store the result in. Returns -------- (n,) numpy.ndarray Linear combination :math:`v` of Lanczos basis vectors. Examples -------- >>> v = lin_comb_Q(coeff,Q_T) """ coeff = _np.asanyarray(coeff) if isinstance(Q_T,_np.ndarray): Q_iter = iter(Q_T[:]) else: Q_iter = iter(Q_T) q = next(Q_iter) dtype = _np.result_type(q.dtype,coeff.dtype) if out is not None: if out.shape != q.shape: raise ValueError("'out' must have same shape as a Lanczos vector.") if out.dtype != dtype: raise ValueError("argument 'out' has dtype {}, expecting dtype {}".format(out.dtype,dtype)) if not out.flags["CARRAY"]: raise ValueError("argument 'out' must be C-contiguous and writable.") else: out = _np.zeros(q.shape,dtype=dtype) # get function : y <- y + a * x axpy = get_blas_funcs('axpy', arrays=(out,q)) n = q.size _np.multiply(q,coeff[0],out=out) for weight,q in zip(coeff[1:],Q_iter): axpy(q,out,n,weight) return out
def linear_kinship(G, out=None, verbose=True): r"""Estimate Kinship matrix via linear kernel. Examples -------- .. doctest:: >>> from numpy.random import RandomState >>> from numpy import array_str >>> from limix.stats import linear_kinship >>> >>> random = RandomState(1) >>> X = random.randn(4, 100) >>> K = linear_kinship(X, verbose=False) >>> print(array_str(K, precision=4)) [[ 0.9131 -0.1928 -0.3413 -0.379 ] [-0.1928 0.8989 -0.2356 -0.4704] [-0.3413 -0.2356 0.9578 -0.3808] [-0.379 -0.4704 -0.3808 1.2302]] """ from numpy import sqrt, zeros, asfortranarray, where, asarray, nanmean, std, isnan from scipy.linalg.blas import get_blas_funcs from tqdm import tqdm (n, p) = G.shape if out is None: out = zeros((n, n), order="F") else: out = asfortranarray(out) chunks = _get_chunks(G) gemm = get_blas_funcs("gemm", [out]) start = 0 for chunk in tqdm(chunks, desc="Kinship", disable=not verbose): end = start + chunk g = asarray(G[:, start:end]) m = nanmean(g, 0) g = where(isnan(g), m, g) g = g - m g /= std(g, 0) g /= sqrt(p) gemm(1.0, g, g, 1.0, out, 0, 1, 1) start = end return out
def __init__(self, solver): self.solver = solver self.RHS = CoeffSystem(solver.subproblems, dtype=solver.dtype) # Create coefficient systems for multistep history self.MX0 = CoeffSystem(solver.subproblems, dtype=solver.dtype) self.LX = [ CoeffSystem(solver.subproblems, dtype=solver.dtype) for i in range(self.stages) ] self.F = [ CoeffSystem(solver.subproblems, dtype=solver.dtype) for i in range(self.stages) ] self._LHS_params = None self.axpy = blas.get_blas_funcs('axpy', dtype=solver.dtype)
def _lanczos_vec_iter_core(A,v0,a,b): dtype = _np.result_type(A.dtype,v0.dtype) q = v0.astype(dtype,copy=True) q_norm = _np.linalg.norm(q) if _np.abs(q_norm-1.0) > _np.finfo(dtype).eps: _np.divide(q,q_norm,out=q) q_view = q[:] q_view.setflags(write=0,uic=0) yield q_view # return non-writable array m = a.size n = q.size axpy = get_blas_funcs('axpy', arrays=(q,)) v = _np.zeros_like(v0,dtype=dtype) r = _np.zeros_like(v0,dtype=dtype) try: A.dot(q,out=r) use_out = True except TypeError: r[:] = A.dot(q) use_out = False axpy(q,r,n,-a[0]) for i in range(1,m,1): v[:] = q[:] _np.divide(r,b[i-1],out=q) yield q_view # return non-writable array if use_out: A.dot(q,out=r) else: r[:] = A.dot(q) axpy(v,r,n,-b[i-1]) axpy(q,r,n,-a[i])
def linalg_info(method, dtype, method_dict=_linalg_info_base, dtype_dict=_linalg_info_dtype): """ Faster BLAS/LAPACK methods to be returned without too many lookups an array checks Parameters ---------- method : str BLAS/LAPACK instance to retrieve dtype : numpy.dtype matrix corresponding data-type Returns ------- Function to call corresponding to method `method` in precision `dtype`. Raises ------ ValueError if the corresponding method is not present """ # dtype as string dtype_str = dtype_dict[dtype] # Get dictionary for methods m_dict = method_dict[dtype_str] # Check if it exists if method in m_dict: return m_dict[method] # Get the corresponding method and store it before returning it try: func = get_lapack_funcs(method, dtype=dtype) except ValueError as e: if 'LAPACK function' in str(e): func = get_blas_funcs(method, dtype=dtype) else: raise e m_dict[method] = func return func
def __init__(self, solver): self.solver = solver self.RHS = CoeffSystem(solver.subproblems, dtype=solver.dtype) # Create deque for storing recent timesteps self.dt = deque([0.] * self.steps) # Create coefficient systems for multistep history self.MX = MX = deque() self.LX = LX = deque() self.F = F = deque() for j in range(self.amax): MX.append(CoeffSystem(solver.subproblems, dtype=solver.dtype)) for j in range(self.bmax): LX.append(CoeffSystem(solver.subproblems, dtype=solver.dtype)) for j in range(self.cmax): F.append(CoeffSystem(solver.subproblems, dtype=solver.dtype)) # Attributes self._iteration = 0 self._LHS_params = None self.axpy = blas.get_blas_funcs('axpy', dtype=solver.dtype)
def megmres(A, B, m=1000, X0=None, tol=1e-8, maxit=None, M1=None, callback=None, plot_ritz=False): size = B.shape if maxit is None: maxit = 2 * np.prod(size) if M1 is None: # No preconditioner class class __NoPrecond__(object): def solve(self, _X_): return _X_ M1 = __NoPrecond__() if X0 is None: X0 = np.zeros(size, dtype=complex) X = np.array(X0) bnrm = norm(B) info = 1 # Check for zero rhs: if bnrm == 0.0: # Solution is null-vector info = 0 return np.zeros(size), info # Compute initial residual: R = B - A.dot(X) rnrm = norm(R) # Relative tolerance tolb = tol * bnrm if callback is not None: callback(rnrm) if rnrm < tolb: # Initial guess is a good enough solution info = 0 return X, info # Initialization rotmat = get_blas_funcs('rotg', dtype=np.complex128) # call to ZROTG V = [np.zeros(size, dtype=complex) for i in range(0, m + 1)] H = np.zeros((m + 1, m), dtype=complex) cs = np.zeros(m + 1, dtype=np.float64) cs_tmp = np.zeros(1, dtype=np.complex128) sn = np.zeros(m + 1, dtype=np.complex128) e1 = np.zeros(m + 1, dtype=complex) e1[0] = 1. for _iter in range(0, maxit): # Begin iteration V[0] = R / rnrm s = rnrm * e1 for i in range(0, m): # Construct orthonormal basis # using Gram-Schmidt W = A.dot(M1.solve(V[i])) for k in range(0, i + 1): H[k, i] = np.vdot(V[k], W) W = W - H[k, i] * V[k] H[i + 1, i] = norm(W) V[i + 1] = W / H[i + 1, i] for k in range(0, i): # Apply Givens rotation temp = cs[k] * H[k, i] + sn[k] * H[k + 1, i] H[k + 1, i] = -np.conj(sn[k]) * H[k, i] + cs[k] * H[k + 1, i] H[k, i] = temp cs_tmp, sn[i] = rotmat(H[i, i], H[i + 1, i]) cs[i] = cs_tmp.real # BUGFIX: BLAS wrapper out params temp = cs[i] * s[i] s[i + 1] = -np.conj(sn[i]) * s[i] s[i] = temp H[i, i] = cs[i] * H[i, i] + sn[i] * H[i + 1, i] H[i + 1, i] = 0.0 rnrm = abs(s[i + 1]) if callback is not None: callback(rnrm) if rnrm < tolb: y = solve(H[:i, :i], s[:i]) Xtmp = np.zeros(size, dtype=complex) for k in range(0, i): Xtmp += y[k] * V[k] X += M1.solve(Xtmp) info = 0 if plot_ritz: plot_ritzvals(H[:i, :i]) return X, info y = solve(H[:m, :m], s[:m]) Xtmp = np.zeros(size, dtype=complex) for k in range(0, k): Xtmp += y[k] * V[k] X += M1.solve(Xtmp) R = B - A.dot(X) rnrm = norm(R) if callback is not None: callback(rnrm) if rnrm < tolb: info = 0 break if plot_ritz & _iter == maxit - 1: plot_ritzvals(H[:m, :m]) return X, info
def symeig_semidefinite_ldl(A, B=None, eigenvectors=True, turbo="on", rng=None, type=1, overwrite=False, rank_threshold=1e-12, dfc_out=None): """LDL-based routine to solve generalized symmetric positive semidefinite eigenvalue problems. This can be used if the normal ``symeig()`` call in ``_stop_training()`` throws ``SymeigException('Covariance matrices may be singular')``. This solver uses SciPy's raw LAPACK interface to access LDL decomposition. http://www.netlib.org/lapack/lug/node54.html describes how to solve a generalized eigenvalue problem with positive definite B using Cholesky/LL decomposition. We extend this method to solve for positive semidefinite B using LDL decomposition, which is a variant of Cholesky/LL decomposition for indefinite Matrices. Accessing raw LAPACK's LDL decomposition (sytrf) is challenging. This code is partly based on code for SciPy 1.1: http://github.com/scipy/scipy/pull/7941/files#diff-9bf9b4b2f0f40415bc0e72143584c889 We optimized and shortened that code for the real-valued positive semidefinite case. This procedure is almost as efficient as the ordinary eigh implementation. This is because implementations for symmetric generalized eigenvalue problems usually perform the Cholesky approach mentioned above. The more general LDL decomposition is only slightly more expensive than Cholesky, due to pivotization. .. note:: This method requires SciPy >= 1.0. The signature of this function equals that of ``mdp.utils.symeig``, but has two additional parameters: :param rank_threshold: A threshold to determine if an eigenvalue counts as zero. :type rank_threshold: float :param dfc_out: If ``dfc_out`` is not ``None``, ``dfc_out.rank_deficit`` will be set to an integer indicating how many zero-eigenvalues were detected. """ if type != 1: raise ValueError('Only type=1 is supported.') # LDL-based method appears to be particularly unstable if blank lines # and columns exist in B. So we circumvent this case: nonzero_idx = _find_blank_data_idx(B, rank_threshold) if not nonzero_idx is None: orig_shape = B.shape B = B[nonzero_idx, :][:, nonzero_idx] A = A[nonzero_idx, :][:, nonzero_idx] # This method has special requirements, which is why we import here # rather than module wide. from scipy.linalg.lapack import get_lapack_funcs, _compute_lwork from scipy.linalg.blas import get_blas_funcs try: inv_tri, solver, solver_lwork = get_lapack_funcs( ('trtri', 'sytrf', 'sytrf_lwork'), (B, )) mult_tri, = get_blas_funcs(('trmm', ), (B, )) except ValueError: err_msg = ("ldl method for solving symeig with rank deficit B " "requires at least SciPy 1.0.") raise SymeigException(err_msg) n = B.shape[0] arng = numx.arange(n) lwork = _compute_lwork(solver_lwork, n, lower=1) lu, piv, _ = solver(B, lwork=lwork, lower=1, overwrite_a=overwrite) # using piv properly requires some postprocessing: swap_ = numx.arange(n) pivs = numx.zeros(swap_.shape, dtype=int) skip_2x2 = False for ind in range(n): # If previous spin belonged already to a 2x2 block if skip_2x2: skip_2x2 = False continue cur_val = piv[ind] # do we have a 1x1 block or not? if cur_val > 0: if cur_val != ind + 1: # Index value != array value --> permutation required swap_[ind] = swap_[cur_val - 1] pivs[ind] = 1 # Not. elif cur_val < 0 and cur_val == piv[ind + 1]: # first neg entry of 2x2 block identifier if -cur_val != ind + 2: # Index value != array value --> permutation required swap_[ind + 1] = swap_[-cur_val - 1] pivs[ind] = 2 skip_2x2 = True full_perm = numx.arange(n) for ind in range(n - 1, -1, -1): s_ind = swap_[ind] if s_ind != ind: col_s = ind if pivs[ind] else ind - 1 # 2x2 block lu[[s_ind, ind], col_s:] = lu[[ind, s_ind], col_s:] full_perm[[s_ind, ind]] = full_perm[[ind, s_ind]] # usually only a few indices actually permute, so we reduce perm: perm = (full_perm - arng).nonzero()[0] perm_idx = full_perm[perm] # end of ldl postprocessing # perm_idx and perm now describe a permutation as dest and source indexes lu[perm_idx, :] = lu[perm, :] dgd = abs(numx.diag(lu)) dnz = (dgd > rank_threshold).nonzero()[0] dgd_sqrt_I = numx.sqrt(1.0 / dgd[dnz]) rank_deficit = len(dgd) - len(dnz) # later used # c, lower, unitdiag, overwrite_c LI, _ = inv_tri(lu, 1, 1, 1) # invert triangular # we mainly apply tril here, because we need to make a # copy of LI anyway, because original result from # dtrtri seems to be read-only regarding some operations LI = numx.tril(LI, -1) LI[arng, arng] = 1 LI[dnz, :] *= dgd_sqrt_I.reshape((dgd_sqrt_I.shape[0], 1)) A2 = A if overwrite else A.copy() A2[perm_idx, :] = A2[perm, :] A2[:, perm_idx] = A2[:, perm] # alpha, a, b, side 0=left 1=right, lower, trans_a, diag 1=unitdiag, # overwrite_b A2 = mult_tri(1.0, LI, A2, 1, 1, 1, 0, 1) # A2 = mult(A2, LI.T) A2 = mult_tri(1.0, LI, A2, 0, 1, 0, 0, 1) # A2 = mult(LI, A2) A2 = A2[dnz, :] A2 = A2[:, dnz] # overwrite=True is okay here, because at this point A2 is a copy anyway eg, ev = mdp.utils.symeig(A2, None, True, turbo, rng, overwrite=True) ev = mdp.utils.mult(LI[dnz].T, ev) if rank_deficit \ else mult_tri(1.0, LI, ev, 0, 1, 1, 0, 1) ev[perm] = ev[perm_idx] if not nonzero_idx is None: # restore ev to original size rank_deficit += orig_shape[0] - len(nonzero_idx) ev_tmp = ev ev = numx.zeros((orig_shape[0], ev.shape[1])) ev[nonzero_idx, :] = ev_tmp if not dfc_out is None: dfc_out.rank_deficit = rank_deficit return eg, ev
""" This module contains a collection of routines for operating on sparse matrices on the scipy.sparse formats, for use internally by other modules throughout QuTiP. """ __all__ = ['sp_fro_norm', 'sp_inf_norm', 'sp_L2_norm', 'sp_max_norm', 'sp_one_norm', 'sp_reshape', 'sp_eigs', 'sp_expm', 'sp_permute', 'sp_reverse_permute', 'sp_bandwidth', 'sp_profile'] import scipy.sparse as sp import scipy.sparse.linalg as spla import numpy as np import scipy.linalg as la from scipy.linalg.blas import get_blas_funcs _dznrm2 = get_blas_funcs("znrm2") from qutip.cy.sparse_utils import (_sparse_profile, _sparse_permute, _sparse_reverse_permute, _sparse_bandwidth, _isdiag) from qutip.settings import debug import qutip.logging_utils logger = qutip.logging_utils.get_logger() if debug: import inspect def sp_fro_norm(data): """ Frobius norm for sparse matrix
import scipy.sparse as sp from scipy.integrate._ode import zvode from scipy.linalg.blas import get_blas_funcs from qutip.qobj import Qobj from qutip.parallel import parfor, parallel_map, serial_map from qutip.cy.spmatfuncs import cy_ode_rhs, cy_expect_psi_csr, spmv, spmv_csr from qutip.cy.codegen import Codegen from qutip.cy.utilities import _cython_build_cleanup from qutip.solver import Options, Result, config from qutip.rhs_generate import _td_format_check, _td_wrap_array_str from qutip.settings import debug from qutip.ui.progressbar import TextProgressBar, BaseProgressBar import qutip.settings dznrm2 = get_blas_funcs("znrm2", dtype=np.float64) if debug: import inspect # # Internal, global variables for storing references to dynamically loaded # cython functions # _cy_col_spmv_func = None _cy_col_expect_func = None _cy_col_spmv_call_func = None _cy_col_expect_call_func = None _cy_rhs_func = None
def merge(self, other, decay = 1.0): """ Merge this Projection with another. Content of `other` is destroyed in the process, so pass this function a copy if you need it further. This is the optimized merge described in algorithm 5. """ if other.u is None: # the other projection is empty => do nothing return if self.u is None: # we are empty => result of merge is the other projection, whatever it is self.u = other.u.copy() self.s = other.s.copy() return if self.m != other.m: raise ValueError("vector space mismatch: update has %s features, expected %s" % (other.m, self.m)) logger.info("merging projections: %s + %s" % (str(self.u.shape), str(other.u.shape))) # diff = numpy.dot(self.u.T, self.u) - numpy.eye(self.u.shape[1]) # logger.info('orth error after=%f' % numpy.sum(diff * diff)) m, n1, n2 = self.u.shape[0], self.u.shape[1], other.u.shape[1] # TODO Maybe keep the bases as elementary reflectors, without # forming explicit matrices with gorgqr. # The only operation we ever need is basis^T*basis ond basis*component. # But how to do that in numpy? And is it fast(er)? # find component of u2 orthogonal to u1 # IMPORTANT: keep matrices in suitable order for matrix products; failing to do so gives 8x lower performance :( self.u = numpy.asfortranarray(self.u) # does nothing if input already fortran-order array other.u = numpy.asfortranarray(other.u) gemm, = get_blas_funcs(('gemm',), (self.u,)) logger.debug("constructing orthogonal component") c = gemm(1.0, self.u, other.u, trans_a = True) gemm(-1.0, self.u, c, beta = 1.0, c = other.u, overwrite_c = True) # perform q, r = QR(component); code hacked out of scipy.linalg.qr logger.debug("computing QR of %s dense matrix" % str(other.u.shape)) geqrf, = get_lapack_funcs(('geqrf',), (other.u,)) qr, tau, work, info = geqrf(other.u, lwork = -1, overwrite_a = True) # sometimes segfaults with overwrite_a=True... qr, tau, work, info = geqrf(other.u, lwork = work[0], overwrite_a = True) # sometimes segfaults with overwrite_a=True... del other.u assert info >= 0 r = triu(qr[:n2, :n2]) if m < n2: # rare case... qr = qr[:,:m] # retains fortran order gorgqr, = get_lapack_funcs(('orgqr',), (qr,)) q, work, info = gorgqr(qr, tau, lwork = -1, overwrite_a = True) q, work, info = gorgqr(qr, tau, lwork = work[0], overwrite_a = True) assert info >= 0, "qr failed" assert q.flags.f_contiguous # find rotation that diagonalizes r k = numpy.bmat([[numpy.diag(decay * self.s), c * other.s], [matutils.pad(numpy.matrix([]).reshape(0, 0), n2, n1), r * other.s]]) logger.debug("computing SVD of %s dense matrix" % str(k.shape)) u_k, s_k, _ = numpy.linalg.svd(k, full_matrices = False) # TODO *ugly overkill*!! only need first self.k SVD factors... but there is no LAPACK wrapper for partial svd/eigendecomp in numpy :( k = clipSpectrum(s_k, self.k) u_k, s_k = u_k[:, :k], s_k[:k] # update & rotate current basis U logger.debug("updating orthonormal basis U") self.u = gemm(1.0, self.u, u_k[:n1]) # TODO temporarily creates an extra (m,k) dense array in memory. find a way to avoid this! gemm(1.0, q, u_k[n1:], beta = 1.0, c = self.u, overwrite_c = True) # u = [u,u']*u_k self.s = s_k
def merge(self, other, decay = 1.0): """ Merge this Projection with another. Content of `other` is destroyed in the process, so pass this function a copy if you need it further. This is the optimized merge described in algorithm 5. """ if other.u is None: # the other projection is empty => do nothing return if self.u is None: # we are empty => result of merge is the other projection, whatever it is if other.s is None: # other.u contains a direct document chunk, not svd => perform svd docs = other.u assert scipy.sparse.issparse(docs) if self.m * self.k < 10000: # SVDLIBC gives spurious results for small matrices.. run full # LAPACK on them instead logger.info("computing dense SVD of %s matrix" % str(docs.shape)) u, s, vt = numpy.linalg.svd(docs.todense(), full_matrices = False) else: try: import sparsesvd except ImportError: raise ImportError("for LSA, the `sparsesvd` module is needed but not found; run `easy_install sparsesvd`") logger.info("computing sparse SVD of %s matrix" % str(docs.shape)) ut, s, vt = sparsesvd.sparsesvd(docs, self.k + 30) # ask for a few extra factors, because for some reason SVDLIBC sometimes returns fewer factors than requested u = ut.T del ut del vt k = clipSpectrum(s, self.k) self.u = u[:, :k].copy('F') self.s = s[:k] else: self.u = other.u.copy('F') self.s = other.s.copy() return if self.m != other.m: raise ValueError("vector space mismatch: update has %s features, expected %s" % (other.m, self.m)) logger.info("merging projections: %s + %s" % (str(self.u.shape), str(other.u.shape))) m, n1, n2 = self.u.shape[0], self.u.shape[1], other.u.shape[1] if other.s is None: other.u = other.u.todense() other.s = 1.0 # broadcasting will promote this to eye(n2) where needed # TODO Maybe keep the bases as elementary reflectors, without # forming explicit matrices with gorgqr. # The only operation we ever need is basis^T*basis ond basis*component. # But how to do that in numpy? And is it fast(er)? # find component of u2 orthogonal to u1 # IMPORTANT: keep matrices in suitable order for matrix products; failing to do so gives 8x lower performance :( self.u = numpy.asfortranarray(self.u) # does nothing if input already fortran-order array other.u = numpy.asfortranarray(other.u) gemm, = get_blas_funcs(('gemm',), (self.u,)) logger.debug("constructing orthogonal component") c = gemm(1.0, self.u, other.u, trans_a = True) gemm(-1.0, self.u, c, beta = 1.0, c = other.u, overwrite_c = True) # perform q, r = QR(component); code hacked out of scipy.linalg.qr logger.debug("computing QR of %s dense matrix" % str(other.u.shape)) geqrf, = get_lapack_funcs(('geqrf',), (other.u,)) qr, tau, work, info = geqrf(other.u, lwork = -1, overwrite_a = True) # sometimes segfaults with overwrite_a=True...? qr, tau, work, info = geqrf(other.u, lwork = work[0], overwrite_a = True) # sometimes segfaults with overwrite_a=True...? del other.u assert info >= 0 r = triu(qr[:n2, :n2]) if m < n2: # rare case, #features < #topics qr = qr[:, :m] # retains fortran order gorgqr, = get_lapack_funcs(('orgqr',), (qr,)) q, work, info = gorgqr(qr, tau, lwork = -1, overwrite_a = True) q, work, info = gorgqr(qr, tau, lwork = work[0], overwrite_a = True) assert info >= 0, "qr failed" assert q.flags.f_contiguous # find rotation that diagonalizes r k = numpy.bmat([[numpy.diag(decay * self.s), c * other.s], [matutils.pad(numpy.matrix([]).reshape(0, 0), min(m, n2), n1), r * other.s]]) logger.debug("computing SVD of %s dense matrix" % str(k.shape)) try: # in numpy < 1.1.0, running SVD sometimes results in "LinAlgError: SVD did not converge'. # for these early versions of numpy, catch the error and try to compute # SVD again, but over k*k^T. # see http://www.mail-archive.com/[email protected]/msg07224.html and # bug ticket http://projects.scipy.org/numpy/ticket/706 u_k, s_k, _ = numpy.linalg.svd(k, full_matrices = False) # TODO *ugly overkill*!! only need first self.k SVD factors... but there is no LAPACK wrapper for partial svd/eigendecomp in numpy :( except numpy.linalg.LinAlgError: logging.error("SVD(A) failed; trying SVD(A * A^T)") u_k, s_k, _ = numpy.linalg.svd(numpy.dot(k, k.T), full_matrices = False) # if this fails too, give up s_k = numpy.sqrt(s_k) k = clipSpectrum(s_k, self.k) u_k, s_k = u_k[:, :k], s_k[:k] # update & rotate current basis U logger.debug("updating orthonormal basis U") self.u = gemm(1.0, self.u, u_k[:n1]) # TODO temporarily creates an extra (m,k) dense array in memory. find a way to avoid this! gemm(1.0, q, u_k[n1:], beta = 1.0, c = self.u, overwrite_c = True) # u = [u,u']*u_k self.s = s_k
def merge(self, other, decay=1.0): """ Merge this Projection with another. Content of `other` is destroyed in the process, so pass this function a copy if you need it further. This is the optimized merge described in algorithm 5. """ if other.u is None: # the other projection is empty => do nothing return if self.u is None: # we are empty => result of merge is the other projection, whatever it is if other.s is None: # other.u contains a direct document chunk, not svd => perform svd docs = other.u assert scipy.sparse.issparse(docs) if self.m * self.k < 10000: # SVDLIBC gives spurious results for small matrices.. run full # LAPACK on them instead logger.info("computing dense SVD of %s matrix" % str(docs.shape)) u, s, vt = numpy.linalg.svd(docs.todense(), full_matrices=False) else: try: import sparsesvd except ImportError: raise ImportError( "for LSA, the `sparsesvd` module is needed but not found; run `easy_install sparsesvd`" ) logger.info("computing sparse SVD of %s matrix" % str(docs.shape)) ut, s, vt = sparsesvd.sparsesvd( docs, self.k + 30 ) # ask for a few extra factors, because for some reason SVDLIBC sometimes returns fewer factors than requested u = ut.T del ut del vt k = clipSpectrum(s, self.k) self.u = u[:, :k].copy('F') self.s = s[:k] else: self.u = other.u.copy('F') self.s = other.s.copy() return if self.m != other.m: raise ValueError( "vector space mismatch: update has %s features, expected %s" % (other.m, self.m)) logger.info("merging projections: %s + %s" % (str(self.u.shape), str(other.u.shape))) m, n1, n2 = self.u.shape[0], self.u.shape[1], other.u.shape[1] if other.s is None: other.u = other.u.todense() other.s = 1.0 # broadcasting will promote this to eye(n2) where needed # TODO Maybe keep the bases as elementary reflectors, without # forming explicit matrices with gorgqr. # The only operation we ever need is basis^T*basis ond basis*component. # But how to do that in numpy? And is it fast(er)? # find component of u2 orthogonal to u1 # IMPORTANT: keep matrices in suitable order for matrix products; failing to do so gives 8x lower performance :( self.u = numpy.asfortranarray( self.u) # does nothing if input already fortran-order array other.u = numpy.asfortranarray(other.u) gemm, = get_blas_funcs(('gemm', ), (self.u, )) logger.debug("constructing orthogonal component") c = gemm(1.0, self.u, other.u, trans_a=True) gemm(-1.0, self.u, c, beta=1.0, c=other.u, overwrite_c=True) # perform q, r = QR(component); code hacked out of scipy.linalg.qr logger.debug("computing QR of %s dense matrix" % str(other.u.shape)) geqrf, = get_lapack_funcs(('geqrf', ), (other.u, )) qr, tau, work, info = geqrf( other.u, lwork=-1, overwrite_a=True) # sometimes segfaults with overwrite_a=True...? qr, tau, work, info = geqrf( other.u, lwork=work[0], overwrite_a=True) # sometimes segfaults with overwrite_a=True...? del other.u assert info >= 0 r = triu(qr[:n2, :n2]) if m < n2: # rare case, #features < #topics qr = qr[:, :m] # retains fortran order gorgqr, = get_lapack_funcs(('orgqr', ), (qr, )) q, work, info = gorgqr(qr, tau, lwork=-1, overwrite_a=True) q, work, info = gorgqr(qr, tau, lwork=work[0], overwrite_a=True) assert info >= 0, "qr failed" assert q.flags.f_contiguous # find rotation that diagonalizes r k = numpy.bmat([[numpy.diag(decay * self.s), c * other.s], [ matutils.pad( numpy.matrix([]).reshape(0, 0), min(m, n2), n1), r * other.s ]]) logger.debug("computing SVD of %s dense matrix" % str(k.shape)) u_k, s_k, _ = numpy.linalg.svd( k, full_matrices=False ) # TODO *ugly overkill*!! only need first self.k SVD factors... but there is no LAPACK wrapper for partial svd/eigendecomp in numpy :( k = clipSpectrum(s_k, self.k) u_k, s_k = u_k[:, :k], s_k[:k] # update & rotate current basis U logger.debug("updating orthonormal basis U") self.u = gemm( 1.0, self.u, u_k[:n1] ) # TODO temporarily creates an extra (m,k) dense array in memory. find a way to avoid this! gemm(1.0, q, u_k[n1:], beta=1.0, c=self.u, overwrite_c=True) # u = [u,u']*u_k self.s = s_k
# Any modifications or derivative works of this code must retain this # copyright notice, and modified files need to carry a notice indicating # that they have been altered from the originals. # pylint: disable=unused-variable, no-name-in-module, protected-access, # pylint: disable=invalid-name, import-error, exec-used """Module for unitary pulse evolution. """ import logging import numpy as np from scipy.integrate import ode from scipy.linalg.blas import get_blas_funcs from ..cy.measure import occ_probabilities, write_shots_memory dznrm2 = get_blas_funcs("znrm2", dtype=np.float64) def unitary_evolution(exp, op_system): """ Calculates evolution when there is no noise, or any measurements that are not at the end of the experiment. Args: exp (dict): Dictionary of experimental pulse and fc op_system (OPSystem): Global OpenPulse system settings Returns: array: Memory of shots.
def symeig_semidefinite_ldl( A, B = None, eigenvectors=True, turbo="on", rng=None, type=1, overwrite=False, rank_threshold=1e-12, dfc_out=None): """ LDL-based routine to solve generalized symmetric positive semidefinite eigenvalue problems. This can be used in case the normal symeig() call in _stop_training() throws SymeigException ('Covariance matrices may be singular'). This solver uses SciPy's raw LAPACK interface to access LDL decomposition. www.netlib.org/lapack/lug/node54.html describes how to solve a generalized eigenvalue problem with positive definite B using Cholesky/LL decomposition. We extend this method to solve for positive semidefinite B using LDL decomposition, which is a variant of Cholesky/LL decomposition for indefinite Matrices. Accessing raw LAPACK's LDL decomposition (sytrf) is challenging. This code is partly based on code for SciPy 1.1: github.com/scipy/scipy/pull/7941/files#diff-9bf9b4b2f0f40415bc0e72143584c889 We optimized and shortened that code for the real-valued positive semidefinite case. This procedure is almost as efficient as the ordinary eigh implementation. This is because implementations for symmetric generalized eigenvalue problems usually perform the Cholesky approach mentioned above. The more general LDL decomposition is only slightly more expensive than Cholesky, due to pivotization. The signature of this function equals that of mdp.utils.symeig, but has two additional parameters: rank_threshold: A threshold to determine if an eigenvalue counts as zero. dfc_out: If dfc_out is not None dfc_out.rank_deficit will be set to an integer indicating how many zero-eigenvalues were detected. Note: This method requires SciPy >= 1.0. """ if type != 1: raise ValueError('Only type=1 is supported.') # LDL-based method appears to be particularly unstable if blank lines # and columns exist in B. So we circumvent this case: nonzero_idx = _find_blank_data_idx(B, rank_threshold) if not nonzero_idx is None: orig_shape = B.shape B = B[nonzero_idx, :][:, nonzero_idx] A = A[nonzero_idx, :][:, nonzero_idx] # This method has special requirements, which is why we import here # rather than module wide. from scipy.linalg.lapack import get_lapack_funcs, _compute_lwork from scipy.linalg.blas import get_blas_funcs try: inv_tri, solver, solver_lwork = get_lapack_funcs( ('trtri', 'sytrf', 'sytrf_lwork'), (B,)) mult_tri, = get_blas_funcs(('trmm',), (B,)) except ValueError: err_msg = ("ldl method for solving symeig with rank deficit B " "requires at least SciPy 1.0.") raise SymeigException(err_msg) n = B.shape[0] arng = numx.arange(n) lwork = _compute_lwork(solver_lwork, n, lower=1) lu, piv, _ = solver(B, lwork=lwork, lower=1, overwrite_a=overwrite) # using piv properly requires some postprocessing: swap_ = numx.arange(n) pivs = numx.zeros(swap_.shape, dtype=int) skip_2x2 = False for ind in range(n): # If previous spin belonged already to a 2x2 block if skip_2x2: skip_2x2 = False continue cur_val = piv[ind] # do we have a 1x1 block or not? if cur_val > 0: if cur_val != ind+1: # Index value != array value --> permutation required swap_[ind] = swap_[cur_val-1] pivs[ind] = 1 # Not. elif cur_val < 0 and cur_val == piv[ind+1]: # first neg entry of 2x2 block identifier if -cur_val != ind+2: # Index value != array value --> permutation required swap_[ind+1] = swap_[-cur_val-1] pivs[ind] = 2 skip_2x2 = True full_perm = numx.arange(n) for ind in range(n-1, -1, -1): s_ind = swap_[ind] if s_ind != ind: col_s = ind if pivs[ind] else ind-1 # 2x2 block lu[[s_ind, ind], col_s:] = lu[[ind, s_ind], col_s:] full_perm[[s_ind, ind]] = full_perm[[ind, s_ind]] # usually only a few indices actually permute, so we reduce perm: perm = (full_perm-arng).nonzero()[0] perm_idx = full_perm[perm] # end of ldl postprocessing # perm_idx and perm now describe a permutation as dest and source indexes lu[perm_idx, :] = lu[perm, :] dgd = abs(numx.diag(lu)) dnz = (dgd > rank_threshold).nonzero()[0] dgd_sqrt_I = numx.sqrt(1.0/dgd[dnz]) rank_deficit = len(dgd) - len(dnz) # later used # c, lower, unitdiag, overwrite_c LI, _ = inv_tri(lu, 1, 1, 1) # invert triangular # we mainly apply tril here, because we need to make a # copy of LI anyway, because original result from # dtrtri seems to be read-only regarding some operations LI = numx.tril(LI, -1) LI[arng, arng] = 1 LI[dnz, :] *= dgd_sqrt_I.reshape((dgd_sqrt_I.shape[0], 1)) A2 = A if overwrite else A.copy() A2[perm_idx, :] = A2[perm, :] A2[:, perm_idx] = A2[:, perm] # alpha, a, b, side 0=left 1=right, lower, trans_a, diag 1=unitdiag, # overwrite_b A2 = mult_tri(1.0, LI, A2, 1, 1, 1, 0, 1) # A2 = mult(A2, LI.T) A2 = mult_tri(1.0, LI, A2, 0, 1, 0, 0, 1) # A2 = mult(LI, A2) A2 = A2[dnz, :] A2 = A2[:, dnz] # overwrite=True is okay here, because at this point A2 is a copy anyway eg, ev = mdp.utils.symeig(A2, None, True, turbo, rng, overwrite=True) ev = mdp.utils.mult(LI[dnz].T, ev) if rank_deficit \ else mult_tri(1.0, LI, ev, 0, 1, 1, 0, 1) ev[perm] = ev[perm_idx] if not nonzero_idx is None: # restore ev to original size rank_deficit += orig_shape[0]-len(nonzero_idx) ev_tmp = ev ev = numx.zeros((orig_shape[0], ev.shape[1])) ev[nonzero_idx, :] = ev_tmp if not dfc_out is None: dfc_out.rank_deficit = rank_deficit return eg, ev
def lanczos_full(A,v0,m,full_ortho=False,out=None,eps=None): """ Creates Lanczos basis; diagonalizes Krylov subspace in Lanczos basis. Given a hermitian matrix `A` of size :math:`n\\times n` and an integer `m`, the Lanczos algorithm computes * an :math:`n\\times m` matrix :math:`Q`, and * a real symmetric tridiagonal matrix :math:`T=Q^\\dagger A Q` of size :math:`m\\times m`. The matrix :math:`T` can be represented via its eigendecomposition `(E,V)`: :math:`T=V\\mathrm{diag}(E)V^T`. This function computes the triple :math:`(E,V,Q^T)`. :red:`NOTE:` This function returns :math:`Q^T;\\,Q^T` is (in general) different from :math:`Q^\\dagger`. Notes ----- * performs classical lanczos algorithm for hermitian matrices and cannot handle degeneracies when calculating eigenvalues. * the function allows for full orthogonalization, see `full_ortho`. The resulting :math:`T` will not neccesarily be tridiagonal. * `V` is always real-valued, since :math:`T` is real and symmetric. * `A` must have a 'dot' method to perform calculation, * The 'out' argument to pass back the results of the matrix-vector product will be used if the 'dot' function supports this argument. Parameters ----------- A : LinearOperator, hamiltonian, numpy.ndarray, or object with a 'dot' method and a 'dtype' method. Python object representing a linear map to compute the Lanczos approximation to the largest eigenvalues/vectors of. Must contain a dot-product method, used as `A.dot(v)` and a dtype method, used as `A.dtype`, e.g. `hamiltonian`, `quantum_operator`, `quantum_LinearOperator`, sparse or dense matrix. v0 : array_like, (n,) initial vector to start the Lanczos algorithm from. m : int Number of Lanczos vectors (size of the Krylov subspace) full_ortho : bool, optional perform a QR decomposition on Q_T generated from the standard lanczos iteration to remove any loss of orthogonality due to numerical precision. out : numpy.ndarray, optional Array to store the Lanczos vectors in (e.g. `Q`). in memory efficient way. eps : float, optional Used to cutoff lanczos iteration when off diagonal matrix elements of `T` drops below this value. Returns -------- tuple(E,V,Q_T) * E : (m,) numpy.ndarray: eigenvalues of Krylov subspace tridiagonal matrix :math:`T`. * V : (m,m) numpy.ndarray: eigenvectors of Krylov subspace tridiagonal matrix :math:`T`. * Q_T : (m,n) numpy.ndarray: matrix containing the `m` Lanczos vectors. This is :math:`Q^T` (not :math:`Q^\\dagger`)! Examples -------- >>> E, V, Q_T = lanczos_full(H,v0,20) """ v0 = _np.asanyarray(v0) n = v0.size dtype = _np.result_type(A.dtype,v0.dtype) if v0.ndim != 1: raise ValueError("expecting array with ndim=1 for initial Lanczos vector.") if m >= n: raise ValueError("Requested size of Lanczos basis must be smaller then size of original space (e.g. m < n).") if out is not None: if out.shape != (m,n): raise ValueError("argument 'out' must have shape (m,n), see documentation.") if out.dtype != dtype: raise ValueError("argument 'out' has dtype {}, expecting dtype {}".format(out.dtype,dtype)) if not out.flags["CARRAY"]: raise ValueError("argument 'out' must be C-contiguous and writable.") Q = out else: Q = _np.zeros((m,n),dtype=dtype) Q[0,:] = v0[:] v = _np.zeros_like(v0,dtype=dtype) r = _np.zeros_like(v0,dtype=dtype) b = _np.zeros((m,),dtype=v.real.dtype) a = _np.zeros((m,),dtype=v.real.dtype) # get function : y <- y + a * x axpy = get_blas_funcs('axpy', arrays=(r, v)) if eps is None: eps = _np.finfo(dtype).eps q_norm = _np.linalg.norm(Q[0,:]) if _np.abs(q_norm-1.0) > eps: _np.divide(Q[0,:],q_norm,out=Q[0,:]) try: A.dot(Q[0,:],out=r) # call if operator supports 'out' argument use_out = True except TypeError: r[:] = A.dot(Q[0,:]) use_out = False a[0] = _np.vdot(Q[0,:],r).real axpy(Q[0,:],r,n,-a[0]) b[0] = _np.linalg.norm(r) i = 0 for i in range(1,m,1): v[:] = Q[i-1,:] _np.divide(r,b[i-1],out=Q[i,:]) if use_out: A.dot(Q[i,:],out=r) # call if operator supports 'out' argument else: r[:] = A.dot(Q[i,:]) axpy(v,r,n,-b[i-1]) a[i] = _np.vdot(Q[i,:],r).real axpy(Q[i,:],r,n,-a[i]) b[i] = _np.linalg.norm(r) if b[i] < eps: m = i break if full_ortho: q,_ = _np.linalg.qr(Q[:m+1].T) Q[:m+1,:] = q.T[...] h = _np.zeros((m,m),dtype=a.dtype) for i in range(m): if use_out: A.dot(Q[i,:],out=r) # call if operator supports 'out' argument else: r[:] = A.dot(Q[i,:]) _np.conj(r,out=r) h[i,i:] = _np.dot(Q[i:,:],r).real E,V = _np.linalg.eigh(h,UPLO="U") else: E,V = eigh_tridiagonal(a[:m],b[:m-1]) return E,V,Q[:m]
""" This module contains a collection of routines for operating on sparse matrices on the scipy.sparse formats, for use internally by other modules throughout QuTiP. """ __all__ = ['sp_fro_norm', 'sp_inf_norm', 'sp_L2_norm', 'sp_max_norm', 'sp_one_norm', 'sp_reshape', 'sp_eigs', 'sp_expm', 'sp_permute', 'sp_reverse_permute', 'sp_bandwidth', 'sp_profile'] import scipy.sparse as sp import scipy.sparse.linalg as spla import numpy as np import scipy.linalg as la from scipy.linalg.blas import get_blas_funcs _dznrm2 = get_blas_funcs("znrm2") from qutip.cy.sparse_utils import (_sparse_profile, _sparse_permute, _sparse_reverse_permute, _sparse_bandwidth, _isdiag, zcsr_one_norm, zcsr_inf_norm) from qutip.fastsparse import fast_csr_matrix from qutip.cy.spconvert import zcsr_reshape from qutip.settings import debug, eigh_unsafe if eigh_unsafe: def _orthogonalize(vec, other): cross = np.sum(np.conj(other) * vec) vec -= cross * other norm = np.sum(np.conj(vec) * vec)**0.5 vec /= norm def eigh(mat, eigvals=[]):
def lanczos_iter(A,v0,m,return_vec_iter=True,copy_v0=True,copy_A=False,eps=None): """ Creates generator for Lanczos basis; diagonalizes Krylov subspace in Lanczos basis. Given a hermitian matrix `A` of size :math:`n\\times n` and an integer `m`, the Lanczos algorithm computes * an :math:`n\\times m` matrix :math:`Q`, and * a real symmetric tridiagonal matrix :math:`T=Q^\\dagger A Q` of size :math:`m\\times m`. The matrix :math:`T` can be represented via its eigendecomposition `(E,V)`: :math:`T=V\\mathrm{diag}(E)V^T`. This function computes the triple :math:`(E,V,Q^T)`. :red:`NOTE:` This function returns :math:`Q^T;\\,Q^T` is (in general) different from :math:`Q^\\dagger`. Parameters ----------- A : LinearOperator, hamiltonian, numpy.ndarray, etc. with a 'dot' method and a 'dtype' method. Python object representing a linear map to compute the Lanczos approximation to the largest eigenvalues/vectors of. Must contain a dot-product method, used as `A.dot(v)` and a dtype method, used as `A.dtype`, e.g. `hamiltonian`, `quantum_operator`, `quantum_LinearOperator`, sparse or dense matrix. v0 : array_like, (n,) initial vector to start the Lanczos algorithm from. m : int Number of Lanczos vectors (size of the Krylov subspace) return_vec_iter : bool, optional Toggles whether or not to return the Lanczos basis iterator. copy_v0 : bool, optional Whether or not to produce of copy of initial vector `v0`. copy_A : bool, optional Whether or not to produce of copy of linear operator `A`. eps : float, optional Used to cutoff lanczos iteration when off diagonal matrix elements of `T` drops below this value. Returns -------- tuple(E,V,Q_T) * E : (m,) numpy.ndarray: eigenvalues of Krylov subspace tridiagonal matrix :math:`T`. * V : (m,m) numpy.ndarray: eigenvectors of Krylov subspace tridiagonal matrix :math:`T`. * Q_T : generator that yields the `m` lanczos basis vectors on the fly, produces the same result as: :code:`iter(Q_T[:])` where `Q_T` is the array generated by `lanczos_full` Notes ----- * this function is useful to minimize any memory requirements in the calculation of the Lanczos basis. * the generator of the lanczos basis performs the calculation 'on the fly'. This means that the lanczos iteration is repeated every time this generator is looped over. * this generator `Q_T` can be reused as many times as needed, this relies on the data in both `v0` and `A` remaining unchanged during runtime. If this cannot be guaranteed then it is safer to set both `copy_v0` and `copy_A` to be true. * `V` is always real-valued, since :math:`T` is real and symmetric. Examples -------- >>> E, V, Q_T_iterator = lanczos_iter(H,v0,20) """ v0 = _np.asanyarray(v0) n = v0.size dtype = _np.result_type(A.dtype,v0.dtype) if copy_v0 and return_vec_iter: v0 = v0.copy() if copy_A and return_vec_iter: A = deepcopy(A) if v0.ndim != 1: raise ValueError("expecting array with ndim=1 for initial Lanczos vector.") if m >= n: raise ValueError("Requested size of Lanczos basis must be smaller then size of original space (e.g. m < n).") q = v0.astype(dtype,copy=True) v = _np.zeros_like(v0,dtype=dtype) r = _np.zeros_like(v0,dtype=dtype) b = _np.zeros((m,),dtype=q.real.dtype) a = _np.zeros((m,),dtype=q.real.dtype) # get function : y <- y + a * x axpy = get_blas_funcs('axpy', arrays=(q, v)) if eps is None: eps = _np.finfo(dtype).eps q_norm = _np.linalg.norm(q) if _np.abs(q_norm-1.0) > eps: _np.divide(q,q_norm,out=q) try: A.dot(q,out=r) # call if operator supports 'out' argument use_out = True except TypeError: r[:] = A.dot(q) use_out = False a[0] = _np.vdot(q,r).real axpy(q,r,n,-a[0]) b[0] = _np.linalg.norm(r) i = 0 for i in range(1,m,1): v[:] = q[:] _np.divide(r,b[i-1],out=q) if use_out: A.dot(q,out=r) # call if operator supports 'out' argument else: r[:] = A.dot(q) axpy(v,r,n,-b[i-1]) a[i] = _np.vdot(q,r).real axpy(q,r,n,-a[i]) b[i] = _np.linalg.norm(r) if b[i] < eps: break a = a[:i+1].copy() b = b[:i].copy() del q,r,v E,V = eigh_tridiagonal(a,b) if return_vec_iter: return E,V,_lanczos_vec_iter(A,v0,a.copy(),b.copy()) else: return E,V
def merge(self, other, decay=1.0): """ Merge this Projection with another. Content of `other` is destroyed in the process, so pass this function a copy if you need it further. This is the optimized merge described in algorithm 5. """ if other.u is None: # the other projection is empty => do nothing return if self.u is None: # we are empty => result of merge is the other projection, whatever it is self.u = other.u.copy() self.s = other.s.copy() return if self.m != other.m: raise ValueError( "vector space mismatch: update has %s features, expected %s" % (other.m, self.m)) logger.info("merging projections: %s + %s" % (str(self.u.shape), str(other.u.shape))) # diff = numpy.dot(self.u.T, self.u) - numpy.eye(self.u.shape[1]) # logger.info('orth error after=%f' % numpy.sum(diff * diff)) m, n1, n2 = self.u.shape[0], self.u.shape[1], other.u.shape[1] # TODO Maybe keep the bases as elementary reflectors, without # forming explicit matrices with gorgqr. # The only operation we ever need is basis^T*basis ond basis*component. # But how to do that in numpy? And is it fast(er)? # find component of u2 orthogonal to u1 # IMPORTANT: keep matrices in suitable order for matrix products; failing to do so gives 8x lower performance :( self.u = numpy.asfortranarray( self.u) # does nothing if input already fortran-order array other.u = numpy.asfortranarray(other.u) gemm, = get_blas_funcs(('gemm', ), (self.u, )) logger.debug("constructing orthogonal component") c = gemm(1.0, self.u, other.u, trans_a=True) gemm(-1.0, self.u, c, beta=1.0, c=other.u, overwrite_c=True) # perform q, r = QR(component); code hacked out of scipy.linalg.qr logger.debug("computing QR of %s dense matrix" % str(other.u.shape)) geqrf, = get_lapack_funcs(('geqrf', ), (other.u, )) qr, tau, work, info = geqrf( other.u, lwork=-1, overwrite_a=True) # sometimes segfaults with overwrite_a=True... qr, tau, work, info = geqrf( other.u, lwork=work[0], overwrite_a=True) # sometimes segfaults with overwrite_a=True... del other.u assert info >= 0 r = triu(qr[:n2, :n2]) if m < n2: # rare case... qr = qr[:, :m] # retains fortran order gorgqr, = get_lapack_funcs(('orgqr', ), (qr, )) q, work, info = gorgqr(qr, tau, lwork=-1, overwrite_a=True) q, work, info = gorgqr(qr, tau, lwork=work[0], overwrite_a=True) assert info >= 0, "qr failed" assert q.flags.f_contiguous # find rotation that diagonalizes r k = numpy.bmat([[ numpy.diag(decay * self.s), c * other.s ], [matutils.pad(numpy.matrix([]).reshape(0, 0), n2, n1), r * other.s]]) logger.debug("computing SVD of %s dense matrix" % str(k.shape)) u_k, s_k, _ = numpy.linalg.svd( k, full_matrices=False ) # TODO *ugly overkill*!! only need first self.k SVD factors... but there is no LAPACK wrapper for partial svd/eigendecomp in numpy :( k = clipSpectrum(s_k, self.k) u_k, s_k = u_k[:, :k], s_k[:k] # update & rotate current basis U logger.debug("updating orthonormal basis U") self.u = gemm( 1.0, self.u, u_k[:n1] ) # TODO temporarily creates an extra (m,k) dense array in memory. find a way to avoid this! gemm(1.0, q, u_k[n1:], beta=1.0, c=self.u, overwrite_c=True) # u = [u,u']*u_k self.s = s_k