def _batched_inv(a): assert a.ndim >= 3 _util._assert_cupy_array(a) _util._assert_stacked_square(a) dtype, out_dtype = _util.linalg_common_type(a) if dtype == cupy.float32: getrf = cupy.cuda.cublas.sgetrfBatched getri = cupy.cuda.cublas.sgetriBatched elif dtype == cupy.float64: getrf = cupy.cuda.cublas.dgetrfBatched getri = cupy.cuda.cublas.dgetriBatched elif dtype == cupy.complex64: getrf = cupy.cuda.cublas.cgetrfBatched getri = cupy.cuda.cublas.cgetriBatched elif dtype == cupy.complex128: getrf = cupy.cuda.cublas.zgetrfBatched getri = cupy.cuda.cublas.zgetriBatched else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) if 0 in a.shape: return cupy.empty_like(a, dtype=out_dtype) a_shape = a.shape # copy is necessary to present `a` to be overwritten. a = a.astype(dtype, order='C').reshape(-1, a_shape[-2], a_shape[-1]) handle = device.get_cublas_handle() batch_size = a.shape[0] n = a.shape[1] lda = n step = n * lda * a.itemsize start = a.data.ptr stop = start + step * batch_size a_array = cupy.arange(start, stop, step, dtype=cupy.uintp) pivot_array = cupy.empty((batch_size, n), dtype=cupy.int32) info_array = cupy.empty((batch_size, ), dtype=cupy.int32) getrf(handle, n, a_array.data.ptr, lda, pivot_array.data.ptr, info_array.data.ptr, batch_size) cupy.linalg._util._check_cublas_info_array_if_synchronization_allowed( getrf, info_array) c = cupy.empty_like(a) ldc = lda step = n * ldc * c.itemsize start = c.data.ptr stop = start + step * batch_size c_array = cupy.arange(start, stop, step, dtype=cupy.uintp) getri(handle, n, a_array.data.ptr, lda, pivot_array.data.ptr, c_array.data.ptr, ldc, info_array.data.ptr, batch_size) cupy.linalg._util._check_cublas_info_array_if_synchronization_allowed( getri, info_array) return c.reshape(a_shape).astype(out_dtype, copy=False)
def lsqr(A, b): """Solves linear system with QR decomposition. Find the solution to a large, sparse, linear system of equations. The function solves ``Ax = b``. Given two-dimensional matrix ``A`` is decomposed into ``Q * R``. Args: A (cupy.ndarray or cupyx.scipy.sparse.csr_matrix): The input matrix with dimension ``(N, N)`` b (cupy.ndarray): Right-hand side vector. Returns: tuple: Its length must be ten. It has same type elements as SciPy. Only the first element, the solution vector ``x``, is available and other elements are expressed as ``None`` because the implementation of cuSOLVER is different from the one of SciPy. You can easily calculate the fourth element by ``norm(b - Ax)`` and the ninth element by ``norm(x)``. .. seealso:: :func:`scipy.sparse.linalg.lsqr` """ if runtime.is_hip: raise RuntimeError('HIP does not support lsqr') if not sparse.isspmatrix_csr(A): A = sparse.csr_matrix(A) # csr_matrix is 2d _util._assert_stacked_square(A) _util._assert_cupy_array(b) m = A.shape[0] if b.ndim != 1 or len(b) != m: raise ValueError('b must be 1-d array whose size is same as A') # Cast to float32 or float64 if A.dtype == 'f' or A.dtype == 'd': dtype = A.dtype else: dtype = numpy.promote_types(A.dtype, 'f') handle = device.get_cusolver_sp_handle() nnz = A.nnz tol = 1.0 reorder = 1 x = cupy.empty(m, dtype=dtype) singularity = numpy.empty(1, numpy.int32) if dtype == 'f': csrlsvqr = cusolver.scsrlsvqr else: csrlsvqr = cusolver.dcsrlsvqr csrlsvqr( handle, m, nnz, A._descr.descriptor, A.data.data.ptr, A.indptr.data.ptr, A.indices.data.ptr, b.data.ptr, tol, reorder, x.data.ptr, singularity.ctypes.data) # The return type of SciPy is always float64. Therefore, x must be casted. x = x.astype(numpy.float64) ret = (x, None, None, None, None, None, None, None, None, None) return ret
def invh(a): """Compute the inverse of a Hermitian matrix. This function computes a inverse of a real symmetric or complex hermitian positive-definite matrix using Cholesky factorization. If matrix ``a`` is not positive definite, Cholesky factorization fails and it raises an error. Args: a (cupy.ndarray): Real symmetric or complex hermitian maxtix. Returns: cupy.ndarray: The inverse of matrix ``a``. """ _util._assert_cupy_array(a) _util._assert_nd_squareness(a) # TODO: Remove this assert once cusolver supports nrhs > 1 for potrsBatched _util._assert_rank2(a) n = a.shape[-1] identity_matrix = cupy.eye(n, dtype=a.dtype) b = cupy.empty(a.shape, a.dtype) b[...] = identity_matrix return lapack.posv(a, b)
def cholesky(a): """Cholesky decomposition. Decompose a given two-dimensional square matrix into ``L * L.T``, where ``L`` is a lower-triangular matrix and ``.T`` is a conjugate transpose operator. Args: a (cupy.ndarray): The input matrix with dimension ``(N, N)`` Returns: cupy.ndarray: The lower-triangular matrix. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.cholesky` """ _util._assert_cupy_array(a) _util._assert_stacked_2d(a) _util._assert_stacked_square(a) if a.ndim > 2: return _potrf_batched(a) dtype, out_dtype = _util.linalg_common_type(a) x = a.astype(dtype, order='C', copy=True) n = len(a) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': potrf = cusolver.spotrf potrf_bufferSize = cusolver.spotrf_bufferSize elif dtype == 'd': potrf = cusolver.dpotrf potrf_bufferSize = cusolver.dpotrf_bufferSize elif dtype == 'F': potrf = cusolver.cpotrf potrf_bufferSize = cusolver.cpotrf_bufferSize else: # dtype == 'D': potrf = cusolver.zpotrf potrf_bufferSize = cusolver.zpotrf_bufferSize buffersize = potrf_bufferSize(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=dtype) potrf(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( potrf, dev_info) _util._tril(x, k=0) return x.astype(out_dtype, copy=False)
def solve(a, b): """Solves a linear matrix equation. It computes the exact solution of ``x`` in ``ax = b``, where ``a`` is a square and full rank matrix. Args: a (cupy.ndarray): The matrix with dimension ``(..., M, M)``. b (cupy.ndarray): The matrix with dimension ``(...,M)`` or ``(..., M, K)``. Returns: cupy.ndarray: The matrix with dimension ``(..., M)`` or ``(..., M, K)``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.solve` """ if a.ndim > 2 and a.shape[-1] <= get_batched_gesv_limit(): # Note: There is a low performance issue in batched_gesv when matrix is # large, so it is not used in such cases. return batched_gesv(a, b) # TODO(kataoka): Move the checks to the beginning _util._assert_cupy_array(a, b) _util._assert_stacked_2d(a) _util._assert_stacked_square(a) if not ((a.ndim == b.ndim or a.ndim == b.ndim + 1) and a.shape[:-1] == b.shape[:a.ndim - 1]): raise ValueError( 'a must have (..., M, M) shape and b must have (..., M) ' 'or (..., M, K)') dtype, out_dtype = _util.linalg_common_type(a, b) if a.ndim == 2: # prevent 'a' and 'b' to be overwritten a = a.astype(dtype, copy=True, order='F') b = b.astype(dtype, copy=True, order='F') cupyx.lapack.gesv(a, b) return b.astype(out_dtype, copy=False) # prevent 'a' to be overwritten a = a.astype(dtype, copy=True, order='C') x = cupy.empty_like(b, dtype=out_dtype) shape = a.shape[:-2] for i in range(numpy.prod(shape)): index = numpy.unravel_index(i, shape) # prevent 'b' to be overwritten bi = b[index].astype(dtype, copy=True, order='F') cupyx.lapack.gesv(a[index], bi) x[index] = bi return x
def solve(a, b): """Solves a linear matrix equation. It computes the exact solution of ``x`` in ``ax = b``, where ``a`` is a square and full rank matrix. Args: a (cupy.ndarray): The matrix with dimension ``(..., M, M)``. b (cupy.ndarray): The matrix with dimension ``(...,M)`` or ``(..., M, K)``. Returns: cupy.ndarray: The matrix with dimension ``(..., M)`` or ``(..., M, K)``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.solve` """ if a.ndim > 2 and a.shape[-1] <= get_batched_gesv_limit(): # Note: There is a low performance issue in batched_gesv when matrix is # large, so it is not used in such cases. return batched_gesv(a, b) _util._assert_cupy_array(a, b) _util._assert_nd_squareness(a) if not ((a.ndim == b.ndim or a.ndim == b.ndim + 1) and a.shape[:-1] == b.shape[:a.ndim - 1]): raise ValueError( 'a must have (..., M, M) shape and b must have (..., M) ' 'or (..., M, K)') # Cast to float32 or float64 if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype else: dtype = numpy.promote_types(a.dtype.char, 'f') a = a.astype(dtype) b = b.astype(dtype) if a.ndim == 2: return cupyx.lapack.gesv(a, b) x = cupy.empty_like(b) shape = a.shape[:-2] for i in range(numpy.prod(shape)): index = numpy.unravel_index(i, shape) x[index] = cupyx.lapack.gesv(a[index], b[index]) return x
def lschol(A, b): """Solves linear system with cholesky decomposition. Find the solution to a large, sparse, linear system of equations. The function solves ``Ax = b``. Given two-dimensional matrix ``A`` is decomposed into ``L * L^*``. Args: A (cupy.ndarray or cupyx.scipy.sparse.csr_matrix): The input matrix with dimension ``(N, N)``. Must be positive-definite input matrix. Only symmetric real matrix is supported currently. b (cupy.ndarray): Right-hand side vector. Returns: ret (cupy.ndarray): The solution vector ``x``. """ if not sparse.isspmatrix_csr(A): A = sparse.csr_matrix(A) # csr_matrix is 2d _util._assert_stacked_square(A) _util._assert_cupy_array(b) m = A.shape[0] if b.ndim != 1 or len(b) != m: raise ValueError('b must be 1-d array whose size is same as A') # Cast to float32 or float64 if A.dtype == 'f' or A.dtype == 'd': dtype = A.dtype else: dtype = numpy.promote_types(A.dtype, 'f') handle = device.get_cusolver_sp_handle() nnz = A.nnz tol = 1.0 reorder = 1 x = cupy.empty(m, dtype=dtype) singularity = numpy.empty(1, numpy.int32) if dtype == 'f': csrlsvchol = cusolver.scsrlsvchol else: csrlsvchol = cusolver.dcsrlsvchol csrlsvchol(handle, m, nnz, A._descr.descriptor, A.data.data.ptr, A.indptr.data.ptr, A.indices.data.ptr, b.data.ptr, tol, reorder, x.data.ptr, singularity.ctypes.data) # The return type of SciPy is always float64. x = x.astype(numpy.float64) return x
def pinv(a, rcond=1e-15): """Compute the Moore-Penrose pseudoinverse of a matrix. It computes a pseudoinverse of a matrix ``a``, which is a generalization of the inverse matrix with Singular Value Decomposition (SVD). Note that it automatically removes small singular values for stability. Args: a (cupy.ndarray): The matrix with dimension ``(..., M, N)`` rcond (float or cupy.ndarray): Cutoff parameter for small singular values. For stability it computes the largest singular value denoted by ``s``, and sets all singular values smaller than ``rcond * s`` to zero. Broadcasts against the stack of matrices. Returns: cupy.ndarray: The pseudoinverse of ``a`` with dimension ``(..., N, M)``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.pinv` """ _util._assert_cupy_array(a) if a.size == 0: _, out_dtype = _util.linalg_common_type(a) m, n = a.shape[-2:] if m == 0 or n == 0: out_dtype = a.dtype # NumPy bug? return cupy.empty(a.shape[:-2] + (n, m), dtype=out_dtype) u, s, vt = _decomposition.svd(a.conj(), full_matrices=False) # discard small singular values cutoff = rcond * cupy.amax(s, axis=-1) leq = s <= cutoff[..., None] cupy.reciprocal(s, out=s) s[leq] = 0 return cupy.matmul(vt.swapaxes(-2, -1), s[..., None] * u.swapaxes(-2, -1))
def matrix_power(M, n): """Raise a square matrix to the (integer) power `n`. Args: M (~cupy.ndarray): Matrix to raise by power n. n (~int): Power to raise matrix to. Returns: ~cupy.ndarray: Output array. ..seealso:: :func:`numpy.linalg.matrix_power` """ _util._assert_cupy_array(M) _util._assert_stacked_2d(M) _util._assert_stacked_square(M) if not isinstance(n, int): raise TypeError('exponent must be an integer') if n == 0: return _util.stacked_identity_like(M) elif n < 0: M = _solve.inv(M) n *= -1 # short-cuts if n <= 3: if n == 1: return M elif n == 2: return cupy.matmul(M, M) else: return cupy.matmul(cupy.matmul(M, M), M) # binary decomposition to reduce the number of Matrix # multiplications for n > 3. result, Z = None, None for b in cupy.binary_repr(n)[::-1]: Z = M if Z is None else cupy.matmul(Z, Z) if b == '1': result = Z if result is None else cupy.matmul(result, Z) return result
def inv(a): """Computes the inverse of a matrix. This function computes matrix ``a_inv`` from n-dimensional regular matrix ``a`` such that ``dot(a, a_inv) == eye(n)``. Args: a (cupy.ndarray): The regular matrix Returns: cupy.ndarray: The inverse of a matrix. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.inv` """ _util._assert_cupy_array(a) _util._assert_stacked_2d(a) _util._assert_stacked_square(a) if a.ndim >= 3: return _batched_inv(a) dtype, out_dtype = _util.linalg_common_type(a) if a.size == 0: return cupy.empty(a.shape, out_dtype) order = 'F' if a._f_contiguous else 'C' # prevent 'a' to be overwritten a = a.astype(dtype, copy=True, order=order) b = cupy.eye(a.shape[0], dtype=dtype, order=order) if order == 'F': cupyx.lapack.gesv(a, b) else: cupyx.lapack.gesv(a.T, b.T) return b.astype(out_dtype, copy=False)
def invh(a): """Compute the inverse of a Hermitian matrix. This function computes a inverse of a real symmetric or complex hermitian positive-definite matrix using Cholesky factorization. If matrix ``a`` is not positive definite, Cholesky factorization fails and it raises an error. Args: a (cupy.ndarray): Real symmetric or complex hermitian maxtix. Returns: cupy.ndarray: The inverse of matrix ``a``. """ _util._assert_cupy_array(a) # TODO: Use `_assert_stacked_2d` instead, once cusolver supports nrhs > 1 # for potrsBatched _util._assert_2d(a) _util._assert_stacked_square(a) b = _util.stacked_identity_like(a) return lapack.posv(a, b)
def tensorinv(a, ind=2): """Computes the inverse of a tensor. This function computes tensor ``a_inv`` from tensor ``a`` such that ``tensordot(a_inv, a, ind) == I``, where ``I`` denotes the identity tensor. Args: a (cupy.ndarray): The tensor such that ``prod(a.shape[:ind]) == prod(a.shape[ind:])``. ind (int): The positive number used in ``axes`` option of ``tensordot``. Returns: cupy.ndarray: The inverse of a tensor whose shape is equivalent to ``a.shape[ind:] + a.shape[:ind]``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.tensorinv` """ _util._assert_cupy_array(a) if ind <= 0: raise ValueError('Invalid ind argument') oldshape = a.shape invshape = oldshape[ind:] + oldshape[:ind] prod = internal.prod(oldshape[ind:]) a = a.reshape(prod, -1) a_inv = inv(a) return a_inv.reshape(*invshape)
def svd(a, full_matrices=True, compute_uv=True): """Singular Value Decomposition. Factorizes the matrix ``a`` as ``u * np.diag(s) * v``, where ``u`` and ``v`` are unitary and ``s`` is an one-dimensional array of ``a``'s singular values. Args: a (cupy.ndarray): The input matrix with dimension ``(..., M, N)``. full_matrices (bool): If True, it returns u and v with dimensions ``(..., M, M)`` and ``(..., N, N)``. Otherwise, the dimensions of u and v are ``(..., M, K)`` and ``(..., K, N)``, respectively, where ``K = min(M, N)``. compute_uv (bool): If ``False``, it only returns singular values. Returns: tuple of :class:`cupy.ndarray`: A tuple of ``(u, s, v)`` such that ``a = u * np.diag(s) * v``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. note:: On CUDA, when ``a.ndim > 2`` and the matrix dimensions <= 32, a fast code path based on Jacobian method (``gesvdj``) is taken. Otherwise, a QR method (``gesvd``) is used. On ROCm, there is no such a fast code path that switches the underlying algorithm. .. seealso:: :func:`numpy.linalg.svd` """ _util._assert_cupy_array(a) # Cast to float32 or float64 a_dtype = numpy.promote_types(a.dtype.char, 'f').char if a_dtype == 'f': s_dtype = 'f' elif a_dtype == 'd': s_dtype = 'd' elif a_dtype == 'F': s_dtype = 'f' else: # a_dtype == 'D': a_dtype = 'D' s_dtype = 'd' if a.ndim > 2: return _svd_batched(a, a_dtype, full_matrices, compute_uv) # Remark 1: gesvd only supports m >= n (WHAT?) # Remark 2: gesvd returns matrix U and V^H n, m = a.shape if m == 0 or n == 0: s = cupy.empty((0, ), s_dtype) if compute_uv: if full_matrices: u = cupy.eye(n, dtype=a_dtype) vt = cupy.eye(m, dtype=a_dtype) else: u = cupy.empty((n, 0), dtype=a_dtype) vt = cupy.empty((0, m), dtype=a_dtype) return u, s, vt else: return s # `a` must be copied because xgesvd destroys the matrix if m >= n: x = a.astype(a_dtype, order='C', copy=True) trans_flag = False else: m, n = a.shape x = a.transpose().astype(a_dtype, order='C', copy=True) trans_flag = True k = n # = min(m, n) where m >= n is ensured above if compute_uv: if full_matrices: u = cupy.empty((m, m), dtype=a_dtype) vt = x[:, :n] job_u = ord('A') job_vt = ord('O') else: u = x vt = cupy.empty((k, n), dtype=a_dtype) job_u = ord('O') job_vt = ord('S') u_ptr, vt_ptr = u.data.ptr, vt.data.ptr else: u_ptr, vt_ptr = 0, 0 # Use nullptr job_u = ord('N') job_vt = ord('N') s = cupy.empty(k, dtype=s_dtype) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if a_dtype == 'f': gesvd = cusolver.sgesvd gesvd_bufferSize = cusolver.sgesvd_bufferSize elif a_dtype == 'd': gesvd = cusolver.dgesvd gesvd_bufferSize = cusolver.dgesvd_bufferSize elif a_dtype == 'F': gesvd = cusolver.cgesvd gesvd_bufferSize = cusolver.cgesvd_bufferSize else: # a_dtype == 'D': gesvd = cusolver.zgesvd gesvd_bufferSize = cusolver.zgesvd_bufferSize buffersize = gesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=a_dtype) if not runtime.is_hip: # rwork can be NULL if the information from supperdiagonal isn't needed # https://docs.nvidia.com/cuda/cusolver/index.html#cuSolverDN-lt-t-gt-gesvd # noqa rwork_ptr = 0 else: rwork = cupy.empty(min(m, n) - 1, dtype=s_dtype) rwork_ptr = rwork.data.ptr gesvd(handle, job_u, job_vt, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, rwork_ptr, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( gesvd, dev_info) # Note that the returned array may need to be transposed # depending on the structure of an input if compute_uv: if trans_flag: return u.transpose(), s, vt.transpose() else: return vt, s, u else: return s
def qr(a, mode='reduced'): """QR decomposition. Decompose a given two-dimensional matrix into ``Q * R``, where ``Q`` is an orthonormal and ``R`` is an upper-triangular matrix. Args: a (cupy.ndarray): The input matrix. mode (str): The mode of decomposition. Currently 'reduced', 'complete', 'r', and 'raw' modes are supported. The default mode is 'reduced', in which matrix ``A = (M, N)`` is decomposed into ``Q``, ``R`` with dimensions ``(M, K)``, ``(K, N)``, where ``K = min(M, N)``. Returns: cupy.ndarray, or tuple of ndarray: Although the type of returned object depends on the mode, it returns a tuple of ``(Q, R)`` by default. For details, please see the document of :func:`numpy.linalg.qr`. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.qr` """ # TODO(Saito): Current implementation only accepts two-dimensional arrays _util._assert_cupy_array(a) _util._assert_rank2(a) if mode not in ('reduced', 'complete', 'r', 'raw'): if mode in ('f', 'full', 'e', 'economic'): msg = 'The deprecated mode \'{}\' is not supported'.format(mode) raise ValueError(msg) else: raise ValueError('Unrecognized mode \'{}\''.format(mode)) # support float32, float64, complex64, and complex128 if a.dtype.char in 'fdFD': dtype = a.dtype.char else: dtype = numpy.promote_types(a.dtype.char, 'f').char m, n = a.shape mn = min(m, n) if mn == 0: if mode == 'reduced': return cupy.empty((m, 0), dtype), cupy.empty((0, n), dtype) elif mode == 'complete': return cupy.identity(m, dtype), cupy.empty((m, n), dtype) elif mode == 'r': return cupy.empty((0, n), dtype) else: # mode == 'raw' # compatibility with numpy.linalg.qr dtype = numpy.promote_types(dtype, 'd') return cupy.empty((n, m), dtype), cupy.empty((0, ), dtype) x = a.transpose().astype(dtype, order='C', copy=True) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': geqrf_bufferSize = cusolver.sgeqrf_bufferSize geqrf = cusolver.sgeqrf elif dtype == 'd': geqrf_bufferSize = cusolver.dgeqrf_bufferSize geqrf = cusolver.dgeqrf elif dtype == 'F': geqrf_bufferSize = cusolver.cgeqrf_bufferSize geqrf = cusolver.cgeqrf elif dtype == 'D': geqrf_bufferSize = cusolver.zgeqrf_bufferSize geqrf = cusolver.zgeqrf else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) # compute working space of geqrf and solve R buffersize = geqrf_bufferSize(handle, m, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=dtype) tau = cupy.empty(mn, dtype=dtype) geqrf(handle, m, n, x.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( geqrf, dev_info) if mode == 'r': r = x[:, :mn].transpose() return _util._triu(r) if mode == 'raw': if a.dtype.char == 'f': # The original numpy.linalg.qr returns float64 in raw mode, # whereas the cusolver returns float32. We agree that the # following code would be inappropriate, however, in this time # we explicitly convert them to float64 for compatibility. return x.astype(numpy.float64), tau.astype(numpy.float64) elif a.dtype.char == 'F': # The same applies to complex64 return x.astype(numpy.complex128), tau.astype(numpy.complex128) return x, tau if mode == 'complete' and m > n: mc = m q = cupy.empty((m, m), dtype) else: mc = mn q = cupy.empty((n, m), dtype) q[:n] = x # compute working space of orgqr and solve Q if dtype == 'f': orgqr_bufferSize = cusolver.sorgqr_bufferSize orgqr = cusolver.sorgqr elif dtype == 'd': orgqr_bufferSize = cusolver.dorgqr_bufferSize orgqr = cusolver.dorgqr elif dtype == 'F': orgqr_bufferSize = cusolver.cungqr_bufferSize orgqr = cusolver.cungqr elif dtype == 'D': orgqr_bufferSize = cusolver.zungqr_bufferSize orgqr = cusolver.zungqr buffersize = orgqr_bufferSize(handle, m, mc, mn, q.data.ptr, m, tau.data.ptr) workspace = cupy.empty(buffersize, dtype=dtype) orgqr(handle, m, mc, mn, q.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( orgqr, dev_info) q = q[:mc].transpose() r = x[:, :mc].transpose() return q, _util._triu(r)
def batched_gtsv(dl, d, du, B, algo='cyclic_reduction'): """Solves multiple tridiagonal systems (This is a bang method for B.) Args: dl, d, du (cupy.ndarray): Lower, main and upper diagonal vectors with last-dim sizes of N-1, N and N-1, repsectively. Only two dimensional inputs are supported currently. The first dim is the batch dim. B (cupy.ndarray): Right-hand side vectors The first dim is the batch dim and the second dim is N. algo (str): algorithm, choose one from four algorithms; cyclic_reduction, cuThomas, LU_w_pivoting and QR. cuThomas is numerically unstable, and LU_w_pivoting is the LU algorithm with pivoting. """ if algo not in ["cyclic_reduction", "cuThomas", "LU_w_pivoting", "QR"]: raise ValueError(f"Unknown algorithm [{algo}]") util._assert_cupy_array(dl) util._assert_cupy_array(d) util._assert_cupy_array(du) util._assert_cupy_array(B) if dl.ndim != 2 or d.ndim != 2 or du.ndim != 2 or B.ndim != 2: raise ValueError('dl, d, du and B must be 2-d arrays') batchsize = d.shape[0] if batchsize != dl.shape[0] or batchsize != du.shape[ 0] or batchsize != B.shape[0]: raise ValueError( 'The first dims of dl, du and B must match that of d.') N = d.shape[1] # the size of the linear system if dl.shape[1] != N - 1 or du.shape[1] != N - 1 or B.shape[1] != N: raise ValueError( 'The second dims of dl, du and B must match the second dim of d.') # the first element must be zero of dl padded_dl = cupy.ascontiguousarray( cupy.pad(dl, ((0, 0), (1, 0)), mode='constant', constant_values=0.0)) # the last element must be zero of du padded_du = cupy.ascontiguousarray( cupy.pad(du, ((0, 0), (0, 1)), mode='constant', constant_values=0.0)) # contiguous d = cupy.ascontiguousarray(d) B = cupy.ascontiguousarray(B) # Cast to float32 or float64 if d.dtype == 'f' or d.dtype == 'd': dtype = d.dtype else: dtype = numpy.find_common_type((d.dtype, 'f'), ()) handle = device.get_cusparse_handle() if dtype == 'f': if algo == "cyclic_reduction": gtsv2 = cusparse.sgtsv2StridedBatch get_buffer_size = cusparse.sgtsv2StridedBatch_bufferSizeExt # buffer_size = numpy.empty(1, numpy.int32) get_buffer_size(handle, N, padded_dl.data.ptr, d.data.ptr, padded_du.data.ptr, B.data.ptr, batchsize, N, buffer_size.ctypes.data) buffer_size = int(buffer_size) buffer = cupy.zeros((buffer_size, ), dtype=cupy.uint8) gtsv2(handle, N, padded_dl.data.ptr, d.data.ptr, padded_du.data.ptr, B.data.ptr, batchsize, N, buffer.data.ptr) else: raise NotImplementedError if algo == "cuThomas": algo_num = 0 elif algo == "LU_w_pivoting": algo_num = 1 elif algo == "QR": algo_num = 2 else: raise ValueError gtsv2 = cusparse.sgtsvInterleavedBatch get_buffer_size = cusparse.sgtsvInterleavedBatch_bufferSizeExt # buffer_size = get_buffer_size(handle, algo_num, N, padded_dl.data.ptr, d.data.ptr, padded_du.data.ptr, B.data.ptr, batchsize) buffer = cupy.zeros((buffer_size, ), dtype=cupy.uint8) gtsv2(handle, algo_num, N, padded_dl.data.ptr, d.data.ptr, padded_du.data.ptr, B.data.ptr, batchsize, buffer.data.ptr) else: raise NotImplementedError return B
def solve_triangular(a, b, trans=0, lower=False, unit_diagonal=False, overwrite_b=False, check_finite=False): """Solve the equation a x = b for x, assuming a is a triangular matrix. Args: a (cupy.ndarray): The matrix with dimension ``(M, M)``. b (cupy.ndarray): The matrix with dimension ``(M,)`` or ``(M, N)``. lower (bool): Use only data contained in the lower triangle of ``a``. Default is to use upper triangle. trans (0, 1, 2, 'N', 'T' or 'C'): Type of system to solve: - *'0'* or *'N'* -- :math:`a x = b` - *'1'* or *'T'* -- :math:`a^T x = b` - *'2'* or *'C'* -- :math:`a^H x = b` unit_diagonal (bool): If ``True``, diagonal elements of ``a`` are assumed to be 1 and will not be referenced. overwrite_b (bool): Allow overwriting data in b (may enhance performance) check_finite (bool): Whether to check that the input matrices contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Returns: cupy.ndarray: The matrix with dimension ``(M,)`` or ``(M, N)``. .. seealso:: :func:`scipy.linalg.solve_triangular` """ _util._assert_cupy_array(a, b) if len(a.shape) != 2 or a.shape[0] != a.shape[1]: raise ValueError('expected square matrix') if len(a) != len(b): raise ValueError('incompatible dimensions') # Cast to float32 or float64 if a.dtype.char in 'fd': dtype = a.dtype else: dtype = numpy.promote_types(a.dtype.char, 'f') a = cupy.array(a, dtype=dtype, order='F', copy=False) b = cupy.array(b, dtype=dtype, order='F', copy=(not overwrite_b)) if check_finite: if a.dtype.kind == 'f' and not cupy.isfinite(a).all(): raise ValueError('array must not contain infs or NaNs') if b.dtype.kind == 'f' and not cupy.isfinite(b).all(): raise ValueError('array must not contain infs or NaNs') m, n = (b.size, 1) if b.ndim == 1 else b.shape cublas_handle = device.get_cublas_handle() if dtype == 'f': trsm = cublas.strsm elif dtype == 'd': trsm = cublas.dtrsm elif dtype == 'F': trsm = cublas.ctrsm else: # dtype == 'D' trsm = cublas.ztrsm one = numpy.array(1, dtype=dtype) if lower: uplo = cublas.CUBLAS_FILL_MODE_LOWER else: uplo = cublas.CUBLAS_FILL_MODE_UPPER if trans == 'N': trans = cublas.CUBLAS_OP_N elif trans == 'T': trans = cublas.CUBLAS_OP_T elif trans == 'C': trans = cublas.CUBLAS_OP_C if unit_diagonal: diag = cublas.CUBLAS_DIAG_UNIT else: diag = cublas.CUBLAS_DIAG_NON_UNIT trsm(cublas_handle, cublas.CUBLAS_SIDE_LEFT, uplo, trans, diag, m, n, one.ctypes.data, a.data.ptr, m, b.data.ptr, m) return b
def svd(a, full_matrices=True, compute_uv=True): """Singular Value Decomposition. Factorizes the matrix ``a`` as ``u * np.diag(s) * v``, where ``u`` and ``v`` are unitary and ``s`` is an one-dimensional array of ``a``'s singular values. Args: a (cupy.ndarray): The input matrix with dimension ``(M, N)``. full_matrices (bool): If True, it returns u and v with dimensions ``(M, M)`` and ``(N, N)``. Otherwise, the dimensions of u and v are respectively ``(M, K)`` and ``(K, N)``, where ``K = min(M, N)``. compute_uv (bool): If ``False``, it only returns singular values. Returns: tuple of :class:`cupy.ndarray`: A tuple of ``(u, s, v)`` such that ``a = u * np.diag(s) * v``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.svd` """ # TODO(Saito): Current implementation only accepts two-dimensional arrays _util._assert_cupy_array(a) _util._assert_rank2(a) # Cast to float32 or float64 a_dtype = numpy.promote_types(a.dtype.char, 'f').char if a_dtype == 'f': s_dtype = 'f' elif a_dtype == 'd': s_dtype = 'd' elif a_dtype == 'F': s_dtype = 'f' else: # a_dtype == 'D': a_dtype = 'D' s_dtype = 'd' # Remark 1: gesvd only supports m >= n (WHAT?) # Remark 2: gesvd only supports jobu = 'A' and jobvt = 'A' # Remark 3: gesvd returns matrix U and V^H # Remark 4: Remark 2 is removed since cuda 8.0 (new!) n, m = a.shape if m == 0 or n == 0: s = cupy.empty((0, ), s_dtype) if compute_uv: if full_matrices: u = cupy.eye(n, dtype=a_dtype) vt = cupy.eye(m, dtype=a_dtype) else: u = cupy.empty((n, 0), dtype=a_dtype) vt = cupy.empty((0, m), dtype=a_dtype) return u, s, vt else: return s # `a` must be copied because xgesvd destroys the matrix if m >= n: x = a.astype(a_dtype, order='C', copy=True) trans_flag = False else: m, n = a.shape x = a.transpose().astype(a_dtype, order='C', copy=True) trans_flag = True k = n # = min(m, n) where m >= n is ensured above if compute_uv: if full_matrices: u = cupy.empty((m, m), dtype=a_dtype) vt = x[:, :n] job_u = ord('A') job_vt = ord('O') else: u = x vt = cupy.empty((k, n), dtype=a_dtype) job_u = ord('O') job_vt = ord('S') u_ptr, vt_ptr = u.data.ptr, vt.data.ptr else: u_ptr, vt_ptr = 0, 0 # Use nullptr job_u = ord('N') job_vt = ord('N') s = cupy.empty(k, dtype=s_dtype) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if a_dtype == 'f': gesvd = cusolver.sgesvd gesvd_bufferSize = cusolver.sgesvd_bufferSize elif a_dtype == 'd': gesvd = cusolver.dgesvd gesvd_bufferSize = cusolver.dgesvd_bufferSize elif a_dtype == 'F': gesvd = cusolver.cgesvd gesvd_bufferSize = cusolver.cgesvd_bufferSize else: # a_dtype == 'D': gesvd = cusolver.zgesvd gesvd_bufferSize = cusolver.zgesvd_bufferSize buffersize = gesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=a_dtype) gesvd(handle, job_u, job_vt, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( gesvd, dev_info) # Note that the returned array may need to be transposed # depending on the structure of an input if compute_uv: if trans_flag: return u.transpose(), s, vt.transpose() else: return vt, s, u else: return s
def lstsq(a, b, rcond='warn'): """Return the least-squares solution to a linear matrix equation. Solves the equation `a x = b` by computing a vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`. The equation may be under-, well-, or over- determined (i.e., the number of linearly independent rows of `a` can be less than, equal to, or greater than its number of linearly independent columns). If `a` is square and of full rank, then `x` (but for round-off error) is the "exact" solution of the equation. Args: a (cupy.ndarray): "Coefficient" matrix with dimension ``(M, N)`` b (cupy.ndarray): "Dependent variable" values with dimension ``(M,)`` or ``(M, K)`` rcond (float): Cutoff parameter for small singular values. For stability it computes the largest singular value denoted by ``s``, and sets all singular values smaller than ``s`` to zero. Returns: tuple: A tuple of ``(x, residuals, rank, s)``. Note ``x`` is the least-squares solution with shape ``(N,)`` or ``(N, K)`` depending if ``b`` was two-dimensional. The sums of ``residuals`` is the squared Euclidean 2-norm for each column in b - a*x. The ``residuals`` is an empty array if the rank of a is < N or M <= N, but iff b is 1-dimensional, this is a (1,) shape array, Otherwise the shape is (K,). The ``rank`` of matrix ``a`` is an integer. The singular values of ``a`` are ``s``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.lstsq` """ if rcond == 'warn': warnings.warn( '`rcond` parameter will change to the default of ' 'machine precision times ``max(M, N)`` where M and N ' 'are the input matrix dimensions.\n' 'To use the future default and silence this warning ' 'we advise to pass `rcond=None`, to keep using the old, ' 'explicitly pass `rcond=-1`.', FutureWarning) rcond = -1 _util._assert_cupy_array(a, b) _util._assert_2d(a) # TODO(kataoka): Fix 0-dim if b.ndim > 2: raise linalg.LinAlgError('{}-dimensional array given. Array must be at' ' most two-dimensional'.format(b.ndim)) m, n = a.shape[-2:] m2 = b.shape[0] if m != m2: raise linalg.LinAlgError('Incompatible dimensions') u, s, vh = cupy.linalg.svd(a, full_matrices=False) if rcond is None: rcond = numpy.finfo(s.dtype).eps * max(m, n) elif rcond <= 0 or rcond >= 1: # some doc of gelss/gelsd says "rcond < 0", but it's not true! rcond = numpy.finfo(s.dtype).eps # number of singular values and matrix rank s1 = 1 / s rank = cupy.array(s.size, numpy.int32) if s.size > 0: cutoff = rcond * s.max() sing_vals = s <= cutoff s1[sing_vals] = 0 rank -= sing_vals.sum(dtype=numpy.int32) # Solve the least-squares solution # x = vh.T.conj() @ diag(s1) @ u.T.conj() @ b z = (cupy.dot(b.T, u.conj()) * s1).T x = cupy.dot(vh.T.conj(), z) # Calculate squared Euclidean 2-norm for each column in b - a*x if m <= n or rank != n: resids = cupy.empty((0, ), dtype=s.dtype) else: e = b - a.dot(x) resids = cupy.atleast_1d(_nrm2_last_axis(e.T)) return x, resids, rank, s
def invh(a): """Compute the inverse of a Hermitian matrix. This function computes a inverse of a real symmetric or complex hermitian positive-definite matrix using Cholesky factorization. If matrix ``a`` is not positive definite, Cholesky factorization fails and it raises an error. Args: a (cupy.ndarray): Real symmetric or complex hermitian maxtix. Returns: cupy.ndarray: The inverse of matrix ``a``. """ _util._assert_cupy_array(a) _util._assert_nd_squareness(a) # TODO: Remove this assert once cusolver supports nrhs > 1 for potrsBatched _util._assert_rank2(a) if a.ndim > 2: return _batched_invh(a) # to prevent `a` from being overwritten a = a.copy() # support float32, float64, complex64, and complex128 if a.dtype.char in 'fdFD': dtype = a.dtype.char else: dtype = numpy.promote_types(a.dtype.char, 'f').char cusolver_handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': potrf = cusolver.spotrf potrf_bufferSize = cusolver.spotrf_bufferSize potrs = cusolver.spotrs elif dtype == 'd': potrf = cusolver.dpotrf potrf_bufferSize = cusolver.dpotrf_bufferSize potrs = cusolver.dpotrs elif dtype == 'F': potrf = cusolver.cpotrf potrf_bufferSize = cusolver.cpotrf_bufferSize potrs = cusolver.cpotrs elif dtype == 'D': potrf = cusolver.zpotrf potrf_bufferSize = cusolver.zpotrf_bufferSize potrs = cusolver.zpotrs else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) m = a.shape[0] uplo = cublas.CUBLAS_FILL_MODE_LOWER worksize = potrf_bufferSize(cusolver_handle, uplo, m, a.data.ptr, m) workspace = cupy.empty(worksize, dtype=dtype) # Cholesky factorization potrf(cusolver_handle, uplo, m, a.data.ptr, m, workspace.data.ptr, worksize, dev_info.data.ptr) info = dev_info[0] if info != 0: if info < 0: msg = '\tThe {}-th parameter is wrong'.format(-info) else: msg = ('\tThe leading minor of order {} is not positive definite' .format(info)) raise RuntimeError('matrix inversion failed at potrf.\n' + msg) b = cupy.eye(m, dtype=dtype) # Solve: A * X = B potrs(cusolver_handle, uplo, m, m, a.data.ptr, m, b.data.ptr, m, dev_info.data.ptr) info = dev_info[0] if info > 0: assert False, ('Unexpected output returned by potrs (actual: {})' .format(info)) elif info < 0: raise RuntimeError('matrix inversion failed at potrs.\n' '\tThe {}-th parameter is wrong'.format(-info)) return b
def lu_solve(lu_and_piv, b, trans=0, overwrite_b=False, check_finite=True): """Solve an equation system, ``a * x = b``, given the LU factorization of ``a`` Args: lu_and_piv (tuple): LU factorization of matrix ``a`` (``(M, M)``) together with pivot indices. b (cupy.ndarray): The matrix with dimension ``(M,)`` or ``(M, N)``. trans ({0, 1, 2}): Type of system to solve: ======== ========= trans system ======== ========= 0 a x = b 1 a^T x = b 2 a^H x = b ======== ========= overwrite_b (bool): Allow overwriting data in b (may enhance performance) check_finite (bool): Whether to check that the input matrices contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Returns: cupy.ndarray: The matrix with dimension ``(M,)`` or ``(M, N)``. .. seealso:: :func:`scipy.linalg.lu_solve` """ (lu, ipiv) = lu_and_piv _util._assert_cupy_array(lu) _util._assert_2d(lu) _util._assert_stacked_square(lu) m = lu.shape[0] if m != b.shape[0]: raise ValueError('incompatible dimensions.') dtype = lu.dtype if dtype.char == 'f': getrs = cusolver.sgetrs elif dtype.char == 'd': getrs = cusolver.dgetrs elif dtype.char == 'F': getrs = cusolver.cgetrs elif dtype.char == 'D': getrs = cusolver.zgetrs else: msg = 'Only float32, float64, complex64 and complex128 are supported.' raise NotImplementedError(msg) if trans == 0: trans = cublas.CUBLAS_OP_N elif trans == 1: trans = cublas.CUBLAS_OP_T elif trans == 2: trans = cublas.CUBLAS_OP_C else: raise ValueError('unknown trans') lu = lu.astype(dtype, order='F', copy=False) ipiv = ipiv.astype(ipiv.dtype, order='F', copy=True) # cuSolver uses 1-origin while SciPy uses 0-origin ipiv += 1 b = b.astype(dtype, order='F', copy=(not overwrite_b)) if check_finite: if lu.dtype.kind == 'f' and not cupy.isfinite(lu).all(): raise ValueError( 'array must not contain infs or NaNs.\n' 'Note that when a singular matrix is given, unlike ' 'scipy.linalg.lu_factor, cupyx.scipy.linalg.lu_factor ' 'returns an array containing NaN.') if b.dtype.kind == 'f' and not cupy.isfinite(b).all(): raise ValueError('array must not contain infs or NaNs') n = 1 if b.ndim == 1 else b.shape[1] cusolver_handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) # solve for the inverse getrs(cusolver_handle, trans, m, n, lu.data.ptr, m, ipiv.data.ptr, b.data.ptr, m, dev_info.data.ptr) if not runtime.is_hip and dev_info[0] < 0: # rocSOLVER does not inform us this info raise ValueError('illegal value in %d-th argument of ' 'internal getrs (lu_solve)' % -dev_info[0]) return b
def lstsq(a, b, rcond=1e-15): """Return the least-squares solution to a linear matrix equation. Solves the equation `a x = b` by computing a vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`. The equation may be under-, well-, or over- determined (i.e., the number of linearly independent rows of `a` can be less than, equal to, or greater than its number of linearly independent columns). If `a` is square and of full rank, then `x` (but for round-off error) is the "exact" solution of the equation. Args: a (cupy.ndarray): "Coefficient" matrix with dimension ``(M, N)`` b (cupy.ndarray): "Dependent variable" values with dimension ``(M,)`` or ``(M, K)`` rcond (float): Cutoff parameter for small singular values. For stability it computes the largest singular value denoted by ``s``, and sets all singular values smaller than ``s`` to zero. Returns: tuple: A tuple of ``(x, residuals, rank, s)``. Note ``x`` is the least-squares solution with shape ``(N,)`` or ``(N, K)`` depending if ``b`` was two-dimensional. The sums of ``residuals`` is the squared Euclidean 2-norm for each column in b - a*x. The ``residuals`` is an empty array if the rank of a is < N or M <= N, but iff b is 1-dimensional, this is a (1,) shape array, Otherwise the shape is (K,). The ``rank`` of matrix ``a`` is an integer. The singular values of ``a`` are ``s``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.lstsq` """ _util._assert_cupy_array(a, b) _util._assert_rank2(a) if b.ndim > 2: raise linalg.LinAlgError('{}-dimensional array given. Array must be at' ' most two-dimensional'.format(b.ndim)) m, n = a.shape[-2:] m2 = b.shape[0] if m != m2: raise linalg.LinAlgError('Incompatible dimensions') u, s, vt = cupy.linalg.svd(a, full_matrices=False) # number of singular values and matrix rank cutoff = rcond * s.max() s1 = 1 / s sing_vals = s <= cutoff s1[sing_vals] = 0 rank = s.size - sing_vals.sum() if b.ndim == 2: s1 = cupy.repeat(s1.reshape(-1, 1), b.shape[1], axis=1) # Solve the least-squares solution z = core.dot(u.transpose(), b) * s1 x = core.dot(vt.transpose(), z) # Calculate squared Euclidean 2-norm for each column in b - a*x if rank != n or m <= n: resids = cupy.array([], dtype=a.dtype) elif b.ndim == 2: e = b - core.dot(a, x) resids = cupy.sum(cupy.square(e), axis=0) else: e = b - cupy.dot(a, x) resids = cupy.dot(e.T, e).reshape(-1) return x, resids, rank, s
def inv(a): """Computes the inverse of a matrix. This function computes matrix ``a_inv`` from n-dimensional regular matrix ``a`` such that ``dot(a, a_inv) == eye(n)``. Args: a (cupy.ndarray): The regular matrix Returns: cupy.ndarray: The inverse of a matrix. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.inv` """ if a.ndim >= 3: return _batched_inv(a) # to prevent `a` to be overwritten a = a.copy() _util._assert_cupy_array(a) _util._assert_rank2(a) _util._assert_nd_squareness(a) # support float32, float64, complex64, and complex128 if a.dtype.char in 'fdFD': dtype = a.dtype.char else: dtype = numpy.promote_types(a.dtype.char, 'f') cusolver_handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) ipiv = cupy.empty((a.shape[0], 1), dtype=numpy.intc) if dtype == 'f': getrf = cusolver.sgetrf getrf_bufferSize = cusolver.sgetrf_bufferSize getrs = cusolver.sgetrs elif dtype == 'd': getrf = cusolver.dgetrf getrf_bufferSize = cusolver.dgetrf_bufferSize getrs = cusolver.dgetrs elif dtype == 'F': getrf = cusolver.cgetrf getrf_bufferSize = cusolver.cgetrf_bufferSize getrs = cusolver.cgetrs elif dtype == 'D': getrf = cusolver.zgetrf getrf_bufferSize = cusolver.zgetrf_bufferSize getrs = cusolver.zgetrs else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) m = a.shape[0] buffersize = getrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) # LU factorization getrf(cusolver_handle, m, m, a.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( getrf, dev_info) b = cupy.eye(m, dtype=dtype) # solve for the inverse getrs(cusolver_handle, 0, m, m, a.data.ptr, m, ipiv.data.ptr, b.data.ptr, m, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( getrs, dev_info) return b
def batched_gesv(a, b): """Solves multiple linear matrix equations using cublas<t>getr[fs]Batched(). Computes the solution to system of linear equation ``ax = b``. Args: a (cupy.ndarray): The matrix with dimension ``(..., M, M)``. b (cupy.ndarray): The matrix with dimension ``(..., M)`` or ``(..., M, K)``. Returns: cupy.ndarray: The matrix with dimension ``(..., M)`` or ``(..., M, K)``. """ _util._assert_cupy_array(a, b) _util._assert_stacked_2d(a) _util._assert_stacked_square(a) # TODO(kataoka): Support broadcast if not ( (a.ndim == b.ndim or a.ndim == b.ndim + 1) and a.shape[:-1] == b.shape[:a.ndim - 1] ): raise ValueError( 'a must have (..., M, M) shape and b must have (..., M) ' 'or (..., M, K)') dtype, out_dtype = _util.linalg_common_type(a, b) if b.size == 0: return cupy.empty(b.shape, out_dtype) if dtype == 'f': t = 's' elif dtype == 'd': t = 'd' elif dtype == 'F': t = 'c' elif dtype == 'D': t = 'z' else: raise TypeError('invalid dtype') getrf = getattr(cublas, t + 'getrfBatched') getrs = getattr(cublas, t + 'getrsBatched') bs = numpy.prod(a.shape[:-2]) if a.ndim > 2 else 1 n = a.shape[-1] nrhs = b.shape[-1] if a.ndim == b.ndim else 1 b_shape = b.shape a_data_ptr = a.data.ptr b_data_ptr = b.data.ptr a = cupy.ascontiguousarray(a.reshape(bs, n, n).transpose(0, 2, 1), dtype=dtype) b = cupy.ascontiguousarray(b.reshape(bs, n, nrhs).transpose(0, 2, 1), dtype=dtype) if a.data.ptr == a_data_ptr: a = a.copy() if b.data.ptr == b_data_ptr: b = b.copy() if n > get_batched_gesv_limit(): warnings.warn('The matrix size ({}) exceeds the set limit ({})'. format(n, get_batched_gesv_limit())) handle = device.get_cublas_handle() lda = n a_step = lda * n * a.itemsize a_array = cupy.arange(a.data.ptr, a.data.ptr + a_step * bs, a_step, dtype=cupy.uintp) ldb = n b_step = ldb * nrhs * b.itemsize b_array = cupy.arange(b.data.ptr, b.data.ptr + b_step * bs, b_step, dtype=cupy.uintp) pivot = cupy.empty((bs, n), dtype=numpy.int32) dinfo = cupy.empty((bs,), dtype=numpy.int32) info = numpy.empty((1,), dtype=numpy.int32) # LU factorization (A = L * U) getrf(handle, n, a_array.data.ptr, lda, pivot.data.ptr, dinfo.data.ptr, bs) _util._check_cublas_info_array_if_synchronization_allowed(getrf, dinfo) # Solves Ax = b getrs(handle, cublas.CUBLAS_OP_N, n, nrhs, a_array.data.ptr, lda, pivot.data.ptr, b_array.data.ptr, ldb, info.ctypes.data, bs) if info[0] != 0: msg = 'Error reported by {} in cuBLAS. '.format(getrs.__name__) if info[0] < 0: msg += 'The {}-th parameter had an illegal value.'.format(-info[0]) raise linalg.LinAlgError(msg) return b.transpose(0, 2, 1).reshape(b_shape).astype(out_dtype, copy=False)
def qr(a, mode='reduced'): """QR decomposition. Decompose a given two-dimensional matrix into ``Q * R``, where ``Q`` is an orthonormal and ``R`` is an upper-triangular matrix. Args: a (cupy.ndarray): The input matrix. mode (str): The mode of decomposition. Currently 'reduced', 'complete', 'r', and 'raw' modes are supported. The default mode is 'reduced', in which matrix ``A = (..., M, N)`` is decomposed into ``Q``, ``R`` with dimensions ``(..., M, K)``, ``(..., K, N)``, where ``K = min(M, N)``. Returns: cupy.ndarray, or tuple of ndarray: Although the type of returned object depends on the mode, it returns a tuple of ``(Q, R)`` by default. For details, please see the document of :func:`numpy.linalg.qr`. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.qr` """ _util._assert_cupy_array(a) if mode not in ('reduced', 'complete', 'r', 'raw'): if mode in ('f', 'full', 'e', 'economic'): msg = 'The deprecated mode \'{}\' is not supported'.format(mode) else: msg = 'Unrecognized mode \'{}\''.format(mode) raise ValueError(msg) if a.ndim > 2: return _qr_batched(a, mode) # support float32, float64, complex64, and complex128 dtype, out_dtype = _util.linalg_common_type(a) m, n = a.shape k = min(m, n) if k == 0: if mode == 'reduced': return cupy.empty((m, 0), out_dtype), cupy.empty((0, n), out_dtype) elif mode == 'complete': return cupy.identity(m, out_dtype), cupy.empty((m, n), out_dtype) elif mode == 'r': return cupy.empty((0, n), out_dtype) else: # mode == 'raw' return cupy.empty((n, m), out_dtype), cupy.empty((0,), out_dtype) x = a.transpose().astype(dtype, order='C', copy=True) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': geqrf_bufferSize = cusolver.sgeqrf_bufferSize geqrf = cusolver.sgeqrf elif dtype == 'd': geqrf_bufferSize = cusolver.dgeqrf_bufferSize geqrf = cusolver.dgeqrf elif dtype == 'F': geqrf_bufferSize = cusolver.cgeqrf_bufferSize geqrf = cusolver.cgeqrf elif dtype == 'D': geqrf_bufferSize = cusolver.zgeqrf_bufferSize geqrf = cusolver.zgeqrf else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) # compute working space of geqrf and solve R buffersize = geqrf_bufferSize(handle, m, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=dtype) tau = cupy.empty(k, dtype=dtype) geqrf(handle, m, n, x.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( geqrf, dev_info) if mode == 'r': r = x[:, :k].transpose() return _util._triu(r).astype(out_dtype, copy=False) if mode == 'raw': return ( x.astype(out_dtype, copy=False), tau.astype(out_dtype, copy=False)) if mode == 'complete' and m > n: mc = m q = cupy.empty((m, m), dtype) else: mc = k q = cupy.empty((n, m), dtype) q[:n] = x # compute working space of orgqr and solve Q if dtype == 'f': orgqr_bufferSize = cusolver.sorgqr_bufferSize orgqr = cusolver.sorgqr elif dtype == 'd': orgqr_bufferSize = cusolver.dorgqr_bufferSize orgqr = cusolver.dorgqr elif dtype == 'F': orgqr_bufferSize = cusolver.cungqr_bufferSize orgqr = cusolver.cungqr elif dtype == 'D': orgqr_bufferSize = cusolver.zungqr_bufferSize orgqr = cusolver.zungqr buffersize = orgqr_bufferSize( handle, m, mc, k, q.data.ptr, m, tau.data.ptr) workspace = cupy.empty(buffersize, dtype=dtype) orgqr( handle, m, mc, k, q.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( orgqr, dev_info) q = q[:mc].transpose() r = x[:, :mc].transpose() return ( q.astype(out_dtype, copy=False), _util._triu(r).astype(out_dtype, copy=False))