def _qr_batched(a, mode): batch_shape = a.shape[:-2] batch_size = internal.prod(batch_shape) m, n = a.shape[-2:] k = min(m, n) # first handle any 0-size inputs if batch_size == 0 or k == 0: # support float32, float64, complex64, and complex128 dtype, out_dtype = _util.linalg_common_type(a) if mode == 'reduced': return (cupy.empty(batch_shape + (m, k), out_dtype), cupy.empty(batch_shape + (k, n), out_dtype)) elif mode == 'complete': q = _util.stacked_identity(batch_shape, m, out_dtype) return (q, cupy.empty(batch_shape + (m, n), out_dtype)) elif mode == 'r': return cupy.empty(batch_shape + (k, n), out_dtype) elif mode == 'raw': return (cupy.empty(batch_shape + (n, m), out_dtype), cupy.empty(batch_shape + (k,), out_dtype)) # ...then delegate real computation to cuSOLVER/rocSOLVER a = a.reshape(-1, *(a.shape[-2:])) out = _geqrf_orgqr_batched(a, mode) if mode == 'r': return out.reshape(batch_shape + out.shape[-2:]) q, r = out q = q.reshape(batch_shape + q.shape[-2:]) idx = -1 if mode == 'raw' else -2 r = r.reshape(batch_shape + r.shape[idx:]) return (q, r)
def _batched_inv(a): assert a.ndim >= 3 _util._assert_cupy_array(a) _util._assert_stacked_square(a) dtype, out_dtype = _util.linalg_common_type(a) if dtype == cupy.float32: getrf = cupy.cuda.cublas.sgetrfBatched getri = cupy.cuda.cublas.sgetriBatched elif dtype == cupy.float64: getrf = cupy.cuda.cublas.dgetrfBatched getri = cupy.cuda.cublas.dgetriBatched elif dtype == cupy.complex64: getrf = cupy.cuda.cublas.cgetrfBatched getri = cupy.cuda.cublas.cgetriBatched elif dtype == cupy.complex128: getrf = cupy.cuda.cublas.zgetrfBatched getri = cupy.cuda.cublas.zgetriBatched else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) if 0 in a.shape: return cupy.empty_like(a, dtype=out_dtype) a_shape = a.shape # copy is necessary to present `a` to be overwritten. a = a.astype(dtype, order='C').reshape(-1, a_shape[-2], a_shape[-1]) handle = device.get_cublas_handle() batch_size = a.shape[0] n = a.shape[1] lda = n step = n * lda * a.itemsize start = a.data.ptr stop = start + step * batch_size a_array = cupy.arange(start, stop, step, dtype=cupy.uintp) pivot_array = cupy.empty((batch_size, n), dtype=cupy.int32) info_array = cupy.empty((batch_size, ), dtype=cupy.int32) getrf(handle, n, a_array.data.ptr, lda, pivot_array.data.ptr, info_array.data.ptr, batch_size) cupy.linalg._util._check_cublas_info_array_if_synchronization_allowed( getrf, info_array) c = cupy.empty_like(a) ldc = lda step = n * ldc * c.itemsize start = c.data.ptr stop = start + step * batch_size c_array = cupy.arange(start, stop, step, dtype=cupy.uintp) getri(handle, n, a_array.data.ptr, lda, pivot_array.data.ptr, c_array.data.ptr, ldc, info_array.data.ptr, batch_size) cupy.linalg._util._check_cublas_info_array_if_synchronization_allowed( getri, info_array) return c.reshape(a_shape).astype(out_dtype, copy=False)
def solve(a, b): """Solves a linear matrix equation. It computes the exact solution of ``x`` in ``ax = b``, where ``a`` is a square and full rank matrix. Args: a (cupy.ndarray): The matrix with dimension ``(..., M, M)``. b (cupy.ndarray): The matrix with dimension ``(...,M)`` or ``(..., M, K)``. Returns: cupy.ndarray: The matrix with dimension ``(..., M)`` or ``(..., M, K)``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.solve` """ if a.ndim > 2 and a.shape[-1] <= get_batched_gesv_limit(): # Note: There is a low performance issue in batched_gesv when matrix is # large, so it is not used in such cases. return batched_gesv(a, b) # TODO(kataoka): Move the checks to the beginning _util._assert_cupy_array(a, b) _util._assert_stacked_2d(a) _util._assert_stacked_square(a) if not ((a.ndim == b.ndim or a.ndim == b.ndim + 1) and a.shape[:-1] == b.shape[:a.ndim - 1]): raise ValueError( 'a must have (..., M, M) shape and b must have (..., M) ' 'or (..., M, K)') dtype, out_dtype = _util.linalg_common_type(a, b) if a.ndim == 2: # prevent 'a' and 'b' to be overwritten a = a.astype(dtype, copy=True, order='F') b = b.astype(dtype, copy=True, order='F') cupyx.lapack.gesv(a, b) return b.astype(out_dtype, copy=False) # prevent 'a' to be overwritten a = a.astype(dtype, copy=True, order='C') x = cupy.empty_like(b, dtype=out_dtype) shape = a.shape[:-2] for i in range(numpy.prod(shape)): index = numpy.unravel_index(i, shape) # prevent 'b' to be overwritten bi = b[index].astype(dtype, copy=True, order='F') cupyx.lapack.gesv(a[index], bi) x[index] = bi return x
def cholesky(a): """Cholesky decomposition. Decompose a given two-dimensional square matrix into ``L * L.T``, where ``L`` is a lower-triangular matrix and ``.T`` is a conjugate transpose operator. Args: a (cupy.ndarray): The input matrix with dimension ``(N, N)`` Returns: cupy.ndarray: The lower-triangular matrix. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.cholesky` """ _util._assert_cupy_array(a) _util._assert_stacked_2d(a) _util._assert_stacked_square(a) if a.ndim > 2: return _potrf_batched(a) dtype, out_dtype = _util.linalg_common_type(a) x = a.astype(dtype, order='C', copy=True) n = len(a) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': potrf = cusolver.spotrf potrf_bufferSize = cusolver.spotrf_bufferSize elif dtype == 'd': potrf = cusolver.dpotrf potrf_bufferSize = cusolver.dpotrf_bufferSize elif dtype == 'F': potrf = cusolver.cpotrf potrf_bufferSize = cusolver.cpotrf_bufferSize else: # dtype == 'D': potrf = cusolver.zpotrf potrf_bufferSize = cusolver.zpotrf_bufferSize buffersize = potrf_bufferSize(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=dtype) potrf(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( potrf, dev_info) _util._tril(x, k=0) return x.astype(out_dtype, copy=False)
def _syevd(a, UPLO, with_eigen_vector): if UPLO not in ('L', 'U'): raise ValueError('UPLO argument must be \'L\' or \'U\'') # reject_float16=False for backward compatibility dtype, v_dtype = _util.linalg_common_type(a, reject_float16=False) real_dtype = dtype.char.lower() w_dtype = v_dtype.char.lower() # Note that cuSolver assumes fortran array v = a.astype(dtype, order='F', copy=True) m, lda = a.shape w = cupy.empty(m, real_dtype) dev_info = cupy.empty((), numpy.int32) handle = device.Device().cusolver_handle if with_eigen_vector: jobz = cusolver.CUSOLVER_EIG_MODE_VECTOR else: jobz = cusolver.CUSOLVER_EIG_MODE_NOVECTOR if UPLO == 'L': uplo = cublas.CUBLAS_FILL_MODE_LOWER else: # UPLO == 'U' uplo = cublas.CUBLAS_FILL_MODE_UPPER if dtype == 'f': buffer_size = cupy.cuda.cusolver.ssyevd_bufferSize syevd = cupy.cuda.cusolver.ssyevd elif dtype == 'd': buffer_size = cupy.cuda.cusolver.dsyevd_bufferSize syevd = cupy.cuda.cusolver.dsyevd elif dtype == 'F': buffer_size = cupy.cuda.cusolver.cheevd_bufferSize syevd = cupy.cuda.cusolver.cheevd elif dtype == 'D': buffer_size = cupy.cuda.cusolver.zheevd_bufferSize syevd = cupy.cuda.cusolver.zheevd else: raise RuntimeError('Only float and double and cuComplex and ' + 'cuDoubleComplex are supported') work_size = buffer_size( handle, jobz, uplo, m, v.data.ptr, lda, w.data.ptr) work = cupy.empty(work_size, dtype) syevd( handle, jobz, uplo, m, v.data.ptr, lda, w.data.ptr, work.data.ptr, work_size, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( syevd, dev_info) return w.astype(w_dtype, copy=False), v.astype(v_dtype, copy=False)
def eigh(a, UPLO='L'): """ Return the eigenvalues and eigenvectors of a complex Hermitian (conjugate symmetric) or a real symmetric matrix. Returns two objects, a 1-D array containing the eigenvalues of `a`, and a 2-D square array or matrix (depending on the input type) of the corresponding eigenvectors (in columns). Args: a (cupy.ndarray): A symmetric 2-D square matrix ``(M, M)`` or a batch of symmetric 2-D square matrices ``(..., M, M)``. UPLO (str): Select from ``'L'`` or ``'U'``. It specifies which part of ``a`` is used. ``'L'`` uses the lower triangular part of ``a``, and ``'U'`` uses the upper triangular part of ``a``. Returns: tuple of :class:`~cupy.ndarray`: Returns a tuple ``(w, v)``. ``w`` contains eigenvalues and ``v`` contains eigenvectors. ``v[:, i]`` is an eigenvector corresponding to an eigenvalue ``w[i]``. For batch input, ``v[k, :, i]`` is an eigenvector corresponding to an eigenvalue ``w[k, i]`` of ``a[k]``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.eigh` """ _util._assert_stacked_2d(a) _util._assert_stacked_square(a) if a.size == 0: _, v_dtype = _util.linalg_common_type(a) w_dtype = v_dtype.char.lower() w = cupy.empty(a.shape[:-1], w_dtype) v = cupy.empty(a.shape, v_dtype) return w, v if a.ndim > 2 or runtime.is_hip: w, v = cupy.cusolver.syevj(a, UPLO, True) return w, v else: return _syevd(a, UPLO, True)
def _potrf_batched(a): """Batched Cholesky decomposition. Decompose a given array of two-dimensional square matrices into ``L * L.T``, where ``L`` is a lower-triangular matrix and ``.T`` is a conjugate transpose operator. Args: a (cupy.ndarray): The input array of matrices with dimension ``(..., N, N)`` Returns: cupy.ndarray: The lower-triangular matrix. """ if not check_availability('potrfBatched'): raise RuntimeError('potrfBatched is not available') dtype, out_dtype = _util.linalg_common_type(a) if a.size == 0: return cupy.empty(a.shape, out_dtype) if dtype == 'f': potrfBatched = cusolver.spotrfBatched elif dtype == 'd': potrfBatched = cusolver.dpotrfBatched elif dtype == 'F': potrfBatched = cusolver.cpotrfBatched else: # dtype == 'D': potrfBatched = cusolver.zpotrfBatched x = a.astype(dtype, order='C', copy=True) xp = cupy._core._mat_ptrs(x) n = x.shape[-1] ldx = x.strides[-2] // x.dtype.itemsize handle = device.get_cusolver_handle() batch_size = internal.prod(x.shape[:-2]) dev_info = cupy.empty(batch_size, dtype=numpy.int32) potrfBatched( handle, cublas.CUBLAS_FILL_MODE_UPPER, n, xp.data.ptr, ldx, dev_info.data.ptr, batch_size) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( potrfBatched, dev_info) return cupy.tril(x).astype(out_dtype, copy=False)
def pinv(a, rcond=1e-15): """Compute the Moore-Penrose pseudoinverse of a matrix. It computes a pseudoinverse of a matrix ``a``, which is a generalization of the inverse matrix with Singular Value Decomposition (SVD). Note that it automatically removes small singular values for stability. Args: a (cupy.ndarray): The matrix with dimension ``(..., M, N)`` rcond (float or cupy.ndarray): Cutoff parameter for small singular values. For stability it computes the largest singular value denoted by ``s``, and sets all singular values smaller than ``rcond * s`` to zero. Broadcasts against the stack of matrices. Returns: cupy.ndarray: The pseudoinverse of ``a`` with dimension ``(..., N, M)``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.pinv` """ _util._assert_cupy_array(a) if a.size == 0: _, out_dtype = _util.linalg_common_type(a) m, n = a.shape[-2:] if m == 0 or n == 0: out_dtype = a.dtype # NumPy bug? return cupy.empty(a.shape[:-2] + (n, m), dtype=out_dtype) u, s, vt = _decomposition.svd(a.conj(), full_matrices=False) # discard small singular values cutoff = rcond * cupy.amax(s, axis=-1) leq = s <= cutoff[..., None] cupy.reciprocal(s, out=s) s[leq] = 0 return cupy.matmul(vt.swapaxes(-2, -1), s[..., None] * u.swapaxes(-2, -1))
def inv(a): """Computes the inverse of a matrix. This function computes matrix ``a_inv`` from n-dimensional regular matrix ``a`` such that ``dot(a, a_inv) == eye(n)``. Args: a (cupy.ndarray): The regular matrix Returns: cupy.ndarray: The inverse of a matrix. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.inv` """ _util._assert_cupy_array(a) _util._assert_stacked_2d(a) _util._assert_stacked_square(a) if a.ndim >= 3: return _batched_inv(a) dtype, out_dtype = _util.linalg_common_type(a) if a.size == 0: return cupy.empty(a.shape, out_dtype) order = 'F' if a._f_contiguous else 'C' # prevent 'a' to be overwritten a = a.astype(dtype, copy=True, order=order) b = cupy.eye(a.shape[0], dtype=dtype, order=order) if order == 'F': cupyx.lapack.gesv(a, b) else: cupyx.lapack.gesv(a.T, b.T) return b.astype(out_dtype, copy=False)
def eigvalsh(a, UPLO='L'): """ Compute the eigenvalues of a complex Hermitian or real symmetric matrix. Main difference from eigh: the eigenvectors are not computed. Args: a (cupy.ndarray): A symmetric 2-D square matrix ``(M, M)`` or a batch of symmetric 2-D square matrices ``(..., M, M)``. UPLO (str): Select from ``'L'`` or ``'U'``. It specifies which part of ``a`` is used. ``'L'`` uses the lower triangular part of ``a``, and ``'U'`` uses the upper triangular part of ``a``. Returns: cupy.ndarray: Returns eigenvalues as a vector ``w``. For batch input, ``w[k]`` is a vector of eigenvalues of matrix ``a[k]``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.eigvalsh` """ _util._assert_stacked_2d(a) _util._assert_stacked_square(a) if a.size == 0: _, v_dtype = _util.linalg_common_type(a) w_dtype = v_dtype.char.lower() return cupy.empty(a.shape[:-1], w_dtype) if a.ndim > 2 or runtime.is_hip: return cupy.cusolver.syevj(a, UPLO, False) else: return _syevd(a, UPLO, False)[0]
def svd(a, full_matrices=True, compute_uv=True): """Singular Value Decomposition. Factorizes the matrix ``a`` as ``u * np.diag(s) * v``, where ``u`` and ``v`` are unitary and ``s`` is an one-dimensional array of ``a``'s singular values. Args: a (cupy.ndarray): The input matrix with dimension ``(..., M, N)``. full_matrices (bool): If True, it returns u and v with dimensions ``(..., M, M)`` and ``(..., N, N)``. Otherwise, the dimensions of u and v are ``(..., M, K)`` and ``(..., K, N)``, respectively, where ``K = min(M, N)``. compute_uv (bool): If ``False``, it only returns singular values. Returns: tuple of :class:`cupy.ndarray`: A tuple of ``(u, s, v)`` such that ``a = u * np.diag(s) * v``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. note:: On CUDA, when ``a.ndim > 2`` and the matrix dimensions <= 32, a fast code path based on Jacobian method (``gesvdj``) is taken. Otherwise, a QR method (``gesvd``) is used. On ROCm, there is no such a fast code path that switches the underlying algorithm. .. seealso:: :func:`numpy.linalg.svd` """ _util._assert_cupy_array(a) if a.ndim > 2: return _svd_batched(a, full_matrices, compute_uv) # Cast to float32 or float64 dtype, uv_dtype = _util.linalg_common_type(a) real_dtype = dtype.char.lower() s_dtype = uv_dtype.char.lower() # Remark 1: gesvd only supports m >= n (WHAT?) # Remark 2: gesvd returns matrix U and V^H n, m = a.shape if m == 0 or n == 0: s = cupy.empty((0, ), s_dtype) if compute_uv: if full_matrices: u = cupy.eye(n, dtype=uv_dtype) vt = cupy.eye(m, dtype=uv_dtype) else: u = cupy.empty((n, 0), dtype=uv_dtype) vt = cupy.empty((0, m), dtype=uv_dtype) return u, s, vt else: return s # `a` must be copied because xgesvd destroys the matrix if m >= n: x = a.astype(dtype, order='C', copy=True) trans_flag = False else: m, n = a.shape x = a.transpose().astype(dtype, order='C', copy=True) trans_flag = True k = n # = min(m, n) where m >= n is ensured above if compute_uv: if full_matrices: u = cupy.empty((m, m), dtype=dtype) vt = x[:, :n] job_u = ord('A') job_vt = ord('O') else: u = x vt = cupy.empty((k, n), dtype=dtype) job_u = ord('O') job_vt = ord('S') u_ptr, vt_ptr = u.data.ptr, vt.data.ptr else: u_ptr, vt_ptr = 0, 0 # Use nullptr job_u = ord('N') job_vt = ord('N') s = cupy.empty(k, dtype=real_dtype) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': gesvd = cusolver.sgesvd gesvd_bufferSize = cusolver.sgesvd_bufferSize elif dtype == 'd': gesvd = cusolver.dgesvd gesvd_bufferSize = cusolver.dgesvd_bufferSize elif dtype == 'F': gesvd = cusolver.cgesvd gesvd_bufferSize = cusolver.cgesvd_bufferSize else: # dtype == 'D': gesvd = cusolver.zgesvd gesvd_bufferSize = cusolver.zgesvd_bufferSize buffersize = gesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=dtype) if not runtime.is_hip: # rwork can be NULL if the information from supperdiagonal isn't needed # https://docs.nvidia.com/cuda/cusolver/index.html#cuSolverDN-lt-t-gt-gesvd # noqa rwork_ptr = 0 else: rwork = cupy.empty(min(m, n) - 1, dtype=s_dtype) rwork_ptr = rwork.data.ptr gesvd(handle, job_u, job_vt, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, rwork_ptr, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( gesvd, dev_info) s = s.astype(s_dtype, copy=False) # Note that the returned array may need to be transposed # depending on the structure of an input if compute_uv: u = u.astype(uv_dtype, copy=False) vt = vt.astype(uv_dtype, copy=False) if trans_flag: return u.transpose(), s, vt.transpose() else: return vt, s, u else: return s
def _svd_batched(a, full_matrices, compute_uv): batch_shape = a.shape[:-2] batch_size = internal.prod(batch_shape) n, m = a.shape[-2:] dtype, uv_dtype = _util.linalg_common_type(a) s_dtype = uv_dtype.char.lower() # first handle any 0-size inputs if batch_size == 0: k = min(m, n) s = cupy.empty(batch_shape + (k, ), s_dtype) if compute_uv: if full_matrices: u = cupy.empty(batch_shape + (n, n), dtype=uv_dtype) vt = cupy.empty(batch_shape + (m, m), dtype=uv_dtype) else: u = cupy.empty(batch_shape + (n, k), dtype=uv_dtype) vt = cupy.empty(batch_shape + (k, m), dtype=uv_dtype) return u, s, vt else: return s elif m == 0 or n == 0: s = cupy.empty(batch_shape + (0, ), s_dtype) if compute_uv: if full_matrices: u = cupy.empty(batch_shape + (n, n), dtype=uv_dtype) u[...] = cupy.identity(n, dtype=uv_dtype) vt = cupy.empty(batch_shape + (m, m), dtype=uv_dtype) vt[...] = cupy.identity(m, dtype=uv_dtype) else: u = cupy.empty(batch_shape + (n, 0), dtype=uv_dtype) vt = cupy.empty(batch_shape + (0, m), dtype=uv_dtype) return u, s, vt else: return s # ...then delegate real computation to cuSOLVER a = a.reshape(-1, *(a.shape[-2:])) if runtime.is_hip or (m <= 32 and n <= 32): # copy is done in _gesvdj_batched, so let's try not to do it here a = a.astype(dtype, order='C', copy=False) out = _gesvdj_batched(a, full_matrices, compute_uv, False) else: # manually loop over cusolverDn<t>gesvd() # copy (via possible type casting) is done in _gesvd_batched # note: _gesvd_batched returns V, not V^H out = _gesvd_batched(a, dtype.char, full_matrices, compute_uv, False) if compute_uv: u, s, v = out u = u.astype(uv_dtype, copy=False) u = u.reshape(*batch_shape, *(u.shape[-2:])) s = s.astype(s_dtype, copy=False) s = s.reshape(*batch_shape, *(s.shape[-1:])) v = v.astype(uv_dtype, copy=False) v = v.reshape(*batch_shape, *(v.shape[-2:])) return u, s, v.swapaxes(-2, -1).conj() else: s = out s = s.astype(s_dtype, copy=False) s = s.reshape(*batch_shape, *(s.shape[-1:])) return s
def qr(a, mode='reduced'): """QR decomposition. Decompose a given two-dimensional matrix into ``Q * R``, where ``Q`` is an orthonormal and ``R`` is an upper-triangular matrix. Args: a (cupy.ndarray): The input matrix. mode (str): The mode of decomposition. Currently 'reduced', 'complete', 'r', and 'raw' modes are supported. The default mode is 'reduced', in which matrix ``A = (M, N)`` is decomposed into ``Q``, ``R`` with dimensions ``(M, K)``, ``(K, N)``, where ``K = min(M, N)``. Returns: cupy.ndarray, or tuple of ndarray: Although the type of returned object depends on the mode, it returns a tuple of ``(Q, R)`` by default. For details, please see the document of :func:`numpy.linalg.qr`. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.qr` """ # TODO(Saito): Current implementation only accepts two-dimensional arrays _util._assert_cupy_array(a) _util._assert_rank2(a) if mode not in ('reduced', 'complete', 'r', 'raw'): if mode in ('f', 'full', 'e', 'economic'): msg = 'The deprecated mode \'{}\' is not supported'.format(mode) raise ValueError(msg) else: raise ValueError('Unrecognized mode \'{}\''.format(mode)) # support float32, float64, complex64, and complex128 dtype, out_dtype = _util.linalg_common_type(a) if mode == 'raw': # compatibility with numpy.linalg.qr out_dtype = numpy.promote_types(out_dtype, 'd') m, n = a.shape mn = min(m, n) if mn == 0: if mode == 'reduced': return cupy.empty((m, 0), out_dtype), cupy.empty((0, n), out_dtype) elif mode == 'complete': return cupy.identity(m, out_dtype), cupy.empty((m, n), out_dtype) elif mode == 'r': return cupy.empty((0, n), out_dtype) else: # mode == 'raw' return cupy.empty((n, m), out_dtype), cupy.empty((0, ), out_dtype) x = a.transpose().astype(dtype, order='C', copy=True) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': geqrf_bufferSize = cusolver.sgeqrf_bufferSize geqrf = cusolver.sgeqrf elif dtype == 'd': geqrf_bufferSize = cusolver.dgeqrf_bufferSize geqrf = cusolver.dgeqrf elif dtype == 'F': geqrf_bufferSize = cusolver.cgeqrf_bufferSize geqrf = cusolver.cgeqrf elif dtype == 'D': geqrf_bufferSize = cusolver.zgeqrf_bufferSize geqrf = cusolver.zgeqrf else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) # compute working space of geqrf and solve R buffersize = geqrf_bufferSize(handle, m, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=dtype) tau = cupy.empty(mn, dtype=dtype) geqrf(handle, m, n, x.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( geqrf, dev_info) if mode == 'r': r = x[:, :mn].transpose() return _util._triu(r).astype(out_dtype, copy=False) if mode == 'raw': return (x.astype(out_dtype, copy=False), tau.astype(out_dtype, copy=False)) if mode == 'complete' and m > n: mc = m q = cupy.empty((m, m), dtype) else: mc = mn q = cupy.empty((n, m), dtype) q[:n] = x # compute working space of orgqr and solve Q if dtype == 'f': orgqr_bufferSize = cusolver.sorgqr_bufferSize orgqr = cusolver.sorgqr elif dtype == 'd': orgqr_bufferSize = cusolver.dorgqr_bufferSize orgqr = cusolver.dorgqr elif dtype == 'F': orgqr_bufferSize = cusolver.cungqr_bufferSize orgqr = cusolver.cungqr elif dtype == 'D': orgqr_bufferSize = cusolver.zungqr_bufferSize orgqr = cusolver.zungqr buffersize = orgqr_bufferSize(handle, m, mc, mn, q.data.ptr, m, tau.data.ptr) workspace = cupy.empty(buffersize, dtype=dtype) orgqr(handle, m, mc, mn, q.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( orgqr, dev_info) q = q[:mc].transpose() r = x[:, :mc].transpose() return (q.astype(out_dtype, copy=False), _util._triu(r).astype(out_dtype, copy=False))
def batched_gesv(a, b): """Solves multiple linear matrix equations using cublas<t>getr[fs]Batched(). Computes the solution to system of linear equation ``ax = b``. Args: a (cupy.ndarray): The matrix with dimension ``(..., M, M)``. b (cupy.ndarray): The matrix with dimension ``(..., M)`` or ``(..., M, K)``. Returns: cupy.ndarray: The matrix with dimension ``(..., M)`` or ``(..., M, K)``. """ _util._assert_cupy_array(a, b) _util._assert_stacked_2d(a) _util._assert_stacked_square(a) # TODO(kataoka): Support broadcast if not ( (a.ndim == b.ndim or a.ndim == b.ndim + 1) and a.shape[:-1] == b.shape[:a.ndim - 1] ): raise ValueError( 'a must have (..., M, M) shape and b must have (..., M) ' 'or (..., M, K)') dtype, out_dtype = _util.linalg_common_type(a, b) if b.size == 0: return cupy.empty(b.shape, out_dtype) if dtype == 'f': t = 's' elif dtype == 'd': t = 'd' elif dtype == 'F': t = 'c' elif dtype == 'D': t = 'z' else: raise TypeError('invalid dtype') getrf = getattr(cublas, t + 'getrfBatched') getrs = getattr(cublas, t + 'getrsBatched') bs = numpy.prod(a.shape[:-2]) if a.ndim > 2 else 1 n = a.shape[-1] nrhs = b.shape[-1] if a.ndim == b.ndim else 1 b_shape = b.shape a_data_ptr = a.data.ptr b_data_ptr = b.data.ptr a = cupy.ascontiguousarray(a.reshape(bs, n, n).transpose(0, 2, 1), dtype=dtype) b = cupy.ascontiguousarray(b.reshape(bs, n, nrhs).transpose(0, 2, 1), dtype=dtype) if a.data.ptr == a_data_ptr: a = a.copy() if b.data.ptr == b_data_ptr: b = b.copy() if n > get_batched_gesv_limit(): warnings.warn('The matrix size ({}) exceeds the set limit ({})'. format(n, get_batched_gesv_limit())) handle = device.get_cublas_handle() lda = n a_step = lda * n * a.itemsize a_array = cupy.arange(a.data.ptr, a.data.ptr + a_step * bs, a_step, dtype=cupy.uintp) ldb = n b_step = ldb * nrhs * b.itemsize b_array = cupy.arange(b.data.ptr, b.data.ptr + b_step * bs, b_step, dtype=cupy.uintp) pivot = cupy.empty((bs, n), dtype=numpy.int32) dinfo = cupy.empty((bs,), dtype=numpy.int32) info = numpy.empty((1,), dtype=numpy.int32) # LU factorization (A = L * U) getrf(handle, n, a_array.data.ptr, lda, pivot.data.ptr, dinfo.data.ptr, bs) _util._check_cublas_info_array_if_synchronization_allowed(getrf, dinfo) # Solves Ax = b getrs(handle, cublas.CUBLAS_OP_N, n, nrhs, a_array.data.ptr, lda, pivot.data.ptr, b_array.data.ptr, ldb, info.ctypes.data, bs) if info[0] != 0: msg = 'Error reported by {} in cuBLAS. '.format(getrs.__name__) if info[0] < 0: msg += 'The {}-th parameter had an illegal value.'.format(-info[0]) raise linalg.LinAlgError(msg) return b.transpose(0, 2, 1).reshape(b_shape).astype(out_dtype, copy=False)
def slogdet(a): """Returns sign and logarithm of the determinant of an array. It calculates the natural logarithm of the determinant of a given value. Args: a (cupy.ndarray): The input matrix with dimension ``(..., N, N)``. Returns: tuple of :class:`~cupy.ndarray`: It returns a tuple ``(sign, logdet)``. ``sign`` represents each sign of the determinant as a real number ``0``, ``1`` or ``-1``. 'logdet' represents the natural logarithm of the absolute of the determinant. If the determinant is zero, ``sign`` will be ``0`` and ``logdet`` will be ``-inf``. The shapes of both ``sign`` and ``logdet`` are equal to ``a.shape[:-2]``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. warning:: To produce the same results as :func:`numpy.linalg.slogdet` for singular inputs, set the `linalg` configuration to `raise`. .. seealso:: :func:`numpy.linalg.slogdet` """ if a.ndim < 2: msg = ('%d-dimensional array given. ' 'Array must be at least two-dimensional' % a.ndim) raise linalg.LinAlgError(msg) _util._assert_nd_squareness(a) dtype, sign_dtype = _util.linalg_common_type(a) logdet_dtype = numpy.dtype(sign_dtype.char.lower()) a_shape = a.shape shape = a_shape[:-2] n = a_shape[-2] if a.size == 0: # empty batch (result is empty, too) or empty matrices det([[]]) == 1 sign = cupy.ones(shape, sign_dtype) logdet = cupy.zeros(shape, logdet_dtype) return sign, logdet lu, ipiv, dev_info = _decomposition._lu_factor(a, dtype) # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that # should never happen even if the matrix contains nan or inf. # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for # debugging purposes. diag = cupy.diagonal(lu, axis1=-2, axis2=-1) logdet = cupy.log(cupy.abs(diag)).sum(axis=-1) # ipiv is 1-origin non_zero = cupy.count_nonzero(ipiv != cupy.arange(1, n + 1), axis=-1) if dtype.kind == "f": non_zero += cupy.count_nonzero(diag < 0, axis=-1) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 if dtype.kind == "c": sign = sign * cupy.prod(diag / cupy.abs(diag), axis=-1) sign = sign.astype(dtype) logdet = logdet.astype(logdet_dtype, copy=False) singular = dev_info > 0 return ( cupy.where(singular, sign_dtype.type(0), sign).reshape(shape), cupy.where(singular, logdet_dtype.type('-inf'), logdet).reshape(shape), )
def _syevd(a, UPLO, with_eigen_vector, overwrite_a=False): if UPLO not in ('L', 'U'): raise ValueError('UPLO argument must be \'L\' or \'U\'') # reject_float16=False for backward compatibility dtype, v_dtype = _util.linalg_common_type(a, reject_float16=False) real_dtype = dtype.char.lower() w_dtype = v_dtype.char.lower() # Note that cuSolver assumes fortran array v = a.astype(dtype, order='F', copy=not overwrite_a) m, lda = a.shape w = cupy.empty(m, real_dtype) dev_info = cupy.empty((), numpy.int32) handle = device.Device().cusolver_handle if with_eigen_vector: jobz = cusolver.CUSOLVER_EIG_MODE_VECTOR else: jobz = cusolver.CUSOLVER_EIG_MODE_NOVECTOR if UPLO == 'L': uplo = cublas.CUBLAS_FILL_MODE_LOWER else: # UPLO == 'U' uplo = cublas.CUBLAS_FILL_MODE_UPPER global _cuda_runtime_version if _cuda_runtime_version < 0: _cuda_runtime_version = runtime.runtimeGetVersion() if not runtime.is_hip and _cuda_runtime_version >= 11010: if dtype.char not in 'fdFD': raise RuntimeError('Only float32, float64, complex64, and ' 'complex128 are supported') type_v = _dtype.to_cuda_dtype(dtype) type_w = _dtype.to_cuda_dtype(real_dtype) params = cusolver.createParams() try: work_device_size, work_host_sizse = cusolver.xsyevd_bufferSize( handle, params, jobz, uplo, m, type_v, v.data.ptr, lda, type_w, w.data.ptr, type_v) work_device = cupy.empty(work_device_size, 'b') work_host = numpy.empty(work_host_sizse, 'b') cusolver.xsyevd(handle, params, jobz, uplo, m, type_v, v.data.ptr, lda, type_w, w.data.ptr, type_v, work_device.data.ptr, work_device_size, work_host.ctypes.data, work_host_sizse, dev_info.data.ptr) finally: cusolver.destroyParams(params) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( cusolver.xsyevd, dev_info) else: if dtype == 'f': buffer_size = cupy.cuda.cusolver.ssyevd_bufferSize syevd = cupy.cuda.cusolver.ssyevd elif dtype == 'd': buffer_size = cupy.cuda.cusolver.dsyevd_bufferSize syevd = cupy.cuda.cusolver.dsyevd elif dtype == 'F': buffer_size = cupy.cuda.cusolver.cheevd_bufferSize syevd = cupy.cuda.cusolver.cheevd elif dtype == 'D': buffer_size = cupy.cuda.cusolver.zheevd_bufferSize syevd = cupy.cuda.cusolver.zheevd else: raise RuntimeError('Only float32, float64, complex64, and ' 'complex128 are supported') work_size = buffer_size(handle, jobz, uplo, m, v.data.ptr, lda, w.data.ptr) work = cupy.empty(work_size, dtype) syevd(handle, jobz, uplo, m, v.data.ptr, lda, w.data.ptr, work.data.ptr, work_size, dev_info.data.ptr) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( syevd, dev_info) return w.astype(w_dtype, copy=False), v.astype(v_dtype, copy=False)