def cholesky(a): """Cholesky decomposition. Decompose a given two-dimensional square matrix into ``L * L.T``, where ``L`` is a lower-triangular matrix and ``.T`` is a conjugate transpose operator. Args: a (cupy.ndarray): The input matrix with dimension ``(N, N)`` Returns: cupy.ndarray: The lower-triangular matrix. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.cholesky` """ # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a) util._assert_rank2(a) util._assert_nd_squareness(a) if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.promote_types(a.dtype.char, 'f').char x = a.astype(dtype, order='C', copy=True) n = len(a) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': potrf = cusolver.spotrf potrf_bufferSize = cusolver.spotrf_bufferSize elif dtype == 'd': potrf = cusolver.dpotrf potrf_bufferSize = cusolver.dpotrf_bufferSize elif dtype == 'F': potrf = cusolver.cpotrf potrf_bufferSize = cusolver.cpotrf_bufferSize else: # dtype == 'D': potrf = cusolver.zpotrf potrf_bufferSize = cusolver.zpotrf_bufferSize buffersize = potrf_bufferSize(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=dtype) potrf(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed( potrf, dev_info) util._tril(x, k=0) return x
def inv(a): """Computes the inverse of a matrix. This function computes matrix ``a_inv`` from n-dimensional regular matrix ``a`` such that ``dot(a, a_inv) == eye(n)``. Args: a (cupy.ndarray): The regular matrix Returns: cupy.ndarray: The inverse of a matrix. .. seealso:: :func:`numpy.linalg.inv` """ if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # to prevent `a` to be overwritten a = a.copy() util._assert_cupy_array(a) util._assert_rank2(a) util._assert_nd_squareness(a) if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char cusolver_handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=dtype) ipiv = cupy.empty((a.shape[0], 1), dtype=dtype) if dtype == 'f': getrf = cusolver.sgetrf getrf_bufferSize = cusolver.sgetrf_bufferSize getrs = cusolver.sgetrs else: # dtype == 'd' getrf = cusolver.dgetrf getrf_bufferSize = cusolver.dgetrf_bufferSize getrs = cusolver.dgetrs m = a.shape[0] buffersize = getrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) # LU factorization getrf(cusolver_handle, m, m, a.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, dev_info.data.ptr) b = cupy.eye(m, dtype=dtype) # solve for the inverse getrs(cusolver_handle, 0, m, m, a.data.ptr, m, ipiv.data.ptr, b.data.ptr, m, dev_info.data.ptr) return b
def cholesky(a): """Cholesky decomposition. Decompose a given two-dimensional square matrix into ``L * L.T``, where ``L`` is a lower-triangular matrix and ``.T`` is a conjugate transpose operator. Note that in the current implementation ``a`` must be a real matrix, and only float32 and float64 are supported. Args: a (cupy.ndarray): The input matrix with dimension ``(N, N)`` Returns: cupy.ndarray: The lower-triangular matrix. .. seealso:: :func:`numpy.linalg.cholesky` """ if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a) util._assert_rank2(a) util._assert_nd_squareness(a) # Cast to float32 or float64 if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char x = a.astype(dtype, order='C', copy=True) n = len(a) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': buffersize = cusolver.spotrf_bufferSize( handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.float32) cusolver.spotrf( handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n, workspace.data.ptr, buffersize, dev_info.data.ptr) else: # dtype == 'd' buffersize = cusolver.dpotrf_bufferSize( handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.float64) cusolver.dpotrf( handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n, workspace.data.ptr, buffersize, dev_info.data.ptr) status = int(dev_info[0]) if status > 0: raise linalg.LinAlgError( 'The leading minor of order {} ' 'is not positive definite'.format(status)) elif status < 0: raise linalg.LinAlgError( 'Parameter error (maybe caused by a bug in cupy.linalg?)') util._tril(x, k=0) return x
def _slogdet_one(a): util._assert_rank2(a) util._assert_nd_squareness(a) dtype = a.dtype handle = device.get_cusolver_handle() m = len(a) ipiv = cupy.empty(m, 'i') info = cupy.empty((), 'i') # Need to make a copy because getrf works inplace a_copy = a.copy(order='F') if dtype == 'f': getrf_bufferSize = cusolver.sgetrf_bufferSize getrf = cusolver.sgetrf #<-- MODIFIED elif dtype == 'd': getrf_bufferSize = cusolver.dgetrf_bufferSize getrf = cusolver.dgetrf elif dtype == 'F': getrf_bufferSize = cusolver.cgetrf_bufferSize getrf = cusolver.cgetrf else: getrf_bufferSize = cusolver.zgetrf_bufferSize getrf = cusolver.zgetrf #<-- MODIFIED buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, info.data.ptr) if info[()] == 0: diag = cupy.diag(a_copy) # ipiv is 1-origin non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) + cupy.count_nonzero(diag < 0)) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 logdet = cupy.log(abs(diag)).sum() else: sign = cupy.array(0.0, dtype=dtype) #ORIGINAL # logdet = cupy.array(float('-inf'), dtype) #<-- MODIFIED if dtype in ['f', 'd']: logdet = cupy.array(float('-inf'), dtype) elif dtype == 'F': logdet = cupy.array(float('-inf'), cupy.float32) else: logdet = cupy.array(float('-inf'), cupy.float64) #<-- MODIFIED return sign, logdet
def _lu_factor(a, overwrite_a=False, check_finite=True): a = cupy.asarray(a) util._assert_rank2(a) dtype = a.dtype if dtype.char == 'f': getrf = cusolver.sgetrf getrf_bufferSize = cusolver.sgetrf_bufferSize elif dtype.char == 'd': getrf = cusolver.dgetrf getrf_bufferSize = cusolver.dgetrf_bufferSize elif dtype.char == 'F': getrf = cusolver.cgetrf getrf_bufferSize = cusolver.cgetrf_bufferSize elif dtype.char == 'D': getrf = cusolver.zgetrf getrf_bufferSize = cusolver.zgetrf_bufferSize else: msg = 'Only float32, float64, complex64 and complex128 are supported.' raise NotImplementedError(msg) a = a.astype(dtype, order='F', copy=(not overwrite_a)) if check_finite: if a.dtype.kind == 'f' and not cupy.isfinite(a).all(): raise ValueError('array must not contain infs or NaNs') cusolver_handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) m, n = a.shape ipiv = cupy.empty((min(m, n), ), dtype=numpy.intc) buffersize = getrf_bufferSize(cusolver_handle, m, n, a.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) # LU factorization getrf(cusolver_handle, m, n, a.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, dev_info.data.ptr) if dev_info[0] < 0: raise ValueError('illegal value in %d-th argument of ' 'internal getrf (lu_factor)' % -dev_info[0]) elif dev_info[0] > 0: warn('Diagonal number %d is exactly zero. Singular matrix.' % dev_info[0], RuntimeWarning, stacklevel=2) # cuSolver uses 1-origin while SciPy uses 0-origin ipiv -= 1 return (a, ipiv)
def _slogdet_one(a): util._assert_rank2(a) util._assert_nd_squareness(a) dtype = a.dtype handle = device.get_cusolver_handle() m = len(a) ipiv = cupy.empty(m, dtype=numpy.int32) dev_info = cupy.empty((), dtype=numpy.int32) # Need to make a copy because getrf works inplace a_copy = a.copy(order='F') if dtype == 'f': getrf_bufferSize = cusolver.sgetrf_bufferSize getrf = cusolver.sgetrf else: getrf_bufferSize = cusolver.dgetrf_bufferSize getrf = cusolver.dgetrf buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, dev_info.data.ptr) # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that # should never happen even if the matrix contains nan or inf. # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for # debugging purposes. diag = cupy.diag(a_copy) # ipiv is 1-origin non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) + cupy.count_nonzero(diag < 0)) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 logdet = cupy.log(abs(diag)).sum() singular = dev_info > 0 return ( cupy.where(singular, dtype.type(0), sign), cupy.where(singular, dtype.type('-inf'), logdet), )
def _slogdet_one(a): util._assert_rank2(a) util._assert_nd_squareness(a) dtype = a.dtype handle = device.get_cusolver_handle() m = len(a) ipiv = cupy.empty(m, dtype=numpy.int32) dev_info = cupy.empty(1, dtype=numpy.int32) # Need to make a copy because getrf works inplace a_copy = a.copy(order='F') if dtype == 'f': getrf_bufferSize = cusolver.sgetrf_bufferSize getrf = cusolver.sgetrf else: getrf_bufferSize = cusolver.dgetrf_bufferSize getrf = cusolver.dgetrf buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, dev_info.data.ptr) try: cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed( getrf, dev_info) diag = cupy.diag(a_copy) # ipiv is 1-origin non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) + cupy.count_nonzero(diag < 0)) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 logdet = cupy.log(abs(diag)).sum() except linalg.LinAlgError: sign = cupy.array(0.0, dtype=dtype) logdet = cupy.array(float('-inf'), dtype) return sign, logdet
def inv(a): '''Computes the inverse of a matrix. This function computes matrix ``a_inv`` from n-dimensional regular matrix ``a`` such that ``dot(a, a_inv) == eye(n)``. Args: a (cupy.ndarray): The regular matrix Returns: cupy.ndarray: The inverse of a matrix. .. seealso:: :func:`numpy.linalg.inv` ''' if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') util._assert_cupy_array(a) util._assert_rank2(a) util._assert_nd_squareness(a) b = cupy.eye(len(a), dtype=a.dtype) return solve(a, b)
def invh(a): """Compute the inverse of a Hermitian matrix. This function computes a inverse of a real symmetric or complex hermitian positive-definite matrix using Cholesky factorization. If matrix ``a`` is not positive definite, Cholesky factorization fails and it raises an error. Args: a (cupy.ndarray): Real symmetric or complex hermitian maxtix. Returns: cupy.ndarray: The inverse of matrix ``a``. """ # to prevent `a` from being overwritten a = a.copy() util._assert_cupy_array(a) util._assert_rank2(a) util._assert_nd_squareness(a) # support float32, float64, complex64, and complex128 if a.dtype.char in 'fdFD': dtype = a.dtype.char else: dtype = numpy.promote_types(a.dtype.char, 'f').char cusolver_handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': potrf = cusolver.spotrf potrf_bufferSize = cusolver.spotrf_bufferSize potrs = cusolver.spotrs elif dtype == 'd': potrf = cusolver.dpotrf potrf_bufferSize = cusolver.dpotrf_bufferSize potrs = cusolver.dpotrs elif dtype == 'F': potrf = cusolver.cpotrf potrf_bufferSize = cusolver.cpotrf_bufferSize potrs = cusolver.cpotrs elif dtype == 'D': potrf = cusolver.zpotrf potrf_bufferSize = cusolver.zpotrf_bufferSize potrs = cusolver.zpotrs else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) m = a.shape[0] uplo = cublas.CUBLAS_FILL_MODE_LOWER worksize = potrf_bufferSize(cusolver_handle, uplo, m, a.data.ptr, m) workspace = cupy.empty(worksize, dtype=dtype) # Cholesky factorization potrf(cusolver_handle, uplo, m, a.data.ptr, m, workspace.data.ptr, worksize, dev_info.data.ptr) info = dev_info[0] if info != 0: if info < 0: msg = '\tThe {}-th parameter is wrong'.format(-info) else: msg = ('\tThe leading minor of order {} is not positive definite' .format(info)) raise RuntimeError('matrix inversion failed at potrf.\n' + msg) b = cupy.eye(m, dtype=dtype) # Solve: A * X = B potrs(cusolver_handle, uplo, m, m, a.data.ptr, m, b.data.ptr, m, dev_info.data.ptr) info = dev_info[0] if info > 0: assert False, ('Unexpected output returned by potrs (actual: {})' .format(info)) elif info < 0: raise RuntimeError('matrix inversion failed at potrs.\n' '\tThe {}-th parameter is wrong'.format(-info)) return b
def solve(a, b): '''Solves a linear matrix equation. It computes the exact solution of ``x`` in ``ax = b``, where ``a`` is a square and full rank matrix. Args: a (cupy.ndarray): The matrix with dimension ``(M, M)`` b (cupy.ndarray): The vector with ``M`` elements, or the matrix with dimension ``(M, K)`` Returns: cupy.ndarray: The vector with ``M`` elements, or the matrix with dimension ``(M, K)``. .. seealso:: :func:`numpy.linalg.solve` ''' # NOTE: Since cusolver in CUDA 8.0 does not support gesv, # we manually solve a linear system with QR decomposition. # For details, please see the following: # https://docs.nvidia.com/cuda/cusolver/index.html#qr_examples if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a, b) util._assert_rank2(a) util._assert_nd_squareness(a) if 2 < b.ndim: raise linalg.LinAlgError('{}-dimensional array given. Array must be ' 'one or two-dimensional'.format(b.ndim)) if len(a) != len(b): raise linalg.LinAlgError('The number of rows of array a must be ' 'the same as that of array b') # Cast to float32 or float64 if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char m, k = (b.size, 1) if b.ndim == 1 else b.shape a = a.transpose().astype(dtype, order='C', copy=True) b = b.transpose().astype(dtype, order='C', copy=True) cusolver_handle = device.get_cusolver_handle() cublas_handle = device.get_cublas_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': geqrf = cusolver.sgeqrf geqrf_bufferSize = cusolver.sgeqrf_bufferSize ormqr = cusolver.sormqr trsm = cublas.strsm else: # dtype == 'd' geqrf = cusolver.dgeqrf geqrf_bufferSize = cusolver.dgeqrf_bufferSize ormqr = cusolver.dormqr trsm = cublas.dtrsm # 1. QR decomposition (A = Q * R) buffersize = geqrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) tau = cupy.empty(m, dtype=dtype) geqrf(cusolver_handle, m, m, a.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) _check_status(dev_info) # 2. ormqr (Q^T * B) ormqr(cusolver_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_OP_T, m, k, m, a.data.ptr, m, tau.data.ptr, b.data.ptr, m, workspace.data.ptr, buffersize, dev_info.data.ptr) _check_status(dev_info) # 3. trsm (X = R^{-1} * (Q^T * B)) trsm(cublas_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_FILL_MODE_UPPER, cublas.CUBLAS_OP_N, cublas.CUBLAS_DIAG_NON_UNIT, m, k, 1, a.data.ptr, m, b.data.ptr, m) return b.transpose()
def inv(a): """Computes the inverse of a matrix. This function computes matrix ``a_inv`` from n-dimensional regular matrix ``a`` such that ``dot(a, a_inv) == eye(n)``. Args: a (cupy.ndarray): The regular matrix Returns: cupy.ndarray: The inverse of a matrix. .. seealso:: :func:`numpy.linalg.inv` """ if a.ndim >= 3: return _batched_inv(a) # to prevent `a` to be overwritten a = a.copy() util._assert_cupy_array(a) util._assert_rank2(a) util._assert_nd_squareness(a) # support float32, float64, complex64, and complex128 if a.dtype.char in 'fdFD': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char cusolver_handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) ipiv = cupy.empty((a.shape[0], 1), dtype=numpy.intc) if dtype == 'f': getrf = cusolver.sgetrf getrf_bufferSize = cusolver.sgetrf_bufferSize getrs = cusolver.sgetrs elif dtype == 'd': getrf = cusolver.dgetrf getrf_bufferSize = cusolver.dgetrf_bufferSize getrs = cusolver.dgetrs elif dtype == 'F': getrf = cusolver.cgetrf getrf_bufferSize = cusolver.cgetrf_bufferSize getrs = cusolver.cgetrs elif dtype == 'D': getrf = cusolver.zgetrf getrf_bufferSize = cusolver.zgetrf_bufferSize getrs = cusolver.zgetrs else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) m = a.shape[0] buffersize = getrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) # LU factorization getrf(cusolver_handle, m, m, a.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, dev_info.data.ptr) b = cupy.eye(m, dtype=dtype) # solve for the inverse getrs(cusolver_handle, 0, m, m, a.data.ptr, m, ipiv.data.ptr, b.data.ptr, m, dev_info.data.ptr) return b
def lstsq(a, b, rcond=1e-15): """Return the least-squares solution to a linear matrix equation. Solves the equation `a x = b` by computing a vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`. The equation may be under-, well-, or over- determined (i.e., the number of linearly independent rows of `a` can be less than, equal to, or greater than its number of linearly independent columns). If `a` is square and of full rank, then `x` (but for round-off error) is the "exact" solution of the equation. Args: a (cupy.ndarray): "Coefficient" matrix with dimension ``(M, N)`` b (cupy.ndarray): "Dependent variable" values with dimension ``(M,)`` or ``(M, K)`` rcond (float): Cutoff parameter for small singular values. For stability it computes the largest singular value denoted by ``s``, and sets all singular values smaller than ``s`` to zero. Returns: tuple: A tuple of ``(x, residuals, rank, s)``. Note ``x`` is the least-squares solution with shape ``(N,)`` or ``(N, K)`` depending if ``b`` was two-dimensional. The sums of ``residuals`` is the squared Euclidean 2-norm for each column in b - a*x. The ``residuals`` is an empty array if the rank of a is < N or M <= N, but iff b is 1-dimensional, this is a (1,) shape array, Otherwise the shape is (K,). The ``rank`` of matrix ``a`` is an integer. The singular values of ``a`` are ``s``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.lstsq` """ util._assert_cupy_array(a, b) util._assert_rank2(a) if b.ndim > 2: raise linalg.LinAlgError('{}-dimensional array given. Array must be at' ' most two-dimensional'.format(b.ndim)) m, n = a.shape[-2:] m2 = b.shape[0] if m != m2: raise linalg.LinAlgError('Incompatible dimensions') u, s, vt = cupy.linalg.svd(a, full_matrices=False) # number of singular values and matrix rank cutoff = rcond * s.max() s1 = 1 / s sing_vals = s <= cutoff s1[sing_vals] = 0 rank = s.size - sing_vals.sum() if b.ndim == 2: s1 = cupy.repeat(s1.reshape(-1, 1), b.shape[1], axis=1) # Solve the least-squares solution z = core.dot(u.transpose(), b) * s1 x = core.dot(vt.transpose(), z) # Calculate squared Euclidean 2-norm for each column in b - a*x if rank != n or m <= n: resids = cupy.array([], dtype=a.dtype) elif b.ndim == 2: e = b - core.dot(a, x) resids = cupy.sum(cupy.square(e), axis=0) else: e = b - cupy.dot(a, x) resids = cupy.dot(e.T, e).reshape(-1) return x, resids, rank, s
def svd(a, full_matrices=True, compute_uv=True): """Singular Value Decomposition. Factorizes the matrix ``a`` as ``u * np.diag(s) * v``, where ``u`` and ``v`` are unitary and ``s`` is an one-dimensional array of ``a``'s singular values. Args: a (cupy.ndarray): The input matrix with dimension ``(M, N)``. full_matrices (bool): If True, it returns u and v with dimensions ``(M, M)`` and ``(N, N)``. Otherwise, the dimensions of u and v are respectively ``(M, K)`` and ``(K, N)``, where ``K = min(M, N)``. compute_uv (bool): If ``False``, it only returns singular values. Returns: tuple of :class:`cupy.ndarray`: A tuple of ``(u, s, v)`` such that ``a = u * np.diag(s) * v``. .. seealso:: :func:`numpy.linalg.svd` """ # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a) util._assert_rank2(a) # Cast to float32 or float64 a_dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char if a_dtype == 'f': s_dtype = 'f' elif a_dtype == 'd': s_dtype = 'd' elif a_dtype == 'F': s_dtype = 'f' else: # a_dtype == 'D': a_dtype = 'D' s_dtype = 'd' # Remark 1: gesvd only supports m >= n (WHAT?) # Remark 2: gesvd only supports jobu = 'A' and jobvt = 'A' # Remark 3: gesvd returns matrix U and V^H # Remark 4: Remark 2 is removed since cuda 8.0 (new!) n, m = a.shape # `a` must be copied because xgesvd destroys the matrix if m >= n: x = a.astype(a_dtype, order='C', copy=True) trans_flag = False else: m, n = a.shape x = a.transpose().astype(a_dtype, order='C', copy=True) trans_flag = True mn = min(m, n) if compute_uv: if full_matrices: u = cupy.empty((m, m), dtype=a_dtype) vt = cupy.empty((n, n), dtype=a_dtype) else: u = cupy.empty((mn, m), dtype=a_dtype) vt = cupy.empty((mn, n), dtype=a_dtype) u_ptr, vt_ptr = u.data.ptr, vt.data.ptr else: u_ptr, vt_ptr = 0, 0 # Use nullptr s = cupy.empty(mn, dtype=s_dtype) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if compute_uv: job = ord('A') if full_matrices else ord('S') else: job = ord('N') if a_dtype == 'f': buffersize = cusolver.sgesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=a_dtype) cusolver.sgesvd( handle, job, job, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr) elif a_dtype == 'd': buffersize = cusolver.dgesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=a_dtype) cusolver.dgesvd( handle, job, job, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr) elif a_dtype == 'F': buffersize = cusolver.cgesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=a_dtype) cusolver.cgesvd( handle, job, job, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr) else: # a_dtype == 'D': buffersize = cusolver.zgesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=a_dtype) cusolver.zgesvd( handle, job, job, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr) status = int(dev_info[0]) if status > 0: raise linalg.LinAlgError( 'SVD computation does not converge') elif status < 0: raise linalg.LinAlgError( 'Parameter error (maybe caused by a bug in cupy.linalg?)') # Note that the returned array may need to be transporsed # depending on the structure of an input if compute_uv: if trans_flag: return u.transpose(), s, vt.transpose() else: return vt, s, u else: return s
def lu_factor(a, overwrite_a=False, check_finite=True): """LU decomposition. Decompose a given two-dimensional square matrix into ``P * L * U``, where ``P`` is a permutation matrix, ``L`` lower-triangular with unit diagonal elements, and ``U`` upper-triangular matrix. Note that in the current implementation ``a`` must be a real matrix, and only :class:`numpy.float32` and :class:`numpy.float64` are supported. Args: a (cupy.ndarray): The input matrix with dimension ``(M, N)`` overwrite_a (bool): Allow overwriting data in ``a`` (may enhance performance) check_finite (bool): Whether to check that the input matrices contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Returns: tuple: ``(lu, piv)`` where ``lu`` is a :class:`cupy.ndarray` storing ``U`` in its upper triangle, and ``L`` without unit diagonal elements in its lower triangle, and ``piv`` is a :class:`cupy.ndarray` storing pivot indices representing permutation matrix ``P``. For ``0 <= i < min(M,N)``, row ``i`` of the matrix was interchanged with row ``piv[i]`` .. seealso:: :func:`scipy.linalg.lu_factor` .. note:: Current implementation returns result different from SciPy when the matrix singular. SciPy returns an array containing ``0.`` while the current implementation returns an array containing ``nan``. >>> import numpy as np >>> import scipy.linalg >>> scipy.linalg.lu_factor(np.array([[0, 1], [0, 0]], \ dtype=np.float32)) (array([[0., 1.], [0., 0.]], dtype=float32), array([0, 1], dtype=int32)) >>> import cupy as cp >>> import cupyx.scipy.linalg >>> cupyx.scipy.linalg.lu_factor(cp.array([[0, 1], [0, 0]], \ dtype=cp.float32)) (array([[ 0., 1.], [nan, nan]], dtype=float32), array([0, 1], dtype=int32)) """ a = cupy.asarray(a) util._assert_rank2(a) dtype = a.dtype if dtype.char == 'f': getrf = cusolver.sgetrf getrf_bufferSize = cusolver.sgetrf_bufferSize elif dtype.char == 'd': getrf = cusolver.dgetrf getrf_bufferSize = cusolver.dgetrf_bufferSize else: raise NotImplementedError('Only float32 and float64 are supported.') a = a.astype(dtype, order='F', copy=(not overwrite_a)) if check_finite: if a.dtype.kind == 'f' and not cupy.isfinite(a).all(): raise ValueError('array must not contain infs or NaNs') cusolver_handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) m, n = a.shape ipiv = cupy.empty((min(m, n), ), dtype=numpy.intc) buffersize = getrf_bufferSize(cusolver_handle, m, n, a.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) # LU factorization getrf(cusolver_handle, m, n, a.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, dev_info.data.ptr) if dev_info[0] < 0: raise ValueError('illegal value in %d-th argument of ' 'internal getrf (lu_factor)' % -dev_info[0]) elif dev_info[0] > 0: warn('Diagonal number %d is exactly zero. Singular matrix.' % dev_info[0], RuntimeWarning, stacklevel=2) # cuSolver uses 1-origin while SciPy uses 0-origin ipiv -= 1 return (a, ipiv)
def qr(a, mode='reduced'): '''QR decomposition. Decompose a given two-dimensional matrix into ``Q * R``, where ``Q`` is an orthonormal and ``R`` is an upper-triangular matrix. Args: a (cupy.ndarray): The input matrix. mode (str): The mode of decomposition. Currently 'reduced', 'complete', 'r', and 'raw' modes are supported. The default mode is 'reduced', and decompose a matrix ``A = (M, N)`` into ``Q``, ``R`` with dimensions ``(M, K)``, ``(K, N)``, where ``K = min(M, N)``. .. seealso:: :func:`numpy.linalg.qr` ''' if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a) util._assert_rank2(a) if mode not in ('reduced', 'complete', 'r', 'raw'): if mode in ('f', 'full', 'e', 'economic'): msg = 'The deprecated mode \'{}\' is not supported'.format(mode) raise ValueError(msg) else: raise ValueError('Unrecognized mode \'{}\''.format(mode)) # Cast to float32 or float64 if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char m, n = a.shape x = a.transpose().astype(dtype, order='C', copy=True) mn = min(m, n) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) # compute working space of geqrf and ormqr, and solve R if dtype == 'f': buffersize = cusolver.sgeqrf_bufferSize(handle, m, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.float32) tau = cupy.empty(mn, dtype=numpy.float32) cusolver.sgeqrf( handle, m, n, x.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) else: # dtype == 'd' buffersize = cusolver.dgeqrf_bufferSize(handle, n, m, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.float64) tau = cupy.empty(mn, dtype=numpy.float64) cusolver.dgeqrf( handle, m, n, x.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) status = int(dev_info[0]) if status < 0: raise linalg.LinAlgError( 'Parameter error (maybe caused by a bug in cupy.linalg?)') if mode == 'r': r = x[:, :mn].transpose() return util._triu(r) if mode == 'raw': if a.dtype.char == 'f': # The original numpy.linalg.qr returns float64 in raw mode, # whereas the cusolver returns float32. We agree that the # following code would be inappropriate, however, in this time # we explicitly convert them to float64 for compatibility. return x.astype(numpy.float64), tau.astype(numpy.float64) return x, tau if mode == 'complete' and m > n: mc = m q = cupy.empty((m, m), dtype) else: mc = mn q = cupy.empty((n, m), dtype) q[:n] = x # solve Q if dtype == 'f': buffersize = cusolver.sorgqr_bufferSize( handle, m, mc, mn, q.data.ptr, m, tau.data.ptr) workspace = cupy.empty(buffersize, dtype=numpy.float32) cusolver.sorgqr( handle, m, mc, mn, q.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) else: buffersize = cusolver.dorgqr_bufferSize( handle, m, mc, mn, q.data.ptr, m, tau.data.ptr) workspace = cupy.empty(buffersize, dtype=numpy.float64) cusolver.dorgqr( handle, m, mc, mn, q.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) q = q[:mc].transpose() r = x[:, :mc].transpose() return q, util._triu(r)
def lu_solve(lu_and_piv, b, trans=0, overwrite_b=False, check_finite=True): """Solve an equation system, ``a * x = b``, given the LU factorization of ``a`` Args: lu_and_piv (tuple): LU factorization of matrix ``a`` (``(M, M)``) together with pivot indices. b (cupy.ndarray): The matrix with dimension ``(M,)`` or ``(M, N)``. trans ({0, 1, 2}): Type of system to solve: ======== ========= trans system ======== ========= 0 a x = b 1 a^T x = b 2 a^H x = b ======== ========= overwrite_b (bool): Allow overwriting data in b (may enhance performance) check_finite (bool): Whether to check that the input matrices contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. Returns: cupy.ndarray: The matrix with dimension ``(M,)`` or ``(M, N)``. .. seealso:: :func:`scipy.linalg.lu_solve` """ (lu, ipiv) = lu_and_piv util._assert_cupy_array(lu) util._assert_rank2(lu) util._assert_nd_squareness(lu) m = lu.shape[0] if m != b.shape[0]: raise ValueError('incompatible dimensions.') dtype = lu.dtype if dtype.char == 'f': getrs = cusolver.sgetrs elif dtype.char == 'd': getrs = cusolver.dgetrs else: raise NotImplementedError('Only float32 and float64 are supported.') if trans == 0: trans = cublas.CUBLAS_OP_N elif trans == 1: trans = cublas.CUBLAS_OP_T elif trans == 2: trans = cublas.CUBLAS_OP_C else: raise ValueError('unknown trans') lu = lu.astype(dtype, order='F', copy=False) ipiv = ipiv.astype(ipiv.dtype, order='F', copy=True) # cuSolver uses 1-origin while SciPy uses 0-origin ipiv += 1 b = b.astype(dtype, order='F', copy=(not overwrite_b)) if check_finite: if lu.dtype.kind == 'f' and not cupy.isfinite(lu).all(): raise ValueError( 'array must not contain infs or NaNs.\n' 'Note that when a singular matrix is given, unlike ' 'scipy.linalg.lu_factor, cupyx.scipy.linalg.lu_factor ' 'returns an array containing NaN.') if b.dtype.kind == 'f' and not cupy.isfinite(b).all(): raise ValueError('array must not contain infs or NaNs') n = 1 if b.ndim == 1 else b.shape[1] cusolver_handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) # solve for the inverse getrs(cusolver_handle, trans, m, n, lu.data.ptr, m, ipiv.data.ptr, b.data.ptr, m, dev_info.data.ptr) if dev_info[0] < 0: raise ValueError('illegal value in %d-th argument of ' 'internal getrs (lu_solve)' % -dev_info[0]) return b
def qr(a, mode='reduced'): """QR decomposition. Decompose a given two-dimensional matrix into ``Q * R``, where ``Q`` is an orthonormal and ``R`` is an upper-triangular matrix. Args: a (cupy.ndarray): The input matrix. mode (str): The mode of decomposition. Currently 'reduced', 'complete', 'r', and 'raw' modes are supported. The default mode is 'reduced', in which matrix ``A = (M, N)`` is decomposed into ``Q``, ``R`` with dimensions ``(M, K)``, ``(K, N)``, where ``K = min(M, N)``. Returns: cupy.ndarray, or tuple of ndarray: Although the type of returned object depends on the mode, it returns a tuple of ``(Q, R)`` by default. For details, please see the document of :func:`numpy.linalg.qr`. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.qr` """ # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a) util._assert_rank2(a) if mode not in ('reduced', 'complete', 'r', 'raw'): if mode in ('f', 'full', 'e', 'economic'): msg = 'The deprecated mode \'{}\' is not supported'.format(mode) raise ValueError(msg) else: raise ValueError('Unrecognized mode \'{}\''.format(mode)) # support float32, float64, complex64, and complex128 if a.dtype.char in 'fdFD': dtype = a.dtype.char else: dtype = numpy.promote_types(a.dtype.char, 'f').char m, n = a.shape mn = min(m, n) if mn == 0: if mode == 'reduced': return cupy.empty((m, 0), dtype), cupy.empty((0, n), dtype) elif mode == 'complete': return cupy.identity(m, dtype), cupy.empty((m, n), dtype) elif mode == 'r': return cupy.empty((0, n), dtype) else: # mode == 'raw' # compatibility with numpy.linalg.qr dtype = numpy.promote_types(dtype, 'd') return cupy.empty((n, m), dtype), cupy.empty((0, ), dtype) x = a.transpose().astype(dtype, order='C', copy=True) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': geqrf_bufferSize = cusolver.sgeqrf_bufferSize geqrf = cusolver.sgeqrf elif dtype == 'd': geqrf_bufferSize = cusolver.dgeqrf_bufferSize geqrf = cusolver.dgeqrf elif dtype == 'F': geqrf_bufferSize = cusolver.cgeqrf_bufferSize geqrf = cusolver.cgeqrf elif dtype == 'D': geqrf_bufferSize = cusolver.zgeqrf_bufferSize geqrf = cusolver.zgeqrf else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) # compute working space of geqrf and solve R buffersize = geqrf_bufferSize(handle, m, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=dtype) tau = cupy.empty(mn, dtype=dtype) geqrf(handle, m, n, x.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed( geqrf, dev_info) if mode == 'r': r = x[:, :mn].transpose() return util._triu(r) if mode == 'raw': if a.dtype.char == 'f': # The original numpy.linalg.qr returns float64 in raw mode, # whereas the cusolver returns float32. We agree that the # following code would be inappropriate, however, in this time # we explicitly convert them to float64 for compatibility. return x.astype(numpy.float64), tau.astype(numpy.float64) elif a.dtype.char == 'F': # The same applies to complex64 return x.astype(numpy.complex128), tau.astype(numpy.complex128) return x, tau if mode == 'complete' and m > n: mc = m q = cupy.empty((m, m), dtype) else: mc = mn q = cupy.empty((n, m), dtype) q[:n] = x # compute working space of orgqr and solve Q if dtype == 'f': orgqr_bufferSize = cusolver.sorgqr_bufferSize orgqr = cusolver.sorgqr elif dtype == 'd': orgqr_bufferSize = cusolver.dorgqr_bufferSize orgqr = cusolver.dorgqr elif dtype == 'F': orgqr_bufferSize = cusolver.cungqr_bufferSize orgqr = cusolver.cungqr elif dtype == 'D': orgqr_bufferSize = cusolver.zungqr_bufferSize orgqr = cusolver.zungqr buffersize = orgqr_bufferSize(handle, m, mc, mn, q.data.ptr, m, tau.data.ptr) workspace = cupy.empty(buffersize, dtype=dtype) orgqr(handle, m, mc, mn, q.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed( orgqr, dev_info) q = q[:mc].transpose() r = x[:, :mc].transpose() return q, util._triu(r)
def qr(a, mode='reduced'): """QR decomposition. Decompose a given two-dimensional matrix into ``Q * R``, where ``Q`` is an orthonormal and ``R`` is an upper-triangular matrix. Args: a (cupy.ndarray): The input matrix. mode (str): The mode of decomposition. Currently 'reduced', 'complete', 'r', and 'raw' modes are supported. The default mode is 'reduced', in which matrix ``A = (M, N)`` is decomposed into ``Q``, ``R`` with dimensions ``(M, K)``, ``(K, N)``, where ``K = min(M, N)``. Returns: cupy.ndarray, or tuple of ndarray: Although the type of returned object depends on the mode, it returns a tuple of ``(Q, R)`` by default. For details, please see the document of :func:`numpy.linalg.qr`. .. seealso:: :func:`numpy.linalg.qr` """ # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a) util._assert_rank2(a) if mode not in ('reduced', 'complete', 'r', 'raw'): if mode in ('f', 'full', 'e', 'economic'): msg = 'The deprecated mode \'{}\' is not supported'.format(mode) raise ValueError(msg) else: raise ValueError('Unrecognized mode \'{}\''.format(mode)) # support float32, float64, complex64, and complex128 if a.dtype.char in 'fdFD': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char m, n = a.shape x = a.transpose().astype(dtype, order='C', copy=True) mn = min(m, n) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) # compute working space of geqrf and orgqr, and solve R if dtype == 'f': geqrf_bufferSize = cusolver.sgeqrf_bufferSize geqrf = cusolver.sgeqrf elif dtype == 'd': geqrf_bufferSize = cusolver.dgeqrf_bufferSize geqrf = cusolver.dgeqrf elif dtype == 'F': geqrf_bufferSize = cusolver.cgeqrf_bufferSize geqrf = cusolver.cgeqrf elif dtype == 'D': geqrf_bufferSize = cusolver.zgeqrf_bufferSize geqrf = cusolver.zgeqrf else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) buffersize = geqrf_bufferSize(handle, m, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=dtype) tau = cupy.empty(mn, dtype=dtype) geqrf(handle, m, n, x.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) status = int(dev_info[0]) if status < 0: raise linalg.LinAlgError( 'Parameter error (maybe caused by a bug in cupy.linalg?)') if mode == 'r': r = x[:, :mn].transpose() return util._triu(r) if mode == 'raw': if a.dtype.char == 'f': # The original numpy.linalg.qr returns float64 in raw mode, # whereas the cusolver returns float32. We agree that the # following code would be inappropriate, however, in this time # we explicitly convert them to float64 for compatibility. return x.astype(numpy.float64), tau.astype(numpy.float64) elif a.dtype.char == 'F': # The same applies to complex64 return x.astype(numpy.complex128), tau.astype(numpy.complex128) return x, tau if mode == 'complete' and m > n: mc = m q = cupy.empty((m, m), dtype) else: mc = mn q = cupy.empty((n, m), dtype) q[:n] = x # solve Q if dtype == 'f': orgqr_bufferSize = cusolver.sorgqr_bufferSize orgqr = cusolver.sorgqr elif dtype == 'd': orgqr_bufferSize = cusolver.dorgqr_bufferSize orgqr = cusolver.dorgqr elif dtype == 'F': orgqr_bufferSize = cusolver.cungqr_bufferSize orgqr = cusolver.cungqr elif dtype == 'D': orgqr_bufferSize = cusolver.zungqr_bufferSize orgqr = cusolver.zungqr buffersize = orgqr_bufferSize(handle, m, mc, mn, q.data.ptr, m, tau.data.ptr) workspace = cupy.empty(buffersize, dtype=dtype) orgqr(handle, m, mc, mn, q.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) q = q[:mc].transpose() r = x[:, :mc].transpose() return q, util._triu(r)
def svd(a, full_matrices=True, compute_uv=True): """Singular Value Decomposition. Factorizes the matrix ``a`` as ``u * np.diag(s) * v``, where ``u`` and ``v`` are unitary and ``s`` is an one-dimensional array of ``a``'s singular values. Args: a (cupy.ndarray): The input matrix with dimension ``(M, N)``. full_matrices (bool): If True, it returns u and v with dimensions ``(M, M)`` and ``(N, N)``. Otherwise, the dimensions of u and v are respectively ``(M, K)`` and ``(K, N)``, where ``K = min(M, N)``. compute_uv (bool): If ``False``, it only returns singular values. Returns: tuple of :class:`cupy.ndarray`: A tuple of ``(u, s, v)`` such that ``a = u * np.diag(s) * v``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.svd` """ # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a) util._assert_rank2(a) # Cast to float32 or float64 a_dtype = numpy.promote_types(a.dtype.char, 'f').char if a_dtype == 'f': s_dtype = 'f' elif a_dtype == 'd': s_dtype = 'd' elif a_dtype == 'F': s_dtype = 'f' else: # a_dtype == 'D': a_dtype = 'D' s_dtype = 'd' # Remark 1: gesvd only supports m >= n (WHAT?) # Remark 2: gesvd only supports jobu = 'A' and jobvt = 'A' # Remark 3: gesvd returns matrix U and V^H # Remark 4: Remark 2 is removed since cuda 8.0 (new!) n, m = a.shape # `a` must be copied because xgesvd destroys the matrix if m >= n: x = a.astype(a_dtype, order='C', copy=True) trans_flag = False else: m, n = a.shape x = a.transpose().astype(a_dtype, order='C', copy=True) trans_flag = True mn = min(m, n) if compute_uv: if full_matrices: u = cupy.empty((m, m), dtype=a_dtype) vt = cupy.empty((n, n), dtype=a_dtype) else: u = cupy.empty((mn, m), dtype=a_dtype) vt = cupy.empty((mn, n), dtype=a_dtype) u_ptr, vt_ptr = u.data.ptr, vt.data.ptr else: u_ptr, vt_ptr = 0, 0 # Use nullptr s = cupy.empty(mn, dtype=s_dtype) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if compute_uv: job = ord('A') if full_matrices else ord('S') else: job = ord('N') if a_dtype == 'f': gesvd = cusolver.sgesvd gesvd_bufferSize = cusolver.sgesvd_bufferSize elif a_dtype == 'd': gesvd = cusolver.dgesvd gesvd_bufferSize = cusolver.dgesvd_bufferSize elif a_dtype == 'F': gesvd = cusolver.cgesvd gesvd_bufferSize = cusolver.cgesvd_bufferSize else: # a_dtype == 'D': gesvd = cusolver.zgesvd gesvd_bufferSize = cusolver.zgesvd_bufferSize buffersize = gesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=a_dtype) gesvd(handle, job, job, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr) cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed( gesvd, dev_info) # Note that the returned array may need to be transporsed # depending on the structure of an input if compute_uv: if trans_flag: return u.transpose(), s, vt.transpose() else: return vt, s, u else: return s
def inv(a): """Computes the inverse of a matrix. This function computes matrix ``a_inv`` from n-dimensional regular matrix ``a`` such that ``dot(a, a_inv) == eye(n)``. Args: a (cupy.ndarray): The regular matrix Returns: cupy.ndarray: The inverse of a matrix. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.inv` """ if a.ndim >= 3: return _batched_inv(a) # to prevent `a` to be overwritten a = a.copy() util._assert_cupy_array(a) util._assert_rank2(a) util._assert_nd_squareness(a) # support float32, float64, complex64, and complex128 if a.dtype.char in 'fdFD': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char cusolver_handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) ipiv = cupy.empty((a.shape[0], 1), dtype=numpy.intc) if dtype == 'f': getrf = cusolver.sgetrf getrf_bufferSize = cusolver.sgetrf_bufferSize getrs = cusolver.sgetrs elif dtype == 'd': getrf = cusolver.dgetrf getrf_bufferSize = cusolver.dgetrf_bufferSize getrs = cusolver.dgetrs elif dtype == 'F': getrf = cusolver.cgetrf getrf_bufferSize = cusolver.cgetrf_bufferSize getrs = cusolver.cgetrs elif dtype == 'D': getrf = cusolver.zgetrf getrf_bufferSize = cusolver.zgetrf_bufferSize getrs = cusolver.zgetrs else: msg = ('dtype must be float32, float64, complex64 or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) m = a.shape[0] buffersize = getrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) # LU factorization getrf(cusolver_handle, m, m, a.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, dev_info.data.ptr) cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed( getrf, dev_info) b = cupy.eye(m, dtype=dtype) # solve for the inverse getrs(cusolver_handle, 0, m, m, a.data.ptr, m, ipiv.data.ptr, b.data.ptr, m, dev_info.data.ptr) cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed( getrs, dev_info) return b
def inv_core(a, cholesky=False): """Computes the inverse of a matrix. This function computes matrix ``a_inv`` from n-dimensional regular matrix ``a`` such that ``dot(a, a_inv) == eye(n)``. Args: a (cupy.ndarray): The regular matrix b (Boolean): Use cholesky decomposition Returns: cupy.ndarray: The inverse of a matrix. .. seealso:: :func:`numpy.linalg.inv` """ xp = cupy.get_array_module(a) if xp == numpy: if cholesky: warnings.warn( "Current fast-inv using cholesky doesn't support numpy.ndarray." ) return numpy.linalg.inv(a) if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # to prevent `a` to be overwritten a = a.copy() util._assert_cupy_array(a) util._assert_rank2(a) util._assert_nd_squareness(a) if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char cusolver_handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=cupy.int) m = a.shape[0] b = cupy.eye(m, dtype=dtype) if not cholesky: if dtype == 'f': getrf = cusolver.sgetrf getrf_bufferSize = cusolver.sgetrf_bufferSize getrs = cusolver.sgetrs else: # dtype == 'd' getrf = cusolver.dgetrf getrf_bufferSize = cusolver.dgetrf_bufferSize getrs = cusolver.dgetrs buffersize = getrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m) # TODO(y1r): cache buffer to avoid malloc workspace = cupy.empty(buffersize, dtype=dtype) ipiv = cupy.empty((a.shape[0], 1), dtype=dtype) # LU Decomposition getrf(cusolver_handle, m, m, a.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, dev_info.data.ptr) # TODO(y1r): check dev_info status # solve for the inverse getrs(cusolver_handle, 0, m, m, a.data.ptr, m, ipiv.data.ptr, b.data.ptr, m, dev_info.data.ptr) # TODO(y1r): check dev_info status else: if dtype == 'f': potrf = cusolver.spotrf potrf_bufferSize = cusolver.spotrf_bufferSize potrs = cusolver.spotrs else: # dtype == 'd' potrf = cusolver.dpotrf potrf_bufferSize = cusolver.dpotrf_bufferSize potrs = cusolver.dpotrs buffersize = potrf_bufferSize(cusolver_handle, cublas.CUBLAS_FILL_MODE_UPPER, m, a.data.ptr, m) # TODO(y1r): cache buffer to avoid malloc workspace = cupy.empty(buffersize, dtype=dtype) # Cholesky Decomposition potrf(cusolver_handle, cublas.CUBLAS_FILL_MODE_UPPER, m, a.data.ptr, m, workspace.data.ptr, buffersize, dev_info.data.ptr) # TODO(y1r): check dev_info status # solve for the inverse potrs(cusolver_handle, cublas.CUBLAS_FILL_MODE_UPPER, m, m, a.data.ptr, m, b.data.ptr, m, dev_info.data.ptr) # TODO(y1r): check dev_info status return b