def _add_sparse(self, other, alpha, beta): self.sum_duplicates() other = other.tocsr() other.sum_duplicates() if cusparse.check_availability('csrgeam2'): csrgeam = cusparse.csrgeam2 elif cusparse.check_availability('csrgeam'): csrgeam = cusparse.csrgeam else: raise NotImplementedError return csrgeam(self, other, alpha, beta)
def __mul__(self, other): if cupy.isscalar(other): self.sum_duplicates() return self._with_data(self.data * other) elif cupyx.scipy.sparse.isspmatrix_csr(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm'): a = self.T return cusparse.csrgemm(a, other, transa=True) elif cusparse.check_availability('csrgemm2'): a = self.tocsr() a.sum_duplicates() return cusparse.csrgemm2(a, other) else: raise NotImplementedError elif isspmatrix_csc(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm'): a = self.T b = other.T return cusparse.csrgemm(a, b, transa=True, transb=True) elif cusparse.check_availability('csrgemm2'): a = self.tocsr() b = other.tocsr() a.sum_duplicates() b.sum_duplicates() return cusparse.csrgemm2(a, b) else: raise NotImplementedError elif cupyx.scipy.sparse.isspmatrix(other): return self * other.tocsr() elif base.isdense(other): if other.ndim == 0: self.sum_duplicates() return self._with_data(self.data * other) elif other.ndim == 1: self.sum_duplicates() if cusparse.check_availability('csrmv'): csrmv = cusparse.csrmv elif cusparse.check_availability('spmv'): csrmv = cusparse.spmv else: raise NotImplementedError return csrmv(self.T, cupy.asfortranarray(other), transa=True) elif other.ndim == 2: self.sum_duplicates() if cusparse.check_availability('csrmm2'): csrmm = cusparse.csrmm2 elif cusparse.check_availability('spmm'): csrmm = cusparse.spmm else: raise NotImplementedError return csrmm(self.T, cupy.asfortranarray(other), transa=True) else: raise ValueError('could not interpret dimensions') else: return NotImplemented
def test_csrmvEx_aligned(self): if not cusparse.check_availability('csrmvEx'): pytest.skip('csrmvEx is not available') a = sparse.csr_matrix(self.a) x = cupy.array(self.x, order='f') assert cupy.cusparse.csrmvExIsAligned(a, x)
def _make_matrix(self, dtype): if not cusparse.check_availability('csrilu02'): pytest.skip('csrilu02 is not available') a = testing.shaped_random( (self.n, self.n), cupy, dtype=dtype, scale=0.9) + 0.1 a = a + cupy.diag(cupy.ones((self.n, ), dtype=dtype.char.lower())) return a
def _make_fast_matvec(A): matvec = None if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): handle = device.get_cusparse_handle() op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE alpha = numpy.array(1.0, A.dtype) beta = numpy.array(0.0, A.dtype) cuda_dtype = _dtype.to_cuda_dtype(A.dtype) alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT x = cupy.empty((A.shape[0], ), dtype=A.dtype) y = cupy.empty((A.shape[0], ), dtype=A.dtype) desc_A = cusparse.SpMatDescriptor.create(A) desc_x = cusparse.DnVecDescriptor.create(x) desc_y = cusparse.DnVecDescriptor.create(y) buff_size = _cusparse.spMV_bufferSize(handle, op_a, alpha.ctypes.data, desc_A.desc, desc_x.desc, beta.ctypes.data, desc_y.desc, cuda_dtype, alg) buff = cupy.empty(buff_size, cupy.int8) del x, desc_x, y, desc_y def matvec(x): y = cupy.empty_like(x) desc_x = cusparse.DnVecDescriptor.create(x) desc_y = cusparse.DnVecDescriptor.create(y) _cusparse.spMV(handle, op_a, alpha.ctypes.data, desc_A.desc, desc_x.desc, beta.ctypes.data, desc_y.desc, cuda_dtype, alg, buff.data.ptr) return y return matvec
def test_csrmm(self): if not cusparse.check_availability('csrmm'): pytest.skip('csrmm is not available') a = sparse.csr_matrix(self.a) b = cupy.array(self.b, order='f') y = cupy.cusparse.csrmm(a, b, alpha=self.alpha, transa=self.transa) expect = self.alpha * self.op_a.dot(self.b) testing.assert_array_almost_equal(y, expect)
def setUp(self): if not cusparse.check_availability('cscsort'): pytest.skip('cscsort is not available') self.a = scipy.sparse.random( 1000, 1, density=0.9, dtype=numpy.float32, format='csc') numpy.random.shuffle(self.a.indices) self.a.has_sorted_indices = False
def setUp(self): if not cusparse.check_availability('coosort'): pytest.skip('coosort is not available') self.a = scipy.sparse.random( 100, 100, density=0.9, dtype=numpy.float32, format='coo') numpy.random.shuffle(self.a.row) numpy.random.shuffle(self.a.col)
def test_denseToSparse(self, dtype): if not cusparse.check_availability('denseToSparse'): pytest.skip('denseToSparse is not available') x = cupy.random.uniform(0, 1, self.shape).astype(dtype) x[x < self.density] = 0 y = cusparse.denseToSparse(x, format=self.format) assert y.format == self.format testing.assert_array_equal(x, y.todense())
def test_csrmvEx_not_aligned(self): if not cusparse.check_availability('csrmvEx'): pytest.skip('csrmvEx is not available') a = sparse.csr_matrix(self.a) tmp = cupy.array(numpy.hstack([self.x, self.y]), order='f') x = tmp[0:len(self.x)] y = tmp[len(self.x):] assert not cupy.cusparse.csrmvExIsAligned(a, x, y)
def tocsc(self, copy=False): """Converts the matrix to Compressed Sparse Column format. Args: copy (bool): If ``False``, it shares data arrays as much as possible. Actually this option is ignored because all arrays in a matrix cannot be shared in csr to csc conversion. Returns: cupyx.scipy.sparse.csc_matrix: Converted matrix. """ # copy is ignored if cusparse.check_availability('csr2csc'): csr2csc = cusparse.csr2csc elif cusparse.check_availability('csr2cscEx2'): csr2csc = cusparse.csr2cscEx2 else: raise NotImplementedError return csr2csc(self)
def test_csrmv_with_y(self): if not cusparse.check_availability('csrmv'): pytest.skip('csrmv is not available') a = sparse.csr_matrix(self.a) x = cupy.array(self.x, order='f') y = cupy.array(self.y, order='f') z = cupy.cusparse.csrmv( a, x, y=y, alpha=self.alpha, beta=self.beta, transa=self.transa) expect = self.alpha * self.op_a.dot(self.x) + self.beta * self.y assert y is z testing.assert_array_almost_equal(y, expect)
def tocsr(self, copy=False): """Converts the matrix to Compressed Sparse Row format. Args: copy (bool): If ``False``, it shares data arrays as much as possible. Actually this option is ignored because all arrays in a matrix cannot be shared in csr to csc conversion. Returns: cupyx.scipy.sparse.csr_matrix: Converted matrix. """ # copy is ignored if cusparse.check_availability('csc2csr'): csc2csr = cusparse.csc2csr elif cusparse.check_availability('csc2csrEx2'): csc2csr = cusparse.csc2csrEx2 else: raise NotImplementedError # don't touch has_sorted_indices, as cuSPARSE made no guarantee return csc2csr(self)
def test_csrmm2_with_c(self): if not cusparse.check_availability('csrmm2'): pytest.skip('csrmm2 is not available') a = sparse.csr_matrix(self.a) b = cupy.array(self.b, order='f') c = cupy.array(self.c, order='f') y = cupy.cusparse.csrmm2( a, b, c=c, alpha=self.alpha, beta=self.beta, transa=self.transa, transb=self.transb) expect = self.alpha * self.op_a.dot(self.op_b) + self.beta * self.c assert y is c testing.assert_array_almost_equal(y, expect)
def test_csrmvEx(self): if not cusparse.check_availability('csrmvEx'): pytest.skip('csrmvEx is not available') if self.transa: # no support for transa return a = sparse.csr_matrix(self.a) x = cupy.array(self.x, order='f') y = cupy.cusparse.csrmvEx(a, x, alpha=self.alpha) expect = self.alpha * self.op_a.dot(self.x) testing.assert_array_almost_equal(y, expect)
def test_csrmv(self): if not cusparse.check_availability('csrmv'): pytest.skip('csrmv is not available') if runtime.is_hip: if self.transa: pytest.xfail('may be buggy') a = sparse.csr_matrix(self.a) x = cupy.array(self.x, order='f') y = cupy.cusparse.csrmv(a, x, alpha=self.alpha, transa=self.transa) expect = self.alpha * self.op_a.dot(self.x) testing.assert_array_almost_equal(y, expect)
def test_csrsm2(self, dtype): if not cusparse.check_availability('csrsm2'): raise unittest.SkipTest('csrsm2 is not available') if (self.format == 'csc' and numpy.dtype(dtype).char in 'FD' and self.transa == 'H'): raise unittest.SkipTest('unsupported combination') self._setup(dtype) x = self.b.copy(order=self.order) cusparse.csrsm2(self.a, x, alpha=self.alpha, lower=self.lower, unit_diag=self.unit_diag, transa=self.transa, blocking=self.blocking, level_info=self.level_info) testing.assert_allclose(x, self.ref_x, atol=self.tol, rtol=self.tol)
def test_sparseToDense(self, dtype): if not cusparse.check_availability('sparseToDense'): pytest.skip('sparseToDense is not available') m, n = self.shape x = scipy.sparse.random(m, n, density=self.density, format=self.format, dtype=dtype) if self.format == 'csr': x = sparse.csr_matrix(x) elif self.format == 'csc': x = sparse.csc_matrix(x) elif self.format == 'coo': x = sparse.coo_matrix(x) y = cusparse.sparseToDense(x) testing.assert_array_equal(x.todense(), y)
def __init__(self, A, V, alpha, beta, update_impl='fast'): assert A.ndim == V.ndim == 2 assert alpha.ndim == beta.ndim == 1 assert A.dtype == V.dtype == alpha.dtype assert A.dtype.char.lower() == beta.dtype.char assert A.shape[0] == A.shape[1] == V.shape[1] assert V.shape[0] == alpha.shape[0] == beta.shape[0] self.A = A self.V = V self.alpha = alpha self.beta = beta self.n = V.shape[1] self.ncv = V.shape[0] self.update_impl = update_impl if self.update_impl != 'fast': return self.cublas_handle = device.get_cublas_handle() self.cublas_pointer_mode = _cublas.getPointerMode(self.cublas_handle) if A.dtype.char == 'f': self.dotc = _cublas.sdot self.nrm2 = _cublas.snrm2 self.gemm = _cublas.sgemm elif A.dtype.char == 'd': self.dotc = _cublas.ddot self.nrm2 = _cublas.dnrm2 self.gemm = _cublas.dgemm elif A.dtype.char == 'F': self.dotc = _cublas.cdotc self.nrm2 = _cublas.scnrm2 self.gemm = _cublas.cgemm elif A.dtype.char == 'D': self.dotc = _cublas.zdotc self.nrm2 = _cublas.dznrm2 self.gemm = _cublas.zgemm else: raise TypeError('invalid dtype ({})'.format(A.dtype)) if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): self.cusparse_handle = device.get_cusparse_handle() self.spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE self.spmv_alpha = numpy.array(1.0, A.dtype) self.spmv_beta = numpy.array(0.0, A.dtype) self.spmv_cuda_dtype = cusparse._dtype_to_DataType(A.dtype) self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT else: self.cusparse_handle = None self.v = cupy.empty((self.n, ), dtype=A.dtype) self.u = cupy.empty((self.n, ), dtype=A.dtype) self.uu = cupy.empty((self.ncv, ), dtype=A.dtype)
def solve(self, rhs, trans='N'): """Solves linear system of equations with one or several right-hand sides. Args: rhs (cupy.ndarray): Right-hand side(s) of equation with dimension ``(M)`` or ``(M, K)``. trans (str): 'N', 'T' or 'H'. 'N': Solves ``A * x = rhs``. 'T': Solves ``A.T * x = rhs``. 'H': Solves ``A.conj().T * x = rhs``. Returns: cupy.ndarray: Solution vector(s) """ if not isinstance(rhs, cupy.ndarray): raise TypeError('ojb must be cupy.ndarray') if rhs.ndim not in (1, 2): raise ValueError('rhs.ndim must be 1 or 2 (actual: {})'. format(rhs.ndim)) if rhs.shape[0] != self.shape[0]: raise ValueError('shape mismatch (self.shape: {}, rhs.shape: {})' .format(self.shape, rhs.shape)) if trans not in ('N', 'T', 'H'): raise ValueError('trans must be \'N\', \'T\', or \'H\'') if not cusparse.check_availability('csrsm2'): raise NotImplementedError x = rhs.astype(self.L.dtype) if trans == 'N': if self.perm_r is not None: x = x[self._perm_r_rev] cusparse.csrsm2(self.L, x, lower=True, transa=trans) cusparse.csrsm2(self.U, x, lower=False, transa=trans) if self.perm_c is not None: x = x[self.perm_c] else: if self.perm_c is not None: x = x[self._perm_c_rev] cusparse.csrsm2(self.U, x, lower=False, transa=trans) cusparse.csrsm2(self.L, x, lower=True, transa=trans) if self.perm_r is not None: x = x[self.perm_r] if not x._f_contiguous: # For compatibility with SciPy x = x.copy(order='F') return x
def dense2csr(a): if a.dtype.char in 'fdFD': if cusparse.check_availability('denseToSparse'): return cusparse.denseToSparse(a, format='csr') else: return cusparse.dense2csr(a) m, n = a.shape a = cupy.ascontiguousarray(a) indptr = cupy.zeros(m + 1, dtype=numpy.int32) info = cupy.zeros(m * n + 1, dtype=numpy.int32) cupy_dense2csr_step1()(m, n, a, indptr, info) indptr = cupy.cumsum(indptr, dtype=numpy.int32) info = cupy.cumsum(info, dtype=numpy.int32) nnz = int(indptr[-1]) indices = cupy.empty(nnz, dtype=numpy.int32) data = cupy.empty(nnz, dtype=a.dtype) cupy_dense2csr_step2()(m, n, a, info, indices, data) return csr_matrix((data, indices, indptr), shape=(m, n))
def test_csrmm_with_c(self): if not cusparse.check_availability('csrmm'): pytest.skip('csrmm is not available') if runtime.is_hip: if self.transa: pytest.xfail('may be buggy') a = sparse.csr_matrix(self.a) b = cupy.array(self.b, order='f') c = cupy.array(self.c, order='f') y = cupy.cusparse.csrmm(a, b, c=c, alpha=self.alpha, beta=self.beta, transa=self.transa) expect = self.alpha * self.op_a.dot(self.b) + self.beta * self.c assert y is c testing.assert_array_almost_equal(y, expect)
def toarray(self, order=None, out=None): """Returns a dense matrix representing the same value. Args: order ({'C', 'F', None}): Whether to store data in C (row-major) order or F (column-major) order. Default is C-order. out: Not supported. Returns: cupy.ndarray: Dense array representing the same matrix. .. seealso:: :meth:`scipy.sparse.csr_matrix.toarray` """ order = 'C' if order is None else order.upper() if self.nnz == 0: return cupy.zeros(shape=self.shape, dtype=self.dtype, order=order) if self.dtype.char not in 'fdFD': return csr2dense(self, order) x = self.copy() x.has_canonical_format = False # need to enforce sum_duplicates x.sum_duplicates() if (cusparse.check_availability('sparseToDense') and (not runtime.is_hip or (x.nnz > 0))): # On HIP, nnz=0 is problematic as of ROCm 4.2.0 y = cusparse.sparseToDense(x) if order == 'F': return y elif order == 'C': return cupy.ascontiguousarray(y) else: raise ValueError('order not understood') else: # csr2dense returns F-contiguous array. if order == 'C': # To return C-contiguous array, it uses transpose. return cusparse.csc2dense(x.T).T elif order == 'F': return cusparse.csr2dense(x) else: raise ValueError('order not understood')
def test_csrsm2(self, dtype): if not cusparse.check_availability('csrsm2'): pytest.skip('csrsm2 is not available') if runtime.is_hip: if (self.transa == 'H' or (driver.get_build_version() < 400 and ((self.format == 'csc' and self.transa == 'N') or (self.format == 'csr' and self.transa == 'T')))): pytest.xfail('may be buggy') if (self.format == 'csc' and numpy.dtype(dtype).char in 'FD' and self.transa == 'H'): pytest.skip('unsupported combination') self._setup(dtype) x = self.b.copy(order=self.order) cusparse.csrsm2(self.a, x, alpha=self.alpha, lower=self.lower, unit_diag=self.unit_diag, transa=self.transa, blocking=self.blocking, level_info=self.level_info) testing.assert_allclose(x, self.ref_x, atol=self.tol, rtol=self.tol)
def __mul__(self, other): if cupy.isscalar(other): self.sum_duplicates() return self._with_data(self.data * other) elif isspmatrix_csr(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm2'): return cusparse.csrgemm2(self, other) elif cusparse.check_availability('csrgemm'): return cusparse.csrgemm(self, other) else: raise NotImplementedError elif csc.isspmatrix_csc(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm'): return cusparse.csrgemm(self, other.T, transb=True) elif cusparse.check_availability('csrgemm2'): b = other.tocsr() b.sum_duplicates() return cusparse.csrgemm2(self, b) else: raise NotImplementedError elif base.isspmatrix(other): return self * other.tocsr() elif base.isdense(other): if other.ndim == 0: self.sum_duplicates() return self._with_data(self.data * other) elif other.ndim == 1: self.sum_duplicates() other = cupy.asfortranarray(other) # csrmvEx does not work if nnz == 0 if self.nnz > 0 and cusparse.csrmvExIsAligned(self, other): if cupy.cuda.cub_enabled and other.flags.c_contiguous: return device_csrmv(self.shape[0], self.shape[1], self.nnz, self.data, self.indptr, self.indices, other) else: return cusparse.csrmvEx(self, other) else: if cusparse.check_availability('csrmv'): csrmv = cusparse.csrmv elif cusparse.check_availability('spmv'): csrmv = cusparse.spmv else: raise NotImplementedError return csrmv(self, other) elif other.ndim == 2: self.sum_duplicates() if cusparse.check_availability('csrmm2'): csrmm = cusparse.csrmm2 elif cusparse.check_availability('spmm'): csrmm = cusparse.spmm else: raise NotImplementedError return csrmm(self, cupy.asfortranarray(other)) else: raise ValueError('could not interpret dimensions') else: return NotImplemented
def __mul__(self, other): if cupy.isscalar(other): self.sum_duplicates() return self._with_data(self.data * other) elif cupyx.scipy.sparse.isspmatrix_csr(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm') and not runtime.is_hip: # trans=True is still buggy as of ROCm 4.2.0 a = self.T return cusparse.csrgemm(a, other, transa=True) elif cusparse.check_availability('csrgemm2'): a = self.tocsr() a.sum_duplicates() return cusparse.csrgemm2(a, other) else: raise NotImplementedError elif isspmatrix_csc(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm') and not runtime.is_hip: # trans=True is still buggy as of ROCm 4.2.0 a = self.T b = other.T return cusparse.csrgemm(a, b, transa=True, transb=True) elif cusparse.check_availability('csrgemm2'): a = self.tocsr() b = other.tocsr() a.sum_duplicates() b.sum_duplicates() return cusparse.csrgemm2(a, b) else: raise NotImplementedError elif cupyx.scipy.sparse.isspmatrix(other): return self * other.tocsr() elif base.isdense(other): if other.ndim == 0: self.sum_duplicates() return self._with_data(self.data * other) elif other.ndim == 1: self.sum_duplicates() if cusparse.check_availability('csrmv') and not runtime.is_hip: # trans=True is buggy as of ROCm 4.2.0 csrmv = cusparse.csrmv elif (cusparse.check_availability('spmv') and not runtime.is_hip): # trans=True is buggy as of ROCm 4.2.0 # (I got HIPSPARSE_STATUS_INTERNAL_ERROR...) csrmv = cusparse.spmv else: raise NotImplementedError return csrmv(self.T, cupy.asfortranarray(other), transa=True) elif other.ndim == 2: self.sum_duplicates() if (cusparse.check_availability('csrmm2') and not runtime.is_hip): # trans=True is buggy as of ROCm 4.2.0 csrmm = cusparse.csrmm2 elif cusparse.check_availability('spmm'): csrmm = cusparse.spmm else: raise NotImplementedError return csrmm(self.T, cupy.asfortranarray(other), transa=True) else: raise ValueError('could not interpret dimensions') else: return NotImplemented
def _convert_dense(self, x): if cusparse.check_availability('denseToSparse'): m = cusparse.denseToSparse(x, format='csc') else: m = cusparse.dense2csc(x) return m.data, m.indices, m.indptr
def __mul__(self, other): if cupy.isscalar(other): self.sum_duplicates() return self._with_data(self.data * other) elif isspmatrix_csr(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm2'): return cusparse.csrgemm2(self, other) elif cusparse.check_availability('csrgemm'): return cusparse.csrgemm(self, other) else: raise NotImplementedError elif csc.isspmatrix_csc(other): self.sum_duplicates() other.sum_duplicates() if cusparse.check_availability('csrgemm') and not runtime.is_hip: # trans=True is still buggy as of ROCm 4.2.0 return cusparse.csrgemm(self, other.T, transb=True) elif cusparse.check_availability('csrgemm2'): b = other.tocsr() b.sum_duplicates() return cusparse.csrgemm2(self, b) else: raise NotImplementedError elif base.isspmatrix(other): return self * other.tocsr() elif base.isdense(other): if other.ndim == 0: self.sum_duplicates() return self._with_data(self.data * other) elif other.ndim == 1: self.sum_duplicates() other = cupy.asfortranarray(other) # need extra padding to ensure not stepping on the CUB bug, # see cupy/cupy#3679 for discussion is_cub_safe = (self.indptr.data.mem.size > self.indptr.size * self.indptr.dtype.itemsize) # CUB spmv is buggy since CUDA 11.0, see # https://github.com/cupy/cupy/issues/3822#issuecomment-782607637 is_cub_safe &= (cub._get_cuda_build_version() < 11000) for accelerator in _accelerator.get_routine_accelerators(): if (accelerator == _accelerator.ACCELERATOR_CUB and not runtime.is_hip and is_cub_safe and other.flags.c_contiguous): return cub.device_csrmv(self.shape[0], self.shape[1], self.nnz, self.data, self.indptr, self.indices, other) if (cusparse.check_availability('csrmvEx') and self.nnz > 0 and cusparse.csrmvExIsAligned(self, other)): # csrmvEx does not work if nnz == 0 csrmv = cusparse.csrmvEx elif cusparse.check_availability('csrmv'): csrmv = cusparse.csrmv elif cusparse.check_availability('spmv'): csrmv = cusparse.spmv else: raise NotImplementedError return csrmv(self, other) elif other.ndim == 2: self.sum_duplicates() if cusparse.check_availability('csrmm2'): csrmm = cusparse.csrmm2 elif cusparse.check_availability('spmm'): csrmm = cusparse.spmm else: raise NotImplementedError return csrmm(self, cupy.asfortranarray(other)) else: raise ValueError('could not interpret dimensions') else: return NotImplemented
def _lanczos_fast(A, n, ncv): cublas_handle = device.get_cublas_handle() cublas_pointer_mode = _cublas.getPointerMode(cublas_handle) if A.dtype.char == 'f': dotc = _cublas.sdot nrm2 = _cublas.snrm2 gemm = _cublas.sgemm elif A.dtype.char == 'd': dotc = _cublas.ddot nrm2 = _cublas.dnrm2 gemm = _cublas.dgemm elif A.dtype.char == 'F': dotc = _cublas.cdotc nrm2 = _cublas.scnrm2 gemm = _cublas.cgemm elif A.dtype.char == 'D': dotc = _cublas.zdotc nrm2 = _cublas.dznrm2 gemm = _cublas.zgemm else: raise TypeError('invalid dtype ({})'.format(A.dtype)) cusparse_handle = None if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): cusparse_handle = device.get_cusparse_handle() spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE spmv_alpha = numpy.array(1.0, A.dtype) spmv_beta = numpy.array(0.0, A.dtype) spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT v = cupy.empty((n, ), dtype=A.dtype) uu = cupy.empty((ncv, ), dtype=A.dtype) one = numpy.array(1.0, dtype=A.dtype) zero = numpy.array(0.0, dtype=A.dtype) mone = numpy.array(-1.0, dtype=A.dtype) outer_A = A def aux(A, V, u, alpha, beta, i_start, i_end): assert A is outer_A beta_eps = inversion_eps(A.dtype) # Get ready for spmv if enabled if cusparse_handle is not None: # Note: I would like to reuse descriptors and working buffer # on the next update, but I gave it up because it sometimes # caused illegal memory access error. spmv_desc_A = cusparse.SpMatDescriptor.create(A) spmv_desc_v = cusparse.DnVecDescriptor.create(v) spmv_desc_u = cusparse.DnVecDescriptor.create(u) buff_size = _cusparse.spMV_bufferSize( cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg) spmv_buff = cupy.empty(buff_size, cupy.int8) v[...] = V[i_start] for i in range(i_start, i_end): # Matrix-vector multiplication if cusparse_handle is None: u[...] = A @ v else: _cusparse.spMV(cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg, spmv_buff.data.ptr) # Call dotc _cublas.setPointerMode(cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) try: dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1, alpha.data.ptr + i * alpha.itemsize) finally: _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) # Orthogonalize gemm(cublas_handle, _cublas.CUBLAS_OP_C, _cublas.CUBLAS_OP_N, 1, i + 1, n, one.ctypes.data, u.data.ptr, n, V.data.ptr, n, zero.ctypes.data, uu.data.ptr, 1) gemm(cublas_handle, _cublas.CUBLAS_OP_N, _cublas.CUBLAS_OP_C, n, 1, i + 1, mone.ctypes.data, V.data.ptr, n, uu.data.ptr, 1, one.ctypes.data, u.data.ptr, n) # Call nrm2 _cublas.setPointerMode(cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) try: nrm2(cublas_handle, n, u.data.ptr, 1, beta.data.ptr + i * beta.itemsize) finally: _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) # Break here as the normalization below touches V[i+1] if i >= i_end - 1: break if beta[i] < beta_eps: V[i + 1:i_end, :] = 0 u[...] = 0 v[...] = 0 break if i == i_start: beta_eps *= beta[i] # scale eps to largest beta # Normalize _kernel_normalize(u, beta, i, n, v, V) return aux
def spsolve_triangular(A, b, lower=True, overwrite_A=False, overwrite_b=False, unit_diagonal=False): """Solves a sparse triangular system ``A x = b``. Args: A (cupyx.scipy.sparse.spmatrix): Sparse matrix with dimension ``(M, M)``. b (cupy.ndarray): Dense vector or matrix with dimension ``(M)`` or ``(M, K)``. lower (bool): Whether ``A`` is a lower or upper trinagular matrix. If True, it is lower triangular, otherwise, upper triangular. overwrite_A (bool): (not supported) overwrite_b (bool): Allows overwriting data in ``b``. unit_diagonal (bool): If True, diagonal elements of ``A`` are assumed to be 1 and will not be referencec. Returns: cupy.ndarray: Solution to the system ``A x = b``. The shape is the same as ``b``. """ if not cusparse.check_availability('csrsm2'): raise NotImplementedError if not sparse.isspmatrix(A): raise TypeError('A must be cupyx.scipy.sparse.spmatrix') if not isinstance(b, cupy.ndarray): raise TypeError('b must be cupy.ndarray') if A.shape[0] != A.shape[1]: raise ValueError('A must be a square matrix (A.shape: {})'. format(A.shape)) if b.ndim not in [1, 2]: raise ValueError('b must be 1D or 2D array (b.shape: {})'. format(b.shape)) if A.shape[0] != b.shape[0]: raise ValueError('The size of dimensions of A must be equal to the ' 'size of the first dimension of b ' '(A.shape: {}, b.shape: {})'.format(A.shape, b.shape)) if A.dtype.char not in 'fdFD': raise TypeError('unsupported dtype (actual: {})'.format(A.dtype)) if not (sparse.isspmatrix_csr(A) or sparse.isspmatrix_csc(A)): warnings.warn('CSR or CSC format is required. Converting to CSR ' 'format.', sparse.SparseEfficiencyWarning) A = A.tocsr() A.sum_duplicates() if (overwrite_b and A.dtype == b.dtype and (b._c_contiguous or b._f_contiguous)): x = b else: x = b.astype(A.dtype, copy=True) cusparse.csrsm2(A, x, lower=lower, unit_diag=unit_diagonal) if x.dtype.char in 'fF': # Note: This is for compatibility with SciPy. dtype = numpy.promote_types(x.dtype, 'float64') x = x.astype(dtype) return x