Ejemplo n.º 1
0
Archivo: csr.py Proyecto: mnicely/cupy
 def _add_sparse(self, other, alpha, beta):
     self.sum_duplicates()
     other = other.tocsr()
     other.sum_duplicates()
     if cusparse.check_availability('csrgeam2'):
         csrgeam = cusparse.csrgeam2
     elif cusparse.check_availability('csrgeam'):
         csrgeam = cusparse.csrgeam
     else:
         raise NotImplementedError
     return csrgeam(self, other, alpha, beta)
Ejemplo n.º 2
0
Archivo: csc.py Proyecto: yoshipon/cupy
 def __mul__(self, other):
     if cupy.isscalar(other):
         self.sum_duplicates()
         return self._with_data(self.data * other)
     elif cupyx.scipy.sparse.isspmatrix_csr(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm'):
             a = self.T
             return cusparse.csrgemm(a, other, transa=True)
         elif cusparse.check_availability('csrgemm2'):
             a = self.tocsr()
             a.sum_duplicates()
             return cusparse.csrgemm2(a, other)
         else:
             raise NotImplementedError
     elif isspmatrix_csc(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm'):
             a = self.T
             b = other.T
             return cusparse.csrgemm(a, b, transa=True, transb=True)
         elif cusparse.check_availability('csrgemm2'):
             a = self.tocsr()
             b = other.tocsr()
             a.sum_duplicates()
             b.sum_duplicates()
             return cusparse.csrgemm2(a, b)
         else:
             raise NotImplementedError
     elif cupyx.scipy.sparse.isspmatrix(other):
         return self * other.tocsr()
     elif base.isdense(other):
         if other.ndim == 0:
             self.sum_duplicates()
             return self._with_data(self.data * other)
         elif other.ndim == 1:
             self.sum_duplicates()
             if cusparse.check_availability('csrmv'):
                 csrmv = cusparse.csrmv
             elif cusparse.check_availability('spmv'):
                 csrmv = cusparse.spmv
             else:
                 raise NotImplementedError
             return csrmv(self.T, cupy.asfortranarray(other), transa=True)
         elif other.ndim == 2:
             self.sum_duplicates()
             if cusparse.check_availability('csrmm2'):
                 csrmm = cusparse.csrmm2
             elif cusparse.check_availability('spmm'):
                 csrmm = cusparse.spmm
             else:
                 raise NotImplementedError
             return csrmm(self.T, cupy.asfortranarray(other), transa=True)
         else:
             raise ValueError('could not interpret dimensions')
     else:
         return NotImplemented
Ejemplo n.º 3
0
    def test_csrmvEx_aligned(self):
        if not cusparse.check_availability('csrmvEx'):
            pytest.skip('csrmvEx is not available')
        a = sparse.csr_matrix(self.a)
        x = cupy.array(self.x, order='f')

        assert cupy.cusparse.csrmvExIsAligned(a, x)
Ejemplo n.º 4
0
 def _make_matrix(self, dtype):
     if not cusparse.check_availability('csrilu02'):
         pytest.skip('csrilu02 is not available')
     a = testing.shaped_random(
         (self.n, self.n), cupy, dtype=dtype, scale=0.9) + 0.1
     a = a + cupy.diag(cupy.ones((self.n, ), dtype=dtype.char.lower()))
     return a
Ejemplo n.º 5
0
def _make_fast_matvec(A):
    matvec = None
    if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'):
        handle = device.get_cusparse_handle()
        op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE
        alpha = numpy.array(1.0, A.dtype)
        beta = numpy.array(0.0, A.dtype)
        cuda_dtype = _dtype.to_cuda_dtype(A.dtype)
        alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT
        x = cupy.empty((A.shape[0], ), dtype=A.dtype)
        y = cupy.empty((A.shape[0], ), dtype=A.dtype)
        desc_A = cusparse.SpMatDescriptor.create(A)
        desc_x = cusparse.DnVecDescriptor.create(x)
        desc_y = cusparse.DnVecDescriptor.create(y)
        buff_size = _cusparse.spMV_bufferSize(handle, op_a, alpha.ctypes.data,
                                              desc_A.desc, desc_x.desc,
                                              beta.ctypes.data, desc_y.desc,
                                              cuda_dtype, alg)
        buff = cupy.empty(buff_size, cupy.int8)
        del x, desc_x, y, desc_y

        def matvec(x):
            y = cupy.empty_like(x)
            desc_x = cusparse.DnVecDescriptor.create(x)
            desc_y = cusparse.DnVecDescriptor.create(y)
            _cusparse.spMV(handle, op_a, alpha.ctypes.data, desc_A.desc,
                           desc_x.desc, beta.ctypes.data, desc_y.desc,
                           cuda_dtype, alg, buff.data.ptr)
            return y

    return matvec
Ejemplo n.º 6
0
 def test_csrmm(self):
     if not cusparse.check_availability('csrmm'):
         pytest.skip('csrmm is not available')
     a = sparse.csr_matrix(self.a)
     b = cupy.array(self.b, order='f')
     y = cupy.cusparse.csrmm(a, b, alpha=self.alpha, transa=self.transa)
     expect = self.alpha * self.op_a.dot(self.b)
     testing.assert_array_almost_equal(y, expect)
Ejemplo n.º 7
0
    def setUp(self):
        if not cusparse.check_availability('cscsort'):
            pytest.skip('cscsort is not available')

        self.a = scipy.sparse.random(
            1000, 1, density=0.9, dtype=numpy.float32, format='csc')
        numpy.random.shuffle(self.a.indices)
        self.a.has_sorted_indices = False
Ejemplo n.º 8
0
    def setUp(self):
        if not cusparse.check_availability('coosort'):
            pytest.skip('coosort is not available')

        self.a = scipy.sparse.random(
            100, 100, density=0.9, dtype=numpy.float32, format='coo')
        numpy.random.shuffle(self.a.row)
        numpy.random.shuffle(self.a.col)
Ejemplo n.º 9
0
 def test_denseToSparse(self, dtype):
     if not cusparse.check_availability('denseToSparse'):
         pytest.skip('denseToSparse is not available')
     x = cupy.random.uniform(0, 1, self.shape).astype(dtype)
     x[x < self.density] = 0
     y = cusparse.denseToSparse(x, format=self.format)
     assert y.format == self.format
     testing.assert_array_equal(x, y.todense())
Ejemplo n.º 10
0
 def test_csrmvEx_not_aligned(self):
     if not cusparse.check_availability('csrmvEx'):
         pytest.skip('csrmvEx is not available')
     a = sparse.csr_matrix(self.a)
     tmp = cupy.array(numpy.hstack([self.x, self.y]), order='f')
     x = tmp[0:len(self.x)]
     y = tmp[len(self.x):]
     assert not cupy.cusparse.csrmvExIsAligned(a, x, y)
Ejemplo n.º 11
0
Archivo: csr.py Proyecto: mnicely/cupy
    def tocsc(self, copy=False):
        """Converts the matrix to Compressed Sparse Column format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in csr to csc conversion.

        Returns:
            cupyx.scipy.sparse.csc_matrix: Converted matrix.

        """
        # copy is ignored
        if cusparse.check_availability('csr2csc'):
            csr2csc = cusparse.csr2csc
        elif cusparse.check_availability('csr2cscEx2'):
            csr2csc = cusparse.csr2cscEx2
        else:
            raise NotImplementedError
        return csr2csc(self)
Ejemplo n.º 12
0
 def test_csrmv_with_y(self):
     if not cusparse.check_availability('csrmv'):
         pytest.skip('csrmv is not available')
     a = sparse.csr_matrix(self.a)
     x = cupy.array(self.x, order='f')
     y = cupy.array(self.y, order='f')
     z = cupy.cusparse.csrmv(
         a, x, y=y, alpha=self.alpha, beta=self.beta, transa=self.transa)
     expect = self.alpha * self.op_a.dot(self.x) + self.beta * self.y
     assert y is z
     testing.assert_array_almost_equal(y, expect)
Ejemplo n.º 13
0
    def tocsr(self, copy=False):
        """Converts the matrix to Compressed Sparse Row format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in csr to csc conversion.

        Returns:
            cupyx.scipy.sparse.csr_matrix: Converted matrix.

        """
        # copy is ignored
        if cusparse.check_availability('csc2csr'):
            csc2csr = cusparse.csc2csr
        elif cusparse.check_availability('csc2csrEx2'):
            csc2csr = cusparse.csc2csrEx2
        else:
            raise NotImplementedError
        # don't touch has_sorted_indices, as cuSPARSE made no guarantee
        return csc2csr(self)
Ejemplo n.º 14
0
 def test_csrmm2_with_c(self):
     if not cusparse.check_availability('csrmm2'):
         pytest.skip('csrmm2 is not available')
     a = sparse.csr_matrix(self.a)
     b = cupy.array(self.b, order='f')
     c = cupy.array(self.c, order='f')
     y = cupy.cusparse.csrmm2(
         a, b, c=c, alpha=self.alpha, beta=self.beta,
         transa=self.transa, transb=self.transb)
     expect = self.alpha * self.op_a.dot(self.op_b) + self.beta * self.c
     assert y is c
     testing.assert_array_almost_equal(y, expect)
Ejemplo n.º 15
0
    def test_csrmvEx(self):
        if not cusparse.check_availability('csrmvEx'):
            pytest.skip('csrmvEx is not available')
        if self.transa:
            # no support for transa
            return

        a = sparse.csr_matrix(self.a)
        x = cupy.array(self.x, order='f')
        y = cupy.cusparse.csrmvEx(a, x, alpha=self.alpha)
        expect = self.alpha * self.op_a.dot(self.x)
        testing.assert_array_almost_equal(y, expect)
Ejemplo n.º 16
0
    def test_csrmv(self):
        if not cusparse.check_availability('csrmv'):
            pytest.skip('csrmv is not available')
        if runtime.is_hip:
            if self.transa:
                pytest.xfail('may be buggy')

        a = sparse.csr_matrix(self.a)
        x = cupy.array(self.x, order='f')
        y = cupy.cusparse.csrmv(a, x, alpha=self.alpha, transa=self.transa)
        expect = self.alpha * self.op_a.dot(self.x)
        testing.assert_array_almost_equal(y, expect)
Ejemplo n.º 17
0
 def test_csrsm2(self, dtype):
     if not cusparse.check_availability('csrsm2'):
         raise unittest.SkipTest('csrsm2 is not available')
     if (self.format == 'csc' and numpy.dtype(dtype).char in 'FD' and
             self.transa == 'H'):
         raise unittest.SkipTest('unsupported combination')
     self._setup(dtype)
     x = self.b.copy(order=self.order)
     cusparse.csrsm2(self.a, x, alpha=self.alpha,
                     lower=self.lower, unit_diag=self.unit_diag,
                     transa=self.transa, blocking=self.blocking,
                     level_info=self.level_info)
     testing.assert_allclose(x, self.ref_x, atol=self.tol, rtol=self.tol)
Ejemplo n.º 18
0
 def test_sparseToDense(self, dtype):
     if not cusparse.check_availability('sparseToDense'):
         pytest.skip('sparseToDense is not available')
     m, n = self.shape
     x = scipy.sparse.random(m, n, density=self.density, format=self.format,
                             dtype=dtype)
     if self.format == 'csr':
         x = sparse.csr_matrix(x)
     elif self.format == 'csc':
         x = sparse.csc_matrix(x)
     elif self.format == 'coo':
         x = sparse.coo_matrix(x)
     y = cusparse.sparseToDense(x)
     testing.assert_array_equal(x.todense(), y)
Ejemplo n.º 19
0
    def __init__(self, A, V, alpha, beta, update_impl='fast'):
        assert A.ndim == V.ndim == 2
        assert alpha.ndim == beta.ndim == 1
        assert A.dtype == V.dtype == alpha.dtype
        assert A.dtype.char.lower() == beta.dtype.char
        assert A.shape[0] == A.shape[1] == V.shape[1]
        assert V.shape[0] == alpha.shape[0] == beta.shape[0]

        self.A = A
        self.V = V
        self.alpha = alpha
        self.beta = beta
        self.n = V.shape[1]
        self.ncv = V.shape[0]
        self.update_impl = update_impl
        if self.update_impl != 'fast':
            return

        self.cublas_handle = device.get_cublas_handle()
        self.cublas_pointer_mode = _cublas.getPointerMode(self.cublas_handle)
        if A.dtype.char == 'f':
            self.dotc = _cublas.sdot
            self.nrm2 = _cublas.snrm2
            self.gemm = _cublas.sgemm
        elif A.dtype.char == 'd':
            self.dotc = _cublas.ddot
            self.nrm2 = _cublas.dnrm2
            self.gemm = _cublas.dgemm
        elif A.dtype.char == 'F':
            self.dotc = _cublas.cdotc
            self.nrm2 = _cublas.scnrm2
            self.gemm = _cublas.cgemm
        elif A.dtype.char == 'D':
            self.dotc = _cublas.zdotc
            self.nrm2 = _cublas.dznrm2
            self.gemm = _cublas.zgemm
        else:
            raise TypeError('invalid dtype ({})'.format(A.dtype))
        if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'):
            self.cusparse_handle = device.get_cusparse_handle()
            self.spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE
            self.spmv_alpha = numpy.array(1.0, A.dtype)
            self.spmv_beta = numpy.array(0.0, A.dtype)
            self.spmv_cuda_dtype = cusparse._dtype_to_DataType(A.dtype)
            self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT
        else:
            self.cusparse_handle = None
        self.v = cupy.empty((self.n, ), dtype=A.dtype)
        self.u = cupy.empty((self.n, ), dtype=A.dtype)
        self.uu = cupy.empty((self.ncv, ), dtype=A.dtype)
Ejemplo n.º 20
0
    def solve(self, rhs, trans='N'):
        """Solves linear system of equations with one or several right-hand sides.

        Args:
            rhs (cupy.ndarray): Right-hand side(s) of equation with dimension
                ``(M)`` or ``(M, K)``.
            trans (str): 'N', 'T' or 'H'.
                'N': Solves ``A * x = rhs``.
                'T': Solves ``A.T * x = rhs``.
                'H': Solves ``A.conj().T * x = rhs``.

        Returns:
            cupy.ndarray:
                Solution vector(s)
        """
        if not isinstance(rhs, cupy.ndarray):
            raise TypeError('ojb must be cupy.ndarray')
        if rhs.ndim not in (1, 2):
            raise ValueError('rhs.ndim must be 1 or 2 (actual: {})'.
                             format(rhs.ndim))
        if rhs.shape[0] != self.shape[0]:
            raise ValueError('shape mismatch (self.shape: {}, rhs.shape: {})'
                             .format(self.shape, rhs.shape))
        if trans not in ('N', 'T', 'H'):
            raise ValueError('trans must be \'N\', \'T\', or \'H\'')
        if not cusparse.check_availability('csrsm2'):
            raise NotImplementedError

        x = rhs.astype(self.L.dtype)
        if trans == 'N':
            if self.perm_r is not None:
                x = x[self._perm_r_rev]
            cusparse.csrsm2(self.L, x, lower=True, transa=trans)
            cusparse.csrsm2(self.U, x, lower=False, transa=trans)
            if self.perm_c is not None:
                x = x[self.perm_c]
        else:
            if self.perm_c is not None:
                x = x[self._perm_c_rev]
            cusparse.csrsm2(self.U, x, lower=False, transa=trans)
            cusparse.csrsm2(self.L, x, lower=True, transa=trans)
            if self.perm_r is not None:
                x = x[self.perm_r]

        if not x._f_contiguous:
            # For compatibility with SciPy
            x = x.copy(order='F')
        return x
Ejemplo n.º 21
0
Archivo: csr.py Proyecto: toslunar/cupy
def dense2csr(a):
    if a.dtype.char in 'fdFD':
        if cusparse.check_availability('denseToSparse'):
            return cusparse.denseToSparse(a, format='csr')
        else:
            return cusparse.dense2csr(a)
    m, n = a.shape
    a = cupy.ascontiguousarray(a)
    indptr = cupy.zeros(m + 1, dtype=numpy.int32)
    info = cupy.zeros(m * n + 1, dtype=numpy.int32)
    cupy_dense2csr_step1()(m, n, a, indptr, info)
    indptr = cupy.cumsum(indptr, dtype=numpy.int32)
    info = cupy.cumsum(info, dtype=numpy.int32)
    nnz = int(indptr[-1])
    indices = cupy.empty(nnz, dtype=numpy.int32)
    data = cupy.empty(nnz, dtype=a.dtype)
    cupy_dense2csr_step2()(m, n, a, info, indices, data)
    return csr_matrix((data, indices, indptr), shape=(m, n))
Ejemplo n.º 22
0
    def test_csrmm_with_c(self):
        if not cusparse.check_availability('csrmm'):
            pytest.skip('csrmm is not available')
        if runtime.is_hip:
            if self.transa:
                pytest.xfail('may be buggy')

        a = sparse.csr_matrix(self.a)
        b = cupy.array(self.b, order='f')
        c = cupy.array(self.c, order='f')
        y = cupy.cusparse.csrmm(a,
                                b,
                                c=c,
                                alpha=self.alpha,
                                beta=self.beta,
                                transa=self.transa)
        expect = self.alpha * self.op_a.dot(self.b) + self.beta * self.c
        assert y is c
        testing.assert_array_almost_equal(y, expect)
Ejemplo n.º 23
0
Archivo: csr.py Proyecto: toslunar/cupy
    def toarray(self, order=None, out=None):
        """Returns a dense matrix representing the same value.

        Args:
            order ({'C', 'F', None}): Whether to store data in C (row-major)
                order or F (column-major) order. Default is C-order.
            out: Not supported.

        Returns:
            cupy.ndarray: Dense array representing the same matrix.

        .. seealso:: :meth:`scipy.sparse.csr_matrix.toarray`

        """
        order = 'C' if order is None else order.upper()
        if self.nnz == 0:
            return cupy.zeros(shape=self.shape, dtype=self.dtype, order=order)

        if self.dtype.char not in 'fdFD':
            return csr2dense(self, order)

        x = self.copy()
        x.has_canonical_format = False  # need to enforce sum_duplicates
        x.sum_duplicates()
        if (cusparse.check_availability('sparseToDense')
                and (not runtime.is_hip or (x.nnz > 0))):
            # On HIP, nnz=0 is problematic as of ROCm 4.2.0
            y = cusparse.sparseToDense(x)
            if order == 'F':
                return y
            elif order == 'C':
                return cupy.ascontiguousarray(y)
            else:
                raise ValueError('order not understood')
        else:
            # csr2dense returns F-contiguous array.
            if order == 'C':
                # To return C-contiguous array, it uses transpose.
                return cusparse.csc2dense(x.T).T
            elif order == 'F':
                return cusparse.csr2dense(x)
            else:
                raise ValueError('order not understood')
Ejemplo n.º 24
0
    def test_csrsm2(self, dtype):
        if not cusparse.check_availability('csrsm2'):
            pytest.skip('csrsm2 is not available')
        if runtime.is_hip:
            if (self.transa == 'H'
                    or (driver.get_build_version() < 400 and
                        ((self.format == 'csc' and self.transa == 'N') or
                         (self.format == 'csr' and self.transa == 'T')))):
                pytest.xfail('may be buggy')

        if (self.format == 'csc' and numpy.dtype(dtype).char in 'FD'
                and self.transa == 'H'):
            pytest.skip('unsupported combination')
        self._setup(dtype)
        x = self.b.copy(order=self.order)
        cusparse.csrsm2(self.a,
                        x,
                        alpha=self.alpha,
                        lower=self.lower,
                        unit_diag=self.unit_diag,
                        transa=self.transa,
                        blocking=self.blocking,
                        level_info=self.level_info)
        testing.assert_allclose(x, self.ref_x, atol=self.tol, rtol=self.tol)
Ejemplo n.º 25
0
Archivo: csr.py Proyecto: mnicely/cupy
 def __mul__(self, other):
     if cupy.isscalar(other):
         self.sum_duplicates()
         return self._with_data(self.data * other)
     elif isspmatrix_csr(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm2'):
             return cusparse.csrgemm2(self, other)
         elif cusparse.check_availability('csrgemm'):
             return cusparse.csrgemm(self, other)
         else:
             raise NotImplementedError
     elif csc.isspmatrix_csc(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm'):
             return cusparse.csrgemm(self, other.T, transb=True)
         elif cusparse.check_availability('csrgemm2'):
             b = other.tocsr()
             b.sum_duplicates()
             return cusparse.csrgemm2(self, b)
         else:
             raise NotImplementedError
     elif base.isspmatrix(other):
         return self * other.tocsr()
     elif base.isdense(other):
         if other.ndim == 0:
             self.sum_duplicates()
             return self._with_data(self.data * other)
         elif other.ndim == 1:
             self.sum_duplicates()
             other = cupy.asfortranarray(other)
             # csrmvEx does not work if nnz == 0
             if self.nnz > 0 and cusparse.csrmvExIsAligned(self, other):
                 if cupy.cuda.cub_enabled and other.flags.c_contiguous:
                     return device_csrmv(self.shape[0], self.shape[1],
                                         self.nnz, self.data, self.indptr,
                                         self.indices, other)
                 else:
                     return cusparse.csrmvEx(self, other)
             else:
                 if cusparse.check_availability('csrmv'):
                     csrmv = cusparse.csrmv
                 elif cusparse.check_availability('spmv'):
                     csrmv = cusparse.spmv
                 else:
                     raise NotImplementedError
                 return csrmv(self, other)
         elif other.ndim == 2:
             self.sum_duplicates()
             if cusparse.check_availability('csrmm2'):
                 csrmm = cusparse.csrmm2
             elif cusparse.check_availability('spmm'):
                 csrmm = cusparse.spmm
             else:
                 raise NotImplementedError
             return csrmm(self, cupy.asfortranarray(other))
         else:
             raise ValueError('could not interpret dimensions')
     else:
         return NotImplemented
Ejemplo n.º 26
0
 def __mul__(self, other):
     if cupy.isscalar(other):
         self.sum_duplicates()
         return self._with_data(self.data * other)
     elif cupyx.scipy.sparse.isspmatrix_csr(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm') and not runtime.is_hip:
             # trans=True is still buggy as of ROCm 4.2.0
             a = self.T
             return cusparse.csrgemm(a, other, transa=True)
         elif cusparse.check_availability('csrgemm2'):
             a = self.tocsr()
             a.sum_duplicates()
             return cusparse.csrgemm2(a, other)
         else:
             raise NotImplementedError
     elif isspmatrix_csc(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm') and not runtime.is_hip:
             # trans=True is still buggy as of ROCm 4.2.0
             a = self.T
             b = other.T
             return cusparse.csrgemm(a, b, transa=True, transb=True)
         elif cusparse.check_availability('csrgemm2'):
             a = self.tocsr()
             b = other.tocsr()
             a.sum_duplicates()
             b.sum_duplicates()
             return cusparse.csrgemm2(a, b)
         else:
             raise NotImplementedError
     elif cupyx.scipy.sparse.isspmatrix(other):
         return self * other.tocsr()
     elif base.isdense(other):
         if other.ndim == 0:
             self.sum_duplicates()
             return self._with_data(self.data * other)
         elif other.ndim == 1:
             self.sum_duplicates()
             if cusparse.check_availability('csrmv') and not runtime.is_hip:
                 # trans=True is buggy as of ROCm 4.2.0
                 csrmv = cusparse.csrmv
             elif (cusparse.check_availability('spmv')
                   and not runtime.is_hip):
                 # trans=True is buggy as of ROCm 4.2.0
                 # (I got HIPSPARSE_STATUS_INTERNAL_ERROR...)
                 csrmv = cusparse.spmv
             else:
                 raise NotImplementedError
             return csrmv(self.T, cupy.asfortranarray(other), transa=True)
         elif other.ndim == 2:
             self.sum_duplicates()
             if (cusparse.check_availability('csrmm2')
                     and not runtime.is_hip):
                 # trans=True is buggy as of ROCm 4.2.0
                 csrmm = cusparse.csrmm2
             elif cusparse.check_availability('spmm'):
                 csrmm = cusparse.spmm
             else:
                 raise NotImplementedError
             return csrmm(self.T, cupy.asfortranarray(other), transa=True)
         else:
             raise ValueError('could not interpret dimensions')
     else:
         return NotImplemented
Ejemplo n.º 27
0
 def _convert_dense(self, x):
     if cusparse.check_availability('denseToSparse'):
         m = cusparse.denseToSparse(x, format='csc')
     else:
         m = cusparse.dense2csc(x)
     return m.data, m.indices, m.indptr
Ejemplo n.º 28
0
Archivo: csr.py Proyecto: toslunar/cupy
 def __mul__(self, other):
     if cupy.isscalar(other):
         self.sum_duplicates()
         return self._with_data(self.data * other)
     elif isspmatrix_csr(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm2'):
             return cusparse.csrgemm2(self, other)
         elif cusparse.check_availability('csrgemm'):
             return cusparse.csrgemm(self, other)
         else:
             raise NotImplementedError
     elif csc.isspmatrix_csc(other):
         self.sum_duplicates()
         other.sum_duplicates()
         if cusparse.check_availability('csrgemm') and not runtime.is_hip:
             # trans=True is still buggy as of ROCm 4.2.0
             return cusparse.csrgemm(self, other.T, transb=True)
         elif cusparse.check_availability('csrgemm2'):
             b = other.tocsr()
             b.sum_duplicates()
             return cusparse.csrgemm2(self, b)
         else:
             raise NotImplementedError
     elif base.isspmatrix(other):
         return self * other.tocsr()
     elif base.isdense(other):
         if other.ndim == 0:
             self.sum_duplicates()
             return self._with_data(self.data * other)
         elif other.ndim == 1:
             self.sum_duplicates()
             other = cupy.asfortranarray(other)
             # need extra padding to ensure not stepping on the CUB bug,
             # see cupy/cupy#3679 for discussion
             is_cub_safe = (self.indptr.data.mem.size >
                            self.indptr.size * self.indptr.dtype.itemsize)
             # CUB spmv is buggy since CUDA 11.0, see
             # https://github.com/cupy/cupy/issues/3822#issuecomment-782607637
             is_cub_safe &= (cub._get_cuda_build_version() < 11000)
             for accelerator in _accelerator.get_routine_accelerators():
                 if (accelerator == _accelerator.ACCELERATOR_CUB
                         and not runtime.is_hip and is_cub_safe
                         and other.flags.c_contiguous):
                     return cub.device_csrmv(self.shape[0], self.shape[1],
                                             self.nnz, self.data,
                                             self.indptr, self.indices,
                                             other)
             if (cusparse.check_availability('csrmvEx') and self.nnz > 0
                     and cusparse.csrmvExIsAligned(self, other)):
                 # csrmvEx does not work if nnz == 0
                 csrmv = cusparse.csrmvEx
             elif cusparse.check_availability('csrmv'):
                 csrmv = cusparse.csrmv
             elif cusparse.check_availability('spmv'):
                 csrmv = cusparse.spmv
             else:
                 raise NotImplementedError
             return csrmv(self, other)
         elif other.ndim == 2:
             self.sum_duplicates()
             if cusparse.check_availability('csrmm2'):
                 csrmm = cusparse.csrmm2
             elif cusparse.check_availability('spmm'):
                 csrmm = cusparse.spmm
             else:
                 raise NotImplementedError
             return csrmm(self, cupy.asfortranarray(other))
         else:
             raise ValueError('could not interpret dimensions')
     else:
         return NotImplemented
Ejemplo n.º 29
0
def _lanczos_fast(A, n, ncv):
    cublas_handle = device.get_cublas_handle()
    cublas_pointer_mode = _cublas.getPointerMode(cublas_handle)
    if A.dtype.char == 'f':
        dotc = _cublas.sdot
        nrm2 = _cublas.snrm2
        gemm = _cublas.sgemm
    elif A.dtype.char == 'd':
        dotc = _cublas.ddot
        nrm2 = _cublas.dnrm2
        gemm = _cublas.dgemm
    elif A.dtype.char == 'F':
        dotc = _cublas.cdotc
        nrm2 = _cublas.scnrm2
        gemm = _cublas.cgemm
    elif A.dtype.char == 'D':
        dotc = _cublas.zdotc
        nrm2 = _cublas.dznrm2
        gemm = _cublas.zgemm
    else:
        raise TypeError('invalid dtype ({})'.format(A.dtype))

    cusparse_handle = None
    if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'):
        cusparse_handle = device.get_cusparse_handle()
        spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE
        spmv_alpha = numpy.array(1.0, A.dtype)
        spmv_beta = numpy.array(0.0, A.dtype)
        spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype)
        spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT

    v = cupy.empty((n, ), dtype=A.dtype)
    uu = cupy.empty((ncv, ), dtype=A.dtype)
    one = numpy.array(1.0, dtype=A.dtype)
    zero = numpy.array(0.0, dtype=A.dtype)
    mone = numpy.array(-1.0, dtype=A.dtype)

    outer_A = A

    def aux(A, V, u, alpha, beta, i_start, i_end):
        assert A is outer_A
        beta_eps = inversion_eps(A.dtype)

        # Get ready for spmv if enabled
        if cusparse_handle is not None:
            # Note: I would like to reuse descriptors and working buffer
            # on the next update, but I gave it up because it sometimes
            # caused illegal memory access error.
            spmv_desc_A = cusparse.SpMatDescriptor.create(A)
            spmv_desc_v = cusparse.DnVecDescriptor.create(v)
            spmv_desc_u = cusparse.DnVecDescriptor.create(u)
            buff_size = _cusparse.spMV_bufferSize(
                cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data,
                spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data,
                spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg)
            spmv_buff = cupy.empty(buff_size, cupy.int8)

        v[...] = V[i_start]
        for i in range(i_start, i_end):
            # Matrix-vector multiplication
            if cusparse_handle is None:
                u[...] = A @ v
            else:
                _cusparse.spMV(cusparse_handle, spmv_op_a,
                               spmv_alpha.ctypes.data, spmv_desc_A.desc,
                               spmv_desc_v.desc, spmv_beta.ctypes.data,
                               spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg,
                               spmv_buff.data.ptr)

            # Call dotc
            _cublas.setPointerMode(cublas_handle,
                                   _cublas.CUBLAS_POINTER_MODE_DEVICE)
            try:
                dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1,
                     alpha.data.ptr + i * alpha.itemsize)
            finally:
                _cublas.setPointerMode(cublas_handle, cublas_pointer_mode)

            # Orthogonalize
            gemm(cublas_handle, _cublas.CUBLAS_OP_C, _cublas.CUBLAS_OP_N, 1,
                 i + 1, n, one.ctypes.data, u.data.ptr, n, V.data.ptr, n,
                 zero.ctypes.data, uu.data.ptr, 1)
            gemm(cublas_handle, _cublas.CUBLAS_OP_N, _cublas.CUBLAS_OP_C, n, 1,
                 i + 1, mone.ctypes.data, V.data.ptr, n, uu.data.ptr, 1,
                 one.ctypes.data, u.data.ptr, n)

            # Call nrm2
            _cublas.setPointerMode(cublas_handle,
                                   _cublas.CUBLAS_POINTER_MODE_DEVICE)
            try:
                nrm2(cublas_handle, n, u.data.ptr, 1,
                     beta.data.ptr + i * beta.itemsize)
            finally:
                _cublas.setPointerMode(cublas_handle, cublas_pointer_mode)

            # Break here as the normalization below touches V[i+1]
            if i >= i_end - 1:
                break

            if beta[i] < beta_eps:
                V[i + 1:i_end, :] = 0
                u[...] = 0
                v[...] = 0
                break
            if i == i_start:
                beta_eps *= beta[i]  # scale eps to largest beta

            # Normalize
            _kernel_normalize(u, beta, i, n, v, V)

    return aux
Ejemplo n.º 30
0
def spsolve_triangular(A, b, lower=True, overwrite_A=False, overwrite_b=False,
                       unit_diagonal=False):
    """Solves a sparse triangular system ``A x = b``.

    Args:
        A (cupyx.scipy.sparse.spmatrix):
            Sparse matrix with dimension ``(M, M)``.
        b (cupy.ndarray):
            Dense vector or matrix with dimension ``(M)`` or ``(M, K)``.
        lower (bool):
            Whether ``A`` is a lower or upper trinagular matrix.
            If True, it is lower triangular, otherwise, upper triangular.
        overwrite_A (bool):
            (not supported)
        overwrite_b (bool):
            Allows overwriting data in ``b``.
        unit_diagonal (bool):
            If True, diagonal elements of ``A`` are assumed to be 1 and will
            not be referencec.

    Returns:
        cupy.ndarray:
            Solution to the system ``A x = b``. The shape is the same as ``b``.
    """
    if not cusparse.check_availability('csrsm2'):
        raise NotImplementedError

    if not sparse.isspmatrix(A):
        raise TypeError('A must be cupyx.scipy.sparse.spmatrix')
    if not isinstance(b, cupy.ndarray):
        raise TypeError('b must be cupy.ndarray')
    if A.shape[0] != A.shape[1]:
        raise ValueError('A must be a square matrix (A.shape: {})'.
                         format(A.shape))
    if b.ndim not in [1, 2]:
        raise ValueError('b must be 1D or 2D array (b.shape: {})'.
                         format(b.shape))
    if A.shape[0] != b.shape[0]:
        raise ValueError('The size of dimensions of A must be equal to the '
                         'size of the first dimension of b '
                         '(A.shape: {}, b.shape: {})'.format(A.shape, b.shape))
    if A.dtype.char not in 'fdFD':
        raise TypeError('unsupported dtype (actual: {})'.format(A.dtype))

    if not (sparse.isspmatrix_csr(A) or sparse.isspmatrix_csc(A)):
        warnings.warn('CSR or CSC format is required. Converting to CSR '
                      'format.', sparse.SparseEfficiencyWarning)
        A = A.tocsr()
    A.sum_duplicates()

    if (overwrite_b and A.dtype == b.dtype and
            (b._c_contiguous or b._f_contiguous)):
        x = b
    else:
        x = b.astype(A.dtype, copy=True)

    cusparse.csrsm2(A, x, lower=lower, unit_diag=unit_diagonal)

    if x.dtype.char in 'fF':
        # Note: This is for compatibility with SciPy.
        dtype = numpy.promote_types(x.dtype, 'float64')
        x = x.astype(dtype)
    return x