Python get_cusparse_handleの例、cupy.cuda.device.get_cusparse_handle Pythonの例

コード例 #1

0

ファイルを表示

ファイル: cusparse.py プロジェクト: anaruse/cupy

def csrmm(a, b, c=None, alpha=1, beta=0, transa=False):
    """Matrix-matrix product for a CSR-matrix and a dense matrix.

    .. math::

       C = \\alpha o_a(A) B + \\beta C,

    where :math:`o_a` is a transpose function when ``transa`` is ``True`` and
    is an identity function otherwise.

    Args:
        a (cupyx.scipy.sparse.csr): Sparse matrix A.
        b (cupy.ndarray): Dense matrix B. It must be F-contiguous.
        c (cupy.ndarray or None): Dense matrix C. It must be F-contiguous.
        alpha (float): Coefficient for AB.
        beta (float): Coefficient for C.
        transa (bool): If ``True``, transpose of A is used.

    Returns:
        cupy.ndarray: Calculated C.

    """
    if not check_availability('csrmm'):
        raise RuntimeError('csrmm is not available.')

    assert a.ndim == b.ndim == 2
    assert b.flags.f_contiguous
    assert c is None or c.flags.f_contiguous

    a_shape = a.shape if not transa else a.shape[::-1]
    if a_shape[1] != b.shape[0]:
        raise ValueError('dimension mismatch')

    handle = device.get_cusparse_handle()
    m, k = a_shape
    n = b.shape[1]

    a, b, c = _cast_common_type(a, b, c)
    if c is None:
        c = cupy.zeros((m, n), a.dtype, 'F')

    ldb = k
    ldc = m

    alpha = numpy.array(alpha, a.dtype).ctypes
    beta = numpy.array(beta, a.dtype).ctypes
    _call_cusparse(
        'csrmm', a.dtype,
        handle, _transpose_flag(transa),
        a.shape[0], n, a.shape[1], a.nnz,
        alpha.data, a._descr.descriptor, a.data.data.ptr,
        a.indptr.data.ptr, a.indices.data.ptr,
        b.data.ptr, ldb, beta.data, c.data.ptr, ldc)
    return c

コード例 #2

0

ファイルを表示

def _csr_indptr_to_coo_rows(nnz, Bp):
    out_rows = cupy.empty(nnz, dtype=numpy.int32)

    # Build a COO row array from output CSR indptr.
    # Calling backend cusparse API directly to avoid
    # constructing a whole COO object.
    handle = device.get_cusparse_handle()
    cusparse.xcsr2coo(handle, Bp.data.ptr, nnz, Bp.size - 1, out_rows.data.ptr,
                      cusparse.CUSPARSE_INDEX_BASE_ZERO)

    return out_rows

コード例 #3

0

ファイルを表示

def csrgemm(a, b, transa=False, transb=False):
    """Matrix-matrix product for CSR-matrix.

    math::
       C = op(A) op(B),

    Args:
        a (cupy.sparse.csr_matrix): Sparse matrix A.
        b (cupy.sparse.csr_matrix): Sparse matrix B.
        transa (bool): If ``True``, transpose of A is used.
        transb (bool): If ``True``, transpose of B is used.

    Returns:
        cupy.sparse.csr_matrix: Calculated C.

    """
    assert a.ndim == b.ndim == 2
    a_shape = a.shape if not transa else a.shape[::-1]
    b_shape = b.shape if not transb else b.shape[::-1]
    if a_shape[1] != b_shape[0]:
        raise ValueError('dimension mismatch')

    handle = device.get_cusparse_handle()
    m, k = a_shape
    n = b_shape[1]

    a, b = _cast_common_type(a, b)

    op_a = _transpose_flag(transa)
    op_b = _transpose_flag(transb)

    nnz = numpy.empty((), 'i')
    cusparse.setPointerMode(handle, cusparse.CUSPARSE_POINTER_MODE_HOST)

    c_descr = MatDescriptor.create()
    c_indptr = cupy.empty(m + 1, 'i')

    cusparse.xcsrgemmNnz(handle, op_a, op_b, m, n, k, a._descr.descriptor,
                         a.nnz, a.indptr.data.ptr, a.indices.data.ptr,
                         b._descr.descriptor, b.nnz, b.indptr.data.ptr,
                         b.indices.data.ptr, c_descr.descriptor,
                         c_indptr.data.ptr, nnz.ctypes.data)

    c_indices = cupy.empty(int(nnz), 'i')
    c_data = cupy.empty(int(nnz), a.dtype)
    _call_cusparse('csrgemm', a.dtype, handle, op_a, op_b, m, n, k,
                   a._descr.descriptor, a.nnz, a.data.data.ptr,
                   a.indptr.data.ptr, a.indices.data.ptr, b._descr.descriptor,
                   b.nnz, b.data.data.ptr, b.indptr.data.ptr,
                   b.indices.data.ptr, c_descr.descriptor, c_data.data.ptr,
                   c_indptr.data.ptr, c_indices.data.ptr)

    return cupy.sparse.csr_matrix((c_data, c_indices, c_indptr), shape=(m, n))

コード例 #4

0

ファイルを表示

ファイル: cusparse.py プロジェクト: zhaohb/cupy

def coo2csc(x):
    handle = _device.get_cusparse_handle()
    n = x.shape[1]
    nnz = x.nnz
    if nnz == 0:
        indptr = _cupy.zeros(n + 1, 'i')
    else:
        indptr = _cupy.empty(n + 1, 'i')
        _cusparse.xcoo2csr(handle, x.col.data.ptr, nnz, n, indptr.data.ptr,
                           _cusparse.CUSPARSE_INDEX_BASE_ZERO)
    return cupyx.scipy.sparse.csc.csc_matrix((x.data, x.row, indptr),
                                             shape=x.shape)

コード例 #5

0

ファイルを表示

def csrmm2(a, b, c=None, alpha=1.0, beta=0.0, transa=False, transb=False):
    """Matrix-matrix product for a CSR-matrix and a dense matrix.

    .. math::

       C = \\alpha o_a(A) o_b(B) + \\beta C,

    where :math:`o_a` and :math:`o_b` are transpose functions when ``transa``
    and ``tranb`` are ``True`` respectively. And they are identity functions
    otherwise.

    Args:
        a (cupy.sparse.csr): Sparse matrix A.
        b (cupy.ndarray): Dense matrix B. It must be F-contiguous.
        c (cupy.ndarray or None): Dense matrix C. It must be F-contiguous.
        alpha (float): Coefficient for AB.
        beta (float): Coefficient for C.
        transa (bool): If ``True``, transpose of A is used.
        transb (bool): If ``True``, transpose of B is used.

    Returns:
        cupy.ndarray: Calculated C.

    """
    assert a.ndim == b.ndim == 2
    assert b.flags.f_contiguous
    assert c is None or c.flags.f_contiguous

    a_shape = a.shape if not transa else a.shape[::-1]
    b_shape = b.shape if not transb else b.shape[::-1]
    if a_shape[1] != b.shape[0]:
        raise ValueError('dimension mismatch')

    handle = device.get_cusparse_handle()
    m, k = a_shape
    n = b_shape[1]

    a, b, c = _cast_common_type(a, b, c)
    if c is None:
        c = cupy.zeros((m, n), a.dtype, 'F')

    ldb = b.shape[0]
    ldc = c.shape[0]
    op_a = _transpose_flag(transa)
    op_b = _transpose_flag(transb)
    alpha = numpy.array(alpha, a.dtype).ctypes
    beta = numpy.array(beta, a.dtype).ctypes
    _call_cusparse('csrmm2', a.dtype, handle, op_a, op_b, m, n, k, a.nnz,
                   alpha.data, a._descr.descriptor, a.data.data.ptr,
                   a.indptr.data.ptr, a.indices.data.ptr, b.data.ptr, ldb,
                   beta.data, c.data.ptr, ldc)
    return c

コード例 #6

0

ファイルを表示

ファイル: cusparse.py プロジェクト: anaruse/cupy

def coo2csr(x):
    handle = device.get_cusparse_handle()
    m = x.shape[0]
    nnz = x.nnz
    if nnz == 0:
        indptr = cupy.zeros(m + 1, 'i')
    else:
        indptr = cupy.empty(m + 1, 'i')
        cusparse.xcoo2csr(
            handle, x.row.data.ptr, nnz, m,
            indptr.data.ptr, cusparse.CUSPARSE_INDEX_BASE_ZERO)
    return cupyx.scipy.sparse.csr.csr_matrix(
        (x.data, x.col, indptr), shape=x.shape)

コード例 #7

0

ファイルを表示

def check_availability(name):
    if name not in _available_cusparse_version:
        msg = 'No available version information specified for {}'.name
        raise ValueError(msg)
    version_added, version_removed = _available_cusparse_version[name]
    version_added = _get_version(version_added)
    version_removed = _get_version(version_removed)
    cusparse_version = cusparse.getVersion(device.get_cusparse_handle())
    if version_added is not None and cusparse_version < version_added:
        return False
    if version_removed is not None and cusparse_version >= version_removed:
        return False
    return True

コード例 #8

0

ファイルを表示

ファイル: cusparse.py プロジェクト: suryadwar/cupy

def csrgeam(a, b, alpha=1, beta=1):
    """Matrix-matrix addition.

    .. math::
        C = \\alpha A + \\beta B

    Args:
        a (cupyx.scipy.sparse.csr_matrix): Sparse matrix A.
        b (cupyx.scipy.sparse.csr_matrix): Sparse matrix B.
        alpha (float): Coefficient for A.
        beta (float): Coefficient for B.

    Returns:
        cupyx.scipy.sparse.csr_matrix: Result matrix.

    """
    assert a.has_canonical_format
    assert b.has_canonical_format
    if a.shape != b.shape:
        raise ValueError('inconsistent shapes')

    handle = device.get_cusparse_handle()
    m, n = a.shape
    a, b = _cast_common_type(a, b)
    nnz = numpy.empty((), 'i')
    cusparse.setPointerMode(handle, cusparse.CUSPARSE_POINTER_MODE_HOST)

    c_descr = MatDescriptor.create()
    c_indptr = cupy.empty(m + 1, 'i')

    cusparse.xcsrgeamNnz(handle, m, n, a._descr.descriptor, a.nnz,
                         a.indptr.data.ptr, a.indices.data.ptr,
                         b._descr.descriptor, b.nnz, b.indptr.data.ptr,
                         b.indices.data.ptr, c_descr.descriptor,
                         c_indptr.data.ptr, nnz.ctypes.data)

    c_indices = cupy.empty(int(nnz), 'i')
    c_data = cupy.empty(int(nnz), a.dtype)
    alpha = numpy.array(alpha, a.dtype).ctypes
    beta = numpy.array(beta, a.dtype).ctypes
    _call_cusparse('csrgeam', a.dtype, handle, m, n, alpha.data,
                   a._descr.descriptor, a.nnz, a.data.data.ptr,
                   a.indptr.data.ptr, a.indices.data.ptr, beta.data,
                   b._descr.descriptor, b.nnz, b.data.data.ptr,
                   b.indptr.data.ptr, b.indices.data.ptr, c_descr.descriptor,
                   c_data.data.ptr, c_indptr.data.ptr, c_indices.data.ptr)

    c = cupyx.scipy.sparse.csr_matrix((c_data, c_indices, c_indptr),
                                      shape=a.shape)
    c._has_canonical_format = True
    return c

コード例 #9

0

ファイルを表示

ファイル: _eigen.py プロジェクト: neveroldmilk/cupy

    def __init__(self, A, V, alpha, beta, update_impl='fast'):
        assert A.ndim == V.ndim == 2
        assert alpha.ndim == beta.ndim == 1
        assert A.dtype == V.dtype == alpha.dtype
        assert A.dtype.char.lower() == beta.dtype.char
        assert A.shape[0] == A.shape[1] == V.shape[1]
        assert V.shape[0] == alpha.shape[0] == beta.shape[0]

        self.A = A
        self.V = V
        self.alpha = alpha
        self.beta = beta
        self.n = V.shape[1]
        self.ncv = V.shape[0]
        self.update_impl = update_impl
        if self.update_impl != 'fast':
            return

        self.cublas_handle = device.get_cublas_handle()
        self.cublas_pointer_mode = _cublas.getPointerMode(self.cublas_handle)
        if A.dtype.char == 'f':
            self.dotc = _cublas.sdot
            self.nrm2 = _cublas.snrm2
            self.gemm = _cublas.sgemm
        elif A.dtype.char == 'd':
            self.dotc = _cublas.ddot
            self.nrm2 = _cublas.dnrm2
            self.gemm = _cublas.dgemm
        elif A.dtype.char == 'F':
            self.dotc = _cublas.cdotc
            self.nrm2 = _cublas.scnrm2
            self.gemm = _cublas.cgemm
        elif A.dtype.char == 'D':
            self.dotc = _cublas.zdotc
            self.nrm2 = _cublas.dznrm2
            self.gemm = _cublas.zgemm
        else:
            raise TypeError('invalid dtype ({})'.format(A.dtype))
        if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'):
            self.cusparse_handle = device.get_cusparse_handle()
            self.spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE
            self.spmv_alpha = numpy.array(1.0, A.dtype)
            self.spmv_beta = numpy.array(0.0, A.dtype)
            self.spmv_cuda_dtype = cusparse._dtype_to_DataType(A.dtype)
            self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT
        else:
            self.cusparse_handle = None
        self.v = cupy.empty((self.n, ), dtype=A.dtype)
        self.u = cupy.empty((self.n, ), dtype=A.dtype)
        self.uu = cupy.empty((self.ncv, ), dtype=A.dtype)

コード例 #10

0

ファイルを表示

ファイル: cusparse.py プロジェクト: yutiansut/cupy

def csr2csc(x):
    handle = device.get_cusparse_handle()
    m, n = x.shape
    nnz = x.nnz
    data = cupy.empty(nnz, x.dtype)
    indptr = cupy.empty(n + 1, 'i')
    indices = cupy.empty(nnz, 'i')

    _call_cusparse('csr2csc', x.dtype, handle, m, n, nnz, x.data.data.ptr,
                   x.indptr.data.ptr, x.indices.data.ptr, data.data.ptr,
                   indices.data.ptr, indptr.data.ptr,
                   cusparse.CUSPARSE_ACTION_NUMERIC,
                   cusparse.CUSPARSE_INDEX_BASE_ZERO)
    return cupy.sparse.csc_matrix((data, indices, indptr), shape=x.shape)

コード例 #11

0

ファイルを表示

ファイル: _index.py プロジェクト: carterbox/cupy

def _csr_indptr_to_coo_rows(nnz, Bp):
    out_rows = cupy.empty(nnz, dtype=numpy.int32)

    # Build a COO row array from output CSR indptr.
    # Calling backend cusparse API directly to avoid
    # constructing a whole COO object.
    handle = device.get_cusparse_handle()
    if runtime.is_hip and nnz == 0:
        raise ValueError('hipSPARSE currently cannot handle '
                         'sparse matrices with null ptrs')
    cusparse.xcsr2coo(handle, Bp.data.ptr, nnz, Bp.size - 1, out_rows.data.ptr,
                      cusparse.CUSPARSE_INDEX_BASE_ZERO)

    return out_rows

コード例 #12

0

ファイルを表示

ファイル: bench_mri.py プロジェクト: mritools/mrrt.mri

def run_all_bench(
    plot_timings=True,
    include_GPU=True,
    include_CPU=True,
    print_timings=True,
    phasings=["real", "complex"],
    save_dir="/tmp",
):
    common_kwargs = dict(
        plot_timings=plot_timings,
        include_GPU=include_GPU,
        include_CPU=include_CPU,
        print_timings=print_timings,
        phasings=phasings,
        save_dir=save_dir,
    )
    if include_GPU:
        import cupy
        from cupy.cuda import device

        device.get_cusparse_handle()
        cupy.fft.cache.enable()

    # 2d cases
    bench_mri_2d_nocoils_nofieldmap(navg_time=(4, 20), **common_kwargs)
    bench_mri_2d_16coils_nofieldmap(navg_time=(2, 8), **common_kwargs)
    bench_mri_2d_16coils_fieldmap(navg_time=(2, 8), **common_kwargs)
    bench_mri_2d_16coils_fieldmap_multispectral(
        navg_time=4, nspectra=2, **common_kwargs
    )

    # 3d cases
    bench_mri_3d_nocoils_nofieldmap(navg_time=(2, 4), **common_kwargs)
    bench_mri_3d_nocoils_fieldmap(navg_time=(2, 4), **common_kwargs)
    bench_mri_3d_16coils_nofieldmap(navg_time=(1, 4), **common_kwargs)
    bench_mri_3d_16coils_fieldmap(navg_time=(1, 4), **common_kwargs)

コード例 #13

0

ファイルを表示

def csrmv(a, x, y=None, alpha=1, beta=0, transa=False):
    """Matrix-vector product for a CSR-matrix and a dense vector.

    .. math::

       y = \\alpha * o_a(A) x + \\beta y,

    where :math:`o_a` is a transpose function when ``transa`` is ``True`` and
    is an identity function otherwise.

    Args:
        a (cupy.cusparse.csr_matrix): Matrix A.
        x (cupy.ndarray): Vector x.
        y (cupy.ndarray or None): Vector y. It must be F-contiguous.
        alpha (float): Coefficient for x.
        beta (float): Coefficient for y.
        transa (bool): If ``True``, transpose of ``A`` is used.

    Returns:
        cupy.ndarray: Calculated ``y``.

    """
    if not check_availability('csrmv'):
        raise RuntimeError('csrmv is not available.')

    assert y is None or y.flags.f_contiguous

    a_shape = a.shape if not transa else a.shape[::-1]
    if a_shape[1] != len(x):
        raise ValueError('dimension mismatch')

    handle = device.get_cusparse_handle()
    m, n = a_shape
    a, x, y = _cast_common_type(a, x, y)
    dtype = a.dtype
    if y is None:
        y = cupy.zeros(m, dtype)
    alpha = numpy.array(alpha, dtype).ctypes
    beta = numpy.array(beta, dtype).ctypes

    _call_cusparse(
        'csrmv', dtype,
        handle, _transpose_flag(transa),
        a.shape[0], a.shape[1], a.nnz, alpha.data, a._descr.descriptor,
        a.data.data.ptr, a.indptr.data.ptr, a.indices.data.ptr,
        x.data.ptr, beta.data, y.data.ptr)

    return y

コード例 #14

0

ファイルを表示

ファイル: cusparse.py プロジェクト: Yugesh299/cupy

def csc2csr(x):
    if not check_availability('csc2csr'):
        raise RuntimeError('csr2csc is not available.')

    handle = device.get_cusparse_handle()
    m, n = x.shape
    nnz = x.nnz
    data = cupy.empty(nnz, x.dtype)
    indptr = cupy.empty(m + 1, 'i')
    indices = cupy.empty(nnz, 'i')

    _call_cusparse('csr2csc', x.dtype, handle, n, m, nnz, x.data.data.ptr,
                   x.indptr.data.ptr, x.indices.data.ptr, data.data.ptr,
                   indices.data.ptr, indptr.data.ptr,
                   cusparse.CUSPARSE_ACTION_NUMERIC,
                   cusparse.CUSPARSE_INDEX_BASE_ZERO)
    return cupyx.scipy.sparse.csr_matrix((data, indices, indptr),
                                         shape=x.shape)

コード例 #15

0

ファイルを表示

ファイル: cusparse.py プロジェクト: yutiansut/cupy

def coosort(x):
    nnz = x.nnz
    if nnz == 0:
        return
    handle = device.get_cusparse_handle()
    m, n = x.shape

    buffer_size = cusparse.xcoosort_bufferSizeExt(handle, m, n, nnz,
                                                  x.row.data.ptr,
                                                  x.col.data.ptr)
    buf = cupy.empty(buffer_size, 'b')
    P = cupy.empty(nnz, 'i')
    cusparse.createIdentityPermutation(handle, nnz, P.data.ptr)
    cusparse.xcoosortByRow(handle, m, n, nnz, x.row.data.ptr, x.col.data.ptr,
                           P.data.ptr, buf.data.ptr)
    _call_cusparse('gthr', x.dtype, handle, nnz, x.data.data.ptr,
                   x.data.data.ptr, P.data.ptr,
                   cusparse.CUSPARSE_INDEX_BASE_ZERO)

コード例 #16

0

ファイルを表示

ファイル: cusparse.py プロジェクト: yutiansut/cupy

def csr2csr_compress(x, tol):
    assert x.dtype.char in 'fdFD'

    handle = device.get_cusparse_handle()
    m, n = x.shape

    nnz_per_row = cupy.empty(m, 'i')
    nnz = _call_cusparse('nnz_compress', x.dtype, handle, m,
                         x._descr.descriptor, x.data.data.ptr,
                         x.indptr.data.ptr, nnz_per_row.data.ptr, tol)
    data = cupy.zeros(nnz, x.dtype)
    indptr = cupy.empty(m + 1, 'i')
    indices = cupy.zeros(nnz, 'i')
    _call_cusparse('csr2csr_compress', x.dtype, handle, m, n,
                   x._descr.descriptor, x.data.data.ptr, x.indices.data.ptr,
                   x.indptr.data.ptr, x.nnz, nnz_per_row.data.ptr,
                   data.data.ptr, indices.data.ptr, indptr.data.ptr, tol)

    return cupy.sparse.csr_matrix((data, indices, indptr), shape=x.shape)

コード例 #17

0

ファイルを表示

ファイル: cusparse.py プロジェクト: yutiansut/cupy

def csr2coo(x, data, indices):
    """Converts a CSR-matrix to COO format.

    Args:
        x (cupy.sparse.csr_matrix): A matrix to be converted.
        data (cupy.ndarray): A data array for converted data.
        indices (cupy.ndarray): An index array for converted data.

    Returns:
        cupy.sparse.coo_matrix: A converted matrix.

    """
    handle = device.get_cusparse_handle()
    m = x.shape[0]
    nnz = len(x.data)
    row = cupy.empty(nnz, 'i')
    cusparse.xcsr2coo(handle, x.indptr.data.ptr, nnz, m, row.data.ptr,
                      cusparse.CUSPARSE_INDEX_BASE_ZERO)
    # data and indices did not need to be copied already
    return cupy.sparse.coo_matrix((data, (row, indices)), shape=x.shape)

コード例 #18

0

ファイルを表示

ファイル: cusparse.py プロジェクト: zhaohb/cupy

def csc2coo(x, data, indices):
    """Converts a CSC-matrix to COO format.

    Args:
        x (cupyx.scipy.sparse.csc_matrix): A matrix to be converted.
        data (cupy.ndarray): A data array for converted data.
        indices (cupy.ndarray): An index array for converted data.

    Returns:
        cupyx.scipy.sparse.coo_matrix: A converted matrix.

    """
    handle = _device.get_cusparse_handle()
    n = x.shape[1]
    nnz = x.nnz
    col = _cupy.empty(nnz, 'i')
    _cusparse.xcsr2coo(handle, x.indptr.data.ptr, nnz, n, col.data.ptr,
                       _cusparse.CUSPARSE_INDEX_BASE_ZERO)
    # data and indices did not need to be copied already
    return cupyx.scipy.sparse.coo_matrix((data, (indices, col)), shape=x.shape)

コード例 #19

0

ファイルを表示

def _csr_row_index(rows, Ap, Aj, Ax, Bp):
    """Populate indices and data arrays from the given row index

    Args
        rows : index array of rows to populate
        Ap : indptr array from input sparse matrix
        Aj : indices array from input sparse matrix
        Ax : data array from input sparse matrix
        Bp : indptr array for output sparse matrix
        tpb : threads per block of row index kernel

    Returns
        Bj : indices array of output sparse matrix
        Bx : data array of output sparse matrix
    """

    nnz = int(Bp[-1])
    Bj = cupy.empty(nnz, dtype=Aj.dtype)
    Bx = cupy.empty(nnz, dtype=Ax.dtype)

    out_rows = cupy.empty(nnz, dtype=rows.dtype)

    # Build a COO row array from output CSR indptr.
    # Calling backend cusparse API directly to avoid
    # constructing a whole COO object.
    handle = device.get_cusparse_handle()
    cusparse.xcsr2coo(handle, Bp.data.ptr, nnz, Bp.size - 1, out_rows.data.ptr,
                      cusparse.CUSPARSE_INDEX_BASE_ZERO)

    _csr_row_index_ker(out_rows,
                       rows,
                       Ap,
                       Aj,
                       Ax,
                       Bp,
                       Bj,
                       Bx,
                       size=out_rows.size)

    return Bj, Bx

コード例 #20

0

ファイルを表示

def dense2csr(x):
    """Converts a dense matrix in CSR format.

    Args:
        x (cupy.ndarray): A matrix to be converted.

    Returns:
        cupyx.scipy.sparse.csr_matrix: A converted matrix.

    """
    if not check_availability('dense2csr'):
        raise RuntimeError('dense2csr is not available.')

    assert x.ndim == 2
    x = cupy.asfortranarray(x)
    nnz = numpy.empty((), dtype='i')
    handle = device.get_cusparse_handle()
    m, n = x.shape

    descr = MatDescriptor.create()
    nnz_per_row = cupy.empty(m, 'i')
    _call_cusparse(
        'nnz', x.dtype,
        handle, cusparse.CUSPARSE_DIRECTION_ROW, m, n, descr.descriptor,
        x.data.ptr, m, nnz_per_row.data.ptr, nnz.ctypes.data)

    nnz = int(nnz)
    data = cupy.empty(nnz, x.dtype)
    indptr = cupy.empty(m + 1, 'i')
    indices = cupy.empty(nnz, 'i')

    _call_cusparse(
        'dense2csr', x.dtype,
        handle, m, n, descr.descriptor,
        x.data.ptr, m, nnz_per_row.data.ptr,
        data.data.ptr, indptr.data.ptr, indices.data.ptr)
    # Note that a desciptor is recreated
    csr = cupyx.scipy.sparse.csr_matrix((data, indices, indptr), shape=x.shape)
    csr._has_canonical_format = True
    return csr

コード例 #21

0

ファイルを表示

def coosort(x, sort_by='r'):
    """Sorts indices of COO-matrix in place.

    Args:
        x (cupyx.scipy.sparse.coo_matrix): A sparse matrix to sort.
        sort_by (str): Sort the indices by row ('r', default) or column ('c').

    """
    if not check_availability('coosort'):
        raise RuntimeError('coosort is not available.')

    nnz = x.nnz
    if nnz == 0:
        return
    handle = device.get_cusparse_handle()
    m, n = x.shape

    buffer_size = cusparse.xcoosort_bufferSizeExt(
        handle, m, n, nnz, x.row.data.ptr, x.col.data.ptr)
    buf = cupy.empty(buffer_size, 'b')
    P = cupy.empty(nnz, 'i')
    data_orig = x.data.copy()
    cusparse.createIdentityPermutation(handle, nnz, P.data.ptr)
    if sort_by == 'r':
        cusparse.xcoosortByRow(
            handle, m, n, nnz, x.row.data.ptr, x.col.data.ptr,
            P.data.ptr, buf.data.ptr)
    elif sort_by == 'c':
        cusparse.xcoosortByColumn(
            handle, m, n, nnz, x.row.data.ptr, x.col.data.ptr,
            P.data.ptr, buf.data.ptr)
    else:
        raise ValueError("sort_by must be either 'r' or 'c'")
    _call_cusparse(
        'gthr', x.dtype,
        handle, nnz, data_orig.data.ptr, x.data.data.ptr,
        P.data.ptr, cusparse.CUSPARSE_INDEX_BASE_ZERO)
    if sort_by == 'c':  # coo is sorted by row first
        x._has_canonical_format = False

コード例 #22

0

ファイルを表示

ファイル: cusparse.py プロジェクト: Yugesh299/cupy

def csr2csr_compress(x, tol):
    if not check_availability('csr2csr_compress'):
        raise RuntimeError('csr2csr_compress is not available.')

    assert x.dtype.char in 'fdFD'

    handle = device.get_cusparse_handle()
    m, n = x.shape

    nnz_per_row = cupy.empty(m, 'i')
    nnz = _call_cusparse('nnz_compress', x.dtype, handle, m,
                         x._descr.descriptor, x.data.data.ptr,
                         x.indptr.data.ptr, nnz_per_row.data.ptr, tol)
    data = cupy.zeros(nnz, x.dtype)
    indptr = cupy.empty(m + 1, 'i')
    indices = cupy.zeros(nnz, 'i')
    _call_cusparse('csr2csr_compress', x.dtype, handle, m, n,
                   x._descr.descriptor, x.data.data.ptr, x.indices.data.ptr,
                   x.indptr.data.ptr, x.nnz, nnz_per_row.data.ptr,
                   data.data.ptr, indices.data.ptr, indptr.data.ptr, tol)

    return cupyx.scipy.sparse.csr_matrix((data, indices, indptr),
                                         shape=x.shape)

コード例 #23

0

ファイルを表示

ファイル: cusparse.py プロジェクト: zhaohb/cupy

def csr2coo(x, data, indices):
    """Converts a CSR-matrix to COO format.

    Args:
        x (cupyx.scipy.sparse.csr_matrix): A matrix to be converted.
        data (cupy.ndarray): A data array for converted data.
        indices (cupy.ndarray): An index array for converted data.

    Returns:
        cupyx.scipy.sparse.coo_matrix: A converted matrix.

    """
    if not check_availability('csr2coo'):
        raise RuntimeError('csr2coo is not available.')

    handle = _device.get_cusparse_handle()
    m = x.shape[0]
    nnz = x.nnz
    row = _cupy.empty(nnz, 'i')
    _cusparse.xcsr2coo(handle, x.indptr.data.ptr, nnz, m, row.data.ptr,
                       _cusparse.CUSPARSE_INDEX_BASE_ZERO)
    # data and indices did not need to be copied already
    return cupyx.scipy.sparse.coo_matrix((data, (row, indices)), shape=x.shape)

コード例 #24

0

ファイルを表示

def cscsort(x):
    """Sorts indices of CSC-matrix in place.

    Args:
        x (cupy.sparse.csc_matrix): A sparse matrix to sort.

    """
    handle = device.get_cusparse_handle()
    m, n = x.shape
    nnz = x.nnz

    buffer_size = cusparse.xcscsort_bufferSizeExt(handle, m, n, nnz,
                                                  x.indptr.data.ptr,
                                                  x.indices.data.ptr)
    buf = cupy.empty(buffer_size, 'b')
    P = cupy.empty(nnz, 'i')
    cusparse.createIdentityPermutation(handle, nnz, P.data.ptr)
    cusparse.xcscsort(handle, m, n, nnz, x._descr.descriptor,
                      x.indptr.data.ptr, x.indices.data.ptr, P.data.ptr,
                      buf.data.ptr)
    _call_cusparse('gthr', x.dtype, handle, nnz, x.data.data.ptr,
                   x.data.data.ptr, P.data.ptr,
                   cusparse.CUSPARSE_INDEX_BASE_ZERO)

コード例 #25

0

ファイルを表示

def dense2csc(x):
    """Converts a dense matrix in CSC format.

    Args:
        x (cupy.ndarray): A matrix to be converted.

    Returns:
        cupyx.scipy.sparse.csc_matrix: A converted matrix.

    """
    assert x.ndim == 2
    x = cupy.asfortranarray(x)
    nnz = numpy.empty((), dtype='i')
    handle = device.get_cusparse_handle()
    m, n = x.shape

    descr = MatDescriptor.create()
    nnz_per_col = cupy.empty(m, 'i')
    _call_cusparse(
        'nnz', x.dtype,
        handle, cusparse.CUSPARSE_DIRECTION_COLUMN, m, n, descr.descriptor,
        x.data.ptr, m, nnz_per_col.data.ptr, nnz.ctypes.data)

    nnz = int(nnz)
    data = cupy.empty(nnz, x.dtype)
    indptr = cupy.empty(n + 1, 'i')
    indices = cupy.empty(nnz, 'i')

    _call_cusparse(
        'dense2csc', x.dtype,
        handle, m, n, descr.descriptor,
        x.data.ptr, m, nnz_per_col.data.ptr,
        data.data.ptr, indices.data.ptr, indptr.data.ptr)
    # Note that a desciptor is recreated
    csc = cupyx.scipy.sparse.csc_matrix((data, indices, indptr), shape=x.shape)
    csc._has_canonical_format = True
    return csc

コード例 #26

0

ファイルを表示

def csrmvEx(a, x, y=None, alpha=1, beta=0, merge_path=True):
    """Matrix-vector product for a CSR-matrix and a dense vector.

    .. math::

       y = \\alpha * A x + \\beta y,

    Args:
        a (cupy.cusparse.csr_matrix): Matrix A.
        x (cupy.ndarray): Vector x.
        y (cupy.ndarray or None): Vector y. It must be F-contiguous.
        alpha (float): Coefficient for x.
        beta (float): Coefficient for y.
        merge_path (bool): If ``True``, merge path algorithm is used.

        All pointers must be aligned with 128 bytes.

    Returns:
        cupy.ndarray: Calculated ``y``.

    """
    if not check_availability('csrmvEx'):
        raise RuntimeError('csrmvEx is not available.')

    assert y is None or y.flags.f_contiguous

    if a.shape[1] != len(x):
        raise ValueError('dimension mismatch')

    handle = device.get_cusparse_handle()
    m, n = a.shape

    a, x, y = _cast_common_type(a, x, y)
    dtype = a.dtype
    if y is None:
        y = cupy.zeros(m, dtype)

    datatype = _dtype_to_DataType(dtype)
    algmode = cusparse.CUSPARSE_ALG_MERGE_PATH if \
        merge_path else cusparse.CUSPARSE_ALG_NAIVE
    transa_flag = cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE

    alpha = numpy.array(alpha, dtype).ctypes
    beta = numpy.array(beta, dtype).ctypes

    assert csrmvExIsAligned(a, x, y)

    bufferSize = cusparse.csrmvEx_bufferSize(
        handle, algmode, transa_flag,
        a.shape[0], a.shape[1], a.nnz, alpha.data, datatype,
        a._descr.descriptor, a.data.data.ptr, datatype,
        a.indptr.data.ptr, a.indices.data.ptr,
        x.data.ptr, datatype, beta.data, datatype,
        y.data.ptr, datatype, datatype)

    buf = cupy.empty(bufferSize, 'b')
    assert buf.data.ptr % 128 == 0

    cusparse.csrmvEx(
        handle, algmode, transa_flag,
        a.shape[0], a.shape[1], a.nnz, alpha.data, datatype,
        a._descr.descriptor, a.data.data.ptr, datatype,
        a.indptr.data.ptr, a.indices.data.ptr,
        x.data.ptr, datatype, beta.data, datatype,
        y.data.ptr, datatype, datatype, buf.data.ptr)
    return y

コード例 #27

0

ファイルを表示

def _lanczos_fast(A, n, ncv):
    cublas_handle = device.get_cublas_handle()
    cublas_pointer_mode = _cublas.getPointerMode(cublas_handle)
    if A.dtype.char == 'f':
        dotc = _cublas.sdot
        nrm2 = _cublas.snrm2
        gemm = _cublas.sgemm
    elif A.dtype.char == 'd':
        dotc = _cublas.ddot
        nrm2 = _cublas.dnrm2
        gemm = _cublas.dgemm
    elif A.dtype.char == 'F':
        dotc = _cublas.cdotc
        nrm2 = _cublas.scnrm2
        gemm = _cublas.cgemm
    elif A.dtype.char == 'D':
        dotc = _cublas.zdotc
        nrm2 = _cublas.dznrm2
        gemm = _cublas.zgemm
    else:
        raise TypeError('invalid dtype ({})'.format(A.dtype))

    cusparse_handle = None
    if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'):
        cusparse_handle = device.get_cusparse_handle()
        spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE
        spmv_alpha = numpy.array(1.0, A.dtype)
        spmv_beta = numpy.array(0.0, A.dtype)
        spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype)
        spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT

    v = cupy.empty((n, ), dtype=A.dtype)
    uu = cupy.empty((ncv, ), dtype=A.dtype)
    one = numpy.array(1.0, dtype=A.dtype)
    zero = numpy.array(0.0, dtype=A.dtype)
    mone = numpy.array(-1.0, dtype=A.dtype)

    outer_A = A

    def aux(A, V, u, alpha, beta, i_start, i_end):
        assert A is outer_A
        beta_eps = inversion_eps(A.dtype)

        # Get ready for spmv if enabled
        if cusparse_handle is not None:
            # Note: I would like to reuse descriptors and working buffer
            # on the next update, but I gave it up because it sometimes
            # caused illegal memory access error.
            spmv_desc_A = cusparse.SpMatDescriptor.create(A)
            spmv_desc_v = cusparse.DnVecDescriptor.create(v)
            spmv_desc_u = cusparse.DnVecDescriptor.create(u)
            buff_size = _cusparse.spMV_bufferSize(
                cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data,
                spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data,
                spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg)
            spmv_buff = cupy.empty(buff_size, cupy.int8)

        v[...] = V[i_start]
        for i in range(i_start, i_end):
            # Matrix-vector multiplication
            if cusparse_handle is None:
                u[...] = A @ v
            else:
                _cusparse.spMV(cusparse_handle, spmv_op_a,
                               spmv_alpha.ctypes.data, spmv_desc_A.desc,
                               spmv_desc_v.desc, spmv_beta.ctypes.data,
                               spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg,
                               spmv_buff.data.ptr)

            # Call dotc
            _cublas.setPointerMode(cublas_handle,
                                   _cublas.CUBLAS_POINTER_MODE_DEVICE)
            try:
                dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1,
                     alpha.data.ptr + i * alpha.itemsize)
            finally:
                _cublas.setPointerMode(cublas_handle, cublas_pointer_mode)

            # Orthogonalize
            gemm(cublas_handle, _cublas.CUBLAS_OP_C, _cublas.CUBLAS_OP_N, 1,
                 i + 1, n, one.ctypes.data, u.data.ptr, n, V.data.ptr, n,
                 zero.ctypes.data, uu.data.ptr, 1)
            gemm(cublas_handle, _cublas.CUBLAS_OP_N, _cublas.CUBLAS_OP_C, n, 1,
                 i + 1, mone.ctypes.data, V.data.ptr, n, uu.data.ptr, 1,
                 one.ctypes.data, u.data.ptr, n)

            # Call nrm2
            _cublas.setPointerMode(cublas_handle,
                                   _cublas.CUBLAS_POINTER_MODE_DEVICE)
            try:
                nrm2(cublas_handle, n, u.data.ptr, 1,
                     beta.data.ptr + i * beta.itemsize)
            finally:
                _cublas.setPointerMode(cublas_handle, cublas_pointer_mode)

            # Break here as the normalization below touches V[i+1]
            if i >= i_end - 1:
                break

            if beta[i] < beta_eps:
                V[i + 1:i_end, :] = 0
                u[...] = 0
                v[...] = 0
                break
            if i == i_start:
                beta_eps *= beta[i]  # scale eps to largest beta

            # Normalize
            _kernel_normalize(u, beta, i, n, v, V)

    return aux

コード例 #28

0

ファイルを表示

def csrgemm2(a, b, d=None, alpha=1, beta=1):
    """Matrix-matrix product for CSR-matrix.

    math::
       C = alpha * A * B + beta * D

    Args:
        a (cupyx.scipy.sparse.csr_matrix): Sparse matrix A.
        b (cupyx.scipy.sparse.csr_matrix): Sparse matrix B.
        d (cupyx.scipy.sparse.csr_matrix or None): Sparse matrix D.
        alpha (scalar): Coefficient
        beta (scalar): Coefficient

    Returns:
        cupyx.scipy.sparse.csr_matrix

    """
    if not check_availability('csrgemm2'):
        raise RuntimeError('csrgemm2 is not available.')

    assert a.ndim == b.ndim == 2
    if not isinstance(a, cupyx.scipy.sparse.csr_matrix):
        raise TypeError('unsupported type (actual: {})'.format(type(a)))
    if not isinstance(b, cupyx.scipy.sparse.csr_matrix):
        raise TypeError('unsupported type (actual: {})'.format(type(b)))
    assert a.has_canonical_format
    assert b.has_canonical_format
    if a.shape[1] != b.shape[0]:
        raise ValueError('mismatched shape')
    if d is not None:
        assert d.ndim == 2
        if not isinstance(d, cupyx.scipy.sparse.csr_matrix):
            raise TypeError('unsupported type (actual: {})'.format(type(d)))
        assert d.has_canonical_format
        if a.shape[0] != d.shape[0] or b.shape[1] != d.shape[1]:
            raise ValueError('mismatched shape')

    handle = device.get_cusparse_handle()
    m, k = a.shape
    _, n = b.shape

    if d is None:
        a, b = _cast_common_type(a, b)
    else:
        a, b, d = _cast_common_type(a, b, d)

    info = cusparse.createCsrgemm2Info()
    alpha = numpy.array(alpha, a.dtype).ctypes
    null_ptr = 0
    if d is None:
        beta_data = null_ptr
        d_descr = MatDescriptor.create()
        d_nnz = 0
        d_data = null_ptr
        d_indptr = null_ptr
        d_indices = null_ptr
    else:
        beta = numpy.array(beta, a.dtype).ctypes
        beta_data = beta.data
        d_descr = d._descr
        d_nnz = d.nnz
        d_data = d.data.data.ptr
        d_indptr = d.indptr.data.ptr
        d_indices = d.indices.data.ptr

    buff_size = _call_cusparse(
        'csrgemm2_bufferSizeExt', a.dtype,
        handle, m, n, k, alpha.data, a._descr.descriptor, a.nnz,
        a.indptr.data.ptr, a.indices.data.ptr, b._descr.descriptor, b.nnz,
        b.indptr.data.ptr, b.indices.data.ptr, beta_data, d_descr.descriptor,
        d_nnz, d_indptr, d_indices, info)
    buff = cupy.empty(buff_size, numpy.int8)

    c_nnz = numpy.empty((), 'i')
    cusparse.setPointerMode(handle, cusparse.CUSPARSE_POINTER_MODE_HOST)

    c_descr = MatDescriptor.create()
    c_indptr = cupy.empty(m + 1, 'i')
    cusparse.xcsrgemm2Nnz(
        handle, m, n, k, a._descr.descriptor, a.nnz, a.indptr.data.ptr,
        a.indices.data.ptr, b._descr.descriptor, b.nnz, b.indptr.data.ptr,
        b.indices.data.ptr, d_descr.descriptor, d_nnz, d_indptr, d_indices,
        c_descr.descriptor, c_indptr.data.ptr, c_nnz.ctypes.data, info,
        buff.data.ptr)

    c_indices = cupy.empty(int(c_nnz), 'i')
    c_data = cupy.empty(int(c_nnz), a.dtype)
    _call_cusparse(
        'csrgemm2', a.dtype,
        handle, m, n, k, alpha.data, a._descr.descriptor, a.nnz,
        a.data.data.ptr, a.indptr.data.ptr, a.indices.data.ptr,
        b._descr.descriptor, b.nnz, b.data.data.ptr, b.indptr.data.ptr,
        b.indices.data.ptr, beta_data, d_descr.descriptor, d_nnz, d_data,
        d_indptr, d_indices, c_descr.descriptor, c_data.data.ptr,
        c_indptr.data.ptr, c_indices.data.ptr, info, buff.data.ptr)

    c = cupyx.scipy.sparse.csr_matrix(
        (c_data, c_indices, c_indptr), shape=(m, n))
    c._has_canonical_format = True
    cusparse.destroyCsrgemm2Info(info)
    return c

コード例 #29

0

ファイルを表示

ファイル: sparse_gtsv.py プロジェクト: TomohikoNakamura/HTFD

def batched_gtsv(dl, d, du, B, algo='cyclic_reduction'):
    """Solves multiple tridiagonal systems (This is a bang method for B.)

    Args:
        dl, d, du (cupy.ndarray): Lower, main and upper diagonal vectors with last-dim sizes of N-1, N and N-1, repsectively.
            Only two dimensional inputs are supported currently.
            The first dim is the batch dim.
        B (cupy.ndarray): Right-hand side vectors
            The first dim is the batch dim and the second dim is N.
        algo (str): algorithm, choose one from four algorithms; cyclic_reduction, cuThomas, LU_w_pivoting and QR.
            cuThomas is numerically unstable, and LU_w_pivoting is the LU algorithm with pivoting.
    """
    if algo not in ["cyclic_reduction", "cuThomas", "LU_w_pivoting", "QR"]:
        raise ValueError(f"Unknown algorithm [{algo}]")

    util._assert_cupy_array(dl)
    util._assert_cupy_array(d)
    util._assert_cupy_array(du)
    util._assert_cupy_array(B)
    if dl.ndim != 2 or d.ndim != 2 or du.ndim != 2 or B.ndim != 2:
        raise ValueError('dl, d, du and B must be 2-d arrays')

    batchsize = d.shape[0]
    if batchsize != dl.shape[0] or batchsize != du.shape[
            0] or batchsize != B.shape[0]:
        raise ValueError(
            'The first dims of dl, du and B must match that of d.')
    N = d.shape[1]  # the size of the linear system
    if dl.shape[1] != N - 1 or du.shape[1] != N - 1 or B.shape[1] != N:
        raise ValueError(
            'The second dims of dl, du and B must match the second dim of d.')

    # the first element must be zero of dl
    padded_dl = cupy.ascontiguousarray(
        cupy.pad(dl, ((0, 0), (1, 0)), mode='constant', constant_values=0.0))
    # the last element must be zero of du
    padded_du = cupy.ascontiguousarray(
        cupy.pad(du, ((0, 0), (0, 1)), mode='constant', constant_values=0.0))
    # contiguous
    d = cupy.ascontiguousarray(d)
    B = cupy.ascontiguousarray(B)

    # Cast to float32 or float64
    if d.dtype == 'f' or d.dtype == 'd':
        dtype = d.dtype
    else:
        dtype = numpy.find_common_type((d.dtype, 'f'), ())

    handle = device.get_cusparse_handle()

    if dtype == 'f':
        if algo == "cyclic_reduction":
            gtsv2 = cusparse.sgtsv2StridedBatch
            get_buffer_size = cusparse.sgtsv2StridedBatch_bufferSizeExt
            #
            buffer_size = numpy.empty(1, numpy.int32)
            get_buffer_size(handle, N, padded_dl.data.ptr, d.data.ptr,
                            padded_du.data.ptr, B.data.ptr, batchsize, N,
                            buffer_size.ctypes.data)
            buffer_size = int(buffer_size)
            buffer = cupy.zeros((buffer_size, ), dtype=cupy.uint8)
            gtsv2(handle, N, padded_dl.data.ptr, d.data.ptr,
                  padded_du.data.ptr, B.data.ptr, batchsize, N,
                  buffer.data.ptr)
        else:
            raise NotImplementedError
            if algo == "cuThomas":
                algo_num = 0
            elif algo == "LU_w_pivoting":
                algo_num = 1
            elif algo == "QR":
                algo_num = 2
            else:
                raise ValueError
            gtsv2 = cusparse.sgtsvInterleavedBatch
            get_buffer_size = cusparse.sgtsvInterleavedBatch_bufferSizeExt
            #
            buffer_size = get_buffer_size(handle, algo_num, N,
                                          padded_dl.data.ptr, d.data.ptr,
                                          padded_du.data.ptr, B.data.ptr,
                                          batchsize)
            buffer = cupy.zeros((buffer_size, ), dtype=cupy.uint8)
            gtsv2(handle, algo_num, N, padded_dl.data.ptr, d.data.ptr,
                  padded_du.data.ptr, B.data.ptr, batchsize, buffer.data.ptr)
    else:
        raise NotImplementedError
    return B

コード例 #30

0

ファイルを表示

def csrgeam2(a, b, alpha=1, beta=1):
    """Matrix-matrix addition.

    .. math::
        C = \\alpha A + \\beta B

    Args:
        a (cupyx.scipy.sparse.csr_matrix): Sparse matrix A.
        b (cupyx.scipy.sparse.csr_matrix): Sparse matrix B.
        alpha (float): Coefficient for A.
        beta (float): Coefficient for B.

    Returns:
        cupyx.scipy.sparse.csr_matrix: Result matrix.

    """
    if not check_availability('csrgeam2'):
        raise RuntimeError('csrgeam2 is not available.')

    if not isinstance(a, cupyx.scipy.sparse.csr_matrix):
        raise TypeError('unsupported type (actual: {})'.format(type(a)))
    if not isinstance(b, cupyx.scipy.sparse.csr_matrix):
        raise TypeError('unsupported type (actual: {})'.format(type(b)))
    assert a.has_canonical_format
    assert b.has_canonical_format
    if a.shape != b.shape:
        raise ValueError('inconsistent shapes')

    handle = device.get_cusparse_handle()
    m, n = a.shape
    a, b = _cast_common_type(a, b)
    nnz = numpy.empty((), 'i')
    cusparse.setPointerMode(
        handle, cusparse.CUSPARSE_POINTER_MODE_HOST)

    alpha = numpy.array(alpha, a.dtype).ctypes
    beta = numpy.array(beta, a.dtype).ctypes
    c_descr = MatDescriptor.create()
    c_indptr = cupy.empty(m + 1, 'i')

    null_ptr = 0
    buff_size = _call_cusparse(
        'csrgeam2_bufferSizeExt', a.dtype,
        handle, m, n, alpha.data, a._descr.descriptor, a.nnz, a.data.data.ptr,
        a.indptr.data.ptr, a.indices.data.ptr, beta.data, b._descr.descriptor,
        b.nnz, b.data.data.ptr, b.indptr.data.ptr, b.indices.data.ptr,
        c_descr.descriptor, null_ptr, c_indptr.data.ptr, null_ptr)
    buff = cupy.empty(buff_size, numpy.int8)
    cusparse.xcsrgeam2Nnz(
        handle, m, n, a._descr.descriptor, a.nnz, a.indptr.data.ptr,
        a.indices.data.ptr, b._descr.descriptor, b.nnz, b.indptr.data.ptr,
        b.indices.data.ptr, c_descr.descriptor, c_indptr.data.ptr,
        nnz.ctypes.data, buff.data.ptr)
    c_indices = cupy.empty(int(nnz), 'i')
    c_data = cupy.empty(int(nnz), a.dtype)
    _call_cusparse(
        'csrgeam2', a.dtype,
        handle, m, n, alpha.data, a._descr.descriptor, a.nnz, a.data.data.ptr,
        a.indptr.data.ptr, a.indices.data.ptr, beta.data, b._descr.descriptor,
        b.nnz, b.data.data.ptr, b.indptr.data.ptr, b.indices.data.ptr,
        c_descr.descriptor, c_data.data.ptr, c_indptr.data.ptr,
        c_indices.data.ptr, buff.data.ptr)

    c = cupyx.scipy.sparse.csr_matrix(
        (c_data, c_indices, c_indptr), shape=a.shape)
    c._has_canonical_format = True
    return c