def csrmm(a, b, c=None, alpha=1, beta=0, transa=False): """Matrix-matrix product for a CSR-matrix and a dense matrix. .. math:: C = \\alpha o_a(A) B + \\beta C, where :math:`o_a` is a transpose function when ``transa`` is ``True`` and is an identity function otherwise. Args: a (cupyx.scipy.sparse.csr): Sparse matrix A. b (cupy.ndarray): Dense matrix B. It must be F-contiguous. c (cupy.ndarray or None): Dense matrix C. It must be F-contiguous. alpha (float): Coefficient for AB. beta (float): Coefficient for C. transa (bool): If ``True``, transpose of A is used. Returns: cupy.ndarray: Calculated C. """ if not check_availability('csrmm'): raise RuntimeError('csrmm is not available.') assert a.ndim == b.ndim == 2 assert b.flags.f_contiguous assert c is None or c.flags.f_contiguous a_shape = a.shape if not transa else a.shape[::-1] if a_shape[1] != b.shape[0]: raise ValueError('dimension mismatch') handle = device.get_cusparse_handle() m, k = a_shape n = b.shape[1] a, b, c = _cast_common_type(a, b, c) if c is None: c = cupy.zeros((m, n), a.dtype, 'F') ldb = k ldc = m alpha = numpy.array(alpha, a.dtype).ctypes beta = numpy.array(beta, a.dtype).ctypes _call_cusparse( 'csrmm', a.dtype, handle, _transpose_flag(transa), a.shape[0], n, a.shape[1], a.nnz, alpha.data, a._descr.descriptor, a.data.data.ptr, a.indptr.data.ptr, a.indices.data.ptr, b.data.ptr, ldb, beta.data, c.data.ptr, ldc) return c
def _csr_indptr_to_coo_rows(nnz, Bp): out_rows = cupy.empty(nnz, dtype=numpy.int32) # Build a COO row array from output CSR indptr. # Calling backend cusparse API directly to avoid # constructing a whole COO object. handle = device.get_cusparse_handle() cusparse.xcsr2coo(handle, Bp.data.ptr, nnz, Bp.size - 1, out_rows.data.ptr, cusparse.CUSPARSE_INDEX_BASE_ZERO) return out_rows
def csrgemm(a, b, transa=False, transb=False): """Matrix-matrix product for CSR-matrix. math:: C = op(A) op(B), Args: a (cupy.sparse.csr_matrix): Sparse matrix A. b (cupy.sparse.csr_matrix): Sparse matrix B. transa (bool): If ``True``, transpose of A is used. transb (bool): If ``True``, transpose of B is used. Returns: cupy.sparse.csr_matrix: Calculated C. """ assert a.ndim == b.ndim == 2 a_shape = a.shape if not transa else a.shape[::-1] b_shape = b.shape if not transb else b.shape[::-1] if a_shape[1] != b_shape[0]: raise ValueError('dimension mismatch') handle = device.get_cusparse_handle() m, k = a_shape n = b_shape[1] a, b = _cast_common_type(a, b) op_a = _transpose_flag(transa) op_b = _transpose_flag(transb) nnz = numpy.empty((), 'i') cusparse.setPointerMode(handle, cusparse.CUSPARSE_POINTER_MODE_HOST) c_descr = MatDescriptor.create() c_indptr = cupy.empty(m + 1, 'i') cusparse.xcsrgemmNnz(handle, op_a, op_b, m, n, k, a._descr.descriptor, a.nnz, a.indptr.data.ptr, a.indices.data.ptr, b._descr.descriptor, b.nnz, b.indptr.data.ptr, b.indices.data.ptr, c_descr.descriptor, c_indptr.data.ptr, nnz.ctypes.data) c_indices = cupy.empty(int(nnz), 'i') c_data = cupy.empty(int(nnz), a.dtype) _call_cusparse('csrgemm', a.dtype, handle, op_a, op_b, m, n, k, a._descr.descriptor, a.nnz, a.data.data.ptr, a.indptr.data.ptr, a.indices.data.ptr, b._descr.descriptor, b.nnz, b.data.data.ptr, b.indptr.data.ptr, b.indices.data.ptr, c_descr.descriptor, c_data.data.ptr, c_indptr.data.ptr, c_indices.data.ptr) return cupy.sparse.csr_matrix((c_data, c_indices, c_indptr), shape=(m, n))
def coo2csc(x): handle = _device.get_cusparse_handle() n = x.shape[1] nnz = x.nnz if nnz == 0: indptr = _cupy.zeros(n + 1, 'i') else: indptr = _cupy.empty(n + 1, 'i') _cusparse.xcoo2csr(handle, x.col.data.ptr, nnz, n, indptr.data.ptr, _cusparse.CUSPARSE_INDEX_BASE_ZERO) return cupyx.scipy.sparse.csc.csc_matrix((x.data, x.row, indptr), shape=x.shape)
def csrmm2(a, b, c=None, alpha=1.0, beta=0.0, transa=False, transb=False): """Matrix-matrix product for a CSR-matrix and a dense matrix. .. math:: C = \\alpha o_a(A) o_b(B) + \\beta C, where :math:`o_a` and :math:`o_b` are transpose functions when ``transa`` and ``tranb`` are ``True`` respectively. And they are identity functions otherwise. Args: a (cupy.sparse.csr): Sparse matrix A. b (cupy.ndarray): Dense matrix B. It must be F-contiguous. c (cupy.ndarray or None): Dense matrix C. It must be F-contiguous. alpha (float): Coefficient for AB. beta (float): Coefficient for C. transa (bool): If ``True``, transpose of A is used. transb (bool): If ``True``, transpose of B is used. Returns: cupy.ndarray: Calculated C. """ assert a.ndim == b.ndim == 2 assert b.flags.f_contiguous assert c is None or c.flags.f_contiguous a_shape = a.shape if not transa else a.shape[::-1] b_shape = b.shape if not transb else b.shape[::-1] if a_shape[1] != b.shape[0]: raise ValueError('dimension mismatch') handle = device.get_cusparse_handle() m, k = a_shape n = b_shape[1] a, b, c = _cast_common_type(a, b, c) if c is None: c = cupy.zeros((m, n), a.dtype, 'F') ldb = b.shape[0] ldc = c.shape[0] op_a = _transpose_flag(transa) op_b = _transpose_flag(transb) alpha = numpy.array(alpha, a.dtype).ctypes beta = numpy.array(beta, a.dtype).ctypes _call_cusparse('csrmm2', a.dtype, handle, op_a, op_b, m, n, k, a.nnz, alpha.data, a._descr.descriptor, a.data.data.ptr, a.indptr.data.ptr, a.indices.data.ptr, b.data.ptr, ldb, beta.data, c.data.ptr, ldc) return c
def coo2csr(x): handle = device.get_cusparse_handle() m = x.shape[0] nnz = x.nnz if nnz == 0: indptr = cupy.zeros(m + 1, 'i') else: indptr = cupy.empty(m + 1, 'i') cusparse.xcoo2csr( handle, x.row.data.ptr, nnz, m, indptr.data.ptr, cusparse.CUSPARSE_INDEX_BASE_ZERO) return cupyx.scipy.sparse.csr.csr_matrix( (x.data, x.col, indptr), shape=x.shape)
def check_availability(name): if name not in _available_cusparse_version: msg = 'No available version information specified for {}'.name raise ValueError(msg) version_added, version_removed = _available_cusparse_version[name] version_added = _get_version(version_added) version_removed = _get_version(version_removed) cusparse_version = cusparse.getVersion(device.get_cusparse_handle()) if version_added is not None and cusparse_version < version_added: return False if version_removed is not None and cusparse_version >= version_removed: return False return True
def csrgeam(a, b, alpha=1, beta=1): """Matrix-matrix addition. .. math:: C = \\alpha A + \\beta B Args: a (cupyx.scipy.sparse.csr_matrix): Sparse matrix A. b (cupyx.scipy.sparse.csr_matrix): Sparse matrix B. alpha (float): Coefficient for A. beta (float): Coefficient for B. Returns: cupyx.scipy.sparse.csr_matrix: Result matrix. """ assert a.has_canonical_format assert b.has_canonical_format if a.shape != b.shape: raise ValueError('inconsistent shapes') handle = device.get_cusparse_handle() m, n = a.shape a, b = _cast_common_type(a, b) nnz = numpy.empty((), 'i') cusparse.setPointerMode(handle, cusparse.CUSPARSE_POINTER_MODE_HOST) c_descr = MatDescriptor.create() c_indptr = cupy.empty(m + 1, 'i') cusparse.xcsrgeamNnz(handle, m, n, a._descr.descriptor, a.nnz, a.indptr.data.ptr, a.indices.data.ptr, b._descr.descriptor, b.nnz, b.indptr.data.ptr, b.indices.data.ptr, c_descr.descriptor, c_indptr.data.ptr, nnz.ctypes.data) c_indices = cupy.empty(int(nnz), 'i') c_data = cupy.empty(int(nnz), a.dtype) alpha = numpy.array(alpha, a.dtype).ctypes beta = numpy.array(beta, a.dtype).ctypes _call_cusparse('csrgeam', a.dtype, handle, m, n, alpha.data, a._descr.descriptor, a.nnz, a.data.data.ptr, a.indptr.data.ptr, a.indices.data.ptr, beta.data, b._descr.descriptor, b.nnz, b.data.data.ptr, b.indptr.data.ptr, b.indices.data.ptr, c_descr.descriptor, c_data.data.ptr, c_indptr.data.ptr, c_indices.data.ptr) c = cupyx.scipy.sparse.csr_matrix((c_data, c_indices, c_indptr), shape=a.shape) c._has_canonical_format = True return c
def __init__(self, A, V, alpha, beta, update_impl='fast'): assert A.ndim == V.ndim == 2 assert alpha.ndim == beta.ndim == 1 assert A.dtype == V.dtype == alpha.dtype assert A.dtype.char.lower() == beta.dtype.char assert A.shape[0] == A.shape[1] == V.shape[1] assert V.shape[0] == alpha.shape[0] == beta.shape[0] self.A = A self.V = V self.alpha = alpha self.beta = beta self.n = V.shape[1] self.ncv = V.shape[0] self.update_impl = update_impl if self.update_impl != 'fast': return self.cublas_handle = device.get_cublas_handle() self.cublas_pointer_mode = _cublas.getPointerMode(self.cublas_handle) if A.dtype.char == 'f': self.dotc = _cublas.sdot self.nrm2 = _cublas.snrm2 self.gemm = _cublas.sgemm elif A.dtype.char == 'd': self.dotc = _cublas.ddot self.nrm2 = _cublas.dnrm2 self.gemm = _cublas.dgemm elif A.dtype.char == 'F': self.dotc = _cublas.cdotc self.nrm2 = _cublas.scnrm2 self.gemm = _cublas.cgemm elif A.dtype.char == 'D': self.dotc = _cublas.zdotc self.nrm2 = _cublas.dznrm2 self.gemm = _cublas.zgemm else: raise TypeError('invalid dtype ({})'.format(A.dtype)) if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): self.cusparse_handle = device.get_cusparse_handle() self.spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE self.spmv_alpha = numpy.array(1.0, A.dtype) self.spmv_beta = numpy.array(0.0, A.dtype) self.spmv_cuda_dtype = cusparse._dtype_to_DataType(A.dtype) self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT else: self.cusparse_handle = None self.v = cupy.empty((self.n, ), dtype=A.dtype) self.u = cupy.empty((self.n, ), dtype=A.dtype) self.uu = cupy.empty((self.ncv, ), dtype=A.dtype)
def csr2csc(x): handle = device.get_cusparse_handle() m, n = x.shape nnz = x.nnz data = cupy.empty(nnz, x.dtype) indptr = cupy.empty(n + 1, 'i') indices = cupy.empty(nnz, 'i') _call_cusparse('csr2csc', x.dtype, handle, m, n, nnz, x.data.data.ptr, x.indptr.data.ptr, x.indices.data.ptr, data.data.ptr, indices.data.ptr, indptr.data.ptr, cusparse.CUSPARSE_ACTION_NUMERIC, cusparse.CUSPARSE_INDEX_BASE_ZERO) return cupy.sparse.csc_matrix((data, indices, indptr), shape=x.shape)
def _csr_indptr_to_coo_rows(nnz, Bp): out_rows = cupy.empty(nnz, dtype=numpy.int32) # Build a COO row array from output CSR indptr. # Calling backend cusparse API directly to avoid # constructing a whole COO object. handle = device.get_cusparse_handle() if runtime.is_hip and nnz == 0: raise ValueError('hipSPARSE currently cannot handle ' 'sparse matrices with null ptrs') cusparse.xcsr2coo(handle, Bp.data.ptr, nnz, Bp.size - 1, out_rows.data.ptr, cusparse.CUSPARSE_INDEX_BASE_ZERO) return out_rows
def run_all_bench( plot_timings=True, include_GPU=True, include_CPU=True, print_timings=True, phasings=["real", "complex"], save_dir="/tmp", ): common_kwargs = dict( plot_timings=plot_timings, include_GPU=include_GPU, include_CPU=include_CPU, print_timings=print_timings, phasings=phasings, save_dir=save_dir, ) if include_GPU: import cupy from cupy.cuda import device device.get_cusparse_handle() cupy.fft.cache.enable() # 2d cases bench_mri_2d_nocoils_nofieldmap(navg_time=(4, 20), **common_kwargs) bench_mri_2d_16coils_nofieldmap(navg_time=(2, 8), **common_kwargs) bench_mri_2d_16coils_fieldmap(navg_time=(2, 8), **common_kwargs) bench_mri_2d_16coils_fieldmap_multispectral( navg_time=4, nspectra=2, **common_kwargs ) # 3d cases bench_mri_3d_nocoils_nofieldmap(navg_time=(2, 4), **common_kwargs) bench_mri_3d_nocoils_fieldmap(navg_time=(2, 4), **common_kwargs) bench_mri_3d_16coils_nofieldmap(navg_time=(1, 4), **common_kwargs) bench_mri_3d_16coils_fieldmap(navg_time=(1, 4), **common_kwargs)
def csrmv(a, x, y=None, alpha=1, beta=0, transa=False): """Matrix-vector product for a CSR-matrix and a dense vector. .. math:: y = \\alpha * o_a(A) x + \\beta y, where :math:`o_a` is a transpose function when ``transa`` is ``True`` and is an identity function otherwise. Args: a (cupy.cusparse.csr_matrix): Matrix A. x (cupy.ndarray): Vector x. y (cupy.ndarray or None): Vector y. It must be F-contiguous. alpha (float): Coefficient for x. beta (float): Coefficient for y. transa (bool): If ``True``, transpose of ``A`` is used. Returns: cupy.ndarray: Calculated ``y``. """ if not check_availability('csrmv'): raise RuntimeError('csrmv is not available.') assert y is None or y.flags.f_contiguous a_shape = a.shape if not transa else a.shape[::-1] if a_shape[1] != len(x): raise ValueError('dimension mismatch') handle = device.get_cusparse_handle() m, n = a_shape a, x, y = _cast_common_type(a, x, y) dtype = a.dtype if y is None: y = cupy.zeros(m, dtype) alpha = numpy.array(alpha, dtype).ctypes beta = numpy.array(beta, dtype).ctypes _call_cusparse( 'csrmv', dtype, handle, _transpose_flag(transa), a.shape[0], a.shape[1], a.nnz, alpha.data, a._descr.descriptor, a.data.data.ptr, a.indptr.data.ptr, a.indices.data.ptr, x.data.ptr, beta.data, y.data.ptr) return y
def csc2csr(x): if not check_availability('csc2csr'): raise RuntimeError('csr2csc is not available.') handle = device.get_cusparse_handle() m, n = x.shape nnz = x.nnz data = cupy.empty(nnz, x.dtype) indptr = cupy.empty(m + 1, 'i') indices = cupy.empty(nnz, 'i') _call_cusparse('csr2csc', x.dtype, handle, n, m, nnz, x.data.data.ptr, x.indptr.data.ptr, x.indices.data.ptr, data.data.ptr, indices.data.ptr, indptr.data.ptr, cusparse.CUSPARSE_ACTION_NUMERIC, cusparse.CUSPARSE_INDEX_BASE_ZERO) return cupyx.scipy.sparse.csr_matrix((data, indices, indptr), shape=x.shape)
def coosort(x): nnz = x.nnz if nnz == 0: return handle = device.get_cusparse_handle() m, n = x.shape buffer_size = cusparse.xcoosort_bufferSizeExt(handle, m, n, nnz, x.row.data.ptr, x.col.data.ptr) buf = cupy.empty(buffer_size, 'b') P = cupy.empty(nnz, 'i') cusparse.createIdentityPermutation(handle, nnz, P.data.ptr) cusparse.xcoosortByRow(handle, m, n, nnz, x.row.data.ptr, x.col.data.ptr, P.data.ptr, buf.data.ptr) _call_cusparse('gthr', x.dtype, handle, nnz, x.data.data.ptr, x.data.data.ptr, P.data.ptr, cusparse.CUSPARSE_INDEX_BASE_ZERO)
def csr2csr_compress(x, tol): assert x.dtype.char in 'fdFD' handle = device.get_cusparse_handle() m, n = x.shape nnz_per_row = cupy.empty(m, 'i') nnz = _call_cusparse('nnz_compress', x.dtype, handle, m, x._descr.descriptor, x.data.data.ptr, x.indptr.data.ptr, nnz_per_row.data.ptr, tol) data = cupy.zeros(nnz, x.dtype) indptr = cupy.empty(m + 1, 'i') indices = cupy.zeros(nnz, 'i') _call_cusparse('csr2csr_compress', x.dtype, handle, m, n, x._descr.descriptor, x.data.data.ptr, x.indices.data.ptr, x.indptr.data.ptr, x.nnz, nnz_per_row.data.ptr, data.data.ptr, indices.data.ptr, indptr.data.ptr, tol) return cupy.sparse.csr_matrix((data, indices, indptr), shape=x.shape)
def csr2coo(x, data, indices): """Converts a CSR-matrix to COO format. Args: x (cupy.sparse.csr_matrix): A matrix to be converted. data (cupy.ndarray): A data array for converted data. indices (cupy.ndarray): An index array for converted data. Returns: cupy.sparse.coo_matrix: A converted matrix. """ handle = device.get_cusparse_handle() m = x.shape[0] nnz = len(x.data) row = cupy.empty(nnz, 'i') cusparse.xcsr2coo(handle, x.indptr.data.ptr, nnz, m, row.data.ptr, cusparse.CUSPARSE_INDEX_BASE_ZERO) # data and indices did not need to be copied already return cupy.sparse.coo_matrix((data, (row, indices)), shape=x.shape)
def csc2coo(x, data, indices): """Converts a CSC-matrix to COO format. Args: x (cupyx.scipy.sparse.csc_matrix): A matrix to be converted. data (cupy.ndarray): A data array for converted data. indices (cupy.ndarray): An index array for converted data. Returns: cupyx.scipy.sparse.coo_matrix: A converted matrix. """ handle = _device.get_cusparse_handle() n = x.shape[1] nnz = x.nnz col = _cupy.empty(nnz, 'i') _cusparse.xcsr2coo(handle, x.indptr.data.ptr, nnz, n, col.data.ptr, _cusparse.CUSPARSE_INDEX_BASE_ZERO) # data and indices did not need to be copied already return cupyx.scipy.sparse.coo_matrix((data, (indices, col)), shape=x.shape)
def _csr_row_index(rows, Ap, Aj, Ax, Bp): """Populate indices and data arrays from the given row index Args rows : index array of rows to populate Ap : indptr array from input sparse matrix Aj : indices array from input sparse matrix Ax : data array from input sparse matrix Bp : indptr array for output sparse matrix tpb : threads per block of row index kernel Returns Bj : indices array of output sparse matrix Bx : data array of output sparse matrix """ nnz = int(Bp[-1]) Bj = cupy.empty(nnz, dtype=Aj.dtype) Bx = cupy.empty(nnz, dtype=Ax.dtype) out_rows = cupy.empty(nnz, dtype=rows.dtype) # Build a COO row array from output CSR indptr. # Calling backend cusparse API directly to avoid # constructing a whole COO object. handle = device.get_cusparse_handle() cusparse.xcsr2coo(handle, Bp.data.ptr, nnz, Bp.size - 1, out_rows.data.ptr, cusparse.CUSPARSE_INDEX_BASE_ZERO) _csr_row_index_ker(out_rows, rows, Ap, Aj, Ax, Bp, Bj, Bx, size=out_rows.size) return Bj, Bx
def dense2csr(x): """Converts a dense matrix in CSR format. Args: x (cupy.ndarray): A matrix to be converted. Returns: cupyx.scipy.sparse.csr_matrix: A converted matrix. """ if not check_availability('dense2csr'): raise RuntimeError('dense2csr is not available.') assert x.ndim == 2 x = cupy.asfortranarray(x) nnz = numpy.empty((), dtype='i') handle = device.get_cusparse_handle() m, n = x.shape descr = MatDescriptor.create() nnz_per_row = cupy.empty(m, 'i') _call_cusparse( 'nnz', x.dtype, handle, cusparse.CUSPARSE_DIRECTION_ROW, m, n, descr.descriptor, x.data.ptr, m, nnz_per_row.data.ptr, nnz.ctypes.data) nnz = int(nnz) data = cupy.empty(nnz, x.dtype) indptr = cupy.empty(m + 1, 'i') indices = cupy.empty(nnz, 'i') _call_cusparse( 'dense2csr', x.dtype, handle, m, n, descr.descriptor, x.data.ptr, m, nnz_per_row.data.ptr, data.data.ptr, indptr.data.ptr, indices.data.ptr) # Note that a desciptor is recreated csr = cupyx.scipy.sparse.csr_matrix((data, indices, indptr), shape=x.shape) csr._has_canonical_format = True return csr
def coosort(x, sort_by='r'): """Sorts indices of COO-matrix in place. Args: x (cupyx.scipy.sparse.coo_matrix): A sparse matrix to sort. sort_by (str): Sort the indices by row ('r', default) or column ('c'). """ if not check_availability('coosort'): raise RuntimeError('coosort is not available.') nnz = x.nnz if nnz == 0: return handle = device.get_cusparse_handle() m, n = x.shape buffer_size = cusparse.xcoosort_bufferSizeExt( handle, m, n, nnz, x.row.data.ptr, x.col.data.ptr) buf = cupy.empty(buffer_size, 'b') P = cupy.empty(nnz, 'i') data_orig = x.data.copy() cusparse.createIdentityPermutation(handle, nnz, P.data.ptr) if sort_by == 'r': cusparse.xcoosortByRow( handle, m, n, nnz, x.row.data.ptr, x.col.data.ptr, P.data.ptr, buf.data.ptr) elif sort_by == 'c': cusparse.xcoosortByColumn( handle, m, n, nnz, x.row.data.ptr, x.col.data.ptr, P.data.ptr, buf.data.ptr) else: raise ValueError("sort_by must be either 'r' or 'c'") _call_cusparse( 'gthr', x.dtype, handle, nnz, data_orig.data.ptr, x.data.data.ptr, P.data.ptr, cusparse.CUSPARSE_INDEX_BASE_ZERO) if sort_by == 'c': # coo is sorted by row first x._has_canonical_format = False
def csr2csr_compress(x, tol): if not check_availability('csr2csr_compress'): raise RuntimeError('csr2csr_compress is not available.') assert x.dtype.char in 'fdFD' handle = device.get_cusparse_handle() m, n = x.shape nnz_per_row = cupy.empty(m, 'i') nnz = _call_cusparse('nnz_compress', x.dtype, handle, m, x._descr.descriptor, x.data.data.ptr, x.indptr.data.ptr, nnz_per_row.data.ptr, tol) data = cupy.zeros(nnz, x.dtype) indptr = cupy.empty(m + 1, 'i') indices = cupy.zeros(nnz, 'i') _call_cusparse('csr2csr_compress', x.dtype, handle, m, n, x._descr.descriptor, x.data.data.ptr, x.indices.data.ptr, x.indptr.data.ptr, x.nnz, nnz_per_row.data.ptr, data.data.ptr, indices.data.ptr, indptr.data.ptr, tol) return cupyx.scipy.sparse.csr_matrix((data, indices, indptr), shape=x.shape)
def csr2coo(x, data, indices): """Converts a CSR-matrix to COO format. Args: x (cupyx.scipy.sparse.csr_matrix): A matrix to be converted. data (cupy.ndarray): A data array for converted data. indices (cupy.ndarray): An index array for converted data. Returns: cupyx.scipy.sparse.coo_matrix: A converted matrix. """ if not check_availability('csr2coo'): raise RuntimeError('csr2coo is not available.') handle = _device.get_cusparse_handle() m = x.shape[0] nnz = x.nnz row = _cupy.empty(nnz, 'i') _cusparse.xcsr2coo(handle, x.indptr.data.ptr, nnz, m, row.data.ptr, _cusparse.CUSPARSE_INDEX_BASE_ZERO) # data and indices did not need to be copied already return cupyx.scipy.sparse.coo_matrix((data, (row, indices)), shape=x.shape)
def cscsort(x): """Sorts indices of CSC-matrix in place. Args: x (cupy.sparse.csc_matrix): A sparse matrix to sort. """ handle = device.get_cusparse_handle() m, n = x.shape nnz = x.nnz buffer_size = cusparse.xcscsort_bufferSizeExt(handle, m, n, nnz, x.indptr.data.ptr, x.indices.data.ptr) buf = cupy.empty(buffer_size, 'b') P = cupy.empty(nnz, 'i') cusparse.createIdentityPermutation(handle, nnz, P.data.ptr) cusparse.xcscsort(handle, m, n, nnz, x._descr.descriptor, x.indptr.data.ptr, x.indices.data.ptr, P.data.ptr, buf.data.ptr) _call_cusparse('gthr', x.dtype, handle, nnz, x.data.data.ptr, x.data.data.ptr, P.data.ptr, cusparse.CUSPARSE_INDEX_BASE_ZERO)
def dense2csc(x): """Converts a dense matrix in CSC format. Args: x (cupy.ndarray): A matrix to be converted. Returns: cupyx.scipy.sparse.csc_matrix: A converted matrix. """ assert x.ndim == 2 x = cupy.asfortranarray(x) nnz = numpy.empty((), dtype='i') handle = device.get_cusparse_handle() m, n = x.shape descr = MatDescriptor.create() nnz_per_col = cupy.empty(m, 'i') _call_cusparse( 'nnz', x.dtype, handle, cusparse.CUSPARSE_DIRECTION_COLUMN, m, n, descr.descriptor, x.data.ptr, m, nnz_per_col.data.ptr, nnz.ctypes.data) nnz = int(nnz) data = cupy.empty(nnz, x.dtype) indptr = cupy.empty(n + 1, 'i') indices = cupy.empty(nnz, 'i') _call_cusparse( 'dense2csc', x.dtype, handle, m, n, descr.descriptor, x.data.ptr, m, nnz_per_col.data.ptr, data.data.ptr, indices.data.ptr, indptr.data.ptr) # Note that a desciptor is recreated csc = cupyx.scipy.sparse.csc_matrix((data, indices, indptr), shape=x.shape) csc._has_canonical_format = True return csc
def csrmvEx(a, x, y=None, alpha=1, beta=0, merge_path=True): """Matrix-vector product for a CSR-matrix and a dense vector. .. math:: y = \\alpha * A x + \\beta y, Args: a (cupy.cusparse.csr_matrix): Matrix A. x (cupy.ndarray): Vector x. y (cupy.ndarray or None): Vector y. It must be F-contiguous. alpha (float): Coefficient for x. beta (float): Coefficient for y. merge_path (bool): If ``True``, merge path algorithm is used. All pointers must be aligned with 128 bytes. Returns: cupy.ndarray: Calculated ``y``. """ if not check_availability('csrmvEx'): raise RuntimeError('csrmvEx is not available.') assert y is None or y.flags.f_contiguous if a.shape[1] != len(x): raise ValueError('dimension mismatch') handle = device.get_cusparse_handle() m, n = a.shape a, x, y = _cast_common_type(a, x, y) dtype = a.dtype if y is None: y = cupy.zeros(m, dtype) datatype = _dtype_to_DataType(dtype) algmode = cusparse.CUSPARSE_ALG_MERGE_PATH if \ merge_path else cusparse.CUSPARSE_ALG_NAIVE transa_flag = cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE alpha = numpy.array(alpha, dtype).ctypes beta = numpy.array(beta, dtype).ctypes assert csrmvExIsAligned(a, x, y) bufferSize = cusparse.csrmvEx_bufferSize( handle, algmode, transa_flag, a.shape[0], a.shape[1], a.nnz, alpha.data, datatype, a._descr.descriptor, a.data.data.ptr, datatype, a.indptr.data.ptr, a.indices.data.ptr, x.data.ptr, datatype, beta.data, datatype, y.data.ptr, datatype, datatype) buf = cupy.empty(bufferSize, 'b') assert buf.data.ptr % 128 == 0 cusparse.csrmvEx( handle, algmode, transa_flag, a.shape[0], a.shape[1], a.nnz, alpha.data, datatype, a._descr.descriptor, a.data.data.ptr, datatype, a.indptr.data.ptr, a.indices.data.ptr, x.data.ptr, datatype, beta.data, datatype, y.data.ptr, datatype, datatype, buf.data.ptr) return y
def _lanczos_fast(A, n, ncv): cublas_handle = device.get_cublas_handle() cublas_pointer_mode = _cublas.getPointerMode(cublas_handle) if A.dtype.char == 'f': dotc = _cublas.sdot nrm2 = _cublas.snrm2 gemm = _cublas.sgemm elif A.dtype.char == 'd': dotc = _cublas.ddot nrm2 = _cublas.dnrm2 gemm = _cublas.dgemm elif A.dtype.char == 'F': dotc = _cublas.cdotc nrm2 = _cublas.scnrm2 gemm = _cublas.cgemm elif A.dtype.char == 'D': dotc = _cublas.zdotc nrm2 = _cublas.dznrm2 gemm = _cublas.zgemm else: raise TypeError('invalid dtype ({})'.format(A.dtype)) cusparse_handle = None if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): cusparse_handle = device.get_cusparse_handle() spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE spmv_alpha = numpy.array(1.0, A.dtype) spmv_beta = numpy.array(0.0, A.dtype) spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT v = cupy.empty((n, ), dtype=A.dtype) uu = cupy.empty((ncv, ), dtype=A.dtype) one = numpy.array(1.0, dtype=A.dtype) zero = numpy.array(0.0, dtype=A.dtype) mone = numpy.array(-1.0, dtype=A.dtype) outer_A = A def aux(A, V, u, alpha, beta, i_start, i_end): assert A is outer_A beta_eps = inversion_eps(A.dtype) # Get ready for spmv if enabled if cusparse_handle is not None: # Note: I would like to reuse descriptors and working buffer # on the next update, but I gave it up because it sometimes # caused illegal memory access error. spmv_desc_A = cusparse.SpMatDescriptor.create(A) spmv_desc_v = cusparse.DnVecDescriptor.create(v) spmv_desc_u = cusparse.DnVecDescriptor.create(u) buff_size = _cusparse.spMV_bufferSize( cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg) spmv_buff = cupy.empty(buff_size, cupy.int8) v[...] = V[i_start] for i in range(i_start, i_end): # Matrix-vector multiplication if cusparse_handle is None: u[...] = A @ v else: _cusparse.spMV(cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg, spmv_buff.data.ptr) # Call dotc _cublas.setPointerMode(cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) try: dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1, alpha.data.ptr + i * alpha.itemsize) finally: _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) # Orthogonalize gemm(cublas_handle, _cublas.CUBLAS_OP_C, _cublas.CUBLAS_OP_N, 1, i + 1, n, one.ctypes.data, u.data.ptr, n, V.data.ptr, n, zero.ctypes.data, uu.data.ptr, 1) gemm(cublas_handle, _cublas.CUBLAS_OP_N, _cublas.CUBLAS_OP_C, n, 1, i + 1, mone.ctypes.data, V.data.ptr, n, uu.data.ptr, 1, one.ctypes.data, u.data.ptr, n) # Call nrm2 _cublas.setPointerMode(cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) try: nrm2(cublas_handle, n, u.data.ptr, 1, beta.data.ptr + i * beta.itemsize) finally: _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) # Break here as the normalization below touches V[i+1] if i >= i_end - 1: break if beta[i] < beta_eps: V[i + 1:i_end, :] = 0 u[...] = 0 v[...] = 0 break if i == i_start: beta_eps *= beta[i] # scale eps to largest beta # Normalize _kernel_normalize(u, beta, i, n, v, V) return aux
def csrgemm2(a, b, d=None, alpha=1, beta=1): """Matrix-matrix product for CSR-matrix. math:: C = alpha * A * B + beta * D Args: a (cupyx.scipy.sparse.csr_matrix): Sparse matrix A. b (cupyx.scipy.sparse.csr_matrix): Sparse matrix B. d (cupyx.scipy.sparse.csr_matrix or None): Sparse matrix D. alpha (scalar): Coefficient beta (scalar): Coefficient Returns: cupyx.scipy.sparse.csr_matrix """ if not check_availability('csrgemm2'): raise RuntimeError('csrgemm2 is not available.') assert a.ndim == b.ndim == 2 if not isinstance(a, cupyx.scipy.sparse.csr_matrix): raise TypeError('unsupported type (actual: {})'.format(type(a))) if not isinstance(b, cupyx.scipy.sparse.csr_matrix): raise TypeError('unsupported type (actual: {})'.format(type(b))) assert a.has_canonical_format assert b.has_canonical_format if a.shape[1] != b.shape[0]: raise ValueError('mismatched shape') if d is not None: assert d.ndim == 2 if not isinstance(d, cupyx.scipy.sparse.csr_matrix): raise TypeError('unsupported type (actual: {})'.format(type(d))) assert d.has_canonical_format if a.shape[0] != d.shape[0] or b.shape[1] != d.shape[1]: raise ValueError('mismatched shape') handle = device.get_cusparse_handle() m, k = a.shape _, n = b.shape if d is None: a, b = _cast_common_type(a, b) else: a, b, d = _cast_common_type(a, b, d) info = cusparse.createCsrgemm2Info() alpha = numpy.array(alpha, a.dtype).ctypes null_ptr = 0 if d is None: beta_data = null_ptr d_descr = MatDescriptor.create() d_nnz = 0 d_data = null_ptr d_indptr = null_ptr d_indices = null_ptr else: beta = numpy.array(beta, a.dtype).ctypes beta_data = beta.data d_descr = d._descr d_nnz = d.nnz d_data = d.data.data.ptr d_indptr = d.indptr.data.ptr d_indices = d.indices.data.ptr buff_size = _call_cusparse( 'csrgemm2_bufferSizeExt', a.dtype, handle, m, n, k, alpha.data, a._descr.descriptor, a.nnz, a.indptr.data.ptr, a.indices.data.ptr, b._descr.descriptor, b.nnz, b.indptr.data.ptr, b.indices.data.ptr, beta_data, d_descr.descriptor, d_nnz, d_indptr, d_indices, info) buff = cupy.empty(buff_size, numpy.int8) c_nnz = numpy.empty((), 'i') cusparse.setPointerMode(handle, cusparse.CUSPARSE_POINTER_MODE_HOST) c_descr = MatDescriptor.create() c_indptr = cupy.empty(m + 1, 'i') cusparse.xcsrgemm2Nnz( handle, m, n, k, a._descr.descriptor, a.nnz, a.indptr.data.ptr, a.indices.data.ptr, b._descr.descriptor, b.nnz, b.indptr.data.ptr, b.indices.data.ptr, d_descr.descriptor, d_nnz, d_indptr, d_indices, c_descr.descriptor, c_indptr.data.ptr, c_nnz.ctypes.data, info, buff.data.ptr) c_indices = cupy.empty(int(c_nnz), 'i') c_data = cupy.empty(int(c_nnz), a.dtype) _call_cusparse( 'csrgemm2', a.dtype, handle, m, n, k, alpha.data, a._descr.descriptor, a.nnz, a.data.data.ptr, a.indptr.data.ptr, a.indices.data.ptr, b._descr.descriptor, b.nnz, b.data.data.ptr, b.indptr.data.ptr, b.indices.data.ptr, beta_data, d_descr.descriptor, d_nnz, d_data, d_indptr, d_indices, c_descr.descriptor, c_data.data.ptr, c_indptr.data.ptr, c_indices.data.ptr, info, buff.data.ptr) c = cupyx.scipy.sparse.csr_matrix( (c_data, c_indices, c_indptr), shape=(m, n)) c._has_canonical_format = True cusparse.destroyCsrgemm2Info(info) return c
def batched_gtsv(dl, d, du, B, algo='cyclic_reduction'): """Solves multiple tridiagonal systems (This is a bang method for B.) Args: dl, d, du (cupy.ndarray): Lower, main and upper diagonal vectors with last-dim sizes of N-1, N and N-1, repsectively. Only two dimensional inputs are supported currently. The first dim is the batch dim. B (cupy.ndarray): Right-hand side vectors The first dim is the batch dim and the second dim is N. algo (str): algorithm, choose one from four algorithms; cyclic_reduction, cuThomas, LU_w_pivoting and QR. cuThomas is numerically unstable, and LU_w_pivoting is the LU algorithm with pivoting. """ if algo not in ["cyclic_reduction", "cuThomas", "LU_w_pivoting", "QR"]: raise ValueError(f"Unknown algorithm [{algo}]") util._assert_cupy_array(dl) util._assert_cupy_array(d) util._assert_cupy_array(du) util._assert_cupy_array(B) if dl.ndim != 2 or d.ndim != 2 or du.ndim != 2 or B.ndim != 2: raise ValueError('dl, d, du and B must be 2-d arrays') batchsize = d.shape[0] if batchsize != dl.shape[0] or batchsize != du.shape[ 0] or batchsize != B.shape[0]: raise ValueError( 'The first dims of dl, du and B must match that of d.') N = d.shape[1] # the size of the linear system if dl.shape[1] != N - 1 or du.shape[1] != N - 1 or B.shape[1] != N: raise ValueError( 'The second dims of dl, du and B must match the second dim of d.') # the first element must be zero of dl padded_dl = cupy.ascontiguousarray( cupy.pad(dl, ((0, 0), (1, 0)), mode='constant', constant_values=0.0)) # the last element must be zero of du padded_du = cupy.ascontiguousarray( cupy.pad(du, ((0, 0), (0, 1)), mode='constant', constant_values=0.0)) # contiguous d = cupy.ascontiguousarray(d) B = cupy.ascontiguousarray(B) # Cast to float32 or float64 if d.dtype == 'f' or d.dtype == 'd': dtype = d.dtype else: dtype = numpy.find_common_type((d.dtype, 'f'), ()) handle = device.get_cusparse_handle() if dtype == 'f': if algo == "cyclic_reduction": gtsv2 = cusparse.sgtsv2StridedBatch get_buffer_size = cusparse.sgtsv2StridedBatch_bufferSizeExt # buffer_size = numpy.empty(1, numpy.int32) get_buffer_size(handle, N, padded_dl.data.ptr, d.data.ptr, padded_du.data.ptr, B.data.ptr, batchsize, N, buffer_size.ctypes.data) buffer_size = int(buffer_size) buffer = cupy.zeros((buffer_size, ), dtype=cupy.uint8) gtsv2(handle, N, padded_dl.data.ptr, d.data.ptr, padded_du.data.ptr, B.data.ptr, batchsize, N, buffer.data.ptr) else: raise NotImplementedError if algo == "cuThomas": algo_num = 0 elif algo == "LU_w_pivoting": algo_num = 1 elif algo == "QR": algo_num = 2 else: raise ValueError gtsv2 = cusparse.sgtsvInterleavedBatch get_buffer_size = cusparse.sgtsvInterleavedBatch_bufferSizeExt # buffer_size = get_buffer_size(handle, algo_num, N, padded_dl.data.ptr, d.data.ptr, padded_du.data.ptr, B.data.ptr, batchsize) buffer = cupy.zeros((buffer_size, ), dtype=cupy.uint8) gtsv2(handle, algo_num, N, padded_dl.data.ptr, d.data.ptr, padded_du.data.ptr, B.data.ptr, batchsize, buffer.data.ptr) else: raise NotImplementedError return B
def csrgeam2(a, b, alpha=1, beta=1): """Matrix-matrix addition. .. math:: C = \\alpha A + \\beta B Args: a (cupyx.scipy.sparse.csr_matrix): Sparse matrix A. b (cupyx.scipy.sparse.csr_matrix): Sparse matrix B. alpha (float): Coefficient for A. beta (float): Coefficient for B. Returns: cupyx.scipy.sparse.csr_matrix: Result matrix. """ if not check_availability('csrgeam2'): raise RuntimeError('csrgeam2 is not available.') if not isinstance(a, cupyx.scipy.sparse.csr_matrix): raise TypeError('unsupported type (actual: {})'.format(type(a))) if not isinstance(b, cupyx.scipy.sparse.csr_matrix): raise TypeError('unsupported type (actual: {})'.format(type(b))) assert a.has_canonical_format assert b.has_canonical_format if a.shape != b.shape: raise ValueError('inconsistent shapes') handle = device.get_cusparse_handle() m, n = a.shape a, b = _cast_common_type(a, b) nnz = numpy.empty((), 'i') cusparse.setPointerMode( handle, cusparse.CUSPARSE_POINTER_MODE_HOST) alpha = numpy.array(alpha, a.dtype).ctypes beta = numpy.array(beta, a.dtype).ctypes c_descr = MatDescriptor.create() c_indptr = cupy.empty(m + 1, 'i') null_ptr = 0 buff_size = _call_cusparse( 'csrgeam2_bufferSizeExt', a.dtype, handle, m, n, alpha.data, a._descr.descriptor, a.nnz, a.data.data.ptr, a.indptr.data.ptr, a.indices.data.ptr, beta.data, b._descr.descriptor, b.nnz, b.data.data.ptr, b.indptr.data.ptr, b.indices.data.ptr, c_descr.descriptor, null_ptr, c_indptr.data.ptr, null_ptr) buff = cupy.empty(buff_size, numpy.int8) cusparse.xcsrgeam2Nnz( handle, m, n, a._descr.descriptor, a.nnz, a.indptr.data.ptr, a.indices.data.ptr, b._descr.descriptor, b.nnz, b.indptr.data.ptr, b.indices.data.ptr, c_descr.descriptor, c_indptr.data.ptr, nnz.ctypes.data, buff.data.ptr) c_indices = cupy.empty(int(nnz), 'i') c_data = cupy.empty(int(nnz), a.dtype) _call_cusparse( 'csrgeam2', a.dtype, handle, m, n, alpha.data, a._descr.descriptor, a.nnz, a.data.data.ptr, a.indptr.data.ptr, a.indices.data.ptr, beta.data, b._descr.descriptor, b.nnz, b.data.data.ptr, b.indptr.data.ptr, b.indices.data.ptr, c_descr.descriptor, c_data.data.ptr, c_indptr.data.ptr, c_indices.data.ptr, buff.data.ptr) c = cupyx.scipy.sparse.csr_matrix( (c_data, c_indices, c_indptr), shape=a.shape) c._has_canonical_format = True return c