def _make_fast_matvec(A): matvec = None if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): handle = device.get_cusparse_handle() op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE alpha = numpy.array(1.0, A.dtype) beta = numpy.array(0.0, A.dtype) cuda_dtype = _dtype.to_cuda_dtype(A.dtype) alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT x = cupy.empty((A.shape[0], ), dtype=A.dtype) y = cupy.empty((A.shape[0], ), dtype=A.dtype) desc_A = cusparse.SpMatDescriptor.create(A) desc_x = cusparse.DnVecDescriptor.create(x) desc_y = cusparse.DnVecDescriptor.create(y) buff_size = _cusparse.spMV_bufferSize(handle, op_a, alpha.ctypes.data, desc_A.desc, desc_x.desc, beta.ctypes.data, desc_y.desc, cuda_dtype, alg) buff = cupy.empty(buff_size, cupy.int8) del x, desc_x, y, desc_y def matvec(x): y = cupy.empty_like(x) desc_x = cusparse.DnVecDescriptor.create(x) desc_y = cusparse.DnVecDescriptor.create(y) _cusparse.spMV(handle, op_a, alpha.ctypes.data, desc_A.desc, desc_x.desc, beta.ctypes.data, desc_y.desc, cuda_dtype, alg, buff.data.ptr) return y return matvec
def _spmv_init(self): if self.cusparse_handle is None: return self.spmv_desc_A = cusparse.SpMatDescriptor.create(self.A) self.spmv_desc_v = cusparse.DnVecDescriptor.create(self.v) self.spmv_desc_u = cusparse.DnVecDescriptor.create(self.u) buff_size = _cusparse.spMV_bufferSize( self.cusparse_handle, self.spmv_op_a, self.spmv_alpha.ctypes.data, self.spmv_desc_A.desc, self.spmv_desc_v.desc, self.spmv_beta.ctypes.data, self.spmv_desc_u.desc, self.spmv_cuda_dtype, self.spmv_alg) self.spmv_buff = cupy.empty(buff_size, cupy.int8)
def aux(A, V, u, alpha, beta, i_start, i_end): assert A is outer_A beta_eps = inversion_eps(A.dtype) # Get ready for spmv if enabled if cusparse_handle is not None: # Note: I would like to reuse descriptors and working buffer # on the next update, but I gave it up because it sometimes # caused illegal memory access error. spmv_desc_A = cusparse.SpMatDescriptor.create(A) spmv_desc_v = cusparse.DnVecDescriptor.create(v) spmv_desc_u = cusparse.DnVecDescriptor.create(u) buff_size = _cusparse.spMV_bufferSize( cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg) spmv_buff = cupy.empty(buff_size, cupy.int8) v[...] = V[i_start] for i in range(i_start, i_end): # Matrix-vector multiplication if cusparse_handle is None: u[...] = A @ v else: _cusparse.spMV(cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg, spmv_buff.data.ptr) # Call dotc _cublas.setPointerMode(cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) try: dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1, alpha.data.ptr + i * alpha.itemsize) finally: _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) # Orthogonalize gemm(cublas_handle, _cublas.CUBLAS_OP_C, _cublas.CUBLAS_OP_N, 1, i + 1, n, one.ctypes.data, u.data.ptr, n, V.data.ptr, n, zero.ctypes.data, uu.data.ptr, 1) gemm(cublas_handle, _cublas.CUBLAS_OP_N, _cublas.CUBLAS_OP_C, n, 1, i + 1, mone.ctypes.data, V.data.ptr, n, uu.data.ptr, 1, one.ctypes.data, u.data.ptr, n) # Call nrm2 _cublas.setPointerMode(cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) try: nrm2(cublas_handle, n, u.data.ptr, 1, beta.data.ptr + i * beta.itemsize) finally: _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) # Break here as the normalization below touches V[i+1] if i >= i_end - 1: break if beta[i] < beta_eps: V[i + 1:i_end, :] = 0 u[...] = 0 v[...] = 0 break if i == i_start: beta_eps *= beta[i] # scale eps to largest beta # Normalize _kernel_normalize(u, beta, i, n, v, V)
def spmv(a, x, y=None, alpha=1, beta=0, transa=False): """Multiplication of sparse matrix and dense vector. .. math:: y = \\alpha * op(A) x + \\beta * y Args: a (cupyx.scipy.sparse.csr_matrix, csc_matrix or coo_matrix): Sparse matrix A x (cupy.ndarray): Dense vector x y (cupy.ndarray or None): Dense vector y alpha (scalar): Coefficent beta (scalar): Coefficent transa (bool): If ``True``, op(A) = transpose of A. Returns: cupy.ndarray """ if not check_availability('spmv'): raise RuntimeError('spmv is not available.') if isinstance(a, cupyx.scipy.sparse.csc_matrix): aT = a.T if not isinstance(aT, cupyx.scipy.sparse.csr_matrix): msg = 'aT must be csr_matrix (actual: {})'.format(type(aT)) raise TypeError(msg) a = aT transa = not transa if not (isinstance(a, cupyx.scipy.sparse.csr_matrix) or isinstance(a, cupyx.scipy.sparse.coo_matrix)): raise TypeError('unsupported type (actual: {})'.format(type(a))) a_shape = a.shape if not transa else a.shape[::-1] if a_shape[1] != len(x): raise ValueError('dimension mismatch') assert a.has_canonical_format m, n = a_shape a, x, y = _cast_common_type(a, x, y) if y is None: y = cupy.zeros(m, a.dtype) elif len(y) != m: raise ValueError('dimension mismatch') if a.nnz == 0: y[...] = 0 return y desc_a = SpMatDescriptor.create(a) desc_x = DnVecDescriptor.create(x) desc_y = DnVecDescriptor.create(y) handle = device.get_cusparse_handle() op_a = _transpose_flag(transa) alpha = numpy.array(alpha, a.dtype).ctypes beta = numpy.array(beta, a.dtype).ctypes cuda_dtype = _dtype_to_DataType(a.dtype) alg = cusparse.CUSPARSE_MV_ALG_DEFAULT buff_size = cusparse.spMV_bufferSize(handle, op_a, alpha.data, desc_a.desc, desc_x.desc, beta.data, desc_y.desc, cuda_dtype, alg) buff = cupy.empty(buff_size, cupy.int8) cusparse.spMV(handle, op_a, alpha.data, desc_a.desc, desc_x.desc, beta.data, desc_y.desc, cuda_dtype, alg, buff.data.ptr) return y