def test_cusparseScsrmm2_notranspose(): A = np.random.laplace(size=(5, 3)).astype(np.float32) A[A<0.1] = 0 A = sparse.csr_matrix(A, dtype=np.float32) B = np.random.normal(size=(3, 6)).astype(np.float32, order="f") C = np.ones((A.shape[0], B.shape[1]), dtype=np.float32) X_exp = (A*B) + 0.5*C a_data = gpu.to_gpu(A.data) a_indptr = gpu.to_gpu(A.indptr) a_indices = gpu.to_gpu(A.indices) b = gpu.to_gpu(B) h = cusparse.cusparseCreate() descrA = cusparse.cusparseCreateMatDescr() c = gpu.empty((C.shape[1], C.shape[0]), dtype=A.dtype) c.fill(1.0) cusparse.cusparseScsrmm2(h, cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE, cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE, c.shape[1], c.shape[0], b.shape[0], A.nnz, 1.0, descrA, a_data.gpudata, a_indptr.gpudata, a_indices.gpudata, b.gpudata, b.shape[0], 0.5, c.gpudata, c.shape[1]) assert_allclose(c.get().T, X_exp, rtol=1e-4)
def test_cusparseScsr2dense(): A = np.random.laplace(size=(3, 5)).astype(np.float32) A[A<0.1] = 0 A = sparse.csr_matrix(A, dtype=np.float32) A.sort_indices() a_data = gpu.to_gpu(A.data) a_indptr = gpu.to_gpu(A.indptr) a_indices = gpu.to_gpu(A.indices) out = gpu.empty((A.shape[0], A.shape[1]), dtype=A.dtype, order="F") h = cusparse.cusparseCreate() descrA = cusparse.cusparseCreateMatDescr() cusparse.cusparseScsr2dense(h, A.shape[0], A.shape[1], descrA, a_data.gpudata, a_indptr.gpudata, a_indices.gpudata, out.gpudata, out.shape[0]) assert_allclose(out.get(), A.A, rtol=1e-4)
def __init__(self, array, dtype=None, allocator=mem_alloc, stream=None): self.dtype = array.dtype if dtype is None else dtype self.nnz = array.nnz self.shape = array.shape if self.nnz == 0: # let's not waste time return if not sparse.isspmatrix_csr(array): array = sparse.csr_matrix(array, dtype=self.dtype) if not array.has_sorted_indices: array = array.sorted_indices() if stream is not None: self.data = gpuarray.to_gpu_async(array.data.astype(dtype=self.dtype), allocator=allocator, stream=stream) self.indptr = gpuarray.to_gpu_async(array.indptr, allocator=allocator, stream=stream) self.indices = gpuarray.to_gpu_async(array.indices, allocator=allocator, stream=stream) else: self.data = gpuarray.to_gpu(array.data.astype(dtype=self.dtype), allocator=allocator) self.indptr = gpuarray.to_gpu(array.indptr, allocator=allocator) self.indices = gpuarray.to_gpu(array.indices, allocator=allocator) self.descr = cusparse.cusparseCreateMatDescr()