def _setup_scalar_ptr(handle, a, dtype): a, a_ptr = _get_scalar_ptr(a, dtype) mode = cublas.getPointerMode(handle) if isinstance(a, cupy.ndarray): cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_DEVICE) else: cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_HOST) return a, a_ptr, mode
def _setup_scalar_ptr(handle, a, dtype): mode = cublas.getPointerMode(handle) if isinstance(a, cupy.ndarray): if a.dtype != dtype: a = cupy.array(a, dtype=dtype) a_ptr = a.data.ptr cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_DEVICE) else: if not (isinstance(a, numpy.ndarray) and a.dtype == dtype): a = numpy.array(a, dtype=dtype) a_ptr = a.ctypes.data cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_HOST) return a_ptr, mode
def sbmv(k, alpha, a, x, beta, y, lower=False): """Computes y = alpha*A @ x + beta * y """ dtype = a.dtype.char if dtype == 'f': func = cublas.ssbmv elif dtype == 'd': func = cublas.dsbmv else: raise TypeError('Complex dtypes not supported') assert a.ndim == 2 assert x.ndim == y.ndim == 1 assert a.dtype == x.dtype == y.dtype m, n = a.shape assert x.shape[0] == n assert y.shape[0] == n if not a._f_contiguous: a = a.copy(order='F') alpha, alpha_ptr = _get_scalar_ptr(alpha, a.dtype) beta, beta_ptr = _get_scalar_ptr(beta, a.dtype) handle = device.get_cublas_handle() orig_mode = cublas.getPointerMode(handle) if isinstance(alpha, cupy.ndarray) or isinstance(beta, cupy.ndarray): if not isinstance(alpha, cupy.ndarray): alpha = cupy.array(alpha) alpha_ptr = alpha.data.ptr if not isinstance(beta, cupy.ndarray): beta = cupy.array(beta) beta_ptr = beta.data.ptr cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_DEVICE) else: cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_HOST) if lower: uplo = cublas.CUBLAS_FILL_MODE_LOWER else: uplo = cublas.CUBLAS_FILL_MODE_UPPER handle = device.get_cublas_handle() try: func(handle, uplo, n, k, alpha_ptr, a.data.ptr, m, x.data.ptr, 1, beta_ptr, y.data.ptr, 1) finally: cublas.setPointerMode(handle, orig_mode) return y
def __init__(self, A, V, alpha, beta, update_impl='fast'): assert A.ndim == V.ndim == 2 assert alpha.ndim == beta.ndim == 1 assert A.dtype == V.dtype == alpha.dtype assert A.dtype.char.lower() == beta.dtype.char assert A.shape[0] == A.shape[1] == V.shape[1] assert V.shape[0] == alpha.shape[0] == beta.shape[0] self.A = A self.V = V self.alpha = alpha self.beta = beta self.n = V.shape[1] self.ncv = V.shape[0] self.update_impl = update_impl if self.update_impl != 'fast': return self.cublas_handle = device.get_cublas_handle() self.cublas_pointer_mode = _cublas.getPointerMode(self.cublas_handle) if A.dtype.char == 'f': self.dotc = _cublas.sdot self.nrm2 = _cublas.snrm2 self.gemm = _cublas.sgemm elif A.dtype.char == 'd': self.dotc = _cublas.ddot self.nrm2 = _cublas.dnrm2 self.gemm = _cublas.dgemm elif A.dtype.char == 'F': self.dotc = _cublas.cdotc self.nrm2 = _cublas.scnrm2 self.gemm = _cublas.cgemm elif A.dtype.char == 'D': self.dotc = _cublas.zdotc self.nrm2 = _cublas.dznrm2 self.gemm = _cublas.zgemm else: raise TypeError('invalid dtype ({})'.format(A.dtype)) if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): self.cusparse_handle = device.get_cusparse_handle() self.spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE self.spmv_alpha = numpy.array(1.0, A.dtype) self.spmv_beta = numpy.array(0.0, A.dtype) self.spmv_cuda_dtype = cusparse._dtype_to_DataType(A.dtype) self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT else: self.cusparse_handle = None self.v = cupy.empty((self.n, ), dtype=A.dtype) self.u = cupy.empty((self.n, ), dtype=A.dtype) self.uu = cupy.empty((self.ncv, ), dtype=A.dtype)
def _setup_result_ptr(handle, out, dtype): mode = cublas.getPointerMode(handle) if out is None or isinstance(out, cupy.ndarray): if out is None or out.dtype != dtype: result = cupy.empty([], dtype=dtype) else: result = out result_ptr = result.data.ptr cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_DEVICE) elif isinstance(out, numpy.ndarray): if out.dtype != dtype: result = numpy.empty([], dtype=dtype) else: result = out result_ptr = result.ctypes.data cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_HOST) else: raise TypeError('out must be either cupy or numpy ndarray') return result_ptr, result, mode
def _lanczos_fast(A, n, ncv): cublas_handle = device.get_cublas_handle() cublas_pointer_mode = _cublas.getPointerMode(cublas_handle) if A.dtype.char == 'f': dotc = _cublas.sdot nrm2 = _cublas.snrm2 gemm = _cublas.sgemm elif A.dtype.char == 'd': dotc = _cublas.ddot nrm2 = _cublas.dnrm2 gemm = _cublas.dgemm elif A.dtype.char == 'F': dotc = _cublas.cdotc nrm2 = _cublas.scnrm2 gemm = _cublas.cgemm elif A.dtype.char == 'D': dotc = _cublas.zdotc nrm2 = _cublas.dznrm2 gemm = _cublas.zgemm else: raise TypeError('invalid dtype ({})'.format(A.dtype)) cusparse_handle = None if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): cusparse_handle = device.get_cusparse_handle() spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE spmv_alpha = numpy.array(1.0, A.dtype) spmv_beta = numpy.array(0.0, A.dtype) spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT v = cupy.empty((n, ), dtype=A.dtype) uu = cupy.empty((ncv, ), dtype=A.dtype) one = numpy.array(1.0, dtype=A.dtype) zero = numpy.array(0.0, dtype=A.dtype) mone = numpy.array(-1.0, dtype=A.dtype) outer_A = A def aux(A, V, u, alpha, beta, i_start, i_end): assert A is outer_A beta_eps = inversion_eps(A.dtype) # Get ready for spmv if enabled if cusparse_handle is not None: # Note: I would like to reuse descriptors and working buffer # on the next update, but I gave it up because it sometimes # caused illegal memory access error. spmv_desc_A = cusparse.SpMatDescriptor.create(A) spmv_desc_v = cusparse.DnVecDescriptor.create(v) spmv_desc_u = cusparse.DnVecDescriptor.create(u) buff_size = _cusparse.spMV_bufferSize( cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg) spmv_buff = cupy.empty(buff_size, cupy.int8) v[...] = V[i_start] for i in range(i_start, i_end): # Matrix-vector multiplication if cusparse_handle is None: u[...] = A @ v else: _cusparse.spMV(cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg, spmv_buff.data.ptr) # Call dotc _cublas.setPointerMode(cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) try: dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1, alpha.data.ptr + i * alpha.itemsize) finally: _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) # Orthogonalize gemm(cublas_handle, _cublas.CUBLAS_OP_C, _cublas.CUBLAS_OP_N, 1, i + 1, n, one.ctypes.data, u.data.ptr, n, V.data.ptr, n, zero.ctypes.data, uu.data.ptr, 1) gemm(cublas_handle, _cublas.CUBLAS_OP_N, _cublas.CUBLAS_OP_C, n, 1, i + 1, mone.ctypes.data, V.data.ptr, n, uu.data.ptr, 1, one.ctypes.data, u.data.ptr, n) # Call nrm2 _cublas.setPointerMode(cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) try: nrm2(cublas_handle, n, u.data.ptr, 1, beta.data.ptr + i * beta.itemsize) finally: _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) # Break here as the normalization below touches V[i+1] if i >= i_end - 1: break if beta[i] < beta_eps: V[i + 1:i_end, :] = 0 u[...] = 0 v[...] = 0 break if i == i_start: beta_eps *= beta[i] # scale eps to largest beta # Normalize _kernel_normalize(u, beta, i, n, v, V) return aux
def syrk(trans, a, out=None, alpha=1.0, beta=0.0, lower=False): """Computes out := alpha*op1(a)*op2(a) + beta*out op1(a) = a if trans is 'N', op2(a) = a.T if transa is 'N' op1(a) = a.T if trans is 'T', op2(a) = a if transa is 'T' lower specifies whether the upper or lower triangular part of the array out is to be referenced """ assert a.ndim == 2 dtype = a.dtype.char if dtype == 'f': func = cublas.ssyrk elif dtype == 'd': func = cublas.dsyrk elif dtype == 'F': func = cublas.csyrk elif dtype == 'D': func = cublas.zsyrk else: raise TypeError('invalid dtype') trans = _trans_to_cublas_op(trans) if trans == cublas.CUBLAS_OP_N: n, k = a.shape else: k, n = a.shape if out is None: out = cupy.zeros((n, n), dtype=dtype, order='F') beta = 0.0 else: assert out.ndim == 2 assert out.shape == (n, n) assert out.dtype == dtype if lower: uplo = cublas.CUBLAS_FILL_MODE_LOWER else: uplo = cublas.CUBLAS_FILL_MODE_UPPER alpha, alpha_ptr = _get_scalar_ptr(alpha, a.dtype) beta, beta_ptr = _get_scalar_ptr(beta, a.dtype) handle = device.get_cublas_handle() orig_mode = cublas.getPointerMode(handle) if isinstance(alpha, cupy.ndarray) or isinstance(beta, cupy.ndarray): if not isinstance(alpha, cupy.ndarray): alpha = cupy.array(alpha) alpha_ptr = alpha.data.ptr if not isinstance(beta, cupy.ndarray): beta = cupy.array(beta) beta_ptr = beta.data.ptr cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_DEVICE) else: cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_HOST) lda, trans = _decide_ld_and_trans(a, trans) ldo, _ = _decide_ld_and_trans(out, trans) if out._c_contiguous: if not a._c_contiguous: a = a.copy(order='C') trans = 1 - trans lda = a.shape[1] try: func(handle, 1 - uplo, trans, n, k, alpha_ptr, a.data.ptr, lda, beta_ptr, out.data.ptr, ldo) finally: cublas.setPointerMode(handle, orig_mode) else: if not a._f_contiguous: a = a.copy(order='F') lda = a.shape[0] trans = 1 - trans c = out if not out._f_contiguous: c = out.copy(order='F') try: func(handle, uplo, trans, n, k, alpha_ptr, a.data.ptr, lda, beta_ptr, out.data.ptr, ldo) finally: cublas.setPointerMode(handle, orig_mode) if not out._f_contiguous: out[...] = c return out
def geam(transa, transb, alpha, a, beta, b, out=None): """Computes alpha * op(a) + beta * op(b) op(a) = a if transa is 'N', op(a) = a.T if transa is 'T', op(a) = a.T.conj() if transa is 'H'. op(b) = b if transb is 'N', op(b) = b.T if transb is 'T', op(b) = b.T.conj() if transb is 'H'. """ assert a.ndim == b.ndim == 2 assert a.dtype == b.dtype dtype = a.dtype.char if dtype == 'f': func = cublas.sgeam elif dtype == 'd': func = cublas.dgeam elif dtype == 'F': func = cublas.cgeam elif dtype == 'D': func = cublas.zgeam else: raise TypeError('invalid dtype') transa = _trans_to_cublas_op(transa) transb = _trans_to_cublas_op(transb) if transa == cublas.CUBLAS_OP_N: m, n = a.shape else: n, m = a.shape if transb == cublas.CUBLAS_OP_N: assert b.shape == (m, n) else: assert b.shape == (n, m) if out is None: out = cupy.empty((m, n), dtype=dtype, order='F') else: assert out.ndim == 2 assert out.shape == (m, n) assert out.dtype == dtype alpha, alpha_ptr = _get_scalar_ptr(alpha, a.dtype) beta, beta_ptr = _get_scalar_ptr(beta, a.dtype) handle = device.get_cublas_handle() orig_mode = cublas.getPointerMode(handle) if isinstance(alpha, cupy.ndarray) or isinstance(beta, cupy.ndarray): if not isinstance(alpha, cupy.ndarray): alpha = cupy.array(alpha) alpha_ptr = alpha.data.ptr if not isinstance(beta, cupy.ndarray): beta = cupy.array(beta) beta_ptr = beta.data.ptr cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_DEVICE) else: cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_HOST) lda, transa = _decide_ld_and_trans(a, transa) ldb, transb = _decide_ld_and_trans(b, transb) if not (lda is None or ldb is None): if out._f_contiguous: try: func(handle, transa, transb, m, n, alpha_ptr, a.data.ptr, lda, beta_ptr, b.data.ptr, ldb, out.data.ptr, m) finally: cublas.setPointerMode(handle, orig_mode) return out elif out._c_contiguous: # Computes alpha * a.T + beta * b.T try: func(handle, 1-transa, 1-transb, n, m, alpha_ptr, a.data.ptr, lda, beta_ptr, b.data.ptr, ldb, out.data.ptr, n) finally: cublas.setPointerMode(handle, orig_mode) return out a, lda = _change_order_if_necessary(a, lda) b, ldb = _change_order_if_necessary(b, ldb) c = out if not out._f_contiguous: c = out.copy(order='F') try: func(handle, transa, transb, m, n, alpha_ptr, a.data.ptr, lda, beta_ptr, b.data.ptr, ldb, c.data.ptr, m) finally: cublas.setPointerMode(handle, orig_mode) if not out._f_contiguous: _core.elementwise_copy(c, out) return out
def gemv(transa, alpha, a, x, beta, y): """Computes y = alpha * op(a) @ x + beta * y op(a) = a if transa is 'N', op(a) = a.T if transa is 'T', op(a) = a.T.conj() if transa is 'H'. Note: ''y'' will be updated. """ dtype = a.dtype.char if dtype == 'f': func = cublas.sgemv elif dtype == 'd': func = cublas.dgemv elif dtype == 'F': func = cublas.cgemv elif dtype == 'D': func = cublas.zgemv else: raise TypeError('invalid dtype') assert a.ndim == 2 assert x.ndim == y.ndim == 1 assert a.dtype == x.dtype == y.dtype m, n = a.shape transa = _trans_to_cublas_op(transa) if transa == cublas.CUBLAS_OP_N: xlen, ylen = n, m else: xlen, ylen = m, n assert x.shape[0] == xlen assert y.shape[0] == ylen alpha, alpha_ptr = _get_scalar_ptr(alpha, a.dtype) beta, beta_ptr = _get_scalar_ptr(beta, a.dtype) handle = device.get_cublas_handle() orig_mode = cublas.getPointerMode(handle) if isinstance(alpha, cupy.ndarray) or isinstance(beta, cupy.ndarray): if not isinstance(alpha, cupy.ndarray): alpha = cupy.array(alpha) alpha_ptr = alpha.data.ptr if not isinstance(beta, cupy.ndarray): beta = cupy.array(beta) beta_ptr = beta.data.ptr cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_DEVICE) else: cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_HOST) try: if a._f_contiguous: func(handle, transa, m, n, alpha_ptr, a.data.ptr, m, x.data.ptr, 1, beta_ptr, y.data.ptr, 1) elif a._c_contiguous and transa != cublas.CUBLAS_OP_C: if transa == cublas.CUBLAS_OP_N: transa = cublas.CUBLAS_OP_T else: transa = cublas.CUBLAS_OP_N func(handle, transa, n, m, alpha_ptr, a.data.ptr, n, x.data.ptr, 1, beta_ptr, y.data.ptr, 1) else: a = a.copy(order='F') func(handle, transa, m, n, alpha_ptr, a.data.ptr, m, x.data.ptr, 1, beta_ptr, y.data.ptr, 1) finally: cublas.setPointerMode(handle, orig_mode)