def _make_fast_matvec(A): matvec = None if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): handle = device.get_cusparse_handle() op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE alpha = numpy.array(1.0, A.dtype) beta = numpy.array(0.0, A.dtype) cuda_dtype = _dtype.to_cuda_dtype(A.dtype) alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT x = cupy.empty((A.shape[0], ), dtype=A.dtype) y = cupy.empty((A.shape[0], ), dtype=A.dtype) desc_A = cusparse.SpMatDescriptor.create(A) desc_x = cusparse.DnVecDescriptor.create(x) desc_y = cusparse.DnVecDescriptor.create(y) buff_size = _cusparse.spMV_bufferSize(handle, op_a, alpha.ctypes.data, desc_A.desc, desc_x.desc, beta.ctypes.data, desc_y.desc, cuda_dtype, alg) buff = cupy.empty(buff_size, cupy.int8) del x, desc_x, y, desc_y def matvec(x): y = cupy.empty_like(x) desc_x = cusparse.DnVecDescriptor.create(x) desc_y = cusparse.DnVecDescriptor.create(y) _cusparse.spMV(handle, op_a, alpha.ctypes.data, desc_A.desc, desc_x.desc, beta.ctypes.data, desc_y.desc, cuda_dtype, alg, buff.data.ptr) return y return matvec
def __init__(self, A, V, alpha, beta, update_impl='fast'): assert A.ndim == V.ndim == 2 assert alpha.ndim == beta.ndim == 1 assert A.dtype == V.dtype == alpha.dtype assert A.dtype.char.lower() == beta.dtype.char assert A.shape[0] == A.shape[1] == V.shape[1] assert V.shape[0] == alpha.shape[0] == beta.shape[0] self.A = A self.V = V self.alpha = alpha self.beta = beta self.n = V.shape[1] self.ncv = V.shape[0] self.update_impl = update_impl if self.update_impl != 'fast': return self.cublas_handle = device.get_cublas_handle() self.cublas_pointer_mode = _cublas.getPointerMode(self.cublas_handle) if A.dtype.char == 'f': self.dotc = _cublas.sdot self.nrm2 = _cublas.snrm2 self.gemm = _cublas.sgemm elif A.dtype.char == 'd': self.dotc = _cublas.ddot self.nrm2 = _cublas.dnrm2 self.gemm = _cublas.dgemm elif A.dtype.char == 'F': self.dotc = _cublas.cdotc self.nrm2 = _cublas.scnrm2 self.gemm = _cublas.cgemm elif A.dtype.char == 'D': self.dotc = _cublas.zdotc self.nrm2 = _cublas.dznrm2 self.gemm = _cublas.zgemm else: raise TypeError('invalid dtype ({})'.format(A.dtype)) if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): self.cusparse_handle = device.get_cusparse_handle() self.spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE self.spmv_alpha = numpy.array(1.0, A.dtype) self.spmv_beta = numpy.array(0.0, A.dtype) self.spmv_cuda_dtype = cusparse._dtype_to_DataType(A.dtype) self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT else: self.cusparse_handle = None self.v = cupy.empty((self.n, ), dtype=A.dtype) self.u = cupy.empty((self.n, ), dtype=A.dtype) self.uu = cupy.empty((self.ncv, ), dtype=A.dtype)
def _lanczos_fast(A, n, ncv): cublas_handle = device.get_cublas_handle() cublas_pointer_mode = _cublas.getPointerMode(cublas_handle) if A.dtype.char == 'f': dotc = _cublas.sdot nrm2 = _cublas.snrm2 gemm = _cublas.sgemm elif A.dtype.char == 'd': dotc = _cublas.ddot nrm2 = _cublas.dnrm2 gemm = _cublas.dgemm elif A.dtype.char == 'F': dotc = _cublas.cdotc nrm2 = _cublas.scnrm2 gemm = _cublas.cgemm elif A.dtype.char == 'D': dotc = _cublas.zdotc nrm2 = _cublas.dznrm2 gemm = _cublas.zgemm else: raise TypeError('invalid dtype ({})'.format(A.dtype)) cusparse_handle = None if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): cusparse_handle = device.get_cusparse_handle() spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE spmv_alpha = numpy.array(1.0, A.dtype) spmv_beta = numpy.array(0.0, A.dtype) spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT v = cupy.empty((n, ), dtype=A.dtype) uu = cupy.empty((ncv, ), dtype=A.dtype) one = numpy.array(1.0, dtype=A.dtype) zero = numpy.array(0.0, dtype=A.dtype) mone = numpy.array(-1.0, dtype=A.dtype) outer_A = A def aux(A, V, u, alpha, beta, i_start, i_end): assert A is outer_A beta_eps = inversion_eps(A.dtype) # Get ready for spmv if enabled if cusparse_handle is not None: # Note: I would like to reuse descriptors and working buffer # on the next update, but I gave it up because it sometimes # caused illegal memory access error. spmv_desc_A = cusparse.SpMatDescriptor.create(A) spmv_desc_v = cusparse.DnVecDescriptor.create(v) spmv_desc_u = cusparse.DnVecDescriptor.create(u) buff_size = _cusparse.spMV_bufferSize( cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg) spmv_buff = cupy.empty(buff_size, cupy.int8) v[...] = V[i_start] for i in range(i_start, i_end): # Matrix-vector multiplication if cusparse_handle is None: u[...] = A @ v else: _cusparse.spMV(cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg, spmv_buff.data.ptr) # Call dotc _cublas.setPointerMode(cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) try: dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1, alpha.data.ptr + i * alpha.itemsize) finally: _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) # Orthogonalize gemm(cublas_handle, _cublas.CUBLAS_OP_C, _cublas.CUBLAS_OP_N, 1, i + 1, n, one.ctypes.data, u.data.ptr, n, V.data.ptr, n, zero.ctypes.data, uu.data.ptr, 1) gemm(cublas_handle, _cublas.CUBLAS_OP_N, _cublas.CUBLAS_OP_C, n, 1, i + 1, mone.ctypes.data, V.data.ptr, n, uu.data.ptr, 1, one.ctypes.data, u.data.ptr, n) # Call nrm2 _cublas.setPointerMode(cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) try: nrm2(cublas_handle, n, u.data.ptr, 1, beta.data.ptr + i * beta.itemsize) finally: _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) # Break here as the normalization below touches V[i+1] if i >= i_end - 1: break if beta[i] < beta_eps: V[i + 1:i_end, :] = 0 u[...] = 0 v[...] = 0 break if i == i_start: beta_eps *= beta[i] # scale eps to largest beta # Normalize _kernel_normalize(u, beta, i, n, v, V) return aux