Esempio n. 1
0
def _make_fast_matvec(A):
    matvec = None
    if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'):
        handle = device.get_cusparse_handle()
        op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE
        alpha = numpy.array(1.0, A.dtype)
        beta = numpy.array(0.0, A.dtype)
        cuda_dtype = _dtype.to_cuda_dtype(A.dtype)
        alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT
        x = cupy.empty((A.shape[0], ), dtype=A.dtype)
        y = cupy.empty((A.shape[0], ), dtype=A.dtype)
        desc_A = cusparse.SpMatDescriptor.create(A)
        desc_x = cusparse.DnVecDescriptor.create(x)
        desc_y = cusparse.DnVecDescriptor.create(y)
        buff_size = _cusparse.spMV_bufferSize(handle, op_a, alpha.ctypes.data,
                                              desc_A.desc, desc_x.desc,
                                              beta.ctypes.data, desc_y.desc,
                                              cuda_dtype, alg)
        buff = cupy.empty(buff_size, cupy.int8)
        del x, desc_x, y, desc_y

        def matvec(x):
            y = cupy.empty_like(x)
            desc_x = cusparse.DnVecDescriptor.create(x)
            desc_y = cusparse.DnVecDescriptor.create(y)
            _cusparse.spMV(handle, op_a, alpha.ctypes.data, desc_A.desc,
                           desc_x.desc, beta.ctypes.data, desc_y.desc,
                           cuda_dtype, alg, buff.data.ptr)
            return y

    return matvec
Esempio n. 2
0
    def __init__(self, A, V, alpha, beta, update_impl='fast'):
        assert A.ndim == V.ndim == 2
        assert alpha.ndim == beta.ndim == 1
        assert A.dtype == V.dtype == alpha.dtype
        assert A.dtype.char.lower() == beta.dtype.char
        assert A.shape[0] == A.shape[1] == V.shape[1]
        assert V.shape[0] == alpha.shape[0] == beta.shape[0]

        self.A = A
        self.V = V
        self.alpha = alpha
        self.beta = beta
        self.n = V.shape[1]
        self.ncv = V.shape[0]
        self.update_impl = update_impl
        if self.update_impl != 'fast':
            return

        self.cublas_handle = device.get_cublas_handle()
        self.cublas_pointer_mode = _cublas.getPointerMode(self.cublas_handle)
        if A.dtype.char == 'f':
            self.dotc = _cublas.sdot
            self.nrm2 = _cublas.snrm2
            self.gemm = _cublas.sgemm
        elif A.dtype.char == 'd':
            self.dotc = _cublas.ddot
            self.nrm2 = _cublas.dnrm2
            self.gemm = _cublas.dgemm
        elif A.dtype.char == 'F':
            self.dotc = _cublas.cdotc
            self.nrm2 = _cublas.scnrm2
            self.gemm = _cublas.cgemm
        elif A.dtype.char == 'D':
            self.dotc = _cublas.zdotc
            self.nrm2 = _cublas.dznrm2
            self.gemm = _cublas.zgemm
        else:
            raise TypeError('invalid dtype ({})'.format(A.dtype))
        if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'):
            self.cusparse_handle = device.get_cusparse_handle()
            self.spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE
            self.spmv_alpha = numpy.array(1.0, A.dtype)
            self.spmv_beta = numpy.array(0.0, A.dtype)
            self.spmv_cuda_dtype = cusparse._dtype_to_DataType(A.dtype)
            self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT
        else:
            self.cusparse_handle = None
        self.v = cupy.empty((self.n, ), dtype=A.dtype)
        self.u = cupy.empty((self.n, ), dtype=A.dtype)
        self.uu = cupy.empty((self.ncv, ), dtype=A.dtype)
Esempio n. 3
0
def _lanczos_fast(A, n, ncv):
    cublas_handle = device.get_cublas_handle()
    cublas_pointer_mode = _cublas.getPointerMode(cublas_handle)
    if A.dtype.char == 'f':
        dotc = _cublas.sdot
        nrm2 = _cublas.snrm2
        gemm = _cublas.sgemm
    elif A.dtype.char == 'd':
        dotc = _cublas.ddot
        nrm2 = _cublas.dnrm2
        gemm = _cublas.dgemm
    elif A.dtype.char == 'F':
        dotc = _cublas.cdotc
        nrm2 = _cublas.scnrm2
        gemm = _cublas.cgemm
    elif A.dtype.char == 'D':
        dotc = _cublas.zdotc
        nrm2 = _cublas.dznrm2
        gemm = _cublas.zgemm
    else:
        raise TypeError('invalid dtype ({})'.format(A.dtype))

    cusparse_handle = None
    if csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'):
        cusparse_handle = device.get_cusparse_handle()
        spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE
        spmv_alpha = numpy.array(1.0, A.dtype)
        spmv_beta = numpy.array(0.0, A.dtype)
        spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype)
        spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT

    v = cupy.empty((n, ), dtype=A.dtype)
    uu = cupy.empty((ncv, ), dtype=A.dtype)
    one = numpy.array(1.0, dtype=A.dtype)
    zero = numpy.array(0.0, dtype=A.dtype)
    mone = numpy.array(-1.0, dtype=A.dtype)

    outer_A = A

    def aux(A, V, u, alpha, beta, i_start, i_end):
        assert A is outer_A
        beta_eps = inversion_eps(A.dtype)

        # Get ready for spmv if enabled
        if cusparse_handle is not None:
            # Note: I would like to reuse descriptors and working buffer
            # on the next update, but I gave it up because it sometimes
            # caused illegal memory access error.
            spmv_desc_A = cusparse.SpMatDescriptor.create(A)
            spmv_desc_v = cusparse.DnVecDescriptor.create(v)
            spmv_desc_u = cusparse.DnVecDescriptor.create(u)
            buff_size = _cusparse.spMV_bufferSize(
                cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data,
                spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data,
                spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg)
            spmv_buff = cupy.empty(buff_size, cupy.int8)

        v[...] = V[i_start]
        for i in range(i_start, i_end):
            # Matrix-vector multiplication
            if cusparse_handle is None:
                u[...] = A @ v
            else:
                _cusparse.spMV(cusparse_handle, spmv_op_a,
                               spmv_alpha.ctypes.data, spmv_desc_A.desc,
                               spmv_desc_v.desc, spmv_beta.ctypes.data,
                               spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg,
                               spmv_buff.data.ptr)

            # Call dotc
            _cublas.setPointerMode(cublas_handle,
                                   _cublas.CUBLAS_POINTER_MODE_DEVICE)
            try:
                dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1,
                     alpha.data.ptr + i * alpha.itemsize)
            finally:
                _cublas.setPointerMode(cublas_handle, cublas_pointer_mode)

            # Orthogonalize
            gemm(cublas_handle, _cublas.CUBLAS_OP_C, _cublas.CUBLAS_OP_N, 1,
                 i + 1, n, one.ctypes.data, u.data.ptr, n, V.data.ptr, n,
                 zero.ctypes.data, uu.data.ptr, 1)
            gemm(cublas_handle, _cublas.CUBLAS_OP_N, _cublas.CUBLAS_OP_C, n, 1,
                 i + 1, mone.ctypes.data, V.data.ptr, n, uu.data.ptr, 1,
                 one.ctypes.data, u.data.ptr, n)

            # Call nrm2
            _cublas.setPointerMode(cublas_handle,
                                   _cublas.CUBLAS_POINTER_MODE_DEVICE)
            try:
                nrm2(cublas_handle, n, u.data.ptr, 1,
                     beta.data.ptr + i * beta.itemsize)
            finally:
                _cublas.setPointerMode(cublas_handle, cublas_pointer_mode)

            # Break here as the normalization below touches V[i+1]
            if i >= i_end - 1:
                break

            if beta[i] < beta_eps:
                V[i + 1:i_end, :] = 0
                u[...] = 0
                v[...] = 0
                break
            if i == i_start:
                beta_eps *= beta[i]  # scale eps to largest beta

            # Normalize
            _kernel_normalize(u, beta, i, n, v, V)

    return aux