Esempio n. 1
0
def _det_gpu(b):
    # We do a batched LU decomposition on the GPU to compute
    # and compute the determinant by multiplying the diagonal.
    # Change the shape of the array to be size=1 minibatch if necessary.
    # Also copy the matrix as the elments will be modified in-place.
    a = matmul._as_batch_mat(b).copy()
    n = a.shape[1]
    n_matrices = len(a)
    # Pivot array
    p = cuda.cupy.zeros((n_matrices, n), dtype='int32')
    # Output array
    # These arrays hold information on the execution success
    # or if the matrix was singular.
    info = cuda.cupy.zeros(n_matrices, dtype=numpy.intp)
    ap = matmul._mat_ptrs(a)
    _, lda = matmul._get_ld(a)
    if b.dtype == numpy.float32:
        cuda.cublas.sgetrfBatched(cuda.Device().cublas_handle, n, ap.data.ptr,
                                  lda, p.data.ptr, info.data.ptr, n_matrices)
    elif b.dtype == numpy.float64:
        cuda.cublas.dgetrfBatched(cuda.Device().cublas_handle, n, ap.data.ptr,
                                  lda, p.data.ptr, info.data.ptr, n_matrices)
    else:
        assert False
    det = cuda.cupy.prod(a.diagonal(axis1=1, axis2=2), axis=1)
    # The determinant is equal to the product of the diagonal entries
    # of `a` where the sign of `a` is flipped depending on whether
    # the pivot array is equal to its index.
    rng = cuda.cupy.arange(1, n + 1, dtype='int32')
    parity = cuda.cupy.sum(p != rng, axis=1) % 2
    sign = 1. - 2. * parity.astype(b.dtype, copy=False)
    return det * sign, info
Esempio n. 2
0
def _inv_gpu(b):
    # We do a batched LU decomposition on the GPU to compute the inverse
    # Change the shape of the array to be size=1 minibatch if necessary
    # Also copy the matrix as the elements will be modified in-place
    a = matmul._as_batch_mat(b).copy()
    n = a.shape[1]
    n_matrices = len(a)
    # Pivot array
    p = cuda.cupy.empty((n, n_matrices), dtype=numpy.int32)
    # Output array
    c = cuda.cupy.empty_like(a)
    # These arrays hold information on the execution success
    # or if the matrix was singular
    info = cuda.cupy.empty(n_matrices, dtype=numpy.int32)
    ap = matmul._mat_ptrs(a)
    cp = matmul._mat_ptrs(c)
    _, lda = matmul._get_ld(a)
    _, ldc = matmul._get_ld(c)
    handle = cuda.Device().cublas_handle
    if b.dtype == numpy.float32:
        cuda.cublas.sgetrfBatched(
            handle, n, ap.data.ptr, lda, p.data.ptr, info.data.ptr, n_matrices)
        cuda.cublas.sgetriBatched(
            handle, n, ap.data.ptr, lda, p.data.ptr, cp.data.ptr, ldc,
            info.data.ptr, n_matrices)
    elif b.dtype == numpy.float64:
        cuda.cublas.dgetrfBatched(
            handle, n, ap.data.ptr, lda, p.data.ptr, info.data.ptr, n_matrices)
        cuda.cublas.dgetriBatched(
            handle, n, ap.data.ptr, lda, p.data.ptr, cp.data.ptr, ldc,
            info.data.ptr, n_matrices)
    else:
        assert False
    return c, info
Esempio n. 3
0
File: det.py Progetto: okuta/chainer
def _det_gpu(b):
    # We do a batched LU decomposition on the GPU to compute
    # and compute the determinant by multiplying the diagonal.
    # Change the shape of the array to be size=1 minibatch if necessary.
    # Also copy the matrix as the elments will be modified in-place.
    a = matmul._as_batch_mat(b).copy()
    n = a.shape[1]
    n_matrices = len(a)
    # Pivot array
    p = cuda.cupy.zeros((n_matrices, n), dtype='int32')
    # Output array
    # These arrays hold information on the execution success
    # or if the matrix was singular.
    info = cuda.cupy.zeros(n_matrices, dtype=numpy.intp)
    ap = matmul._mat_ptrs(a)
    _, lda = matmul._get_ld(a)
    if b.dtype == numpy.float32:
        cuda.cublas.sgetrfBatched(cuda.Device().cublas_handle, n, ap.data.ptr,
                                  lda, p.data.ptr, info.data.ptr, n_matrices)
    elif b.dtype == numpy.float64:
        cuda.cublas.dgetrfBatched(cuda.Device().cublas_handle, n, ap.data.ptr,
                                  lda, p.data.ptr, info.data.ptr, n_matrices)
    else:
        assert False
    det = cuda.cupy.prod(a.diagonal(axis1=1, axis2=2), axis=1)
    # The determinant is equal to the product of the diagonal entries
    # of `a` where the sign of `a` is flipped depending on whether
    # the pivot array is equal to its index.
    rng = cuda.cupy.arange(1, n + 1, dtype='int32')
    parity = cuda.cupy.sum(p != rng, axis=1) % 2
    sign = 1. - 2. * parity.astype(b.dtype, copy=False)
    return det * sign, info
Esempio n. 4
0
def _inv_gpu(b):
    # We do a batched LU decomposition on the GPU to compute the inverse
    # Change the shape of the array to be size=1 minibatch if necessary
    # Also copy the matrix as the elments will be modified in-place
    a = matmul._as_batch_mat(b).copy()
    n = a.shape[1]
    n_matrices = len(a)
    # Pivot array
    p = cuda.cupy.empty((n, n_matrices), dtype=numpy.int32)
    # Output array
    c = cuda.cupy.empty_like(a)
    # These arrays hold information on the execution success
    # or if the matrix was singular
    info = cuda.cupy.empty(n_matrices, dtype=numpy.int32)
    ap = matmul._mat_ptrs(a)
    cp = matmul._mat_ptrs(c)
    _, lda = matmul._get_ld(a)
    _, ldc = matmul._get_ld(c)
    handle = cuda.Device().cublas_handle
    if b.dtype == numpy.float32:
        cuda.cublas.sgetrfBatched(handle, n, ap.data.ptr, lda, p.data.ptr,
                                  info.data.ptr, n_matrices)
        cuda.cublas.sgetriBatched(handle, n, ap.data.ptr, lda, p.data.ptr,
                                  cp.data.ptr, ldc, info.data.ptr, n_matrices)
    elif b.dtype == numpy.float64:
        cuda.cublas.dgetrfBatched(handle, n, ap.data.ptr, lda, p.data.ptr,
                                  info.data.ptr, n_matrices)
        cuda.cublas.dgetriBatched(handle, n, ap.data.ptr, lda, p.data.ptr,
                                  cp.data.ptr, ldc, info.data.ptr, n_matrices)
    else:
        assert False
    return c, info