Python cusolverDnSgetrf Examples, skcuda.cusolver.cusolverDnSgetrf Python Examples

Example #1

0

Show file

File: extra_ops.py Project: jimfleming/kusanagi

    def perform(self, node, inputs, outputs):
        context = inputs[0][0].context

        # Input matrix.
        A = inputs[0]

        l, n = A.shape
        if l != n:
            raise ValueError('A must be a square matrix')

        lda = max(1, n)

        # cusolver operates on F ordered matrices
        if not self.inplace:
            LU = pygpu.array(A, copy=True, order='F')
        else:
            LU = A.T if A.flags['C_CONTIGUOUS'] else A

        LU_ptr = LU.gpudata

        with context:
            workspace_size = cusolver.cusolverDnSgetrf_bufferSize(
                context.cusolver_handle, n, n, LU_ptr, lda)

            workspace = pygpu.zeros(workspace_size,
                                    dtype='float32',
                                    context=context)

            pivots = pygpu.zeros(n, dtype='int32', context=context)

            dev_info = pygpu.zeros((1, ), dtype='int32', context=context)

            workspace_ptr = workspace.gpudata
            pivots_ptr = pivots.gpudata
            dev_info_ptr = dev_info.gpudata

            cusolver.cusolverDnSgetrf(context.cusolver_handle, n, n, LU_ptr,
                                      lda, workspace_ptr, pivots_ptr,
                                      dev_info_ptr)

            if self.check_output:
                val_dev_info = np.asarray(dev_info)[0]
                if val_dev_info > 0:
                    raise LinAlgError('LU decomposition failed')

            outputs[1][0] = pivots

        outputs[0][0] = LU

Example #2

0

Show file

File: linalg.py Project: sethips/hand-digit-classifier

        def thunk():
            context = inputs[0][0].context

            # Size of the matrices to invert.
            z = outputs[0]

            # Matrix.
            A = inputs[0][0]

            # Solution vectors.
            b = inputs[1][0]

            assert (len(A.shape) == 2)
            assert (len(b.shape) == 2)

            if self.trans in ['T', 'C']:
                trans = 1
                l, n = A.shape
                k, m = b.shape
            elif self.trans == 'N':
                trans = 0
                n, l = A.shape
                k, m = b.shape
            else:
                raise ValueError('Invalid value for trans')
            if l != n:
                raise ValueError('A must be a square matrix')
            if n != k:
                raise ValueError('A and b must be aligned.')

            lda = max(1, n)
            ldb = max(1, k, m)

            # We copy A and b as cusolver operates inplace
            b = gpuarray.array(b, copy=True, order='F')
            if not self.inplace:
                A = gpuarray.array(A, copy=True)
            A_ptr = A.gpudata
            b_ptr = b.gpudata

            # cusolver expects a F ordered matrix, but A is not explicitly
            # converted between C and F order, instead we switch the
            # "transpose" flag.
            if A.flags['C_CONTIGUOUS']:
                trans = 1 - trans

            workspace_size = cusolver.cusolverDnSgetrf_bufferSize(
                cusolver_handle, n, n, A_ptr, lda)

            if (thunk.workspace is None
                    or thunk.workspace.size != workspace_size):
                thunk.workspace = gpuarray.zeros((workspace_size, ),
                                                 dtype='float32',
                                                 context=context)

            if thunk.pivots is None or thunk.pivots.size != min(n, n):
                thunk.pivots = gpuarray.zeros((min(n, n), ),
                                              dtype='float32',
                                              context=context)

            if thunk.dev_info is None:
                thunk.dev_info = gpuarray.zeros((1, ),
                                                dtype='float32',
                                                context=context)

            workspace_ptr = thunk.workspace.gpudata
            pivots_ptr = thunk.pivots.gpudata
            dev_info_ptr = thunk.dev_info.gpudata

            cusolver.cusolverDnSgetrf(cusolver_handle, n, n, A_ptr, lda,
                                      workspace_ptr, pivots_ptr, dev_info_ptr)

            cusolver.cusolverDnSgetrs(cusolver_handle, trans, n, m, A_ptr, lda,
                                      pivots_ptr, b_ptr, ldb, dev_info_ptr)

            z[0] = b

Example #3

0

Show file

File: cusolver_demo.py Project: emillynge/scikit-cuda

x = np.asarray([[1.80, 2.88, 2.05, -0.89], [5.25, -2.95, -0.95, -3.80],
                [1.58, -2.69, -2.90, -1.04], [-1.11, -0.66, -0.59,
                                              0.80]]).astype(np.float32)

# Need to copy transposed matrix because T only returns a view:
m, n = x.shape
x_gpu = gpuarray.to_gpu(x.T.copy())

# Set up work buffers:
Lwork = solver.cusolverDnSgetrf_bufferSize(h, m, n, x_gpu.gpudata, m)
workspace_gpu = gpuarray.zeros(Lwork, np.float32)
devipiv_gpu = gpuarray.zeros(min(m, n), np.int32)
devinfo_gpu = gpuarray.zeros(1, np.int32)

# Compute:
solver.cusolverDnSgetrf(h, m, n, x_gpu.gpudata, m, workspace_gpu.gpudata,
                        devipiv_gpu.gpudata, devinfo_gpu.gpudata)

# Confirm that solution is correct by checking against result obtained with
# scipy; set dimensions of computed lower/upper triangular matrices to facilitate
# comparison if the original matrix was not square:
l_cuda = np.tril(x_gpu.get().T, -1)
u_cuda = np.triu(x_gpu.get().T)
if m < n:
    l_cuda = l_cuda[:, :m]
else:
    u_cuda = u_cuda[:n, :]
p, l, u = sp.linalg.lu(x)

# Only check values in lower triangle starting from first off-diagonal:
print 'lower triangular matrix is correct: ', \
    np.allclose(np.tril(l, -1), l_cuda)

Example #4

0

Show file

File: linalg.py Project: HapeMask/Theano

    def perform(self, node, inputs, outputs):
        context = inputs[0][0].context

        # Size of the matrices to invert.
        z = outputs[0]

        # Matrix.
        A = inputs[0]

        # Solution vectors.
        b = inputs[1]

        assert(len(A.shape) == 2)
        assert(len(b.shape) == 2)

        if self.trans in ['T', 'C']:
            trans = 1
            l, n = A.shape
            k, m = b.shape
        elif self.trans == 'N':
            trans = 0
            n, l = A.shape
            k, m = b.shape
        else:
            raise ValueError('Invalid value for trans')
        if l != n:
            raise ValueError('A must be a square matrix')
        if n != k:
            raise ValueError('A and b must be aligned.')

        lda = max(1, n)
        ldb = max(1, k)

        # We copy A and b as cusolver operates inplace
        b = pygpu.array(b, copy=True, order='F')
        if not self.inplace:
            A = pygpu.array(A, copy=True)
        A_ptr = A.gpudata
        b_ptr = b.gpudata

        # cusolver expects a F ordered matrix, but A is not explicitly
        # converted between C and F order, instead we switch the
        # "transpose" flag.
        if A.flags['C_CONTIGUOUS']:
            trans = 1 - trans

        if self.A_structure == 'symmetric':
            with context:
                workspace_size = cusolver.cusolverDnSpotrf_bufferSize(
                    context.cusolver_handle, 0, n, A_ptr, lda)

            workspace = pygpu.zeros(workspace_size, dtype='float32',
                                    context=context)

            dev_info = pygpu.zeros((1,), dtype='int32', context=context)

            workspace_ptr = workspace.gpudata
            dev_info_ptr = dev_info.gpudata

            with context:
                cusolver.cusolverDnSpotrf(
                    context.cusolver_handle, 0, n, A_ptr, lda, workspace_ptr,
                    workspace_size, dev_info_ptr)
                self.check_dev_info(dev_info)

                cusolverDnSpotrs(
                    context.cusolver_handle, 0, n, m, A_ptr, lda,
                    b_ptr, ldb, dev_info_ptr)

        else:
            # general case for A
            with context:
                workspace_size = cusolver.cusolverDnSgetrf_bufferSize(
                    context.cusolver_handle, n, n, A_ptr, lda)

            workspace = pygpu.zeros(workspace_size, dtype='float32',
                                    context=context)

            pivots = pygpu.zeros(n, dtype='int32', context=context)

            dev_info = pygpu.zeros((1,), dtype='int32', context=context)

            workspace_ptr = workspace.gpudata
            pivots_ptr = pivots.gpudata
            dev_info_ptr = dev_info.gpudata

            with context:
                cusolver.cusolverDnSgetrf(
                    context.cusolver_handle, n, n, A_ptr, lda, workspace_ptr,
                    pivots_ptr, dev_info_ptr)
                self.check_dev_info(dev_info)

                cusolver.cusolverDnSgetrs(
                    context.cusolver_handle, trans, n, m, A_ptr, lda,
                    pivots_ptr, b_ptr, ldb, dev_info_ptr)

        z[0] = b

Example #5

0

Show file

File: linalg.py Project: devhliu/SAKE

    def perform(self, node, inputs, outputs):
        context = inputs[0][0].context

        # Size of the matrices to invert.
        z = outputs[0]

        # Matrix.
        A = inputs[0]

        # Solution vectors.
        b = inputs[1]

        assert (len(A.shape) == 2)
        assert (len(b.shape) == 2)

        if self.trans in ['T', 'C']:
            trans = 1
            l, n = A.shape
            k, m = b.shape
        elif self.trans == 'N':
            trans = 0
            n, l = A.shape
            k, m = b.shape
        else:
            raise ValueError('Invalid value for trans')
        if l != n:
            raise ValueError('A must be a square matrix')
        if n != k:
            raise ValueError('A and b must be aligned.')

        lda = max(1, n)
        ldb = max(1, k)

        # We copy A and b as cusolver operates inplace
        b = pygpu.array(b, copy=True, order='F')
        if not self.inplace:
            A = pygpu.array(A, copy=True)
        A_ptr = A.gpudata
        b_ptr = b.gpudata

        # cusolver expects a F ordered matrix, but A is not explicitly
        # converted between C and F order, instead we switch the
        # "transpose" flag.
        if A.flags['C_CONTIGUOUS']:
            trans = 1 - trans

        if self.A_structure == 'symmetric':
            with context:
                workspace_size = cusolver.cusolverDnSpotrf_bufferSize(
                    context.cusolver_handle, 0, n, A_ptr, lda)

            workspace = pygpu.zeros(workspace_size,
                                    dtype='float32',
                                    context=context)

            dev_info = pygpu.zeros((1, ), dtype='int32', context=context)

            workspace_ptr = workspace.gpudata
            dev_info_ptr = dev_info.gpudata

            with context:
                cusolver.cusolverDnSpotrf(context.cusolver_handle, 0, n, A_ptr,
                                          lda, workspace_ptr, workspace_size,
                                          dev_info_ptr)
                self.check_dev_info(dev_info)

                cusolverDnSpotrs(context.cusolver_handle, 0, n, m, A_ptr, lda,
                                 b_ptr, ldb, dev_info_ptr)

        else:
            # general case for A
            with context:
                workspace_size = cusolver.cusolverDnSgetrf_bufferSize(
                    context.cusolver_handle, n, n, A_ptr, lda)

            workspace = pygpu.zeros(workspace_size,
                                    dtype='float32',
                                    context=context)

            pivots = pygpu.zeros(n, dtype='int32', context=context)

            dev_info = pygpu.zeros((1, ), dtype='int32', context=context)

            workspace_ptr = workspace.gpudata
            pivots_ptr = pivots.gpudata
            dev_info_ptr = dev_info.gpudata

            with context:
                cusolver.cusolverDnSgetrf(context.cusolver_handle, n, n, A_ptr,
                                          lda, workspace_ptr, pivots_ptr,
                                          dev_info_ptr)
                self.check_dev_info(dev_info)

                cusolver.cusolverDnSgetrs(context.cusolver_handle, trans, n, m,
                                          A_ptr, lda, pivots_ptr, b_ptr, ldb,
                                          dev_info_ptr)

        z[0] = b

Example #6

0

Show file

File: linalg.py Project: bouthilx/Theano

        def thunk():
            context = inputs[0][0].context

            # Size of the matrices to invert.
            z = outputs[0]

            # Matrix.
            A = inputs[0][0]

            # Solution vectors.
            b = inputs[1][0]

            assert(len(A.shape) == 2)
            assert(len(b.shape) == 2)

            if self.trans in ['T', 'C']:
                trans = 1
                l, n = A.shape
                k, m = b.shape
            elif self.trans == 'N':
                trans = 0
                n, l = A.shape
                k, m = b.shape
            else:
                raise ValueError('Invalid value for trans')
            if l != n:
                raise ValueError('A must be a square matrix')
            if n != k:
                raise ValueError('A and b must be aligned.')

            lda = max(1, n)
            ldb = max(1, k, m)

            # We copy A and b as cusolver operates inplace
            b = gpuarray.array(b, copy=True, order='F')
            if not self.inplace:
                A = gpuarray.array(A, copy=True)
            A_ptr = A.gpudata
            b_ptr = b.gpudata

            # cusolver expects a F ordered matrix, but A is not explicitly
            # converted between C and F order, instead we switch the
            # "transpose" flag.
            if A.flags['C_CONTIGUOUS']:
                trans = 1 - trans

            workspace_size = cusolver.cusolverDnSgetrf_bufferSize(
                cusolver_handle, n, n, A_ptr, lda)

            if (thunk.workspace is None or
                    thunk.workspace.size != workspace_size):
                thunk.workspace = gpuarray.zeros((workspace_size,),
                                                 dtype='float32',
                                                 context=context)

            if thunk.pivots is None or thunk.pivots.size != min(n, n):
                thunk.pivots = gpuarray.zeros((min(n, n),),
                                              dtype='float32',
                                              context=context)

            if thunk.dev_info is None:
                thunk.dev_info = gpuarray.zeros((1,),
                                                dtype='float32',
                                                context=context)

            workspace_ptr = thunk.workspace.gpudata
            pivots_ptr = thunk.pivots.gpudata
            dev_info_ptr = thunk.dev_info.gpudata

            cusolver.cusolverDnSgetrf(
                cusolver_handle, n, n, A_ptr, lda, workspace_ptr,
                pivots_ptr, dev_info_ptr)

            cusolver.cusolverDnSgetrs(
                cusolver_handle, trans, n, m, A_ptr, lda,
                pivots_ptr, b_ptr, ldb, dev_info_ptr)

            z[0] = b

Example #7

0

Show file

File: cusolver_demo_lu.py Project: Eric89GXL/scikits.cuda

                [5.25, -2.95, -0.95, -3.80], 
                [1.58, -2.69, -2.90, -1.04],
                [-1.11, -0.66, -0.59, 0.80]]).astype(np.float32)

# Need to copy transposed matrix because T only returns a view:
m, n = x.shape
x_gpu = gpuarray.to_gpu(x.T.copy())

# Set up work buffers:
Lwork = solver.cusolverDnSgetrf_bufferSize(h, m, n, x_gpu.gpudata, m)
workspace_gpu = gpuarray.zeros(Lwork, np.float32)
devipiv_gpu = gpuarray.zeros(min(m, n), np.int32)
devinfo_gpu = gpuarray.zeros(1, np.int32)

# Compute:
solver.cusolverDnSgetrf(h, m, n, x_gpu.gpudata, m, workspace_gpu.gpudata, devipiv_gpu.gpudata, devinfo_gpu.gpudata)

# Confirm that solution is correct by checking against result obtained with
# scipy; set dimensions of computed lower/upper triangular matrices to facilitate
# comparison if the original matrix was not square:
l_cuda = np.tril(x_gpu.get().T, -1)
u_cuda = np.triu(x_gpu.get().T)
if m < n:
    l_cuda = l_cuda[:, :m]
else:
    u_cuda = u_cuda[:n, :]
p, l, u = sp.linalg.lu(x)

# Only check values in lower triangle starting from first off-diagonal:
print 'lower triangular matrix is correct: ', \
    np.allclose(np.tril(l, -1), l_cuda)