Example #1
0
        def thunk():
            context = inputs[0][0].context

            # Size of the matrices to invert.
            z = outputs[0]

            # Matrix.
            A = inputs[0][0]

            # Solution vectors.
            b = inputs[1][0]

            assert (len(A.shape) == 2)
            assert (len(b.shape) == 2)

            if self.trans in ['T', 'C']:
                trans = 1
                l, n = A.shape
                k, m = b.shape
            elif self.trans == 'N':
                trans = 0
                n, l = A.shape
                k, m = b.shape
            else:
                raise ValueError('Invalid value for trans')
            if l != n:
                raise ValueError('A must be a square matrix')
            if n != k:
                raise ValueError('A and b must be aligned.')

            lda = max(1, n)
            ldb = max(1, k, m)

            # We copy A and b as cusolver operates inplace
            b = gpuarray.array(b, copy=True, order='F')
            if not self.inplace:
                A = gpuarray.array(A, copy=True)
            A_ptr = A.gpudata
            b_ptr = b.gpudata

            # cusolver expects a F ordered matrix, but A is not explicitly
            # converted between C and F order, instead we switch the
            # "transpose" flag.
            if A.flags['C_CONTIGUOUS']:
                trans = 1 - trans

            workspace_size = cusolver.cusolverDnSgetrf_bufferSize(
                cusolver_handle, n, n, A_ptr, lda)

            if (thunk.workspace is None
                    or thunk.workspace.size != workspace_size):
                thunk.workspace = gpuarray.zeros((workspace_size, ),
                                                 dtype='float32',
                                                 context=context)

            if thunk.pivots is None or thunk.pivots.size != min(n, n):
                thunk.pivots = gpuarray.zeros((min(n, n), ),
                                              dtype='float32',
                                              context=context)

            if thunk.dev_info is None:
                thunk.dev_info = gpuarray.zeros((1, ),
                                                dtype='float32',
                                                context=context)

            workspace_ptr = thunk.workspace.gpudata
            pivots_ptr = thunk.pivots.gpudata
            dev_info_ptr = thunk.dev_info.gpudata

            cusolver.cusolverDnSgetrf(cusolver_handle, n, n, A_ptr, lda,
                                      workspace_ptr, pivots_ptr, dev_info_ptr)

            cusolver.cusolverDnSgetrs(cusolver_handle, trans, n, m, A_ptr, lda,
                                      pivots_ptr, b_ptr, ldb, dev_info_ptr)

            z[0] = b
Example #2
0
    def perform(self, node, inputs, outputs):
        context = inputs[0][0].context

        # Size of the matrices to invert.
        z = outputs[0]

        # Matrix.
        A = inputs[0]

        # Solution vectors.
        b = inputs[1]

        assert (len(A.shape) == 2)
        assert (len(b.shape) == 2)

        if self.trans in ['T', 'C']:
            trans = 1
            l, n = A.shape
            k, m = b.shape
        elif self.trans == 'N':
            trans = 0
            n, l = A.shape
            k, m = b.shape
        else:
            raise ValueError('Invalid value for trans')
        if l != n:
            raise ValueError('A must be a square matrix')
        if n != k:
            raise ValueError('A and b must be aligned.')

        lda = max(1, n)
        ldb = max(1, k)

        # We copy A and b as cusolver operates inplace
        b = pygpu.array(b, copy=True, order='F')
        if not self.inplace:
            A = pygpu.array(A, copy=True)
        A_ptr = A.gpudata
        b_ptr = b.gpudata

        # cusolver expects a F ordered matrix, but A is not explicitly
        # converted between C and F order, instead we switch the
        # "transpose" flag.
        if A.flags['C_CONTIGUOUS']:
            trans = 1 - trans

        if self.A_structure == 'symmetric':
            with context:
                workspace_size = cusolver.cusolverDnSpotrf_bufferSize(
                    context.cusolver_handle, 0, n, A_ptr, lda)

            workspace = pygpu.zeros(workspace_size,
                                    dtype='float32',
                                    context=context)

            dev_info = pygpu.zeros((1, ), dtype='int32', context=context)

            workspace_ptr = workspace.gpudata
            dev_info_ptr = dev_info.gpudata

            with context:
                cusolver.cusolverDnSpotrf(context.cusolver_handle, 0, n, A_ptr,
                                          lda, workspace_ptr, workspace_size,
                                          dev_info_ptr)
                self.check_dev_info(dev_info)

                cusolverDnSpotrs(context.cusolver_handle, 0, n, m, A_ptr, lda,
                                 b_ptr, ldb, dev_info_ptr)

        else:
            # general case for A
            with context:
                workspace_size = cusolver.cusolverDnSgetrf_bufferSize(
                    context.cusolver_handle, n, n, A_ptr, lda)

            workspace = pygpu.zeros(workspace_size,
                                    dtype='float32',
                                    context=context)

            pivots = pygpu.zeros(n, dtype='int32', context=context)

            dev_info = pygpu.zeros((1, ), dtype='int32', context=context)

            workspace_ptr = workspace.gpudata
            pivots_ptr = pivots.gpudata
            dev_info_ptr = dev_info.gpudata

            with context:
                cusolver.cusolverDnSgetrf(context.cusolver_handle, n, n, A_ptr,
                                          lda, workspace_ptr, pivots_ptr,
                                          dev_info_ptr)
                self.check_dev_info(dev_info)

                cusolver.cusolverDnSgetrs(context.cusolver_handle, trans, n, m,
                                          A_ptr, lda, pivots_ptr, b_ptr, ldb,
                                          dev_info_ptr)

        z[0] = b
Example #3
0
    def perform(self, node, inputs, outputs):
        context = inputs[0][0].context

        # Size of the matrices to invert.
        z = outputs[0]

        # Matrix.
        A = inputs[0]

        # Solution vectors.
        b = inputs[1]

        assert(len(A.shape) == 2)
        assert(len(b.shape) == 2)

        if self.trans in ['T', 'C']:
            trans = 1
            l, n = A.shape
            k, m = b.shape
        elif self.trans == 'N':
            trans = 0
            n, l = A.shape
            k, m = b.shape
        else:
            raise ValueError('Invalid value for trans')
        if l != n:
            raise ValueError('A must be a square matrix')
        if n != k:
            raise ValueError('A and b must be aligned.')

        lda = max(1, n)
        ldb = max(1, k)

        # We copy A and b as cusolver operates inplace
        b = pygpu.array(b, copy=True, order='F')
        if not self.inplace:
            A = pygpu.array(A, copy=True)
        A_ptr = A.gpudata
        b_ptr = b.gpudata

        # cusolver expects a F ordered matrix, but A is not explicitly
        # converted between C and F order, instead we switch the
        # "transpose" flag.
        if A.flags['C_CONTIGUOUS']:
            trans = 1 - trans

        if self.A_structure == 'symmetric':
            with context:
                workspace_size = cusolver.cusolverDnSpotrf_bufferSize(
                    context.cusolver_handle, 0, n, A_ptr, lda)

            workspace = pygpu.zeros(workspace_size, dtype='float32',
                                    context=context)

            dev_info = pygpu.zeros((1,), dtype='int32', context=context)

            workspace_ptr = workspace.gpudata
            dev_info_ptr = dev_info.gpudata

            with context:
                cusolver.cusolverDnSpotrf(
                    context.cusolver_handle, 0, n, A_ptr, lda, workspace_ptr,
                    workspace_size, dev_info_ptr)
                self.check_dev_info(dev_info)

                cusolverDnSpotrs(
                    context.cusolver_handle, 0, n, m, A_ptr, lda,
                    b_ptr, ldb, dev_info_ptr)

        else:
            # general case for A
            with context:
                workspace_size = cusolver.cusolverDnSgetrf_bufferSize(
                    context.cusolver_handle, n, n, A_ptr, lda)

            workspace = pygpu.zeros(workspace_size, dtype='float32',
                                    context=context)

            pivots = pygpu.zeros(n, dtype='int32', context=context)

            dev_info = pygpu.zeros((1,), dtype='int32', context=context)

            workspace_ptr = workspace.gpudata
            pivots_ptr = pivots.gpudata
            dev_info_ptr = dev_info.gpudata

            with context:
                cusolver.cusolverDnSgetrf(
                    context.cusolver_handle, n, n, A_ptr, lda, workspace_ptr,
                    pivots_ptr, dev_info_ptr)
                self.check_dev_info(dev_info)

                cusolver.cusolverDnSgetrs(
                    context.cusolver_handle, trans, n, m, A_ptr, lda,
                    pivots_ptr, b_ptr, ldb, dev_info_ptr)

        z[0] = b
Example #4
0
        def thunk():
            context = inputs[0][0].context

            # Size of the matrices to invert.
            z = outputs[0]

            # Matrix.
            A = inputs[0][0]

            # Solution vectors.
            b = inputs[1][0]

            assert(len(A.shape) == 2)
            assert(len(b.shape) == 2)

            if self.trans in ['T', 'C']:
                trans = 1
                l, n = A.shape
                k, m = b.shape
            elif self.trans == 'N':
                trans = 0
                n, l = A.shape
                k, m = b.shape
            else:
                raise ValueError('Invalid value for trans')
            if l != n:
                raise ValueError('A must be a square matrix')
            if n != k:
                raise ValueError('A and b must be aligned.')

            lda = max(1, n)
            ldb = max(1, k, m)

            # We copy A and b as cusolver operates inplace
            b = gpuarray.array(b, copy=True, order='F')
            if not self.inplace:
                A = gpuarray.array(A, copy=True)
            A_ptr = A.gpudata
            b_ptr = b.gpudata

            # cusolver expects a F ordered matrix, but A is not explicitly
            # converted between C and F order, instead we switch the
            # "transpose" flag.
            if A.flags['C_CONTIGUOUS']:
                trans = 1 - trans

            workspace_size = cusolver.cusolverDnSgetrf_bufferSize(
                cusolver_handle, n, n, A_ptr, lda)

            if (thunk.workspace is None or
                    thunk.workspace.size != workspace_size):
                thunk.workspace = gpuarray.zeros((workspace_size,),
                                                 dtype='float32',
                                                 context=context)

            if thunk.pivots is None or thunk.pivots.size != min(n, n):
                thunk.pivots = gpuarray.zeros((min(n, n),),
                                              dtype='float32',
                                              context=context)

            if thunk.dev_info is None:
                thunk.dev_info = gpuarray.zeros((1,),
                                                dtype='float32',
                                                context=context)

            workspace_ptr = thunk.workspace.gpudata
            pivots_ptr = thunk.pivots.gpudata
            dev_info_ptr = thunk.dev_info.gpudata

            cusolver.cusolverDnSgetrf(
                cusolver_handle, n, n, A_ptr, lda, workspace_ptr,
                pivots_ptr, dev_info_ptr)

            cusolver.cusolverDnSgetrs(
                cusolver_handle, trans, n, m, A_ptr, lda,
                pivots_ptr, b_ptr, ldb, dev_info_ptr)

            z[0] = b