Example #1
0
 def __init__(self, ctx, queue, shape):
     '''
     Create context for pThomas (thread-parallel Thomas algorithm)
     '''
     self.ctx = ctx
     self.queue = queue
     self.platforms = self.ctx.devices[0].platform
     self.nz, self.ny, self.nx = shape
     self.pThomas, = kernels.get_funcs(ctx, 'kernels.cl', 'pThomasKernel')
 def __init__(self, shape):
     '''
     Create context for pThomas (thread-parallel Thomas algorithm)
     '''
     self.nz, self.ny, self.nx = shape
     thisdir = os.path.dirname(os.path.realpath(__file__))
     self.solver, = kernels.get_funcs(thisdir + '/' + 'kernels.cu',
                                      'reducedSolverKernel')
     self.solver.prepare([
         np.intp, np.intp, np.intp, np.intp, np.intp, np.intc, np.intc,
         np.intc
     ])
Example #3
0
    def __init__(self, shape, coeffs):
        '''
        Create context for the Cyclic Reduction Solver
        that solves a "near-toeplitz"
        tridiagonal system with
        diagonals:
        a = (_, ai, ai .... an)
        b[:] = (b1, bi, bi, bi... bn)
        c[:] = (c1, ci, ci, ... _)

        Parameters
        ----------
        shape: The size of the tridiagonal system.
        coeffs: A list of coefficients that make up the tridiagonal matrix:
            [b1, c1, ai, bi, ci, an, bn]
        '''
        self.nz, self.ny, self.nx = shape
        self.coeffs = coeffs

        # check that system_size is a power of 2:
        assert np.int(np.log2(self.nx)) == np.log2(self.nx)

        # compute coefficients a, b, etc.,
        a, b, c, k1, k2, b_first, k1_first, k1_last = _precompute_coefficients(
            self.nx, self.coeffs)

        # copy coefficients to buffers:
        self.a_d = gpuarray.to_gpu(a)
        self.b_d = gpuarray.to_gpu(b)
        self.c_d = gpuarray.to_gpu(c)
        self.k1_d = gpuarray.to_gpu(k1)
        self.k2_d = gpuarray.to_gpu(k2)
        self.b_first_d = gpuarray.to_gpu(b_first)
        self.k1_first_d = gpuarray.to_gpu(k1_first)
        self.k1_last_d = gpuarray.to_gpu(k1_last)

        self.forward_reduction, self.back_substitution = kernels.get_funcs(
            os.path.dirname(os.path.realpath(__file__)) + '/' + 'kernels.cu',
            'globalForwardReduction', 'globalBackSubstitution')

        self.forward_reduction.prepare([
            np.intp, np.intp, np.intp, np.intp, np.intp, np.intp, np.intp,
            np.intp, np.intp, np.intc, np.intc, np.intc, np.intc
        ])
        self.back_substitution.prepare([
            np.intp, np.intp, np.intp, np.intp, np.intp, np.float64,
            np.float64, np.float64, np.float64, np.float64, np.intc, np.intc,
            np.intc, np.intc
        ])
Example #4
0
    def __init__(self, shape, coeffs):
        '''
        Create context for the Cyclic Reduction Solver
        that solves a "near-toeplitz"
        tridiagonal system with
        diagonals:
        a = (_, ai, ai .... an)
        b[:] = (b1, bi, bi, bi... bn)
        c[:] = (c1, ci, ci, ... _)

        Parameters
        ----------
        shape: The size of the tridiagonal system.
        coeffs: A list of coefficients that make up the tridiagonal matrix:
            [b1, c1, ai, bi, ci, an, bn]
        '''
        self.nz, self.ny, self.nx = shape
        self.coeffs = coeffs

        # check that system_size is a power of 2:
        assert np.int(np.log2(self.nx)) == np.log2(self.nx)

        # compute coefficients a, b, etc.,
        a, b, c, k1, k2, b_first, k1_first, k1_last = _precompute_coefficients(
            self.nx, self.coeffs)

        # copy coefficients to buffers:
        self.a_d = gpuarray.to_gpu(a)
        self.b_d = gpuarray.to_gpu(b)
        self.c_d = gpuarray.to_gpu(c)
        self.k1_d = gpuarray.to_gpu(k1)
        self.k2_d = gpuarray.to_gpu(k2)
        self.b_first_d = gpuarray.to_gpu(b_first)
        self.k1_first_d = gpuarray.to_gpu(k1_first)
        self.k1_last_d = gpuarray.to_gpu(k1_last)

        thisdir = os.path.dirname(os.path.realpath(__file__))
        kernels.render_kernel(thisdir + '/' + 'kernels.jinja2',
                              thisdir + '/' + 'kernels.cugen',
                              nx=self.nx,
                              ny=self.ny,
                              nz=self.nz,
                              bx=self.nx / 2,
                              by=1)
        time.sleep(5)
        self.cyclic_reduction, = kernels.get_funcs(
            thisdir + '/' + 'kernels.cugen', 'sharedMemCyclicReduction')
        self.cyclic_reduction.prepare('PPPPPPPPPddddd')
    def __init__(self, ctx, queue, shape, coeffs):
        '''
        Create context for the Cyclic Reduction Solver
        that solves a "near-toeplitz"
        tridiagonal system with
        diagonals:
        a = (_, ai, ai .... an)
        b[:] = (b1, bi, bi, bi... bn)
        c[:] = (c1, ci, ci, ... _)

        Parameters
        ----------
        ctx: PyOpenCL context
        queue: PyOpenCL command queue
        shape: The size of the tridiagonal system.
        coeffs: A list of coefficients that make up the tridiagonal matrix:
            [b1, c1, ai, bi, ci, an, bn]
        '''
        self.ctx = ctx
        self.queue = queue
        self.device = self.ctx.devices[0]
        self.platform = self.device.platform
        self.nz, self.ny, self.nx = shape
        self.coeffs = coeffs

        mf = cl.mem_flags

        # check that system_size is a power of 2:
        assert np.int(np.log2(self.nx)) == np.log2(self.nx)

        # compute coefficients a, b, etc.,
        a, b, c, k1, k2, b_first, k1_first, k1_last = self._precompute_coefficients(
        )

        self.a_d = cl_array.to_device(queue, a)
        self.b_d = cl_array.to_device(queue, b)
        self.c_d = cl_array.to_device(queue, c)
        self.k1_d = cl_array.to_device(queue, k1)
        self.k2_d = cl_array.to_device(queue, k2)
        self.b_first_d = cl_array.to_device(queue, b_first)
        self.k1_first_d = cl_array.to_device(queue, k1_first)
        self.k1_last_d = cl_array.to_device(queue, k1_last)

        self.forward_reduction, self.back_substitution = kernels.get_funcs(
            self.ctx, 'kernels.cl', 'globalForwardReduction',
            'globalBackSubstitution')
    def __init__(self, ctx, queue, shape, coeffs):
        """
        Create context for the Cyclic Reduction Solver
        that solves a "near-toeplitz"
        tridiagonal system with
        diagonals:
        a = (_, ai, ai .... an)
        b[:] = (b1, bi, bi, bi... bn)
        c[:] = (c1, ci, ci, ... _)

        Parameters
        ----------
        ctx: PyOpenCL context
        queue: PyOpenCL command queue
        shape: The size of the tridiagonal system.
        coeffs: A list of coefficients that make up the tridiagonal matrix:
            [b1, c1, ai, bi, ci, an, bn]
        """
        self.ctx = ctx
        self.queue = queue
        self.device = self.ctx.devices[0]
        self.platform = self.device.platform
        self.nz, self.ny, self.nx = shape
        self.coeffs = coeffs

        mf = cl.mem_flags

        # check that system_size is a power of 2:
        assert np.int(np.log2(self.nx)) == np.log2(self.nx)

        # compute coefficients a, b, etc.,
        a, b, c, k1, k2, b_first, k1_first, k1_last = self._precompute_coefficients()

        self.a_d = cl_array.to_device(queue, a)
        self.b_d = cl_array.to_device(queue, b)
        self.c_d = cl_array.to_device(queue, c)
        self.k1_d = cl_array.to_device(queue, k1)
        self.k2_d = cl_array.to_device(queue, k2)
        self.b_first_d = cl_array.to_device(queue, b_first)
        self.k1_first_d = cl_array.to_device(queue, k1_first)
        self.k1_last_d = cl_array.to_device(queue, k1_last)

        self.forward_reduction, self.back_substitution = kernels.get_funcs(
            self.ctx, "kernels.cl", "globalForwardReduction", "globalBackSubstitution"
        )
Example #7
0
 def init_cu(self):
     thisdir = os.path.dirname(os.path.realpath(__file__))
     self.compute_RHS_kernel, self.sum_solutions_kernel, self.copy_faces_kernel, = kernels.get_funcs(
             thisdir + '/' + 'kernels.cu', 'computeRHS', 'sumSolutions', 'negateAndCopyFaces')
     self.compute_RHS_kernel.prepare('PPdii')
     self.sum_solutions_kernel.prepare('PPPPPiii')
     self.copy_faces_kernel.prepare('PPiiiii')
     self.start = cuda.Event()
     self.end = cuda.Event()
Example #8
0
    def init_cl(self):
        self.platform = cl.get_platforms()[0]
        if self.use_gpu:
            ngpus = len(self.platform.get_devices())
            self.device = self.platform.get_devices()[self.da.rank % ngpus]
        else:
            self.device = self.platform.get_devices()[0]
        self.ctx = cl.Context([self.device])
        self.queue = cl.CommandQueue(self.ctx)

        self.compute_RHS_kernel, self.sum_solutions_kernel, self.copy_faces_kernel, = kernels.get_funcs(
            self.ctx, 'kernels.cl', 'computeRHS', 'sumSolutions',
            'negateAndCopyFaces')
 def init_cl(self):
     self.platform = cl.get_platforms()[0]
     if self.use_gpu:
         ngpus = len(self.platform.get_devices())
         self.device = self.platform.get_devices()[self.da.rank%ngpus]
     else:
         self.device = self.platform.get_devices()[0]
     self.ctx = cl.Context([self.device])
     self.queue = cl.CommandQueue(self.ctx)
     
     self.compute_RHS_kernel, self.sum_solutions_kernel, self.copy_faces_kernel, = kernels.get_funcs(
             self.ctx, 'kernels.cl', 'computeRHS', 'sumSolutions', 'negateAndCopyFaces')