def __init__(self, ctx, queue, shape): ''' Create context for pThomas (thread-parallel Thomas algorithm) ''' self.ctx = ctx self.queue = queue self.platforms = self.ctx.devices[0].platform self.nz, self.ny, self.nx = shape self.pThomas, = kernels.get_funcs(ctx, 'kernels.cl', 'pThomasKernel')
def __init__(self, shape): ''' Create context for pThomas (thread-parallel Thomas algorithm) ''' self.nz, self.ny, self.nx = shape thisdir = os.path.dirname(os.path.realpath(__file__)) self.solver, = kernels.get_funcs(thisdir + '/' + 'kernels.cu', 'reducedSolverKernel') self.solver.prepare([ np.intp, np.intp, np.intp, np.intp, np.intp, np.intc, np.intc, np.intc ])
def __init__(self, shape, coeffs): ''' Create context for the Cyclic Reduction Solver that solves a "near-toeplitz" tridiagonal system with diagonals: a = (_, ai, ai .... an) b[:] = (b1, bi, bi, bi... bn) c[:] = (c1, ci, ci, ... _) Parameters ---------- shape: The size of the tridiagonal system. coeffs: A list of coefficients that make up the tridiagonal matrix: [b1, c1, ai, bi, ci, an, bn] ''' self.nz, self.ny, self.nx = shape self.coeffs = coeffs # check that system_size is a power of 2: assert np.int(np.log2(self.nx)) == np.log2(self.nx) # compute coefficients a, b, etc., a, b, c, k1, k2, b_first, k1_first, k1_last = _precompute_coefficients( self.nx, self.coeffs) # copy coefficients to buffers: self.a_d = gpuarray.to_gpu(a) self.b_d = gpuarray.to_gpu(b) self.c_d = gpuarray.to_gpu(c) self.k1_d = gpuarray.to_gpu(k1) self.k2_d = gpuarray.to_gpu(k2) self.b_first_d = gpuarray.to_gpu(b_first) self.k1_first_d = gpuarray.to_gpu(k1_first) self.k1_last_d = gpuarray.to_gpu(k1_last) self.forward_reduction, self.back_substitution = kernels.get_funcs( os.path.dirname(os.path.realpath(__file__)) + '/' + 'kernels.cu', 'globalForwardReduction', 'globalBackSubstitution') self.forward_reduction.prepare([ np.intp, np.intp, np.intp, np.intp, np.intp, np.intp, np.intp, np.intp, np.intp, np.intc, np.intc, np.intc, np.intc ]) self.back_substitution.prepare([ np.intp, np.intp, np.intp, np.intp, np.intp, np.float64, np.float64, np.float64, np.float64, np.float64, np.intc, np.intc, np.intc, np.intc ])
def __init__(self, shape, coeffs): ''' Create context for the Cyclic Reduction Solver that solves a "near-toeplitz" tridiagonal system with diagonals: a = (_, ai, ai .... an) b[:] = (b1, bi, bi, bi... bn) c[:] = (c1, ci, ci, ... _) Parameters ---------- shape: The size of the tridiagonal system. coeffs: A list of coefficients that make up the tridiagonal matrix: [b1, c1, ai, bi, ci, an, bn] ''' self.nz, self.ny, self.nx = shape self.coeffs = coeffs # check that system_size is a power of 2: assert np.int(np.log2(self.nx)) == np.log2(self.nx) # compute coefficients a, b, etc., a, b, c, k1, k2, b_first, k1_first, k1_last = _precompute_coefficients( self.nx, self.coeffs) # copy coefficients to buffers: self.a_d = gpuarray.to_gpu(a) self.b_d = gpuarray.to_gpu(b) self.c_d = gpuarray.to_gpu(c) self.k1_d = gpuarray.to_gpu(k1) self.k2_d = gpuarray.to_gpu(k2) self.b_first_d = gpuarray.to_gpu(b_first) self.k1_first_d = gpuarray.to_gpu(k1_first) self.k1_last_d = gpuarray.to_gpu(k1_last) thisdir = os.path.dirname(os.path.realpath(__file__)) kernels.render_kernel(thisdir + '/' + 'kernels.jinja2', thisdir + '/' + 'kernels.cugen', nx=self.nx, ny=self.ny, nz=self.nz, bx=self.nx / 2, by=1) time.sleep(5) self.cyclic_reduction, = kernels.get_funcs( thisdir + '/' + 'kernels.cugen', 'sharedMemCyclicReduction') self.cyclic_reduction.prepare('PPPPPPPPPddddd')
def __init__(self, ctx, queue, shape, coeffs): ''' Create context for the Cyclic Reduction Solver that solves a "near-toeplitz" tridiagonal system with diagonals: a = (_, ai, ai .... an) b[:] = (b1, bi, bi, bi... bn) c[:] = (c1, ci, ci, ... _) Parameters ---------- ctx: PyOpenCL context queue: PyOpenCL command queue shape: The size of the tridiagonal system. coeffs: A list of coefficients that make up the tridiagonal matrix: [b1, c1, ai, bi, ci, an, bn] ''' self.ctx = ctx self.queue = queue self.device = self.ctx.devices[0] self.platform = self.device.platform self.nz, self.ny, self.nx = shape self.coeffs = coeffs mf = cl.mem_flags # check that system_size is a power of 2: assert np.int(np.log2(self.nx)) == np.log2(self.nx) # compute coefficients a, b, etc., a, b, c, k1, k2, b_first, k1_first, k1_last = self._precompute_coefficients( ) self.a_d = cl_array.to_device(queue, a) self.b_d = cl_array.to_device(queue, b) self.c_d = cl_array.to_device(queue, c) self.k1_d = cl_array.to_device(queue, k1) self.k2_d = cl_array.to_device(queue, k2) self.b_first_d = cl_array.to_device(queue, b_first) self.k1_first_d = cl_array.to_device(queue, k1_first) self.k1_last_d = cl_array.to_device(queue, k1_last) self.forward_reduction, self.back_substitution = kernels.get_funcs( self.ctx, 'kernels.cl', 'globalForwardReduction', 'globalBackSubstitution')
def __init__(self, ctx, queue, shape, coeffs): """ Create context for the Cyclic Reduction Solver that solves a "near-toeplitz" tridiagonal system with diagonals: a = (_, ai, ai .... an) b[:] = (b1, bi, bi, bi... bn) c[:] = (c1, ci, ci, ... _) Parameters ---------- ctx: PyOpenCL context queue: PyOpenCL command queue shape: The size of the tridiagonal system. coeffs: A list of coefficients that make up the tridiagonal matrix: [b1, c1, ai, bi, ci, an, bn] """ self.ctx = ctx self.queue = queue self.device = self.ctx.devices[0] self.platform = self.device.platform self.nz, self.ny, self.nx = shape self.coeffs = coeffs mf = cl.mem_flags # check that system_size is a power of 2: assert np.int(np.log2(self.nx)) == np.log2(self.nx) # compute coefficients a, b, etc., a, b, c, k1, k2, b_first, k1_first, k1_last = self._precompute_coefficients() self.a_d = cl_array.to_device(queue, a) self.b_d = cl_array.to_device(queue, b) self.c_d = cl_array.to_device(queue, c) self.k1_d = cl_array.to_device(queue, k1) self.k2_d = cl_array.to_device(queue, k2) self.b_first_d = cl_array.to_device(queue, b_first) self.k1_first_d = cl_array.to_device(queue, k1_first) self.k1_last_d = cl_array.to_device(queue, k1_last) self.forward_reduction, self.back_substitution = kernels.get_funcs( self.ctx, "kernels.cl", "globalForwardReduction", "globalBackSubstitution" )
def init_cu(self): thisdir = os.path.dirname(os.path.realpath(__file__)) self.compute_RHS_kernel, self.sum_solutions_kernel, self.copy_faces_kernel, = kernels.get_funcs( thisdir + '/' + 'kernels.cu', 'computeRHS', 'sumSolutions', 'negateAndCopyFaces') self.compute_RHS_kernel.prepare('PPdii') self.sum_solutions_kernel.prepare('PPPPPiii') self.copy_faces_kernel.prepare('PPiiiii') self.start = cuda.Event() self.end = cuda.Event()
def init_cl(self): self.platform = cl.get_platforms()[0] if self.use_gpu: ngpus = len(self.platform.get_devices()) self.device = self.platform.get_devices()[self.da.rank % ngpus] else: self.device = self.platform.get_devices()[0] self.ctx = cl.Context([self.device]) self.queue = cl.CommandQueue(self.ctx) self.compute_RHS_kernel, self.sum_solutions_kernel, self.copy_faces_kernel, = kernels.get_funcs( self.ctx, 'kernels.cl', 'computeRHS', 'sumSolutions', 'negateAndCopyFaces')
def init_cl(self): self.platform = cl.get_platforms()[0] if self.use_gpu: ngpus = len(self.platform.get_devices()) self.device = self.platform.get_devices()[self.da.rank%ngpus] else: self.device = self.platform.get_devices()[0] self.ctx = cl.Context([self.device]) self.queue = cl.CommandQueue(self.ctx) self.compute_RHS_kernel, self.sum_solutions_kernel, self.copy_faces_kernel, = kernels.get_funcs( self.ctx, 'kernels.cl', 'computeRHS', 'sumSolutions', 'negateAndCopyFaces')