def setUp(self): """ The setup consists of four particles placed at the vertices of a unit square. The force function to be tested is: ..math:: f_i = \sum_{j=1}^{4} \frac{m_j}{|x_j - x_i|^3 + \eps}(x_j - x_i) The mass of each particle is 1 """ self.precision = "single" self.np = 4 x = numpy.array([0, 0, 1, 1], numpy.float64) y = numpy.array([0, 1, 1, 0], numpy.float64) z = numpy.zeros_like(x) m = numpy.ones_like(x) tmpx = numpy.zeros_like(x) tmpy = numpy.zeros_like(x) tmpz = numpy.zeros_like(x) self.pa = pa = base.get_particle_array(name="test", x=x, y=y, z=z, m=m, tmpx=tmpx, tmpy=tmpy, tmpz=tmpz, cl_precision=self.precision) self.func = func = sph.NBodyForce.get_func(pa, pa) self.eps = func.eps if solver.HAS_CL: self.ctx = ctx = cl.create_some_context() self.q = q = cl.CommandQueue(ctx) pa.setup_cl(ctx, q) pysph_root = solver.get_pysph_root() template = solver.cl_read( path.join(pysph_root, "sph/funcs/external_force.clt"), function_name=func.cl_kernel_function_name, precision=self.precision) prog_src = solver.create_program(template, func) self.prog = cl.Program(ctx, prog_src).build(solver.get_cl_include())
def setup_cl(self): pa = self.pa if solver.HAS_CL: self.ctx = ctx = cl.create_some_context() self.q = q = cl.CommandQueue(ctx) pa.setup_cl(ctx, q) pysph_root = solver.get_pysph_root() template = solver.cl_read( path.join(pysph_root, "sph/funcs/pressure_funcs.clt"), function_name=self.grad_func.cl_kernel_function_name, precision=self.precision) prog_src = solver.create_program(template, self.grad_func) self.prog=cl.Program(ctx, prog_src).build(solver.get_cl_include())
devices = platform.get_devices() device = devices[0] ctx = cl.Context(devices) q = cl.CommandQueue(ctx, device) mf = cl.mem_flags xbuf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=x) ybuf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=y) zbuf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=z) args = (ybuf, zbuf) pysph_root = solver.get_pysph_root() src = solver.cl_read(path.join(pysph_root, 'solver/cl_common.cl'), precision='single') prog = cl.Program(ctx, src).build(options=solver.get_cl_include()) # launch the OpenCL kernel prog.set_tmp_to_zero(q, (16, 16, 16), (1,1,1), xbuf, *args) # read the buffer contents back to the arrays solver.enqueue_copy(q, src=xbuf, dst=x) solver.enqueue_copy(q, src=ybuf, dst=y) solver.enqueue_copy(q, src=zbuf, dst=z) for i in range(np): assert x[i] == 0.0 assert y[i] == 0.0 assert z[i] == 0.0
devices = platform.get_devices() device = devices[0] ctx = cl.Context(devices) q = cl.CommandQueue(ctx, device) mf = cl.mem_flags xbuf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=x) ybuf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=y) zbuf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=z) args = (ybuf, zbuf) pysph_root = solver.get_pysph_root() src = solver.cl_read(path.join(pysph_root, 'solver/cl_common.cl'), precision='single') prog = cl.Program(ctx, src).build(options=solver.get_cl_include()) # launch the OpenCL kernel prog.set_tmp_to_zero(q, (16, 16, 16), (1, 1, 1), xbuf, *args) # read the buffer contents back to the arrays cl.enqueue_copy(q, src=xbuf, dest=x).wait() cl.enqueue_copy(q, src=ybuf, dest=y).wait() cl.enqueue_copy(q, src=zbuf, dest=z).wait() for i in range(np): assert x[i] == 0.0 assert y[i] == 0.0 assert z[i] == 0.0