from kemp.fdtd3d.gpu import Fields, Core, DirectIncident, GetFields, Pbc from kemp.fdtd3d.util import common_exchange nx, ny, nz = 200, 300, 16 tmax, tgap = 200, 10 # instances from kemp.fdtd3d.util import common_gpu import pyopencl as cl gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) fields = Fields(context, gpu_devices[0], nx, ny, nz) #fields = Fields(nx, ny, nz) core = Core(fields) pbc = Pbc(fields, 'x') pbc = Pbc(fields, 'y') pbc = Pbc(fields, 'z') print fields.instance_list tfunc = lambda tstep: np.sin(0.05 * tstep) incident = DirectIncident(fields, 'ez', (20, 0, 0), (20, ny-1, nz-1), tfunc) incident = DirectIncident(fields, 'ey', (0, 20, 0), (nx-1, 20, nz-1), tfunc) #incident = DirectIncident(fields, 'ex', (0, 0, 20), (nx-1, ny-1, 20), tfunc) getf = GetFields(fields, 'ez', (0, 0, nz/2), (nx-1, ny-1, nz/2)) # for verify pbc vpbc = common_exchange.VerifyPbc(fields, 'xyz') # plot
#nx, ny, nz = 544, 544, 544 # 5527 MB #nx, ny, nz = 512, 512, 512 # 4608 MB #nx, ny, nz = 480, 480, 480 # 3796 MB nx, ny, nz = 800, 256, 256 # 576 MB #nx, ny, nz = 128, 128, 128 # 72 MB coeff_use = 'e' precision_float = 'single' # instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] qtask = cpu.QueueTask() fields = Fields(context, device, qtask, nx, ny, nz, coeff_use, precision_float) Core(fields) tmax = 250 if is_plot else 1000 if rank == 0: direction = '+' elif rank == size - 1: direction = '-' else: direction = '+-' #exch = node.ExchangeMpiNonBlock(fields, direction) #exch = node.ExchangeMpiBufferBlock(fields, direction) #exch = node.ExchangeMpiBufferBlockSplit(fields, direction) exch = node.ExchangeMpiBufferNonBlockSplitEnqueue(fields, direction, tmax) if '+' in direction: cpu.Core(exch.cpuf_p) if '-' in direction: cpu.Core(exch.cpuf_m) is_master = True if rank == 0 else False
def runTest(self): ufunc, nx, ny, nz, coeff_use, precision_float, tmax = self.args gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = Fields(context, device, nx, ny, nz, coeff_use, precision_float) core = Core(fields) # allocations ns = fields.ns dtype = fields.dtype strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] ehs = common_update.generate_random_ehs(nx, ny, nz, dtype, ufunc) fields.set_eh_bufs(*ehs) ces, chs = common_update.generate_random_cs(coeff_use, nx, ny, nz, dtype) if 'e' in coeff_use: fields.set_ce_bufs(*ces) if 'h' in coeff_use: fields.set_ch_bufs(*chs) tmpf = np.zeros(fields.ns_pitch, dtype=dtype) # update if ufunc == 'e': for tstep in xrange(0, tmax): fields.update_e() common_update.update_e(ehs, ces) for strf, eh in zip(strf_list, ehs)[:3]: cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf)) norm = np.linalg.norm(eh - tmpf[:, :, fields.slice_z]) max_diff = np.abs(eh - tmpf[:, :, fields.slice_z]).max() self.assertEqual( norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff)) if fields.pad != 0: if strf == 'ez': norm2 = np.linalg.norm(tmpf[:, :, -fields.pad:]) else: norm2 = np.linalg.norm(tmpf[:, :, -fields.pad - 1:]) self.assertEqual( norm2, 0, '%s, %s, %g, padding' % (self.args, strf, norm2)) elif ufunc == 'h': for tstep in xrange(0, tmax): fields.update_h() common_update.update_h(ehs, chs) for strf, eh in zip(strf_list, ehs)[3:]: cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf)) norm = np.linalg.norm(eh - tmpf[:, :, fields.slice_z]) max_diff = np.abs(eh - tmpf[:, :, fields.slice_z]).max() self.assertEqual( norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff)) if fields.pad != 0: if strf == 'hz': norm2 = np.linalg.norm(tmpf[:, :, -fields.pad:]) else: norm2 = np.linalg.norm(tmpf[:, :, -fields.pad:]) self.assertEqual( norm2, 0, '%s, %s, %g, padding' % (self.args, strf, norm2))