def runTest(self): ufunc, nx, ny, nz, coeff_use, precision_float, tmax = self.args fields = gpu.Fields(0, nx, ny, nz, coeff_use, precision_float) gpu.Core(fields) fields_ref = naive.Fields(nx, ny, nz, precision_float, segment_nbytes=64) naive.Core(fields_ref) # allocations ns = fields.ns dtype = fields.dtype strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] ehs = common_random.generate_ehs(nx, ny, nz, dtype, ufunc) fields.set_eh_bufs(*ehs) fields_ref.set_ehs(*ehs) ces, chs = common_random.generate_cs(nx, ny, nz, dtype, coeff_use) if 'e' in coeff_use: fields.set_ce_bufs(*ces) fields_ref.set_ces(*ces) if 'h' in coeff_use: fields.set_ch_bufs(*chs) fields_ref.set_chs(*chs) tmpf = np.zeros(fields.ns_pitch, dtype=dtype) # update if ufunc == 'e': for tstep in xrange(0, tmax): fields.update_e() fields_ref.update_e() for strf, eh in zip(strf_list, ehs)[:3]: cuda.memcpy_dtoh(tmpf, fields.get_buf(strf)) norm = np.linalg.norm(fields_ref.get(strf) - tmpf) max_diff = np.abs(fields_ref.get(strf) - tmpf).max() self.assertEqual( norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff)) elif ufunc == 'h': for tstep in xrange(0, tmax): fields.update_h() fields_ref.update_h() for strf, eh in zip(strf_list, ehs)[3:]: cuda.memcpy_dtoh(tmpf, fields.get_buf(strf)) norm = np.linalg.norm(fields_ref.get(strf) - tmpf) max_diff = np.abs(fields_ref.get(strf) - tmpf).max() self.assertEqual( norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff)) fields.context.pop()
def runTest(self): ufunc, nx, ny, nz, coeff_use, precision_float, tmax = self.args gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] fields = gpu.Fields(context, device, nx, ny, nz, coeff_use, precision_float) gpu.Core(fields) fields_ref = naive.Fields(nx, ny, nz, precision_float) naive.Core(fields_ref) # allocations ns = fields.ns dtype = fields.dtype strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] ehs = common_random.generate_ehs(nx, ny, nz, dtype, ufunc) fields.set_eh_bufs(*ehs) fields_ref.set_ehs(*ehs) ces, chs = common_random.generate_cs(nx, ny, nz, dtype, coeff_use) if 'e' in coeff_use: fields.set_ce_bufs(*ces) fields_ref.set_ces(*ces) if 'h' in coeff_use: fields.set_ch_bufs(*chs) fields_ref.set_chs(*chs) tmpf = np.zeros(fields.ns, dtype=dtype) # update if ufunc == 'e': for tstep in xrange(0, tmax): fields.update_e() fields_ref.update_e() for strf, eh in zip(strf_list, ehs)[:3]: cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf)) norm = np.linalg.norm(fields_ref.get(strf) - tmpf) max_diff = np.abs(fields_ref.get(strf) - tmpf).max() self.assertEqual( norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff)) elif ufunc == 'h': for tstep in xrange(0, tmax): fields.update_h() fields_ref.update_h() for strf, eh in zip(strf_list, ehs)[3:]: cl.enqueue_copy(fields.queue, tmpf, fields.get_buf(strf)) norm = np.linalg.norm(fields_ref.get(strf) - tmpf) max_diff = np.abs(fields_ref.get(strf) - tmpf).max() self.assertEqual( norm, 0, '%s, %s, %g, %g' % (self.args, strf, norm, max_diff))
def runTest(self): nx, ny, nz, str_f, pt0, pt1, is_array = self.args slices = common.slices_two_points(pt0, pt1) str_fs = common.convert_to_tuple(str_f) # instance gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) mainf_list = [gpu.Fields(context, device, nx, ny, nz) \ for device in gpu_devices] mainf_list.append(cpu.Fields(nx, ny, nz)) nodef = Fields(mainf_list) dtype = nodef.dtype anx = nodef.accum_nx_list getf = GetFields(nodef, str_f, (0, 0, 0), (nodef.nx - 1, ny - 1, nz - 1)) setf = SetFields(nodef, str_f, pt0, pt1, is_array) # generate random source if is_array: shape = common.shape_two_points(pt0, pt1, len(str_fs)) value = np.random.rand(*shape).astype(nodef.dtype) split_value = np.split(value, len(str_fs)) split_value_dict = dict(zip(str_fs, split_value)) else: value = np.random.ranf() # host allocations global_ehs = [np.zeros(nodef.ns, dtype) for i in range(6)] eh_dict = dict(zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], global_ehs)) # verify for str_f in str_fs: if is_array: eh_dict[str_f][slices] = split_value_dict[str_f] else: eh_dict[str_f][slices] = value setf.set_fields(value) gpu_getf = gpu.GetFields(mainf_list[0], str_fs, (0, 0, 0), (nx - 1, ny - 1, nz - 1)) gpu_getf.get_event().wait() getf.wait() for str_f in str_fs: original = eh_dict[str_f] copy = getf.get_fields(str_f) norm = np.linalg.norm(original - copy) #if norm != 0: #print '\ngpu getf\n', gpu_getf.get_fields(str_f) #print original[slices] #print copy[slices] self.assertEqual(norm, 0, '%s, %g, %s' % (self.args, norm, str_f))
def runTest(self): nx, ny, nz, str_f, pt0, pt1 = self.args slices = common.slice_index_two_points(pt0, pt1) str_fs = common.convert_to_tuple(str_f) # instance gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) mainf_list = [gpu.Fields(context, device, nx, ny, nz) \ for device in gpu_devices] mainf_list.append(cpu.Fields(nx, ny, nz)) nodef = NodeFields(mainf_list) dtype = nodef.dtype anx = nodef.accum_nx_list getf = NodeGetFields(nodef, str_f, pt0, pt1) # generate random source global_ehs = [np.zeros(nodef.ns, dtype) for i in range(6)] eh_dict = dict(zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], global_ehs)) for i, f in enumerate(mainf_list[:-1]): nx, ny, nz = f.ns ehs = common_update.generate_random_ehs(nx, ny, nz, dtype) f.set_eh_bufs(*ehs) for eh, geh in zip(ehs, global_ehs): geh[anx[i]:anx[i + 1], :, :] = eh[:-1, :, :] f = mainf_list[-1] nx, ny, nz = f.ns ehs = common_update.generate_random_ehs(nx, ny, nz, dtype) f.set_ehs(*ehs) for eh, geh in zip(ehs, global_ehs): geh[anx[-2]:anx[-1] + 1, :, :] = eh[:] # verify getf.wait() for str_f in str_fs: original = eh_dict[str_f][slices] copy = getf.get_fields(str_f) norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))
def runTest(self): nx, ny, nz, str_f, pt0, pt1, is_array = self.args slices = common.slice_index_two_points(pt0, pt1) # generate random source if is_array: shape = common.shape_two_points(pt0, pt1) value = np.random.rand(*shape).astype(np.float32) else: value = np.random.ranf() # instance gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) mainf_list = [gpu.Fields(context, device, nx, ny, nz) \ for device in gpu_devices] mainf_list.append(cpu.Fields(nx, ny, nz)) nodef = NodeFields(mainf_list) dtype = nodef.dtype anx = nodef.accum_nx_list tfunc = lambda tstep: np.sin(0.03 * tstep) incident = NodeDirectIncident(nodef, str_f, pt0, pt1, tfunc, value) # allocations for verify eh = np.zeros(nodef.ns, dtype) getf = NodeGetFields(nodef, str_f, pt0, pt1) # verify eh[slices] = dtype(value) * dtype(tfunc(1)) e_or_h = str_f[0] nodef.update_e() nodef.update_h() getf.wait() original = eh[slices] copy = getf.get_fields(str_f) norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g' % (self.args, norm))
def runTest(self): nx, ny, nz = self.args # instances buffer_dict = {} buffer_dict['x+'] = cpu.BufferFields('x+', ny, nz, '', 'single') buffer_dict['x-'] = cpu.BufferFields('x-', ny, nz, '', 'single') import pyopencl as cl from kemp.fdtd3d.util import common_gpu from kemp.fdtd3d import gpu gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) mainf_list = [ gpu.Fields(context, gpu_devices[0], nx, ny, nz) ] #mainf_list = [ cpu.Fields(nx, ny, nz) ] nodef = node.Fields(mainf_list, buffer_dict) # generate random source dtype = nodef.dtype ehs = common_random.generate_ehs(nx, ny, nz, dtype) buf_ehs_p = common_random.generate_ehs(3, ny, nz, dtype) buf_ehs_m = common_random.generate_ehs(3, ny, nz, dtype) nodef.mainf_list[0].set_eh_bufs(*ehs) #nodef.mainf_list[0].set_ehs(*ehs) nodef.buffer_dict['x+'].set_ehs(*buf_ehs_p) nodef.buffer_dict['x-'].set_ehs(*buf_ehs_m) node.Core(nodef) # allocations for verify getf_dict = {'x+': {}, 'x-': {}} getf_buf_dict = {'x+': {}, 'x-': {}} getf_dict['x+']['e'] = gpu.GetFields(nodef.mainf_list[0], ['ey', 'ez'], (nx-1, 0, 0), (nx-1, ny-1, nz-1)) getf_dict['x+']['h'] = gpu.GetFields(nodef.mainf_list[0], ['hy', 'hz'], (nx-2, 0, 0), (nx-2, ny-1, nz-1)) getf_buf_dict['x+']['e'] = cpu.GetFields(nodef.buffer_dict['x+'], ['ey', 'ez'], (1, 0, 0), (1, ny-1, nz-1)) getf_buf_dict['x+']['h'] = cpu.GetFields(nodef.buffer_dict['x+'], ['hy', 'hz'], (0, 0, 0), (0, ny-1, nz-1)) getf_dict['x-']['e'] = gpu.GetFields(nodef.mainf_list[0], ['ey', 'ez'], (1, 0, 0), (1, ny-1, nz-1)) getf_dict['x-']['h'] = gpu.GetFields(nodef.mainf_list[0], ['hy', 'hz'], (0, 0, 0), (0, ny-1, nz-1)) getf_buf_dict['x-']['e'] = cpu.GetFields(nodef.buffer_dict['x-'], ['ey', 'ez'], (2, 0, 0), (2, ny-1, nz-1)) getf_buf_dict['x-']['h'] = cpu.GetFields(nodef.buffer_dict['x-'], ['hy', 'hz'], (1, 0, 0), (1, ny-1, nz-1)) # verify nodef.update_e() nodef.update_h() print 'nodef, instance_list', nodef.instance_list print 'mainf_list[0], instance_list', nodef.mainf_list[0].instance_list for direction in ['x+', 'x-']: for e_or_h in ['e', 'h']: getf = getf_dict[direction][e_or_h] getf_buf = getf_buf_dict[direction][e_or_h] getf.get_event().wait() getf_buf.get_event().wait() original = getf.get_fields() copy = getf_buf.get_fields() norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g, %s, %s' % (self.args, norm, direction, e_or_h))
def __init__(self, geometry_h5_path, max_tstep, mpi_shape, pbc_axes='', target_device='all', precision_float='single', **kargs): """ """ common.check_type('geometry_h5_path', geometry_h5_path, str) common.check_type('max_tstep', max_tstep, int) common.check_type('mpi_shape', mpi_shape, (list, tuple), int) common.check_type('pbc_axes', pbc_axes, str) common.check_type('target_device', target_device, str) common.check_value('precision_float', precision_float, ['single', 'double']) # import modules global is_mpi, is_gpu is_mpi = False if mpi_shape == (1, 1, 1) else True if is_mpi: global network, common_mpi, comm, size, rank, coord from mpi4py import MPI from kemp.fdtd3d import network from kemp.fdtd3d.util import common_mpi comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() coord = common_mpi.my_coord(rank, mpi_shape) is_master = False if is_mpi and rank != 0 else True is_cpu = True if target_device == 'all' or 'cpu' in target_device else False is_gpu = True if target_device == 'all' or 'gpu' in target_device else False if is_mpi: if reduce(lambda a, b: a * b, mpi_shape) != size: if is_master: print("The MPI size %d is not matched the mpi_shape %s" % (size, mpi_shape)) sys.exit() if is_gpu: try: global cl, gpu, common_gpu import pyopencl as cl from kemp.fdtd3d import gpu from kemp.fdtd3d.util import common_gpu except: if is_master: print("The 'pyopencl' module is not found.") if is_cpu: if is_master: print("The CPU is only used.") target_device = 'cpu' is_gpu = False else: sys.exit() # read from the h5 file try: h5f = h5py.File(geometry_h5_path, 'r') coeff_use = h5f.attrs['coeff_use'] nx = h5f.attrs['nx'] ny = h5f.attrs['ny'] nz = h5f.attrs['nz'] except: if is_master: print(repr(sys.exc_info())) print("To load the geometry HDF5 file '%s' is failed." % geometry_h5_path) sys.exit() # local variables device_nx_list = kargs['device_nx_list'] if kargs.has_key( 'device_nx_list') else None ny_list = kargs['ny_list'] if kargs.has_key('ny_list') else None nz_list = kargs['nz_list'] if kargs.has_key('nz_list') else None # Set the number of device and the device_n_list ndev = 1 if is_cpu else 0 if is_gpu: try: gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) ndev += len(gpu_devices) except Exception as errinst: if is_master: print(repr(sys.exc_info())) print( "To get the GPU devices is failed. The CPU is only used." ) target_device = 'cpu' is_gpu = False if is_mpi: mi, mj, mk = coord dnx_list = device_nx_list[mi * ndev:(mi + 1) * ndev] dny = ny_list[mj] dnz = nz_list[mk] else: dnx_list = device_nx_list dny = ny_list[0] dnz = nz_list[0] total_ndev = mpi_shape[0] * ndev if len(device_nx_list) != total_ndev: if is_master: print( "The device_nx_list %s is not matched with the number of total devices %d." % (device_nx_list, total_ndev)) sys.exit() # create the mainf_list and the buffer_dict buffer_dict = {} if is_mpi: # create BufferFields instances snx = sum(dnx_list) - ndev + 1 sny, snz = dny, dnz mpi_target_dict = common_mpi.mpi_target_dict( rank, mpi_shape, pbc_axes) for direction, target_rank in mpi_target_dict.items(): if target_rank != None: n0, n1 = { 'x': (sny, snz), 'y': (snx, snz), 'z': (snx, sny) }[direction[0]] bufferf = cpu.BufferFields(direction, target_rank, n0, n1, coeff_use, precision_float) buffer_dict[direction] = bufferf #network.ExchangeMpi(bufferf, target_rank, max_tstep) #network.ExchangeMpiNoSplitBlock(bufferf, target_rank) #network.ExchangeMpiBlock(bufferf, target_rank) mainf_list = [] if is_cpu: mainf_list += [ cpu.Fields(dnx_list.pop(0), dny, dnz, coeff_use, precision_float, use_cpu_core=1) ] if is_gpu: mainf_list += [ gpu.Fields(context, gpu_device, dnx, dny, dnz, coeff_use, precision_float) for gpu_device, dnx in zip(gpu_devices, dnx_list) ] # create node.Fields instance nodef = node.Fields(mainf_list, buffer_dict) # create nodePbc instance node_pbc_axes = ''.join([ axis for i, axis in enumerate(['x', 'y', 'z']) if mpi_shape[i] == 1 and axis in pbc_axes ]) if node_pbc_axes != '': node.Pbc(nodef, node_pbc_axes) # create update instances node.Core(nodef) for bufferf in nodef.buffer_dict.values(): #network.ExchangeMpiSplitBlock(bufferf) network.ExchangeMpiSplitNonBlock(bufferf, max_tstep) ''' if rank == 0: direction = 'x+' target_rank = 1 elif rank == 1: direction = 'x-' target_rank = 0 #network.ExchangeMpiNoBufferBlock(nodef, target_rank, direction) # no buffer, block self.mpi_instance_list = [] self.mpi_instance_list.append( network.ExchangeMpiNoBufferNonBlock(nodef, target_rank, direction) ) ''' # accum_sub_ns_dict, node_pts if is_mpi: asn_dict = common_mpi.accum_sub_ns_dict(mpi_shape, ndev, device_nx_list, ny_list, nz_list) axes = ['x', 'y', 'z'] node_pt0 = [asn_dict[ax][m] for ax, m in zip(axes, coord)] node_pt1 = [asn_dict[ax][m + 1] - 1 for ax, m in zip(axes, coord)] # global variables self.max_tstep = max_tstep self.mpi_shape = mpi_shape #self.ns = (nx, ny, nz) self.ns = (asn_dict['x'][-1], asn_dict['y'][-1], asn_dict['z'][-1]) if is_mpi else nodef.ns self.nodef = nodef self.is_master = is_master if is_mpi: self.asn_dict = asn_dict self.node_pt0 = node_pt0 self.node_pt1 = node_pt1 # for savefields self.savef_tag_list = [] self.savef_list = []
comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() is_plot = False nx, ny, nz = 240, 256, 256 coeff_use = 'e' precision_float = 'single' use_cpu_core = 1 # instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) device = gpu_devices[0] gpuf = gpu.Fields(context, device, nx, ny, nz, coeff_use, precision_float) tmax = 250 if is_plot else 1000 if rank == 0: direction = '+' elif rank == size - 1: direction = '-' else: direction = '+-' buffer_dict = {} if '+' in direction: buffer_dict['x+'] = cpu.Fields(cpu.QueueTask(), 2, ny, nz, coeff_use, precision_float, use_cpu_core) if '-' in direction: buffer_dict['x-'] = cpu.Fields(cpu.QueueTask(), 2, ny, nz, coeff_use, precision_float, use_cpu_core) nodef = node.Fields([gpuf], buffer_dict) node.Core(nodef)
from kemp.fdtd3d.util import common_gpu from kemp.fdtd3d.node import NodeFields, NodeCore, NodeExchange, NodeDirectIncident, NodeGetFields, NodePbc from kemp.fdtd3d import gpu, cpu nx_gpu = 120 nx_cpu = 80 #nx_gpu = nx_cpu = 100 ny, nz = 300, 64 tmax, tgap = 200, 10 # instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) mainf_list = [gpu.Fields(context, device, nx_gpu, ny, nz) for device in gpu_devices] mainf_list.append( cpu.Fields(nx_cpu, ny, nz) ) nodef = NodeFields(mainf_list) core = NodeCore(nodef) exchange = NodeExchange(nodef) pbc = NodePbc(nodef, 'y') pbc = NodePbc(nodef, 'z') pbc_x = NodePbc(nodef, 'x') tfunc = lambda tstep: np.sin(0.1 * tstep) #incident = NodeDirectIncident(nodef, 'ez', (0, 20, 0), (nodef.nx-1, 20, nz-1), tfunc) incident = NodeDirectIncident(nodef, 'ez', (20, 0, 0), (20, ny-1, nz-1), tfunc) getf = NodeGetFields(nodef, 'ez', (0, 0, 2), (nodef.nx-1, ny-1, 2)) # plot import matplotlib.pyplot as plt
def runTest(self): nx, ny, nz = self.args tmax = 10 # instances buffer_dict = {} if rank == 0: buffer_dict['x+'] = cpu.BufferFields('x+', ny, nz, '', 'single') elif rank == 1: buffer_dict['x-'] = cpu.BufferFields('x-', ny, nz, '', 'single') import pyopencl as cl from kemp.fdtd3d.util import common_gpu from kemp.fdtd3d import gpu gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) mainf_list = [ gpu.Fields(context, gpu_devices[0], nx, ny, nz) ] #mainf_list = [ cpu.Fields(nx, ny, nz, use_cpu_core=1) ] nodef = node.Fields(mainf_list, buffer_dict) # generate random source dtype = nodef.dtype ehs = common_random.generate_ehs(nx, ny, nz, dtype) buf_ehs = common_random.generate_ehs(3, ny, nz, dtype) #nodef.cpuf.set_ehs(*ehs) nodef.mainf_list[0].set_eh_bufs(*ehs) other = {0: 1, 1: 0}[rank] if rank == 0: #nodef.buffer_dict['x+'].set_ehs(*buf_ehs) ExchangeMpi(nodef.buffer_dict['x+'], other, tmax) elif rank == 1: #nodef.buffer_dict['x-'].set_ehs(*buf_ehs) ExchangeMpi(nodef.buffer_dict['x-'], other, tmax) node.Core(nodef) # allocations for verify if rank == 0: getf_e = cpu.GetFields(nodef.buffer_dict['x+'], ['ey', 'ez'], (2, 0, 0), (2, ny-1, nz-1)) getf_h = cpu.GetFields(nodef.buffer_dict['x+'], ['hy', 'hz'], (1, 0, 0), (1, ny-1, nz-1)) elif rank == 1: getf_e = cpu.GetFields(nodef.buffer_dict['x-'], ['ey', 'ez'], (1, 0, 0), (1, ny-1, nz-1)) getf_h = cpu.GetFields(nodef.buffer_dict['x-'], ['hy', 'hz'], (0, 0, 0), (0, ny-1, nz-1)) # verify print 'nodef, instance_list', rank, nodef.instance_list print 'f0, instance_list', rank, nodef.mainf_list[0].instance_list exch = nodef.instance_list[0] main_core = nodef.mainf_list[0].instance_list[0] if rank == 0: #nodef.buffer_dict['x+'].instance_list.pop(0) print 'bufferf x+, instance_list', rank, nodef.buffer_dict['x+'].instance_list core, mpi = nodef.buffer_dict['x+'].instance_list elif rank == 1: #nodef.buffer_dict['x-'].instance_list.pop(0) print 'bufferf x-, instance_list', rank, nodef.buffer_dict['x-'].instance_list core, mpi = nodef.buffer_dict['x-'].instance_list for tstep in xrange(1, tmax+1): #if rank == 0: print 'tstep', tstep #nodef.update_e() main_core.update_e() if rank == 0: #print tstep, rank, 'core upE' core.update_e('') #print tstep, rank, 'mpi upE' mpi.update_e('') elif rank == 1: #print tstep, rank, 'core upE pre' core.update_e('pre') #print tstep, rank, 'mpi upE pre' mpi.update_e('pre') #print tstep, rank, 'core upE post' core.update_e('post') #print tstep, rank, 'mpi upE post' mpi.update_e('post') exch.update_e() # verify the buffer #print tstep, rank, 'pre get' getf_h.get_event().wait() #print tstep, rank, 'after get' if rank == 1: #print tstep, rank, 'pre save' np.save('rank1_h_%d' % tstep, getf_h.get_fields()) #print tstep, rank, 'after save' elif rank == 0: no_exist_npy = True while no_exist_npy: try: arr1 = np.load('rank1_h_%d.npy' % tstep) no_exist_npy = False except: sleep(0.5) arr0 = getf_h.get_fields() #print tstep, 'h arr0\n', arr0 #print tstep, 'h arr1\n', arr1 norm = np.linalg.norm(arr0 - arr1) if norm != 0: print tstep, 'h norm', norm #if tstep > 1: self.assertEqual(norm, 0, '%s, %g, h' % (self.args, norm)) #nodef.update_h() main_core.update_h() if rank == 0: #print tstep, rank, 'core upH pre' core.update_h('pre') #print tstep, rank, 'mpi upH pre' mpi.update_h('pre') #print tstep, rank, 'core upH post' core.update_h('post') #print tstep, rank, 'mpi upH post' mpi.update_h('post') elif rank == 1: #print tstep, rank, 'core upH' core.update_h('') #print tstep, rank, 'mpi upH' mpi.update_h('') exch.update_h() getf_e.get_event().wait() if rank == 1: np.save('rank1_e_%d' % tstep, getf_e.get_fields()) elif rank == 0: no_exist_npy = True while no_exist_npy: try: arr1 = np.load('rank1_e_%d.npy' % tstep) no_exist_npy = False except: sleep(0.5) arr0 = getf_e.get_fields() norm = np.linalg.norm(arr0 - arr1) if norm != 0: print tstep, 'e norm', norm #self.assertEqual(norm, 0, '%s, %g, e' % (self.args, norm)) '''
def test_y_pbc_x_exchange(self): # instance nx, ny, nz = 40, 50, 60 #nx, ny, nz = 3, 4, 5 gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) gpuf = gpu.Fields(context, gpu_devices[0], nx, ny, nz) cpuf = cpu.Fields(nx, ny, nz) mainf_list = [gpuf, cpuf] nodef = NodeFields(mainf_list) core = NodeCore(nodef) pbc = NodePbc(nodef, 'y') exchange = NodeExchange(nodef) # generate random source ehs_gpu = common_update.generate_random_ehs(nx, ny, nz, nodef.dtype) gpuf.set_eh_bufs(*ehs_gpu) ehs_gpu_dict = dict(zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs_gpu)) ehs_cpu = common_update.generate_random_ehs(nx, ny, nz, nodef.dtype) cpuf.set_ehs(*ehs_cpu) ehs_cpu_dict = dict(zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs_cpu)) # verify for mainf in mainf_list: mainf.update_e() pbc.update_e() exchange.update_e() for mainf in mainf_list: mainf.update_h() pbc.update_h() exchange.update_h() mainf_list[-1].enqueue_barrier() getf0, getf1 = {}, {} # x-axis exchange getf0['e'] = gpu.GetFields(gpuf, ['ey', 'ez'], (nx - 1, 0, 0), (nx - 1, ny - 2, nz - 2)) getf1['e'] = cpu.GetFields(cpuf, ['ey', 'ez'], (0, 0, 0), (0, ny - 2, nz - 2)) getf0['h'] = gpu.GetFields(gpuf, ['hy', 'hz'], (nx - 1, 1, 1), (nx - 1, ny - 1, nz - 1)) getf1['h'] = cpu.GetFields(cpuf, ['hy', 'hz'], (0, 1, 1), (0, ny - 1, nz - 1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: g0 = getf0[eh].get_fields() g1 = getf1[eh].get_fields() norm = np.linalg.norm(g0 - g1) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'x-axis exchange', eh)) # y-axis pbc gpu getf0['e'] = gpu.GetFields(gpuf, ['ex', 'ez'], (0, ny - 1, 0), (nx - 2, ny - 1, nz - 2)) getf1['e'] = gpu.GetFields(gpuf, ['ex', 'ez'], (0, 0, 0), (nx - 2, 0, nz - 2)) getf0['h'] = gpu.GetFields(gpuf, ['hx', 'hz'], (1, ny - 1, 1), (nx - 1, ny - 1, nz - 1)) getf1['h'] = gpu.GetFields(gpuf, ['hx', 'hz'], (1, 0, 1), (nx - 1, 0, nz - 1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: g0 = getf0[eh].get_fields() g1 = getf1[eh].get_fields() norm = np.linalg.norm(g0 - g1) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y-axis pbc gpu', eh)) # y-axis pbc cpu getf0['e'] = cpu.GetFields(cpuf, ['ex', 'ez'], (0, ny - 1, 0), (nx - 2, ny - 1, nz - 2)) getf1['e'] = cpu.GetFields(cpuf, ['ex', 'ez'], (0, 0, 0), (nx - 2, 0, nz - 2)) getf0['h'] = cpu.GetFields(cpuf, ['hx', 'hz'], (1, ny - 1, 1), (nx - 1, ny - 1, nz - 1)) getf1['h'] = cpu.GetFields(cpuf, ['hx', 'hz'], (1, 0, 1), (nx - 1, 0, nz - 1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: g0 = getf0[eh].get_fields() g1 = getf1[eh].get_fields() norm = np.linalg.norm(g0 - g1) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y-axis pbc cpu', eh))
import numpy as np import pyopencl as cl nx_gpu = 120 nx_cpu = nx_gpu/5 ny, nz = 320, 320 tmax, tgap = 2000, 5 divide_axes = 'x' # GPUs gpu_devices = common_gpu.get_gpu_devices() context = cl.Context(gpu_devices) ngpu = len(gpu_devices) fdtds = [gpu.Fields(context, device, nx_gpu, ny, nz, coeff_use='') for device in gpu_devices] outputs = [gpu.GetFields(fdtd, 'ez', (0, 0, nz/2), (nx_gpu-2, ny-1, nz/2)) for fdtd in fdtds] src_e = gpu.DirectSrc(fdtds[2], 'ez', (nx_gpu/5*1, ny/2, 0), (nx_gpu/5*1, ny/2, nz-1), lambda tstep: np.sin(0.1 * tstep)) # CPU common_cpu.print_cpu_info() fdtds.append( cpu.Fields(nx_cpu, ny, nz, coeff_use='') ) outputs.append( cpu.GetFields(fdtds[-1], 'ez', (0, 0, nz/2), (nx_cpu-2, ny-1, nz/2)) ) # GPUs-CPU exch = ExchangeInternal(fdtds, 'x') # Plot
def runTest(self): axis, nx, ny, nz = self.args self.gpu, self.cpu = gpu, cpu # instance gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) mainf_list = [gpu.Fields(context, device, nx, ny, nz) \ for device in gpu_devices] mainf_list.append(cpu.Fields(nx, ny, nz)) nodef = NodeFields(mainf_list) dtype = nodef.dtype pbc = NodePbc(nodef, axis) exchange = NodeExchange(nodef) # generate random source for f in mainf_list[:-1]: nx, ny, nz = f.ns ehs = common_update.generate_random_ehs(nx, ny, nz, dtype) f.set_eh_bufs(*ehs) for f in nodef.cpuf_dict.values(): nx, ny, nz = f.ns ehs = common_update.generate_random_ehs(nx, ny, nz, dtype) f.set_ehs(*ehs) # verify for mainf in mainf_list: mainf.update_e() pbc.update_e() exchange.update_e() for mainf in mainf_list: mainf.update_h() pbc.update_h() exchange.update_h() mainf_list[-1].enqueue_barrier() getf0, getf1 = {}, {} if axis == 'x': f0, f1 = mainf_list[0], mainf_list[-1] getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ey', 'ez'], \ (0, 0, 0), (0, f0.ny-2, f0.nz-2)) getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ey', 'ez'], \ (f1.nx-1, 0, 0), (f1.nx-1, f1.ny-2, f1.nz-2)) getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hy', 'hz'], \ (0, 1, 1), (0, f0.ny-1, f0.nz-1)) getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hy', 'hz'], \ (f1.nx-1, 1, 1), (f1.nx-1, f1.ny-1, f1.nz-1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm(getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'x', eh)) elif axis == 'y': for f in mainf_list: getf0['e'] = getattr(self, f.device_type).GetFields(f, ['ex', 'ez'], \ (0, 0, 0), (f.nx-2, 0, f.nz-2)) getf1['e'] = getattr(self, f.device_type).GetFields(f, ['ex', 'ez'], \ (0, f.ny-1, 0), (f.nx-2, f.ny-1, f.nz-2)) getf0['h'] = getattr(self, f.device_type).GetFields(f, ['hx', 'hz'], \ (1, 0, 1), (f.nx-1, 0, f.nz-1)) getf1['h'] = getattr(self, f.device_type).GetFields(f, ['hx', 'hz'], \ (1, f.ny-1, 1), (f.nx-1, f.ny-1, f.nz-1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual( norm, 0, '%g, %s, %s, %s' % (norm, 'y', eh, f.device_type)) elif axis == 'z': for f in mainf_list: getf0['e'] = getattr(self, f.device_type).GetFields(f, ['ex', 'ey'], \ (0, 0, f.nz-1), (f.nx-2, f.ny-2, f.nz-1)) getf1['e'] = getattr(self, f.device_type).GetFields(f, ['ex', 'ey'], \ (0, 0, 0), (f.nx-2, f.ny-2, 0)) getf0['h'] = getattr(self, f.device_type).GetFields(f, ['hx', 'hy'], \ (1, 1, f.nz-1), (f.nx-1, f.ny-1, f.nz-1)) getf1['h'] = getattr(self, f.device_type).GetFields(f, ['hx', 'hy'], \ (1, 1, 0), (f.nx-1, f.ny-1, 0)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'z', eh))
def runTest(self): nx, ny, nz = self.args # instance gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) mainf_list = [gpu.Fields(context, device, nx, ny, nz) \ for device in gpu_devices] mainf_list.append(cpu.Fields(nx, ny, nz)) nodef = NodeFields(mainf_list) dtype = nodef.dtype # buffer instance nodef.append_buffer_fields(cpu.Fields(3, ny, nz, mpi_type='x-')) nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, nz, mpi_type='y+')) nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, nz, mpi_type='y-')) nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, ny, mpi_type='z+')) nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, ny, mpi_type='z-')) exchange = NodeExchange(nodef) # generate random source for f in mainf_list[:-1]: nx, ny, nz = f.ns ehs = common_update.generate_random_ehs(nx, ny, nz, dtype) f.set_eh_bufs(*ehs) for f in nodef.cpuf_dict.values(): nx, ny, nz = f.ns ehs = common_update.generate_random_ehs(nx, ny, nz, dtype) f.set_ehs(*ehs) # verify exchange.update_e() exchange.update_h() getf0, getf1 = {}, {} # mainf list self.gpu, self.cpu = gpu, cpu for f0, f1 in zip(mainf_list[:-1], mainf_list[1:]): getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ey', 'ez'], \ (f0.nx-1, 0, 0), (f0.nx-1, f0.ny-2, f0.nz-2)) getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ey', 'ez'], \ (0, 0, 0), (0, f1.ny-2, f1.nz-2)) getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hy', 'hz'], \ (f0.nx-1, 1, 1), (f0.nx-1, f0.ny-1, f0.nz-1)) getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hy', 'hz'], \ (0, 1, 1), (0, f1.ny-1, f1.nz-1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm(getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%s, %g, %s, %s, %s' % \ (self.args, norm, 'mainf', \ getf0[eh].mainf.device_type, getf1[eh].mainf.device_type) ) # buffer 'x-' f0, f1 = nodef.cpuf_dict['x-'], mainf_list[0] getf0['e'] = cpu.GetFields(f0, ['ey', 'ez'], \ (f0.nx-1, 0, 0), (f0.nx-1, f0.ny-2, f0.nz-2)) getf1['e'] = gpu.GetFields(f1, ['ey', 'ez'], \ (1, 0, 0), (1, f1.ny-2, f1.nz-2)) getf0['h'] = cpu.GetFields(f0, ['hy', 'hz'], \ (f0.nx-2, 1, 1), (f0.nx-2, f0.ny-1, f0.nz-1)) getf1['h'] = gpu.GetFields(f1, ['hy', 'hz'], \ (0, 1, 1), (0, f1.ny-1, f1.nz-1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'x-', eh)) # buffer 'y+' anx_list = nodef.accum_nx_list f1 = nodef.cpuf_dict['y+'] for f0, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]): getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ex', 'ez'], \ (0, f0.ny-1, 0), (f0.nx-2, f0.ny-1, f0.nz-2)) getf1['e'] = cpu.GetFields(f1, ['ey', 'ez'], \ (1, anx0, 0), (1, anx1-1, f1.nz-2)) getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hx', 'hz'], \ (1, f0.ny-2, 1), (f0.nx-1, f0.ny-2, f0.nz-1)) getf1['h'] = cpu.GetFields(f1, ['hy', 'hz'], \ (0, anx0+1, 1), (0, anx1, f1.nz-1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y+', eh)) # buffer 'y-' f0 = nodef.cpuf_dict['y-'] for f1, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]): getf0['e'] = cpu.GetFields(f0, ['ey', 'ez'], \ (f0.nx-1, anx0, 0), (f0.nx-1, anx1-1, f0.nz-2)) getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ex', 'ez'], \ (0, 1, 0), (f1.nx-2, 1, f1.nz-2)) getf0['h'] = cpu.GetFields(f0, ['hy', 'hz'], \ (f0.nx-2, anx0+1, 1), (f0.nx-2, anx1, f0.nz-1)) getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hx', 'hz'], \ (1, 0, 1), (f1.nx-1, 0, f1.nz-1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y-', eh)) # buffer 'z+' f1 = nodef.cpuf_dict['z+'] for f0, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]): getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ex', 'ey'], \ (0, 0, f0.nz-1), (f0.nx-2, f0.ny-2, f0.nz-1)) getf1['e'] = cpu.GetFields(f1, ['ey', 'ez'], \ (1, anx0, 0), (1, anx1-1, f1.nz-2)) getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hx', 'hy'], \ (1, 1, f0.nz-2), (f0.nx-1, f0.ny-1, f0.nz-2)) getf1['h'] = cpu.GetFields(f1, ['hy', 'hz'], \ (0, anx0+1, 1), (0, anx1, f1.nz-1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'z+', eh)) # buffer 'z-' f0 = nodef.cpuf_dict['z-'] for f1, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]): getf0['e'] = cpu.GetFields(f0, ['ey', 'ez'], \ (f0.nx-1, anx0, 0), (f0.nx-1, anx1-1, f0.nz-2)) getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ex', 'ey'], \ (0, 0, 1), (f1.nx-2, f1.ny-2, 1)) getf0['h'] = cpu.GetFields(f0, ['hy', 'hz'], \ (f0.nx-2, anx0+1, 1), (f0.nx-2, anx1, f0.nz-1)) getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hx', 'hy'], \ (1, 1, 0), (f1.nx-1, f1.ny-1, 0)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'z-', eh))
sys.path.append( os.path.expanduser('~') ) from kemp.fdtd3d.util import common_gpu from kemp.fdtd3d.node import Fields, Core, IncidentDirect, GetFields, Pbc from kemp.fdtd3d import gpu, cpu ny, nz = 140, 2 gpu_nx = 141 cpu_nx = 20 tmax, tgap = 150, 10 # instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) mainf_list = [ cpu.Fields(cpu_nx, ny, nz) ] mainf_list += [gpu.Fields(context, device, gpu_nx, ny, nz) for device in gpu_devices] fields = Fields(mainf_list) Core(fields) Pbc(fields, 'xyz') nx = fields.nx tfunc = lambda tstep: np.sin(0.05 * tstep) #IncidentDirect(fields, 'ez', (20, 0, 0), (20, ny-1, nz-1), tfunc) IncidentDirect(fields, 'ez', (0, 20, 0), (nx-1, 20, nz-1), tfunc) getf = GetFields(fields, 'ez', (0, 0, nz/2), (nx-1, ny-1, nz/2)) #IncidentDirect(fields, 'ey', (20, 0, 0), (20, ny-1, nz-1), tfunc) #getf = GetFields(fields, 'ey', (0, 0, nz/2), (nx-1, ny-1, nz/2)) #IncidentDirect(fields, 'ex', (0, 20, 0), (nx-1, 20, nz-1), tfunc)
from kemp.fdtd3d.util import common_gpu from kemp.fdtd3d.node import Fields, Core, IncidentDirect, GetFields, Pbc, Pml, ExchangeNode from kemp.fdtd3d import gpu, cpu ny, nz = 320, 2 gpu_nx = 101 cpu_nx = 20 tmax, tgap = 800, 10 # instances gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) #mainf_list = [gpu.Fields(context, gpu_devices[0], gpu_nx, ny, nz)] mainf_list = [ gpu.Fields(context, device, gpu_nx, ny, nz) for device in gpu_devices ] #mainf_list += [ cpu.Fields(cpu_nx, ny, nz)] fields = Fields(mainf_list) Pbc(fields, 'z') Pml(fields, ('+-', '+-', ''), npml=10) ExchangeNode(fields) Core(fields) nx = fields.nx tfunc = lambda tstep: 50 * np.sin(0.05 * tstep) IncidentDirect(fields, 'ez', (0.6, 0.7, 0), (0.6, 0.7, -1), tfunc) getf = GetFields(fields, 'ez', (0, 0, 0.5), (-1, -1, 0.5)) print fields.instance_list