def __init__(self, gpuf, direction): common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('+', '-', '+-')) qtask = cpu.QueueTask() if '+' in direction: self.cpuf_p = cpuf_p = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=0) self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.tmp_recv_e = np.zeros(self.gf_h.host_array.shape, gpuf.dtype) if '-' in direction: self.cpuf_m = cpuf_m = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=0) self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.tmp_recv_h = np.zeros(self.gf_e.host_array.shape, gpuf.dtype) # global variables self.direction = direction self.qtask = qtask
def __init__(self, nodef): common.check_type('nodef', nodef, node.Fields) self.directions = nodef.buffer_dict.keys() self.gpu = gpu self.cpu = cpu if 'x+' in self.directions: mf_xp = nodef.mainf_list[-1] bf_xp = nodef.buffer_dict['x+'] mpu = getattr(self, mf_xp.device_type) self.gf_p_h = mpu.GetFields(mf_xp, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) self.sf_p_h = cpu.SetFields(bf_xp, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.sf_p_e = mpu.SetFields(mf_xp, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_h = cpu.GetFields(bf_xp, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_e = cpu.SetFields(bf_xp, ['ey', 'ez'], (1, 0, 0), (1, -1, -1), True) target = 0 if rank == size - 1 else rank + 1 self.req_send_h = comm.Send_init(self.gf_h.host_array, target, tag=0) self.tmp_recv_e = np.zeros(self.gf_h.host_array.shape, nodef.dtype) self.req_recv_e = comm.Recv_init(self.tmp_recv_e, target, tag=1) if 'x-' in self.directions: mf_xm = nodef.mainf_list[0] bf_xm = nodef.buffer_dict['x-'] mpu = getattr(self, mf_xm.device_type) self.gf_m_e = mpu.GetFields(mf_xm, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_m_e = cpu.SetFields(bf_xm, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.sf_m_h = mpu.SetFields(mf_xm, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_e = cpu.GetFields(bf_xm, ['ey', 'ez'], (0, 0, 0), (0, -1, -1)) self.sf_h = cpu.SetFields(bf_xm, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) target = size - 1 if rank == 0 else rank - 1 self.req_send_e = comm.Send_init(self.gf_e.host_array, target, tag=1) self.tmp_recv_h = np.zeros(self.gf_e.host_array.shape, nodef.dtype) self.req_recv_h = comm.Recv_init(self.tmp_recv_h, target, tag=0) # append to the update list self.priority_type = 'mpi' nodef.append_instance(self)
def __init__(self, node_fields, axis): """ """ common.check_type('node_fields', node_fields, NodeFields) common.check_value('axis', axis, ['x', 'y', 'z']) # local variables self.gpu = gpu self.cpu = cpu mainf_list = node_fields.mainf_list cpuf_dict = node_fields.cpuf_dict axis_id = {'x': 0, 'y': 1, 'z': 2}[axis] set_cpuf = set(cpuf_dict.keys()) for ax in ['x', 'y', 'z']: if not set_cpuf.isdisjoint([ax + '+', ax + '-']): raise ValueError, 'There are %s-axis buffer instances. The pbc_internal operation along %s-axis is not allowed.' % ( ax, ax) # create pbc instances f0 = mainf_list[0] f1 = mainf_list[-1] if axis == 'x': if f0 is not f1: setf_e = cpu.SetFields(f1, ['ey', 'ez'], \ (f1.nx-1, 0, 0), (f1.nx-1, f1.ny-2, f1.nz-2), True) getf_e = gpu.GetFields(f0, ['ey', 'ez'], \ (0, 0, 0), (0, f0.ny-2, f0.nz-2) ) setf_h = gpu.SetFields(f0, ['hy', 'hz'], \ (0, 1, 1), (0, f0.ny-1, f0.nz-1), True ) getf_h = cpu.GetFields(f1, ['hy', 'hz'], \ (f1.nx-1, 1, 1), (f1.nx-1, f1.ny-1, f1.nz-1) ) else: getattr(self, f0.device_type).Pbc(f0, axis) elif axis in ['y', 'z']: for f in mainf_list: getattr(self, f.device_type).Pbc(f, axis) # global variables and functions if axis == 'x' and f0 is not f1: self.setf_e = setf_e self.getf_e = getf_e self.setf_h = setf_h self.getf_h = getf_h self.update_e = self.update_e_actual self.update_h = self.update_h_actual else: self.update_e = lambda: None self.update_h = lambda: None
def __init__(self, gpuf, direction, tmax): common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('+', '-', '+-')) qtask = cpu.QueueTask() if '+' in direction: self.cpuf_p = cpuf_p = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=1) self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_h = gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.req_send_h = comm.Send_init(gf_h.host_array, rank + 1, tag=0) self.tmp_recv_e_list = [ np.zeros(gf_h.host_array.shape, gpuf.dtype) for i in range(2) ] self.req_recv_e_list = [ comm.Recv_init(tmp_recv_e, rank + 1, tag=1) for tmp_recv_e in self.tmp_recv_e_list ] self.switch_e = 0 if '-' in direction: self.cpuf_m = cpuf_m = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=1) self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_e = gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.req_send_e = comm.Send_init(gf_e.host_array, rank - 1, tag=1) self.tmp_recv_h_list = [ np.zeros(gf_e.host_array.shape, gpuf.dtype) for i in range(2) ] self.req_recv_h_list = [ comm.Recv_init(tmp_recv_h, rank - 1, tag=0) for tmp_recv_h in self.tmp_recv_h_list ] self.switch_h = 0 # global variables self.direction = direction self.qtask = qtask self.tmax = tmax self.tstep = 1
def runTest(self): nx, ny, nz = self.args # instances buffer_dict = {} buffer_dict['x+'] = cpu.BufferFields('x+', ny, nz, '', 'single') buffer_dict['x-'] = cpu.BufferFields('x-', ny, nz, '', 'single') import pyopencl as cl from kemp.fdtd3d.util import common_gpu from kemp.fdtd3d import gpu gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) mainf_list = [ gpu.Fields(context, gpu_devices[0], nx, ny, nz) ] #mainf_list = [ cpu.Fields(nx, ny, nz) ] nodef = node.Fields(mainf_list, buffer_dict) # generate random source dtype = nodef.dtype ehs = common_random.generate_ehs(nx, ny, nz, dtype) buf_ehs_p = common_random.generate_ehs(3, ny, nz, dtype) buf_ehs_m = common_random.generate_ehs(3, ny, nz, dtype) nodef.mainf_list[0].set_eh_bufs(*ehs) #nodef.mainf_list[0].set_ehs(*ehs) nodef.buffer_dict['x+'].set_ehs(*buf_ehs_p) nodef.buffer_dict['x-'].set_ehs(*buf_ehs_m) node.Core(nodef) # allocations for verify getf_dict = {'x+': {}, 'x-': {}} getf_buf_dict = {'x+': {}, 'x-': {}} getf_dict['x+']['e'] = gpu.GetFields(nodef.mainf_list[0], ['ey', 'ez'], (nx-1, 0, 0), (nx-1, ny-1, nz-1)) getf_dict['x+']['h'] = gpu.GetFields(nodef.mainf_list[0], ['hy', 'hz'], (nx-2, 0, 0), (nx-2, ny-1, nz-1)) getf_buf_dict['x+']['e'] = cpu.GetFields(nodef.buffer_dict['x+'], ['ey', 'ez'], (1, 0, 0), (1, ny-1, nz-1)) getf_buf_dict['x+']['h'] = cpu.GetFields(nodef.buffer_dict['x+'], ['hy', 'hz'], (0, 0, 0), (0, ny-1, nz-1)) getf_dict['x-']['e'] = gpu.GetFields(nodef.mainf_list[0], ['ey', 'ez'], (1, 0, 0), (1, ny-1, nz-1)) getf_dict['x-']['h'] = gpu.GetFields(nodef.mainf_list[0], ['hy', 'hz'], (0, 0, 0), (0, ny-1, nz-1)) getf_buf_dict['x-']['e'] = cpu.GetFields(nodef.buffer_dict['x-'], ['ey', 'ez'], (2, 0, 0), (2, ny-1, nz-1)) getf_buf_dict['x-']['h'] = cpu.GetFields(nodef.buffer_dict['x-'], ['hy', 'hz'], (1, 0, 0), (1, ny-1, nz-1)) # verify nodef.update_e() nodef.update_h() print 'nodef, instance_list', nodef.instance_list print 'mainf_list[0], instance_list', nodef.mainf_list[0].instance_list for direction in ['x+', 'x-']: for e_or_h in ['e', 'h']: getf = getf_dict[direction][e_or_h] getf_buf = getf_buf_dict[direction][e_or_h] getf.get_event().wait() getf_buf.get_event().wait() original = getf.get_fields() copy = getf_buf.get_fields() norm = np.linalg.norm(original - copy) self.assertEqual(norm, 0, '%s, %g, %s, %s' % (self.args, norm, direction, e_or_h))
def runTest(self): nx, ny, nz = self.args tmax = 10 # instances buffer_dict = {} if rank == 0: buffer_dict['x+'] = cpu.BufferFields('x+', ny, nz, '', 'single') elif rank == 1: buffer_dict['x-'] = cpu.BufferFields('x-', ny, nz, '', 'single') import pyopencl as cl from kemp.fdtd3d.util import common_gpu from kemp.fdtd3d import gpu gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) mainf_list = [ gpu.Fields(context, gpu_devices[0], nx, ny, nz) ] #mainf_list = [ cpu.Fields(nx, ny, nz, use_cpu_core=1) ] nodef = node.Fields(mainf_list, buffer_dict) # generate random source dtype = nodef.dtype ehs = common_random.generate_ehs(nx, ny, nz, dtype) buf_ehs = common_random.generate_ehs(3, ny, nz, dtype) #nodef.cpuf.set_ehs(*ehs) nodef.mainf_list[0].set_eh_bufs(*ehs) other = {0: 1, 1: 0}[rank] if rank == 0: #nodef.buffer_dict['x+'].set_ehs(*buf_ehs) ExchangeMpi(nodef.buffer_dict['x+'], other, tmax) elif rank == 1: #nodef.buffer_dict['x-'].set_ehs(*buf_ehs) ExchangeMpi(nodef.buffer_dict['x-'], other, tmax) node.Core(nodef) # allocations for verify if rank == 0: getf_e = cpu.GetFields(nodef.buffer_dict['x+'], ['ey', 'ez'], (2, 0, 0), (2, ny-1, nz-1)) getf_h = cpu.GetFields(nodef.buffer_dict['x+'], ['hy', 'hz'], (1, 0, 0), (1, ny-1, nz-1)) elif rank == 1: getf_e = cpu.GetFields(nodef.buffer_dict['x-'], ['ey', 'ez'], (1, 0, 0), (1, ny-1, nz-1)) getf_h = cpu.GetFields(nodef.buffer_dict['x-'], ['hy', 'hz'], (0, 0, 0), (0, ny-1, nz-1)) # verify print 'nodef, instance_list', rank, nodef.instance_list print 'f0, instance_list', rank, nodef.mainf_list[0].instance_list exch = nodef.instance_list[0] main_core = nodef.mainf_list[0].instance_list[0] if rank == 0: #nodef.buffer_dict['x+'].instance_list.pop(0) print 'bufferf x+, instance_list', rank, nodef.buffer_dict['x+'].instance_list core, mpi = nodef.buffer_dict['x+'].instance_list elif rank == 1: #nodef.buffer_dict['x-'].instance_list.pop(0) print 'bufferf x-, instance_list', rank, nodef.buffer_dict['x-'].instance_list core, mpi = nodef.buffer_dict['x-'].instance_list for tstep in xrange(1, tmax+1): #if rank == 0: print 'tstep', tstep #nodef.update_e() main_core.update_e() if rank == 0: #print tstep, rank, 'core upE' core.update_e('') #print tstep, rank, 'mpi upE' mpi.update_e('') elif rank == 1: #print tstep, rank, 'core upE pre' core.update_e('pre') #print tstep, rank, 'mpi upE pre' mpi.update_e('pre') #print tstep, rank, 'core upE post' core.update_e('post') #print tstep, rank, 'mpi upE post' mpi.update_e('post') exch.update_e() # verify the buffer #print tstep, rank, 'pre get' getf_h.get_event().wait() #print tstep, rank, 'after get' if rank == 1: #print tstep, rank, 'pre save' np.save('rank1_h_%d' % tstep, getf_h.get_fields()) #print tstep, rank, 'after save' elif rank == 0: no_exist_npy = True while no_exist_npy: try: arr1 = np.load('rank1_h_%d.npy' % tstep) no_exist_npy = False except: sleep(0.5) arr0 = getf_h.get_fields() #print tstep, 'h arr0\n', arr0 #print tstep, 'h arr1\n', arr1 norm = np.linalg.norm(arr0 - arr1) if norm != 0: print tstep, 'h norm', norm #if tstep > 1: self.assertEqual(norm, 0, '%s, %g, h' % (self.args, norm)) #nodef.update_h() main_core.update_h() if rank == 0: #print tstep, rank, 'core upH pre' core.update_h('pre') #print tstep, rank, 'mpi upH pre' mpi.update_h('pre') #print tstep, rank, 'core upH post' core.update_h('post') #print tstep, rank, 'mpi upH post' mpi.update_h('post') elif rank == 1: #print tstep, rank, 'core upH' core.update_h('') #print tstep, rank, 'mpi upH' mpi.update_h('') exch.update_h() getf_e.get_event().wait() if rank == 1: np.save('rank1_e_%d' % tstep, getf_e.get_fields()) elif rank == 0: no_exist_npy = True while no_exist_npy: try: arr1 = np.load('rank1_e_%d.npy' % tstep) no_exist_npy = False except: sleep(0.5) arr0 = getf_e.get_fields() norm = np.linalg.norm(arr0 - arr1) if norm != 0: print tstep, 'e norm', norm #self.assertEqual(norm, 0, '%s, %g, e' % (self.args, norm)) '''
def test_y_pbc_x_exchange(self): # instance nx, ny, nz = 40, 50, 60 #nx, ny, nz = 3, 4, 5 gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) gpuf = gpu.Fields(context, gpu_devices[0], nx, ny, nz) cpuf = cpu.Fields(nx, ny, nz) mainf_list = [gpuf, cpuf] nodef = NodeFields(mainf_list) core = NodeCore(nodef) pbc = NodePbc(nodef, 'y') exchange = NodeExchange(nodef) # generate random source ehs_gpu = common_update.generate_random_ehs(nx, ny, nz, nodef.dtype) gpuf.set_eh_bufs(*ehs_gpu) ehs_gpu_dict = dict(zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs_gpu)) ehs_cpu = common_update.generate_random_ehs(nx, ny, nz, nodef.dtype) cpuf.set_ehs(*ehs_cpu) ehs_cpu_dict = dict(zip(['ex', 'ey', 'ez', 'hx', 'hy', 'hz'], ehs_cpu)) # verify for mainf in mainf_list: mainf.update_e() pbc.update_e() exchange.update_e() for mainf in mainf_list: mainf.update_h() pbc.update_h() exchange.update_h() mainf_list[-1].enqueue_barrier() getf0, getf1 = {}, {} # x-axis exchange getf0['e'] = gpu.GetFields(gpuf, ['ey', 'ez'], (nx - 1, 0, 0), (nx - 1, ny - 2, nz - 2)) getf1['e'] = cpu.GetFields(cpuf, ['ey', 'ez'], (0, 0, 0), (0, ny - 2, nz - 2)) getf0['h'] = gpu.GetFields(gpuf, ['hy', 'hz'], (nx - 1, 1, 1), (nx - 1, ny - 1, nz - 1)) getf1['h'] = cpu.GetFields(cpuf, ['hy', 'hz'], (0, 1, 1), (0, ny - 1, nz - 1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: g0 = getf0[eh].get_fields() g1 = getf1[eh].get_fields() norm = np.linalg.norm(g0 - g1) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'x-axis exchange', eh)) # y-axis pbc gpu getf0['e'] = gpu.GetFields(gpuf, ['ex', 'ez'], (0, ny - 1, 0), (nx - 2, ny - 1, nz - 2)) getf1['e'] = gpu.GetFields(gpuf, ['ex', 'ez'], (0, 0, 0), (nx - 2, 0, nz - 2)) getf0['h'] = gpu.GetFields(gpuf, ['hx', 'hz'], (1, ny - 1, 1), (nx - 1, ny - 1, nz - 1)) getf1['h'] = gpu.GetFields(gpuf, ['hx', 'hz'], (1, 0, 1), (nx - 1, 0, nz - 1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: g0 = getf0[eh].get_fields() g1 = getf1[eh].get_fields() norm = np.linalg.norm(g0 - g1) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y-axis pbc gpu', eh)) # y-axis pbc cpu getf0['e'] = cpu.GetFields(cpuf, ['ex', 'ez'], (0, ny - 1, 0), (nx - 2, ny - 1, nz - 2)) getf1['e'] = cpu.GetFields(cpuf, ['ex', 'ez'], (0, 0, 0), (nx - 2, 0, nz - 2)) getf0['h'] = cpu.GetFields(cpuf, ['hx', 'hz'], (1, ny - 1, 1), (nx - 1, ny - 1, nz - 1)) getf1['h'] = cpu.GetFields(cpuf, ['hx', 'hz'], (1, 0, 1), (nx - 1, 0, nz - 1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: g0 = getf0[eh].get_fields() g1 = getf1[eh].get_fields() norm = np.linalg.norm(g0 - g1) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y-axis pbc cpu', eh))
divide_axes = 'x' # GPUs gpu_devices = common_gpu.get_gpu_devices() context = cl.Context(gpu_devices) ngpu = len(gpu_devices) fdtds = [gpu.Fields(context, device, nx_gpu, ny, nz, coeff_use='') for device in gpu_devices] outputs = [gpu.GetFields(fdtd, 'ez', (0, 0, nz/2), (nx_gpu-2, ny-1, nz/2)) for fdtd in fdtds] src_e = gpu.DirectSrc(fdtds[2], 'ez', (nx_gpu/5*1, ny/2, 0), (nx_gpu/5*1, ny/2, nz-1), lambda tstep: np.sin(0.1 * tstep)) # CPU common_cpu.print_cpu_info() fdtds.append( cpu.Fields(nx_cpu, ny, nz, coeff_use='') ) outputs.append( cpu.GetFields(fdtds[-1], 'ez', (0, 0, nz/2), (nx_cpu-2, ny-1, nz/2)) ) # GPUs-CPU exch = ExchangeInternal(fdtds, 'x') # Plot import matplotlib.pyplot as plt plt.ion() idxs = [0] + [i*nx_gpu - i for i in range(1, ngpu+1)] + [ngpu*nx_gpu + nx_cpu - (ngpu+1)] for idx in idxs[1:]: plt.plot((idx,idx), (0,ny), color='w', linewidth=0.2) global_ez = np.ones((idxs[-1], ny), dtype=fdtds[0].dtype) imag = plt.imshow(global_ez.T, cmap=plt.cm.hot, origin='lower', vmin=0, vmax=0.05)
def runTest(self): nx, ny, nz = self.args # instance gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) mainf_list = [gpu.Fields(context, device, nx, ny, nz) \ for device in gpu_devices] mainf_list.append(cpu.Fields(nx, ny, nz)) nodef = NodeFields(mainf_list) dtype = nodef.dtype # buffer instance nodef.append_buffer_fields(cpu.Fields(3, ny, nz, mpi_type='x-')) nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, nz, mpi_type='y+')) nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, nz, mpi_type='y-')) nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, ny, mpi_type='z+')) nodef.append_buffer_fields(cpu.Fields(3, nodef.nx, ny, mpi_type='z-')) exchange = NodeExchange(nodef) # generate random source for f in mainf_list[:-1]: nx, ny, nz = f.ns ehs = common_update.generate_random_ehs(nx, ny, nz, dtype) f.set_eh_bufs(*ehs) for f in nodef.cpuf_dict.values(): nx, ny, nz = f.ns ehs = common_update.generate_random_ehs(nx, ny, nz, dtype) f.set_ehs(*ehs) # verify exchange.update_e() exchange.update_h() getf0, getf1 = {}, {} # mainf list self.gpu, self.cpu = gpu, cpu for f0, f1 in zip(mainf_list[:-1], mainf_list[1:]): getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ey', 'ez'], \ (f0.nx-1, 0, 0), (f0.nx-1, f0.ny-2, f0.nz-2)) getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ey', 'ez'], \ (0, 0, 0), (0, f1.ny-2, f1.nz-2)) getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hy', 'hz'], \ (f0.nx-1, 1, 1), (f0.nx-1, f0.ny-1, f0.nz-1)) getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hy', 'hz'], \ (0, 1, 1), (0, f1.ny-1, f1.nz-1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm(getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%s, %g, %s, %s, %s' % \ (self.args, norm, 'mainf', \ getf0[eh].mainf.device_type, getf1[eh].mainf.device_type) ) # buffer 'x-' f0, f1 = nodef.cpuf_dict['x-'], mainf_list[0] getf0['e'] = cpu.GetFields(f0, ['ey', 'ez'], \ (f0.nx-1, 0, 0), (f0.nx-1, f0.ny-2, f0.nz-2)) getf1['e'] = gpu.GetFields(f1, ['ey', 'ez'], \ (1, 0, 0), (1, f1.ny-2, f1.nz-2)) getf0['h'] = cpu.GetFields(f0, ['hy', 'hz'], \ (f0.nx-2, 1, 1), (f0.nx-2, f0.ny-1, f0.nz-1)) getf1['h'] = gpu.GetFields(f1, ['hy', 'hz'], \ (0, 1, 1), (0, f1.ny-1, f1.nz-1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'x-', eh)) # buffer 'y+' anx_list = nodef.accum_nx_list f1 = nodef.cpuf_dict['y+'] for f0, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]): getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ex', 'ez'], \ (0, f0.ny-1, 0), (f0.nx-2, f0.ny-1, f0.nz-2)) getf1['e'] = cpu.GetFields(f1, ['ey', 'ez'], \ (1, anx0, 0), (1, anx1-1, f1.nz-2)) getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hx', 'hz'], \ (1, f0.ny-2, 1), (f0.nx-1, f0.ny-2, f0.nz-1)) getf1['h'] = cpu.GetFields(f1, ['hy', 'hz'], \ (0, anx0+1, 1), (0, anx1, f1.nz-1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y+', eh)) # buffer 'y-' f0 = nodef.cpuf_dict['y-'] for f1, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]): getf0['e'] = cpu.GetFields(f0, ['ey', 'ez'], \ (f0.nx-1, anx0, 0), (f0.nx-1, anx1-1, f0.nz-2)) getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ex', 'ez'], \ (0, 1, 0), (f1.nx-2, 1, f1.nz-2)) getf0['h'] = cpu.GetFields(f0, ['hy', 'hz'], \ (f0.nx-2, anx0+1, 1), (f0.nx-2, anx1, f0.nz-1)) getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hx', 'hz'], \ (1, 0, 1), (f1.nx-1, 0, f1.nz-1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'y-', eh)) # buffer 'z+' f1 = nodef.cpuf_dict['z+'] for f0, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]): getf0['e'] = getattr(self, f0.device_type).GetFields(f0, ['ex', 'ey'], \ (0, 0, f0.nz-1), (f0.nx-2, f0.ny-2, f0.nz-1)) getf1['e'] = cpu.GetFields(f1, ['ey', 'ez'], \ (1, anx0, 0), (1, anx1-1, f1.nz-2)) getf0['h'] = getattr(self, f0.device_type).GetFields(f0, ['hx', 'hy'], \ (1, 1, f0.nz-2), (f0.nx-1, f0.ny-1, f0.nz-2)) getf1['h'] = cpu.GetFields(f1, ['hy', 'hz'], \ (0, anx0+1, 1), (0, anx1, f1.nz-1)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'z+', eh)) # buffer 'z-' f0 = nodef.cpuf_dict['z-'] for f1, anx0, anx1 in zip(mainf_list, anx_list[:-1], anx_list[1:]): getf0['e'] = cpu.GetFields(f0, ['ey', 'ez'], \ (f0.nx-1, anx0, 0), (f0.nx-1, anx1-1, f0.nz-2)) getf1['e'] = getattr(self, f1.device_type).GetFields(f1, ['ex', 'ey'], \ (0, 0, 1), (f1.nx-2, f1.ny-2, 1)) getf0['h'] = cpu.GetFields(f0, ['hy', 'hz'], \ (f0.nx-2, anx0+1, 1), (f0.nx-2, anx1, f0.nz-1)) getf1['h'] = getattr(self, f1.device_type).GetFields(f1, ['hx', 'hy'], \ (1, 1, 0), (f1.nx-1, f1.ny-1, 0)) for getf in getf0.values() + getf1.values(): getf.get_event().wait() for eh in ['e', 'h']: norm = np.linalg.norm( \ getf0[eh].get_fields() - getf1[eh].get_fields()) self.assertEqual(norm, 0, '%g, %s, %s' % (norm, 'z-', eh))
] mainf_list.append(cpu.Fields(cpu_nx, ny, nz)) #mainf_list = [ cpu.Fields(160, ny, nz) ] fields = Fields(mainf_list, pbc='yz', mpi_shape=(2, 1, 1), tmax=tmax) Core(fields) nx = fields.nx tfunc = lambda tstep: np.sin(0.05 * tstep) #IncidentDirect(fields, 'ez', (20, 0, 0), (20, ny-1, nz-1), tfunc) IncidentDirect(fields, 'ez', (0, 20, 0), (nx - 1, 20, nz - 1), tfunc) getf = GetFields(fields, 'ez', (0, 0, nz / 2), (nx - 1, ny - 1, nz / 2)) if rank == 0: buf = fields.buffer_dict['x+'] print 'buf instance_list', buf.instance_list getf_buf = cpu.GetFields(buf, 'ez', (0, 0, nz / 2), (2, ny - 1, nz / 2)) #IncidentDirect(fields, 'ey', (20, 0, 0), (20, ny-1, nz-1), tfunc) #getf = GetFields(fields, 'ey', (0, 0, nz/2), (nx-1, ny-1, nz/2)) #IncidentDirect(fields, 'ex', (0, 20, 0), (nx-1, 20, nz-1), tfunc) #getf = GetFields(fields, 'ex', (0, 0, nz/2), (nx-1, ny-1, nz/2)) #print fields.updatef_list """ # plot if rank == 0: import matplotlib.pyplot as plt plt.ion() fig = plt.figure(figsize=(12,8)) '''
def __init__(self, gpuf, direction, tmax, ny, nz, coeff_use, precision_float): """ """ super(BufferFields, self).__init__(3, ny, nz, coeff_use, precision_float, use_cpu_core=0) common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('x+', 'x-')) if direction == 'x+': gf0 = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) sf0 = cpu.SetFields(self, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) gf1 = cpu.GetFields(self, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) sf1 = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) gf2 = cpu.GetFields(self, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) sf2 = cpu.SetFields(self, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) target_rank = rank + 1 tag_send, tag_recv = 0, 1 elif direction == 'x-': gf0 = gpu.GetFields(gpuf, ['ey', 'ez'], (2, 0, 0), (2, -1, -1)) sf0 = cpu.SetFields(self, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) gf1 = cpu.GetFields(self, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) sf1 = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) gf2 = cpu.GetFields(self, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) sf2 = cpu.SetFields(self, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) target_rank = rank - 1 tag_send, tag_recv = 1, 0 req_send = comm.Send_init(gf2.host_array, target_rank, tag=tag_send) tmp_recv_list = [ np.zeros(gf2.host_array.shape, gpuf.dtype) for i in range(2) ] req_recv_list = [ comm.Recv_init(tmp_recv, target_rank, tag=tag_recv) for tmp_recv in tmp_recv_list ] # global variables self.direction = direction self.gf0 = gf0 self.sf0 = sf0 self.gf1 = gf1 self.sf1 = sf1 self.gf2 = gf2 self.sf2 = sf2 self.req_send = req_send self.req_recv_list = req_recv_list self.tmp_recv_list = tmp_recv_list self.switch = 0 self.tmax = tmax self.tstep = 1 # global functions if direction == 'x+': self.update_e = self.update_e_xp self.update_h = self.update_h_xp elif direction == 'x-': self.update_e = self.update_e_xm self.update_h = self.update_h_xm