def __init__(self, gpuf, direction): common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('+', '-', '+-')) qtask = cpu.QueueTask() if '+' in direction: self.cpuf_p = cpuf_p = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=0) self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.tmp_recv_e = np.zeros(self.gf_h.host_array.shape, gpuf.dtype) if '-' in direction: self.cpuf_m = cpuf_m = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=0) self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.tmp_recv_h = np.zeros(self.gf_e.host_array.shape, gpuf.dtype) # global variables self.direction = direction self.qtask = qtask
def __init__(self, nodef): common.check_type('nodef', nodef, node.Fields) self.directions = nodef.buffer_dict.keys() self.gpu = gpu self.cpu = cpu if 'x+' in self.directions: mf_xp = nodef.mainf_list[-1] bf_xp = nodef.buffer_dict['x+'] mpu = getattr(self, mf_xp.device_type) self.gf_p_h = mpu.GetFields(mf_xp, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) self.sf_p_h = cpu.SetFields(bf_xp, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.sf_p_e = mpu.SetFields(mf_xp, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_h = cpu.GetFields(bf_xp, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_e = cpu.SetFields(bf_xp, ['ey', 'ez'], (1, 0, 0), (1, -1, -1), True) target = 0 if rank == size - 1 else rank + 1 self.req_send_h = comm.Send_init(self.gf_h.host_array, target, tag=0) self.tmp_recv_e = np.zeros(self.gf_h.host_array.shape, nodef.dtype) self.req_recv_e = comm.Recv_init(self.tmp_recv_e, target, tag=1) if 'x-' in self.directions: mf_xm = nodef.mainf_list[0] bf_xm = nodef.buffer_dict['x-'] mpu = getattr(self, mf_xm.device_type) self.gf_m_e = mpu.GetFields(mf_xm, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_m_e = cpu.SetFields(bf_xm, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.sf_m_h = mpu.SetFields(mf_xm, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_e = cpu.GetFields(bf_xm, ['ey', 'ez'], (0, 0, 0), (0, -1, -1)) self.sf_h = cpu.SetFields(bf_xm, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) target = size - 1 if rank == 0 else rank - 1 self.req_send_e = comm.Send_init(self.gf_e.host_array, target, tag=1) self.tmp_recv_h = np.zeros(self.gf_e.host_array.shape, nodef.dtype) self.req_recv_h = comm.Recv_init(self.tmp_recv_h, target, tag=0) # append to the update list self.priority_type = 'mpi' nodef.append_instance(self)
def __init__(self, node_fields, axis): """ """ common.check_type('node_fields', node_fields, NodeFields) common.check_value('axis', axis, ['x', 'y', 'z']) # local variables self.gpu = gpu self.cpu = cpu mainf_list = node_fields.mainf_list cpuf_dict = node_fields.cpuf_dict axis_id = {'x': 0, 'y': 1, 'z': 2}[axis] set_cpuf = set(cpuf_dict.keys()) for ax in ['x', 'y', 'z']: if not set_cpuf.isdisjoint([ax + '+', ax + '-']): raise ValueError, 'There are %s-axis buffer instances. The pbc_internal operation along %s-axis is not allowed.' % ( ax, ax) # create pbc instances f0 = mainf_list[0] f1 = mainf_list[-1] if axis == 'x': if f0 is not f1: setf_e = cpu.SetFields(f1, ['ey', 'ez'], \ (f1.nx-1, 0, 0), (f1.nx-1, f1.ny-2, f1.nz-2), True) getf_e = gpu.GetFields(f0, ['ey', 'ez'], \ (0, 0, 0), (0, f0.ny-2, f0.nz-2) ) setf_h = gpu.SetFields(f0, ['hy', 'hz'], \ (0, 1, 1), (0, f0.ny-1, f0.nz-1), True ) getf_h = cpu.GetFields(f1, ['hy', 'hz'], \ (f1.nx-1, 1, 1), (f1.nx-1, f1.ny-1, f1.nz-1) ) else: getattr(self, f0.device_type).Pbc(f0, axis) elif axis in ['y', 'z']: for f in mainf_list: getattr(self, f.device_type).Pbc(f, axis) # global variables and functions if axis == 'x' and f0 is not f1: self.setf_e = setf_e self.getf_e = getf_e self.setf_h = setf_h self.getf_h = getf_h self.update_e = self.update_e_actual self.update_h = self.update_h_actual else: self.update_e = lambda: None self.update_h = lambda: None
def __init__(self, gpuf, direction, tmax): common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('+', '-', '+-')) qtask = cpu.QueueTask() if '+' in direction: self.cpuf_p = cpuf_p = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=1) self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_h = gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.req_send_h = comm.Send_init(gf_h.host_array, rank + 1, tag=0) self.tmp_recv_e_list = [ np.zeros(gf_h.host_array.shape, gpuf.dtype) for i in range(2) ] self.req_recv_e_list = [ comm.Recv_init(tmp_recv_e, rank + 1, tag=1) for tmp_recv_e in self.tmp_recv_e_list ] self.switch_e = 0 if '-' in direction: self.cpuf_m = cpuf_m = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=1) self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_e = gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.req_send_e = comm.Send_init(gf_e.host_array, rank - 1, tag=1) self.tmp_recv_h_list = [ np.zeros(gf_e.host_array.shape, gpuf.dtype) for i in range(2) ] self.req_recv_h_list = [ comm.Recv_init(tmp_recv_h, rank - 1, tag=0) for tmp_recv_h in self.tmp_recv_h_list ] self.switch_h = 0 # global variables self.direction = direction self.qtask = qtask self.tmax = tmax self.tstep = 1
def update_e(s): s.qtask.enqueue(s.program.update_e, *(s.nss.e_args) for e_func in e_func_list: class ExchangeMpi: def __init__(s, fields_list, nx_cpu, nx, ny, nz): cuse = fields_list[0].coeff_use s.bfs = {} s.bfs['x+'] = BufferFields(nx_cpu, ny, nz, cuse, 0) s.bfs['x-'] = BufferFields(3, ny, nz, cuse, 1) s.bfs['y+'] = BufferFields(3, nx, nz, cuse, 1) s.bfs['y-'] = BufferFields(3, nx, nz, cuse, 1) s.bfs['z+'] = BufferFields(3, nx, ny, cuse, 1) s.bfs['z-'] = BufferFields(3, nx, ny, cuse, 1) def update_e(s): for pos, bf in s.bfs.items(): if pos == 'x+': getf = gpu.GetFields(s.fields_list[-1], ['hy', 'hz'], (nx-2, 0, 0), (nx-2, ny-1, nz-1)) setf = cpu.SetFields(s.bfs[pos], ['hy', 'hz'], (0, 0, 0), (0, ny-1, nz-1)) setf.set_fields(getf.get_fields(), [getf.get_event()]) s.bfs[pos].qtask.enqueue(s.bfs[pos].program.update_e, *s.bfs[pos].e_args) elif pos == 'y+': getfs = [gpu.GetFields(fields, ['hx', 'hz'], (nx-2, 0, 0), (nx-2, ny-1, nz-1)) for fields in s.fields_list] setf = cpu.SetFields(s.bfs[pos], ['hy', 'hz'], (0, 0, 0), (0, ny-1, nz-1)) setf.set_fields(getf.get_fields(), [getf.get_event()]) s.bfs[pos].qtask.enqueue(s.bfs[pos].program.update_e, *s.bfs[pos].e_args) def update_h(s): def get_pts_for_boundary(axis, nx, ny, nz): pt0 = { 'x': {'-':(0, 0, 0), '+':(nx-1, 0, 0)}, 'y': {'-':(0, 0, 0), '+':(0, ny-1, 0)}, 'z': {'-':(0, 0, 0), '+':(0, 0, nz-1)} }[axis] pt1 = { 'x': {'-':(0, ny-1, nz-1), '+':(nx-1, ny-1, nz-1)}, 'y': {'-':(nx-1, 0, nz-1), '+':(nx-1, ny-1, nz-1)}, 'z': {'-':(nx-1, ny-1, 0), '+':(nx-1, ny-1, nz-1)} }[axis] return pt0, pt1 class ExchangeInternal: def __init__(s, node_list, axis): emf_list = fields_list e_strfs = {'x':['ey','ez'], 'y':['ex','ez'], 'z':['ex','ey']}[axis] h_strfs = {'x':['hy','hz'], 'y':['hx','hz'], 'z':['hx','hy']}[axis] pts = [get_pts_for_boundary(axis, *emf.ns) for emf in emf_list] s.e_getfs, s.h_getfs = [], [] s.e_setfs, s.h_setfs = [], [] s.gpu, s.cpu = gpu, cpu for emf, (pt0, pt1) in zip(emf_list, pts)[1:]: s.e_getfs.append( getattr(s, emf.device_type).GetFields(emf, e_strfs, pt0['-'], pt1['-']) ) s.h_setfs.append( getattr(s, emf.device_type).SetFields(emf, h_strfs, pt0['-'], pt1['-'], np.ndarray) ) for emf, (pt0, pt1) in zip(emf_list, pts)[:-1]: s.h_getfs.append( getattr(s, emf.device_type).GetFields(emf, h_strfs, pt0['+'], pt1['+']) ) s.e_setfs.append( getattr(s, emf.device_type).SetFields(emf, e_strfs, pt0['+'], pt1['+'], np.ndarray) ) s.update = s.update_e_head s.update = s.update_e_body s.update = s.update_e_tail def update_e_head(s): s.recv.Start() s.recv.Wait() s.setf.set_fields(s.e_fhost_recv) def update_e_body(s): s.e_recv.Start() s.e_getf.get_event().wait() s.e_send.Start() s.e_recv.Wait() s.e_setf.set_fields(s.e_fhost_recv) s.e_send.Wait() def update_e_body(s): f_list = [s.e_recv.Start, s.e_getf.get_event().wait, s.e_send.Start] arg_list = [(), (), ()] ff = zip(f_list, arg_list) for f, arg in ff: f(*arg) s.e_recv.Wait() s.e_setf.set_fields(s.e_fhost_recv) s.e_send.Wait() start_list = [] wait_list = [] [s.e_recv.Start], # recv [s.e_getf.get_event().wait, s.e_send.Start], # send [s.e_recv.Wait, s.e_setf.set_fields], # recv [s.e_send.Wait] ] # send if 'r' in sr: start_list.append( ) wait_list.append( ) if 's' in sr: start_list.append( ) wait_list.append( ) f_list.insert(-1, ff_list[1]) f_list = f_list + [ff_list[3]] fin_f_list = f_list[0,1,2,3] elif I = recv: f_list = ff_list[0] + ff_list[2]
def __init__(self, gpuf, direction, tmax, ny, nz, coeff_use, precision_float): """ """ super(BufferFields, self).__init__(3, ny, nz, coeff_use, precision_float, use_cpu_core=0) common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('x+', 'x-')) if direction == 'x+': gf0 = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) sf0 = cpu.SetFields(self, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) gf1 = cpu.GetFields(self, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) sf1 = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) gf2 = cpu.GetFields(self, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) sf2 = cpu.SetFields(self, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) target_rank = rank + 1 tag_send, tag_recv = 0, 1 elif direction == 'x-': gf0 = gpu.GetFields(gpuf, ['ey', 'ez'], (2, 0, 0), (2, -1, -1)) sf0 = cpu.SetFields(self, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) gf1 = cpu.GetFields(self, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) sf1 = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) gf2 = cpu.GetFields(self, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) sf2 = cpu.SetFields(self, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) target_rank = rank - 1 tag_send, tag_recv = 1, 0 req_send = comm.Send_init(gf2.host_array, target_rank, tag=tag_send) tmp_recv_list = [ np.zeros(gf2.host_array.shape, gpuf.dtype) for i in range(2) ] req_recv_list = [ comm.Recv_init(tmp_recv, target_rank, tag=tag_recv) for tmp_recv in tmp_recv_list ] # global variables self.direction = direction self.gf0 = gf0 self.sf0 = sf0 self.gf1 = gf1 self.sf1 = sf1 self.gf2 = gf2 self.sf2 = sf2 self.req_send = req_send self.req_recv_list = req_recv_list self.tmp_recv_list = tmp_recv_list self.switch = 0 self.tmax = tmax self.tstep = 1 # global functions if direction == 'x+': self.update_e = self.update_e_xp self.update_h = self.update_h_xp elif direction == 'x-': self.update_e = self.update_e_xm self.update_h = self.update_h_xm