def __init__(self, gpuf, core, direction): common.check_type('gpuf', gpuf, Fields) common.check_value('direction', direction, ('+', '-', '+-')) if '+' in direction: self.gf_h = gf_h = GetFields(gpuf, ['hy', 'hz'], (-1, 0, 0), (-1, -1, -1)) self.sf_e = SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) target = 0 if rank == size-1 else rank+1 self.req_send_h = comm.Send_init(gf_h.host_array, target, tag=0) self.tmp_recv_e = np.zeros(gf_h.host_array.shape, gpuf.dtype) self.req_recv_e = comm.Recv_init(self.tmp_recv_e, target, tag=1) if '-' in direction: self.gf_e = gf_e = GetFields(gpuf, ['ey', 'ez'], (0, 0, 0), (0, -1, -1)) self.sf_h = SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) target = size-1 if rank == 0 else rank-1 self.req_send_e = comm.Send_init(gf_e.host_array, target, tag=1) self.tmp_recv_h = np.zeros(gf_e.host_array.shape, gpuf.dtype) self.req_recv_h = comm.Recv_init(self.tmp_recv_h, target, tag=0) # global variables self.core = core self.direction = direction
def append_buffer_fields(self, cpuf): common.check_type('cpuf', cpuf, cpu.Fields) common.check_value('cpuf.mpi_type', cpuf.mpi_type, \ ('x+', 'x-', 'y+', 'y-', 'z+', 'z-') ) self.cpuf_dict[cpuf.mpi_type] = cpuf self.updatef_list.append(cpuf)
def __init__(self, nodef, direction): common.check_type('nodef', nodef, node.Fields) common.check_value('direction', direction, ('+', '-', '+-')) self.gpu = gpu self.cpu = cpu if '+' in direction: mf_p = nodef.mainf_list[-1] mpu = getattr(self, mf_p.device_type) self.getf_h = mpu.GetFields(mf_p, ['hy', 'hz'], (-1, 0, 0), (-1, -1, -1)) self.setf_e = mpu.SetFields(mf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.tmp_recv_e = np.zeros(self.getf_h.host_array.shape, nodef.dtype) if '-' in direction: mf_m = nodef.mainf_list[0] mpu = getattr(self, mf_m.device_type) self.getf_e = mpu.GetFields(mf_m, ['ey', 'ez'], (0, 0, 0), (0, -1, -1)) self.setf_h = mpu.SetFields(mf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.tmp_recv_h = np.zeros(self.getf_e.host_array.shape, nodef.dtype) # global variables self.direction = direction self.target_p = 0 if rank == size-1 else rank+1 self.target_m = size-1 if rank == 0 else rank-1 # append to the update list self.priority_type = 'mpi' nodef.append_instance(self)
def __init__(self, gpuf, core, direction, tmax): common.check_type('gpuf', gpuf, Fields) common.check_value('direction', direction, ('+', '-', '+-')) if '+' in direction: self.gf_h = gf_h = GetFields(gpuf, ['hy', 'hz'], (-1, 0, 0), (-1, -1, -1)) self.sf_e = SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.req_send_h = comm.Send_init(gf_h.host_array, rank+1, tag=0) self.tmp_recv_e_list = [np.zeros(gf_h.host_array.shape, gpuf.dtype) for i in range(2)] self.req_recv_e_list = [comm.Recv_init(tmp_recv_e, rank+1, tag=1) for tmp_recv_e in self.tmp_recv_e_list] self.switch_e = 0 if '-' in direction: self.gf_e = gf_e = GetFields(gpuf, ['ey', 'ez'], (0, 0, 0), (0, -1, -1)) self.sf_h = SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.req_send_e = comm.Send_init(gf_e.host_array, rank-1, tag=1) self.tmp_recv_h_list = [np.zeros(gf_e.host_array.shape, gpuf.dtype) for i in range(2)] self.req_recv_h_list = [comm.Recv_init(tmp_recv_h, rank-1, tag=0) for tmp_recv_h in self.tmp_recv_h_list] self.switch_h = 0 # global variables self.gpuf = gpuf self.core = core self.direction = direction self.tmax = tmax self.tstep = 1
def get(self, str_f): value_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] if self.ce_on: value_list += ['cex', 'cey', 'cez'] if self.ch_on: value_list += ['chx', 'chy', 'chz'] common.check_value('str_f', str_f, value_list) return self.__dict__[str_f]
def set_fields_spatial_value(self, value): common.check_value('value.dtype', value.dtype, self.dtype) common.check_value('value.shape', value.shape, [self.shape]) split_value = np.split(value, len(self.str_fs)) for setf, slices in zip(self.setf_list, self.slices_list): val = np.concatenate([arr[slices] for arr in split_value]) if val.shape != (1,): val = val.reshape([i for i in val.shape if i != 1]) setf.set_fields(val)
def set_fields_spatial_value(self, value): common.check_value('value.dtype', value.dtype, self.mainf.dtype) common.check_value('value.shape', value.shape, [self.shape]) split_value = np.split(value, len(self.str_fs)) for str_f, ndarr in zip(self.str_fs, split_value): if self.is_overwrite: self.mainf.get(str_f)[self.slice_xyz] = ndarr[:] else: self.mainf.get(str_f)[self.slice_xyz] += ndarr[:]
def __init__(self, node_fields, axis): """ """ common.check_type('node_fields', node_fields, NodeFields) common.check_value('axis', axis, ['x', 'y', 'z']) # local variables self.gpu = gpu self.cpu = cpu mainf_list = node_fields.mainf_list cpuf_dict = node_fields.cpuf_dict axis_id = {'x':0, 'y':1, 'z':2}[axis] set_cpuf = set( cpuf_dict.keys() ) for ax in ['x', 'y', 'z']: if not set_cpuf.isdisjoint( [ax+'+', ax+'-'] ): raise ValueError, 'There are %s-axis buffer instances. The pbc_internal operation along %s-axis is not allowed.' % (ax, ax) # create pbc instances f0 = mainf_list[0] f1 = mainf_list[-1] if axis == 'x': if f0 is not f1: setf_e = cpu.SetFields(f1, ['ey', 'ez'], \ (f1.nx-1, 0, 0), (f1.nx-1, f1.ny-2, f1.nz-2), True) getf_e = gpu.GetFields(f0, ['ey', 'ez'], \ (0, 0, 0), (0, f0.ny-2, f0.nz-2) ) setf_h = gpu.SetFields(f0, ['hy', 'hz'], \ (0, 1, 1), (0, f0.ny-1, f0.nz-1), True ) getf_h = cpu.GetFields(f1, ['hy', 'hz'], \ (f1.nx-1, 1, 1), (f1.nx-1, f1.ny-1, f1.nz-1) ) else: getattr(self, f0.device_type).Pbc(f0, axis) elif axis in ['y', 'z']: for f in mainf_list: getattr(self, f.device_type).Pbc(f, axis) # global variables and functions if axis == 'x' and f0 is not f1: self.setf_e = setf_e self.getf_e = getf_e self.setf_h = setf_h self.getf_h = getf_h self.update_e = self.update_e_actual self.update_h = self.update_h_actual else: self.update_e = lambda : None self.update_h = lambda : None
def set_fields_spatial_value(self, value): common.check_value('value.dtype', value.dtype, self.mainf.dtype) common.check_value('value.shape', value.shape, [self.shape]) nx, ny, nz_pitch = self.mainf.ns_pitch cuda.memcpy_htod(self.source_buf, value) for shift_idx, target_buf in enumerate(self.target_bufs): self.kernel_copy( \ nx, ny, nz_pitch, np.int32(shift_idx), target_buf, self.source_buf, \ grid=self.mainf.gs, block=self.mainf.bs)
def __init__(self, nx, ny, nz, precision_float='single', segment_nbytes=16): common.check_type('nx', nx, int) common.check_type('ny', ny, int) common.check_type('nz', nz, int) common.check_type('segment_nbytes', segment_nbytes, int) common.check_value('precision_float', precision_float, ('single', 'double')) # local variables dtype = {'single':np.float32, 'double':np.float64}[precision_float] # padding for the nz which is multi of segment size align_size = segment_nbytes / np.nbytes[dtype] pad = int(np.ceil(float(nz) / align_size) * align_size) - nz slice_z = slice(None, None) if pad == 0 else slice(None, -pad) nz_pitch = nz + pad ns = [nx, ny, nz] ns_pitch = [nx, ny, nz_pitch] ns_pad = [nx, ny, pad] # allocations ehs = [np.zeros(ns_pitch, dtype) for i in range(6)] ces = [np.ones(ns_pitch, dtype)*0.5 for i in range(3)] chs = [np.ones(ns_pitch, dtype)*0.5 for i in range(3)] # global variables self.dx = 1. self.dt = 0.5 self.nx = nx self.ny = ny self.nz = nz self.ns = ns self.ns_pitch = ns_pitch self.ns_pad = ns_pad self.align_size = align_size self.pad = pad self.slice_z = slice_z self.precision_float = precision_float self.dtype = dtype self.ehs = ehs self.ex, self.ey, self.ez = ehs[:3] self.hx, self.hy, self.hz = ehs[3:] self.ces = ces self.cex, self.cey, self.cez = ces self.chs = chs self.chx, self.chy, self.chz = chs # update list self.instance_list = [] self.append_instance = lambda instance: \ common.append_instance(self.instance_list, instance)
def __init__(self, fields, axis): """ """ common.check_type('fields', fields, Fields) common.check_value('axis', axis, ['x', 'y', 'z']) # local variables nx, ny, nz = fields.ns dtype_str_list = fields.dtype_str_list # program replace = lambda lst, idx, val: lst[:idx] + [val] + lst[idx+1:] base0 = {'e': [0, 0, 0], 'h': [1, 1, 1]} base1 = {'e': [nx-2, ny-2, nz-2], 'h': [nx-1, ny-1, nz-1]} axis_id = {'x':0, 'y':1, 'z':2}[axis] nn = fields.ns[axis_id] value_dict = {'e': [], 'h': []} for eh in ['e', 'h']: for idx in {'e': [0, nn-1], 'h':[nn-1, 0]}[eh]: pt0 = replace(base0[eh], axis_id, idx) pt1 = replace(base1[eh], axis_id, idx) nmax, xid, yid, zid = \ common_gpu.macro_replace_list(pt0, pt1) value_dict[eh].append( \ '%s*ny*nz + %s*nz + %s' % (xid, yid, zid) ) macros = ['NMAX', 'IDX0', 'IDX1', 'DTYPE', 'PRAGMA_fp64'] values_e = [nmax] + value_dict['e'] + dtype_str_list values_h = [nmax] + value_dict['h'] + dtype_str_list ksrc_e = common.replace_template_code( \ open(common_gpu.src_path + 'copy_self.cl').read(), \ macros, values_e) ksrc_h = common.replace_template_code( \ open(common_gpu.src_path + 'copy_self.cl').read(), \ macros, values_h) program_e = cl.Program(fields.context, ksrc_e).build() program_h = cl.Program(fields.context, ksrc_h).build() # global variables self.mainf = fields self.program_e = program_e self.program_h = program_h self.strfs_e = {'x':['ey','ez'], 'y':['ex','ez'], 'z':['ex','ey']}[axis] self.strfs_h = {'x':['hy','hz'], 'y':['hx','hz'], 'z':['hx','hy']}[axis] # append to the update list self.priority_type = 'pbc' self.mainf.append_instance(self)
def __init__(self, direction, ny, nz, coeff_use, precision_float): """ """ super(BufferFields, self).__init__(3, ny, nz, coeff_use, precision_float, use_cpu_core=1) common.check_value('direction', direction, ('x+', 'x-', 'y+', 'y-', 'z+', 'z-')) # global variables self.direction = direction p_or_m = direction[-1] self.part_e_list = {'+': [''], '-': ['pre', 'post']}[p_or_m] self.part_h_list = {'-': [''], '+': ['pre', 'post']}[p_or_m]
def set_fields_spatial_value(self, value, wait_for=[]): common.check_value('value.dtype', value.dtype, self.mainf.dtype) common.check_value('value.shape', value.shape, [self.shape]) nx, ny, nz_pitch = self.mainf.ns_pitch self.mainf.enqueue(cl.enqueue_copy, \ [self.mainf.queue, self.source_buf, value], \ wait_for) for shift_idx, target_buf in enumerate(self.target_bufs): self.mainf.enqueue(self.program.copy, \ [self.mainf.queue, (self.gs,), (self.mainf.ls,), \ nx, ny, nz_pitch, np.int32(shift_idx), target_buf, self.source_buf])
def set_fields_spatial_value(self, value, wait_for=[]): """ """ mainf = self.mainf queue, gs, ls = mainf.queue, mainf.gs, mainf.ls nx, ny, nz_pitch = mainf.ns_pitch common.check_value('value.dtype', value.dtype, mainf.dtype) cl.enqueue_copy(queue, self.source_buf, value, \ is_blocking=False, wait_for=wait_for) for shift_idx, target_buf in enumerate(self.target_bufs): self.program.subdomain(queue, (gs,), (ls,), \ nx, ny, nz_pitch, np.int32(shift_idx), \ target_buf, self.source_buf)
def __init__(self, fields, axis): """ """ common.check_type('fields', fields, Fields) common.check_value('axis', axis, ['x', 'y', 'z']) mtype = fields.mpi_type if axis == 'x' and mtype in ['x+', 'x-', 'y+', 'y-', 'z+', 'z-']: raise ValueError, 'The fields.mpi_type is \'%s\'. The buffer instance is only permit the pbc operation along y and z axes' % mtype # local variables nx, ny, nz = fields.ns axis_id = {'x':0, 'y':1, 'z':2}[axis] # slice indices replace = lambda lst, idx, val: lst[:idx] + [val] + lst[idx+1:] slices_e = [slice(None, -1), slice(None, -1), slice(None, nz-1)] slices_h = [slice(1, None), slice(1, None), slice(1, nz)] slices_e_src = replace(slices_e, axis_id, slice(None, 1)) slices_h_dest = replace(slices_h, axis_id, slice(None, 1)) if axis == 'z': slices_e_dest = replace(slices_e, axis_id, slice(nz-1, nz)) slices_h_src = replace(slices_h, axis_id, slice(nz-1, nz)) else: slices_e_dest = replace(slices_e, axis_id, slice(-1, None)) slices_h_src = replace(slices_h, axis_id, slice(-1, None)) # global variables self.mainf = fields self.slices_dict = { \ 'e_src': fields.split_slices_dict('e', slices_e_src), \ 'e_dest': fields.split_slices_dict('e', slices_e_dest), \ 'h_src': fields.split_slices_dict('h', slices_h_src), \ 'h_dest': fields.split_slices_dict('h', slices_h_dest) } self.strfs = {\ 'x': {'e': ['ey','ez'], 'h': ['hy','hz']}, \ 'y': {'e': ['ex','ez'], 'h': ['hx','hz']}, \ 'z': {'e': ['ex','ey'], 'h': ['hx','hy']} }[axis] # append to the update list self.priority_type = 'pbc' self.mainf.append_instance(self)
def __init__(self, fields, target_rank, tmax): common.check_type('fields', fields, Fields) # local variables nx, ny, nz = fields.ns dtype = fields.dtype mpi_type = fields.mpi_type common.check_value('mpi_type', mpi_type, \ ['x+', 'x-', 'y+', 'y-', 'z+', 'z-']) # create instances (getf, setf and mpi requests) if '+' in mpi_type: # split h getf = GetFields(fields, ['hy', 'hz'], \ (1, 1, 1), (1, ny-1, nz-1)) setf = SetFields(fields, ['ey', 'ez'], \ (nx-1, 0, 0), (nx-1, ny-2, nz-2), True) req_send = comm.Send_init(getf.host_array, target_rank, tag=1) tmp_recv = np.zeros(getf.host_array.shape, dtype) req_recv = comm.Recv_init(tmp_recv, target_rank, tag=2) elif '-' in mpi_type: # split e getf = GetFields(fields, ['ey', 'ez'], \ (nx-2, 0, 0), (nx-2, ny-2, nz-2)) setf = SetFields(fields, ['hy', 'hz'], \ (0, 1, 1), (0, ny-1, nz-1), True) req_send = comm.Send_init(getf.host_array, target_rank, tag=2) tmp_recv = np.zeros(getf.host_array.shape, dtype) req_recv = comm.Recv_init(tmp_recv, target_rank, tag=1) # global variables and functions self.mainf = fields self.getf = getf self.setf = setf self.tmp_recv = tmp_recv self.req_send = req_send self.req_recv = req_recv self.tmax = tmax self.tstep = 1 # append to the update list self.priority_type = 'mpi' self.mainf.append_instance(self)
def __init__(self, gpuf, direction, tmax): common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('+', '-', '+-')) qtask = cpu.QueueTask() if '+' in direction: self.cpuf_p = cpuf_p = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=1) self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_h = gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.req_send_h = comm.Send_init(gf_h.host_array, rank+1, tag=0) self.tmp_recv_e_list = [np.zeros(gf_h.host_array.shape, gpuf.dtype) for i in range(2)] self.req_recv_e_list = [comm.Recv_init(tmp_recv_e, rank+1, tag=1) for tmp_recv_e in self.tmp_recv_e_list] self.switch_e = 0 if '-' in direction: self.cpuf_m = cpuf_m = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=1) self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_e = gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.req_send_e = comm.Send_init(gf_e.host_array, rank-1, tag=1) self.tmp_recv_h_list = [np.zeros(gf_e.host_array.shape, gpuf.dtype) for i in range(2)] self.req_recv_h_list = [comm.Recv_init(tmp_recv_h, rank-1, tag=0) for tmp_recv_h in self.tmp_recv_h_list] self.switch_h = 0 # global variables self.direction = direction self.qtask = qtask self.tmax = tmax self.tstep = 1
def __init__(self, gpuf, direction): common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('+', '-', '+-')) if '+' in direction: self.getf_h = getf_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-1, 0, 0), (-1, -1, -1)) self.setf_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.tmp_recv_e = tmp_recv_e = np.zeros(getf_h.host_array.shape, gpuf.dtype) if '-' in direction: self.getf_e = getf_e = gpu.GetFields(gpuf, ['ey', 'ez'], (0, 0, 0), (0, -1, -1)) self.setf_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.tmp_recv_h = tmp_recv_h = np.zeros(getf_e.host_array.shape, gpuf.dtype) # global variables self.direction = direction
def set_fields_spatial_value(self, value, wait_for=[]): common.check_value("value.dtype", value.dtype, self.mainf.dtype) common.check_value("value.shape", value.shape, [self.shape]) nx, ny, nz_pitch = self.mainf.ns_pitch cl.enqueue_copy(self.mainf.queue, self.source_buf, value, is_blocking=False, wait_for=wait_for) for shift_idx, target_buf in enumerate(self.target_bufs): self.program.subdomain( self.mainf.queue, (self.mainf.gs,), (self.mainf.ls,), nx, ny, nz_pitch, np.int32(shift_idx), target_buf, self.source_buf, )
def __init__(self, direction, target_rank, ny, nz, coeff_use, precision_float): """ """ super(BufferFields, self).__init__(3, ny, nz, coeff_use, precision_float, use_cpu_core=0) common.check_value('direction', direction, ('x+', 'x-', 'y+', 'y-', 'z+', 'z-')) common.check_type('target_rank', target_rank, int) # global variables self.direction = direction self.target_rank = target_rank p_or_m = direction[-1] self.part_e_list = {'+': [''], '-': ['pre', 'post']}[p_or_m] self.part_h_list = {'-': [''], '+': ['pre', 'post']}[p_or_m] self.is_split_dict = { \ '+': {'e': False, 'h': True}, \ '-': {'e': True, 'h': False}}[p_or_m]
def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True): """ """ common.check_type("fields", fields, Fields) common.check_type("str_f", str_f, (str, list, tuple), str) common.check_type("pt0", pt0, (list, tuple), int) common.check_type("pt1", pt1, (list, tuple), int) common.check_type("is_array", is_array, bool) common.check_type("is_overwrite", is_overwrite, bool) # local variables str_fs = common.convert_to_tuple(str_f) for strf in str_fs: strf_list = ["ex", "ey", "ez", "hx", "hy", "hz"] common.check_value("str_f", strf, strf_list) for axis, n, p0, p1 in zip(["x", "y", "z"], fields.ns, pt0, pt1): common.check_value("pt0 %s" % axis, p0, range(n)) common.check_value("pt1 %s" % axis, p1, range(n)) # global variables and functions self.mainf = fields self.str_fs = str_fs self.slice_xyz = common.slice_index_two_points(pt0, pt1) self.shape = common.shape_two_points(pt0, pt1, len(str_fs)) self.is_overwrite = is_overwrite if is_array: self.func = self.set_fields_spatial_value else: self.func = self.set_fields_single_value
def __init__(self, fields, str_f, pt0, pt1): """ """ common.check_type("fields", fields, Fields) common.check_type("str_f", str_f, (str, list, tuple), str) common.check_type("pt0", pt0, (list, tuple), int) common.check_type("pt1", pt1, (list, tuple), int) # local variables str_fs = common.convert_to_tuple(str_f) for strf in str_fs: strf_list = ["ex", "ey", "ez", "hx", "hy", "hz"] common.check_value("str_f", strf, strf_list) for axis, n, p0, p1 in zip(["x", "y", "z"], fields.ns, pt0, pt1): common.check_value("pt0 %s" % axis, p0, range(n)) common.check_value("pt1 %s" % axis, p1, range(n)) # allocation shape = common.shape_two_points(pt0, pt1, len(str_fs)) host_array = np.zeros(shape, dtype=fields.dtype) split_host_array = np.split(host_array, len(str_fs)) split_host_array_dict = dict(zip(str_fs, split_host_array)) # global variables self.mainf = fields self.str_fs = str_fs self.slice_xyz = common.slice_index_two_points(pt0, pt1) self.host_array = host_array self.split_host_array_dict = split_host_array_dict
def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True): """ """ common.check_type('fields', fields, (Fields, BufferFields)) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) common.check_type('is_array', is_array, bool) common.check_type('is_overwrite', is_overwrite, bool) # local variables str_fs = common.convert_to_tuple(str_f) for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # global variables and functions self.mainf = fields self.str_fs = str_fs self.slice_xyz = common.slices_two_points(pt0, pt1) self.shape = common.shape_two_points(pt0, pt1, len(str_fs)) self.is_overwrite = is_overwrite if is_array: self.func = self.set_fields_spatial_value else: self.func = self.set_fields_single_value
def set_incident_direct(self, str_f, pt0, pt1, tfunc, spatial_value=1., is_overwrite=False): common.check_value('str_f', str_f, ('ex', 'ey', 'ez', 'hx', 'hy', 'hz')) common.check_type('pt0', pt0, (list, tuple), (int, float)) common.check_type('pt1', pt1, (list, tuple), (int, float)) common.check_type('tfunc', tfunc, types.FunctionType) common.check_type('spatial_value', spatial_value, \ (np.ndarray, np.number, types.FloatType, types.IntType) ) common.check_type('is_overwrite', is_overwrite, bool) pt0 = list( common.convert_indices(self.ns, pt0) ) pt1 = list( common.convert_indices(self.ns, pt1) ) if is_mpi: node_pt0 = list(self.node_pt0) node_pt1 = list(self.node_pt1) for i, axis in enumerate(['x', 'y', 'z']): if self.nodef.buffer_dict.has_key('%s+' % axis): node_pt1[i] += 1 if self.nodef.buffer_dict.has_key('%s-' % axis): node_pt0[i] -= 1 if coord[i] == 0 and pt0[i] == 0: pt0[i] -= 1 if coord[i] == self.mpi_shape[i]-1 and pt1[i] == self.ns[i]-1: pt1[i] += 1 overlap = common.overlap_two_regions(node_pt0, node_pt1, pt0, pt1) if overlap != None: sx0, sy0, sz0 = self.node_pt0 ox0, oy0, oz0 = overlap[0] ox1, oy1, oz1 = overlap[1] local_pt0 = (ox0-sx0, oy0-sy0, oz0-sz0) local_pt1 = (ox1-sx0, oy1-sy0, oz1-sz0) node.IncidentDirect(self.nodef, str_f, local_pt0, local_pt1, tfunc, spatial_value, is_overwrite) else: node.IncidentDirect(self.nodef, str_f, pt0, pt1, tfunc, spatial_value, is_overwrite)
def __init__(self, fields, str_f, pt0, pt1): """ """ common.check_type('fields', fields, (Fields, BufferFields)) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) # local variables str_fs = common.convert_to_tuple(str_f) for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # allocation shape = common.shape_two_points(pt0, pt1, len(str_fs)) host_array = np.zeros(shape, dtype=fields.dtype) split_host_array = np.split(host_array, len(str_fs)) split_host_array_dict = dict(zip(str_fs, split_host_array)) # global variables self.mainf = fields self.str_fs = str_fs self.slice_xyz = common.slices_two_points(pt0, pt1) self.host_array = host_array self.split_host_array_dict = split_host_array_dict
def split_slices_dict(self, eh, slices): common.check_value('eh', eh, ('e', 'h')) common.check_type('slices', slices, (list, tuple), slice) overlap = lambda sl: \ common.intersection_two_slices(self.ns, slices, \ (sl, slice(None, None), slice(None, None)) ) if eh == 'e': slices_dict = { \ '': slices, \ 'pre': overlap( slice(-2, None) ), \ 'mid': overlap( slice(1, -2) ), \ 'post': overlap( slice(None, 1) ) } elif eh == 'h': slices_dict = { \ '': slices, \ 'pre': overlap( slice(None, 2) ), \ 'mid': overlap( slice(2, -1) ), \ 'post': overlap( slice(-1, None) ) } return slices_dict
def __init__(self, fields, str_f, pt0, pt1): """ """ common.check_type('fields', fields, Fields) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) # local variables str_fs = common.convert_to_tuple(str_f) dtype_str_list = fields.dtype_str_list for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # program macros = ['NMAX', 'XID', 'YID', 'ZID', \ 'ARGS', \ 'TARGET', 'SOURCE', 'OVERWRITE', \ 'DTYPE', 'PRAGMA_fp64'] values = common_gpu.macro_replace_list(pt0, pt1) + \ ['__global DTYPE *source', \ 'target[sub_idx]', 'source[idx]', '='] + \ dtype_str_list ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'copy.cl').read(), macros, values) program = cl.Program(fields.context, ksrc).build() # allocation source_bufs = [fields.get_buf(str_f) for str_f in str_fs] shape = common.shape_two_points(pt0, pt1, len(str_fs)) host_array = np.zeros(shape, dtype=fields.dtype) split_host_array = np.split(host_array, len(str_fs)) split_host_array_dict = dict(zip(str_fs, split_host_array)) target_buf = cl.Buffer( \ fields.context, \ cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, \ hostbuf=host_array) # global variables self.mainf = fields self.program = program self.source_bufs = source_bufs self.target_buf = target_buf self.host_array = host_array self.split_host_array_dict = split_host_array_dict
def split_points_dict(self, eh, pt0, pt1): common.check_value('eh', eh, ('e', 'h')) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) overlap = lambda pt2, pt3: \ common.intersection_two_regions(pt0, pt1, pt2, pt3) nx, ny, nz = self.ns if eh == 'e': points_dict = { \ '': (pt0, pt1), \ 'pre': overlap((nx-2, 0, 0), (nx-1, ny-1, nz-1)), \ 'mid': overlap((1, 0, 0), (nx-3, ny-1, nz-1)), \ 'post': overlap((0, 0, 0), (0, ny-1, nz-1)) } elif eh == 'h': points_dict = { \ '': (pt0, pt1), \ 'pre': overlap((0, 0, 0), (1, ny-1, nz-1)), \ 'mid': overlap((2, 0, 0), (nx-2, ny-1, nz-1)), \ 'post': overlap((nx-1, 0, 0), (nx-1, ny-1, nz-1)) } return points_dict
def __init__(self, fields, str_f, pt0, pt1): """ """ common.check_type('fields', fields, Fields) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), (int, float)) common.check_type('pt1', pt1, (list, tuple), (int, float)) pt0 = list( common.convert_indices(fields.ns, pt0) ) pt1 = list( common.convert_indices(fields.ns, pt1) ) # local variables str_fs = common.convert_to_tuple(str_f) dtype_str_list = fields.dtype_str_list for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # program macros = ['NMAX', 'XID', 'YID', 'ZID', \ 'ARGS', \ 'TARGET', 'SOURCE', 'OVERWRITE', \ 'DTYPE'] values = common_gpu.macro_replace_list(pt0, pt1) + \ ['DTYPE *source', \ 'target[sub_idx]', 'source[idx]', '='] + \ dtype_str_list ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'copy.cu').read(), macros, values) program = SourceModule(ksrc) kernel_copy = program.get_function('copy') # allocation source_bufs = [fields.get_buf(str_f) for str_f in str_fs] shape = common.shape_two_points(pt0, pt1, len(str_fs)) host_array = np.zeros(shape, fields.dtype) split_host_array = np.split(host_array, len(str_fs)) split_host_array_dict = dict( zip(str_fs, split_host_array) ) target_buf = cuda.to_device(host_array) # global variables self.mainf = fields self.kernel_copy = kernel_copy self.source_bufs = source_bufs self.target_buf = target_buf self.host_array = host_array self.split_host_array_dict = split_host_array_dict
def __init__(self, gpuf, direction): common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('+', '-', '+-')) qtask = cpu.QueueTask() if '+' in direction: self.cpuf_p = cpuf_p = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=0) self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.tmp_recv_e = np.zeros(self.gf_h.host_array.shape, gpuf.dtype) if '-' in direction: self.cpuf_m = cpuf_m = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=0) self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.tmp_recv_h = np.zeros(self.gf_e.host_array.shape, gpuf.dtype) # global variables self.direction = direction self.qtask = qtask
def __init__(self, fields, str_f, pt0, pt1, tfunc, spatial_value=1., is_overwrite=False): """ """ common.check_type('fields', fields, Fields) common.check_value('str_f', str_f, ('ex', 'ey', 'ez', 'hx', 'hy', 'hz')) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) common.check_type('tfunc', tfunc, types.FunctionType) common.check_type('spatial_value', spatial_value, \ (np.ndarray, np.number, types.FloatType, types.IntType) ) common.check_type('is_overwrite', is_overwrite, bool) # local variables e_or_h = str_f[0] dtype = fields.dtype is_array = True if isinstance(spatial_value, np.ndarray) else False for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) if is_array: shape = common.shape_two_points(pt0, pt1) assert shape == spatial_value.shape, \ 'shape mismatch : %s, %s' % (shape, spatial_value.shape) assert dtype == spatial_value.dtype, \ 'dtype mismatch : %s, %s' % (dtype, spatial_value.dtype) else: spatial_value = dtype(spatial_value) # create the SetFields instance setf = SetFields(fields, str_f, pt0, pt1, is_array, is_overwrite) # global variables self.mainf = fields self.dtype = dtype self.tfunc = tfunc self.setf = setf self.spatial_value = spatial_value self.tstep = 1 # global functions if e_or_h == 'e': self.update_e = self.update self.update_h = lambda : None elif e_or_h == 'h': self.update_e = lambda : None self.update_h = self.update # append to the update list self.priority_type = 'incident' fields.append_instance(self)
def __init__(self, fields, str_f, pt0, pt1, tfunc, spatial_value=1., is_overwrite=False): common.check_type('fields', fields, Fields) common.check_value('str_f', str_f, ('ex', 'ey', 'ez', 'hx', 'hy', 'hz')) common.check_type('pt0', pt0, (list, tuple), (int, float)) common.check_type('pt1', pt1, (list, tuple), (int, float)) common.check_type('tfunc', tfunc, types.FunctionType) common.check_type('spatial_value', spatial_value, \ (np.ndarray, np.number, types.FloatType, types.IntType) ) common.check_type('is_overwrite', is_overwrite, bool) # local variables pt0 = common.convert_indices(fields.ns, pt0) pt1 = common.convert_indices(fields.ns, pt1) dtype = fields.dtype is_array = True if isinstance(spatial_value, np.ndarray) else False for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) if is_array: shape = common.shape_two_points(pt0, pt1) assert shape == spatial_value.shape, \ 'shape mismatch : %s, %s' % (shape, spatial_value.shape) assert dtype == spatial_value.dtype, \ 'dtype mismatch : %s, %s' % (dtype, spatial_value.dtype) else: spatial_value = dtype(spatial_value) # global variables self.mainf = fields self.str_f = str_f self.slices = common.slices_two_points(pt0, pt1) self.tfunc = tfunc self.spatial_value = spatial_value self.is_overwrite = is_overwrite self.e_or_h = str_f[0] self.tstep = 1 # append to the update list self.priority_type = 'incident' fields.append_instance(self)
def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True): """ """ common.check_type('fields', fields, Fields) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) common.check_type('is_array', is_array, bool) common.check_type('is_overwrite', is_overwrite, bool) # local variables str_fs = common.convert_to_tuple(str_f) for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # global variables and functions self.mainf = fields self.str_fs = str_fs self.slice_xyz = common.slice_index_two_points(pt0, pt1) self.shape = common.shape_two_points(pt0, pt1, len(str_fs)) self.is_overwrite = is_overwrite if is_array: self.func = self.set_fields_spatial_value else: self.func = self.set_fields_single_value
def __init__(self, fields, str_f, pt0, pt1): """ """ common.check_type('fields', fields, Fields) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), (int, float)) common.check_type('pt1', pt1, (list, tuple), (int, float)) pt0 = list( common.convert_indices(fields.ns, pt0) ) pt1 = list( common.convert_indices(fields.ns, pt1) ) # local variables str_fs = common.convert_to_tuple(str_f) for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # allocation shape = common.shape_two_points(pt0, pt1, len(str_fs)) host_array = np.zeros(shape, dtype=fields.dtype) split_host_array = np.split(host_array, len(str_fs)) split_host_array_dict = dict( zip(str_fs, split_host_array) ) # global variables self.mainf = fields self.str_fs = str_fs self.slice_xyz = common.slices_two_points(pt0, pt1) self.host_array = host_array self.split_host_array_dict = split_host_array_dict
def __init__(self, context, device, \ nx, ny, nz, \ coeff_use='e', \ precision_float='single', \ local_work_size=256, \ global_work_size=0): """ """ common.check_type('context', context, cl.Context) common.check_type('device', device, cl.Device) common.check_type('nx', nx, int) common.check_type('ny', ny, int) common.check_type('nz', nz, int) common.check_type('global_work_size', global_work_size, int) common.check_type('local_work_size', local_work_size, int) common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh')) common.check_value('precision_float', precision_float, ('single', 'double')) self.context = context self.device = device self.nx = nx self.ny = ny self.nz = nz self.ls = local_work_size self.gs = global_work_size self.coeff_use = coeff_use self.dtype = { 'single': np.float32, 'double': np.float64 }[precision_float] self.dtype_str = { 'single': 'float', 'double': 'double' }[precision_float] self.dtype_str_list = { \ 'single':['float', ''], \ 'double':['double', '#pragma OPENCL EXTENSION cl_khr_fp64 : enable'] }[precision_float] self.device_type = 'gpu' # padding for the nz which is multiple of 16 (float32) or 8 (float64) self.align_size = a_size = { 'single': 16, 'double': 8 }[precision_float] # 64 Bytes self.pad = pad = int(np.ceil(float(nz) / a_size) * a_size) - nz self.slz = slice(None, None) if pad == 0 else slice(None, -pad) self.nz_pitch = nz_pitch = nz + pad self.dtype_str_list.append('' if pad == 0 else '-%s' % pad) # ns, queue, global_size self.ns = [np.int32(nx), np.int32(ny), np.int32(nz)] self.ns_pitch = [np.int32(nx), np.int32(ny), np.int32(nz_pitch)] self.ns_pad = [np.int32(nx), np.int32(ny), np.int32(pad)] self.queue = cl.CommandQueue(self.context, self.device) if self.gs == 0: self.gs = common_gpu.get_optimal_gs(self.device) # on/off the coefficient arrays self.ce_on = True if 'e' in self.coeff_use else False self.ch_on = True if 'h' in self.coeff_use else False # allocations f = np.zeros(self.ns_pitch, dtype=self.dtype) cf = np.ones_like(f) * 0.5 mf = cl.mem_flags self.eh_bufs = [cl.Buffer(self.context, mf.READ_WRITE, f.nbytes) \ for i in range(6)] for eh_buf in self.eh_bufs: cl.enqueue_copy(self.queue, eh_buf, f) self.ex_buf, self.ey_buf, self.ez_buf = self.eh_bufs[:3] self.hx_buf, self.hy_buf, self.hz_buf = self.eh_bufs[3:] if self.ce_on: self.ce_bufs = [cl.Buffer(self.context, mf.READ_ONLY, cf.nbytes) \ for i in range(3)] self.cex_buf, self.cey_buf, self.cez_buf = self.ce_bufs if self.ch_on: self.ch_bufs = [cl.Buffer(self.context, mf.READ_ONLY, cf.nbytes) \ for i in range(3)] self.chx_buf, self.chy_buf, self.chz_buf = self.ch_bufs del f, cf # program macros = ['ARGS_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'CHX', 'CHY', 'CHZ', \ 'DX', 'DTYPE', 'PRAGMA_fp64', 'PAD'] values = ['', '0.5', '0.5', '0.5', \ '', '0.5', '0.5', '0.5', \ str(self.ls)] + self.dtype_str_list self.e_args = self.ns_pitch + self.eh_bufs self.h_args = self.ns_pitch + self.eh_bufs if self.ce_on: values[:4] = [ \ ', __global DTYPE *cex, __global DTYPE *cey, __global DTYPE *cez', \ 'cex[idx]', 'cey[idx]', 'cez[idx]'] self.e_args += self.ce_bufs if self.ch_on: values[4:8] = [ \ ', __global DTYPE *chx, __global DTYPE *chy, __global DTYPE *chz', \ 'chx[idx]', 'chy[idx]', 'chz[idx]'] self.h_args += self.ch_bufs ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'core.cl').read(), macros, values) self.program = cl.Program(self.context, ksrc).build()
def get(self, str_f): value_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz', 'cex', 'cey', 'cez', 'chx', 'chy', 'chz'] common.check_value('str_f', str_f, value_list) return self.__dict__[str_f]
def __init__(self, nx, ny, nz, \ coeff_use='', \ precision_float='single', \ use_cpu_core=0): """ """ common.check_type('nx', nx, int) common.check_type('ny', ny, int) common.check_type('nz', nz, int) common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh')) common.check_value('precision_float', precision_float, \ ('single', 'double')) common.check_type('use_cpu_core', use_cpu_core, int) # local variables dtype = {'single': np.float32, 'double': np.float64}[precision_float] # padding for the nz which is multiple of 4 (float32) or 2 (float64) segment_nbytes = 16 align_size = segment_nbytes / np.nbytes[dtype] pad = int(np.ceil(float(nz) / align_size) * align_size) - nz slice_z = slice(None, None) if pad == 0 else slice(None, -pad) nz_pitch = nz + pad ns = [nx, ny, nz] ns_pitch = [nx, ny, nz_pitch] ns_pad = [nx, ny, pad] # on/off the coefficient arrays ce_on = True if 'e' in coeff_use else False ch_on = True if 'h' in coeff_use else False # allocations ehs = [np.zeros(ns_pitch, dtype) for i in range(6)] if ce_on: ces = [np.ones(ns_pitch, dtype) * 0.5 for i in range(3)] if ch_on: chs = [np.ones(ns_pitch, dtype) * 0.5 for i in range(3)] # global variables and functions self.device_type = 'cpu' self.qtask = QueueTask() self.enqueue = self.qtask.enqueue self.enqueue_barrier = self.qtask.enqueue_barrier self.nx = nx self.ny = ny self.nz = nz self.ns = ns self.ns_pitch = ns_pitch self.ns_pad = ns_pad self.align_size = align_size self.pad = pad self.slice_z = slice_z self.precision_float = precision_float self.use_cpu_core = use_cpu_core self.dtype = dtype self.coeff_use = coeff_use self.ce_on = ce_on self.ch_on = ch_on self.ehs = ehs self.ex, self.ey, self.ez = ehs[:3] self.hx, self.hy, self.hz = ehs[3:] if ce_on: self.ces = ces self.cex, self.cey, self.cez = ces if ch_on: self.chs = chs self.chx, self.chy, self.chz = chs # update list self.instance_list = [] self.append_instance = lambda instance: \ common.append_instance(self.instance_list, instance)
def __init__(self, fields, pt0, pt1, ep_inf, drude_freq, gamma, mask_arrays=(1, 1, 1)): common.check_type('fields', fields, Fields) common.check_type('pt0', pt0, (list, tuple), (int, float)) common.check_type('pt1', pt1, (list, tuple), (int, float)) common.check_type('ep_inf', ep_inf, (int, float)) common.check_type('drude_freq', drude_freq, (int, float)) common.check_type('gamma', gamma, (int, float)) common.check_type('mask_arrays', mask_arrays, (list, tuple), (np.ndarray, types.IntType)) # local variables pt0 = common.convert_indices(fields.ns, pt0) pt1 = common.convert_indices(fields.ns, pt1) dtype = fields.dtype for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) for mask_array in mask_arrays: if isinstance(mask_array, np.ndarray): assert common.shape_two_points(pt0, pt1) == mask_array.shape, \ 'shape mismatch : %s, %s' % (shape, mask_array.shape) # allocations shape = common.shape_two_points(pt0, pt1, is_dummy=True) psis = [np.zeros(shape, dtype) for i in range(3)] dt = fields.dt aa = (2 - gamma * dt) / (2 + gamma * dt) bb = drude_freq**2 * dt / (2 + gamma * dt) comm = 2 * ep_inf + bb * dt ca = 2 * dt / comm cb = -(aa + 3) * bb * dt / comm cc = -(aa + 1) * dt / comm cas = [ca * mask for mask in mask_arrays] cbs = [cb * mask for mask in mask_arrays] ccs = [cc * mask for mask in mask_arrays] # modify ce arrays slices = common.slices_two_points(pt0, pt1) for ce, ca in zip(fields.get_ces(), cas): ce[slices] = ca # global variables self.mainf = fields self.psis = psis self.cbs = cbs self.ccs = ccs self.pcs = aa, (aa + 1) * bb self.slices = slices # append to the update list self.priority_type = 'material' fields.append_instance(self)
def __init__(self, node_fields, str_f, pt0, pt1, tfunc, spatial_value=1., is_overwrite=False): """ """ common.check_type('node_fields', node_fields, NodeFields) common.check_value('str_f', str_f, ('ex', 'ey', 'ez', 'hx', 'hy', 'hz')) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) common.check_type('tfunc', tfunc, types.FunctionType) common.check_type('spatial_value', spatial_value, \ (np.ndarray, np.number, types.FloatType, types.IntType) ) common.check_type('is_overwrite', is_overwrite, bool) # local variables nodef = node_fields dtype = nodef.dtype is_array = True if isinstance(spatial_value, np.ndarray) else False mainf_list = nodef.mainf_list anx = nodef.accum_nx_list for axis, n, p0, p1 in zip(['x', 'y', 'z'], nodef.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) if is_array: shape = common.shape_two_points(pt0, pt1) assert shape == spatial_value.shape, \ 'shape mismatch : %s, %s' % (shape, spatial_value.shape) assert dtype == spatial_value.dtype, \ 'dtype mismatch : %s, %s' % (dtype, spatial_value.dtype) else: spatial_value = dtype(spatial_value) # allocation dummied_shape = common.shape_two_points(pt0, pt1, is_dummy=True) incident_list = [] reduced_slices = [] self.gpu, self.cpu = gpu, cpu for i, mainf in enumerate(mainf_list): nx0 = anx[i] nx1 = anx[i + 1] overlap = common.intersection_two_lines((nx0, nx1), (pt0[0], pt1[0])) if overlap != None: x0, y0, z0 = pt0 x1, y1, z1 = pt1 shift_pt0 = (overlap[0] - x0, y0 - y0, z0 - z0) shift_pt1 = (overlap[1] - x0, y1 - y0, z1 - z0) shift_slices = [ slice(p0, p1 + 1) for p0, p1 in zip(shift_pt0, shift_pt1) ] if is_array: reshaped_value = spatial_value.reshape(dummied_shape) dummied_array = reshaped_value[shift_slices] overlap_shape = common.shape_two_points( shift_pt0, shift_pt1) split_value = dummied_array.reshape(overlap_shape).copy() else: split_value = spatial_value local_pt0 = (overlap[0] - nx0, y0, z0) local_pt1 = (overlap[1] - nx0, y1, z1) incident_list.append( \ getattr(self, mainf.device_type). \ DirectIncident(mainf, str_f, local_pt0, local_pt1, \ tfunc, split_value, is_overwrite) ) # global variables self.incident_list = incident_list
def __init__(self, node_fields, str_f, pt0, pt1, tfunc, spatial_value=1., is_overwrite=False): """ """ common.check_type('node_fields', node_fields, Fields) common.check_value('str_f', str_f, ('ex', 'ey', 'ez', 'hx', 'hy', 'hz')) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) common.check_type('tfunc', tfunc, types.FunctionType) common.check_type('spatial_value', spatial_value, \ (np.ndarray, np.number, types.FloatType, types.IntType) ) common.check_type('is_overwrite', is_overwrite, bool) # local variables nodef = node_fields dtype = nodef.dtype is_array = True if isinstance(spatial_value, np.ndarray) else False mainf_list = nodef.mainf_list buffer_dict = nodef.buffer_dict anx = nodef.accum_nx_list nx, ny, nz = nodef.ns for axis, n, p0, p1 in zip(['x', 'y', 'z'], nodef.ns, pt0, pt1): start, end = 0, n if buffer_dict.has_key(axis + '+'): end = n + 1 if buffer_dict.has_key(axis + '-'): start = -1 common.check_value('pt0 %s' % axis, p0, range(start, end)) common.check_value('pt1 %s' % axis, p1, range(start, end)) if is_array: shape = common.shape_two_points(pt0, pt1) assert shape == spatial_value.shape, \ 'shape mismatch : %s, %s' % (shape, spatial_value.shape) assert dtype == spatial_value.dtype, \ 'dtype mismatch : %s, %s' % (dtype, spatial_value.dtype) else: spatial_value = dtype(spatial_value) # global valriables self.str_f = str_f self.pt0 = pt0 self.pt1 = pt1 self.tfunc = tfunc self.spatial_value = spatial_value self.is_overwrite = is_overwrite self.is_array = is_array self.cpu = cpu if 'gpu' in [f.device_type for f in nodef.updatef_list]: from kemp.fdtd3d import gpu self.gpu = gpu # create IncidentDirect instance for i, mainf in enumerate(mainf_list): fields_pt0 = (anx[i], 0, 0) fields_pt1 = (anx[i + 1] - 1, ny - 1, nz - 1) overlap = common.overlap_two_regions(fields_pt0, fields_pt1, pt0, pt1) if overlap != None: self.create_instance(mainf, fields_pt0, fields_pt1, overlap[0], overlap[1]) # for buffer for direction, buffer in buffer_dict.items(): fields_pt0 = { \ 'x+': (anx[-1]-1, 0, 0), \ 'y+': (0, ny-2, 0), \ 'z+': (0, 0, nz-2), \ 'x-': (-1, 0, 0), \ 'y-': (0, -1, 0), \ 'z-': (0, 0, -1) }[direction] fields_pt1 = { \ 'x+': (anx[-1]+1, ny-1, nz-1), \ 'y+': (nx-1, ny, nz-1), \ 'z+': (nx-1, ny-1, nz), \ 'x-': (1, ny-1, nz-1), \ 'y-': (nx-1, 1, nz-1), \ 'z-': (nx-1, ny-1, 1) }[direction] overlap = common.overlap_two_regions(fields_pt0, fields_pt1, pt0, pt1) if overlap != None: self.create_instance(buffer, fields_pt0, fields_pt1, overlap[0], overlap[1])
def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True): """ """ common.check_type('fields', fields, Fields) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) common.check_type('is_array', is_array, bool) common.check_type('is_overwrite', is_overwrite, bool) # local variables nodef = fields str_fs = common.convert_to_tuple(str_f) mainf_list = nodef.mainf_list anx = nodef.accum_nx_list for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], nodef.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # allocation dummied_shape = common.shape_two_points(pt0, pt1, is_dummy=True) setf_list = [] slices_list = [] self.gpu, self.cpu = gpu, cpu for i, mainf in enumerate(mainf_list): nx0 = anx[i] nx1 = anx[i + 1] overlap = common.intersection_two_lines((nx0, nx1), (pt0[0], pt1[0])) if overlap != None: x0, y0, z0 = pt0 x1, y1, z1 = pt1 slice_pt0 = (overlap[0] - x0, 0, 0) slice_pt1 = (overlap[1] - x0, y1 - y0, z1 - z0) slices = [] for j, p0, p1 in zip([0, 1, 2], slice_pt0, slice_pt1): if dummied_shape[j] != 1: slices.append(slice(p0, p1 + 1)) slices_list.append(slices if slices != [] else [slice(0, 1)]) local_pt0 = (overlap[0] - nx0, y0, z0) local_pt1 = (overlap[1] - nx0, y1, z1) setf_list.append( getattr(self, mainf.device_type). \ SetFields(mainf, str_fs, local_pt0, local_pt1, \ is_array, is_overwrite) ) # global variables and functions self.str_fs = str_fs self.dtype = nodef.dtype self.shape = common.shape_two_points(pt0, pt1, len(str_fs)) self.setf_list = setf_list self.slices_list = slices_list if is_array: self.set_fields = self.set_fields_spatial_value else: self.set_fields = self.set_fields_single_value
def __init__(self, context, device, queue_task, \ nx, ny, nz, \ coeff_use='', \ precision_float='single', \ local_work_size=256): """ """ common.check_type('context', context, cl.Context) common.check_type('device', device, cl.Device) common.check_type('queue_task', queue_task, QueueTask) common.check_type('nx', nx, int) common.check_type('ny', ny, int) common.check_type('nz', nz, int) common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh')) common.check_value('precision_float', precision_float, ('single', 'double')) common.check_type('local_work_size', local_work_size, int) # local variables queue = cl.CommandQueue(context, device) pragma_fp64 = '' if precision_float == 'double': extensions = device.get_info(cl.device_info.EXTENSIONS) if 'cl_khr_fp64' in extensions: pragma_fp64 = '#pragma OPENCL EXTENSION cl_khr_fp64 : enable' elif 'cl_amd_fp64' in extensions: pragma_fp64 = '#pragma OPENCL EXTENSION cl_amd_fp64 : enable' else: precision_float = 'single' print('Warning: The %s GPU device is not support the double-precision.') % \ device.get_info(cl.device_info.NAME) print('The precision is changed to \'single\'.') dtype = {'single':np.float32, 'double':np.float64}[precision_float] dtype_str_list = { \ 'single':['float', ''], \ 'double':['double', pragma_fp64] }[precision_float] # padding for the nz which is multiple of 16 (float32) or 8 (float64) segment_nbytes = 64 align_size = segment_nbytes / np.nbytes[dtype] pad = int(np.ceil(float(nz) / align_size) * align_size) - nz slice_z = slice(None, None) if pad == 0 else slice(None, -pad) nz_pitch = nz + pad ns = [np.int32(nx), np.int32(ny), np.int32(nz)] ns_pitch = [np.int32(nx), np.int32(ny), np.int32(nz_pitch)] ns_pad = [np.int32(nx), np.int32(ny), np.int32(pad)] # on/off the coefficient arrays ce_on = True if 'e' in coeff_use else False ch_on = True if 'h' in coeff_use else False # allocations f = np.zeros(ns_pitch, dtype) cf = np.ones_like(f) * 0.5 mflags = cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR eh_bufs = [cl.Buffer(context, mflags, hostbuf=f) for i in range(6)] c_mflags = cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR if ce_on: ce_bufs = [cl.Buffer(context, c_mflags, hostbuf=cf) for i in range(3)] if ch_on: ch_bufs = [cl.Buffer(context, c_mflags, hostbuf=cf) for i in range(3)] del f, cf # global variables self.device_type = 'gpu' self.context = context self.device = device self.queue = queue self.qtask = queue_task self.enqueue = queue_task.enqueue self.enqueue_barrier = queue_task.enqueue_barrier self.nx = nx self.ny = ny self.nz = nz self.ns = ns self.ns_pitch = ns_pitch self.ns_pad = ns_pad self.align_size = align_size self.pad = pad self.slice_z = slice_z self.precision_float = precision_float self.dtype = dtype self.dtype_str_list = dtype_str_list self.coeff_use = coeff_use self.ce_on = ce_on self.ch_on = ch_on self.eh_bufs = eh_bufs self.ex_buf, self.ey_buf, self.ez_buf = eh_bufs[:3] self.hx_buf, self.hy_buf, self.hz_buf = eh_bufs[3:] if ce_on: self.ce_bufs = ce_bufs self.cex_buf, self.cey_buf, self.cez_buf = ce_bufs if ch_on: self.ch_bufs = ch_bufs self.chx_buf, self.chy_buf, self.chz_buf = ch_bufs self.ls = ls = local_work_size nmax = nx * ny * nz_pitch remainder = nmax % ls self.gs = nmax if remainder == 0 else nmax - remainder + ls # create update list self.instance_list = [] self.append_instance = lambda instance: \ common.append_instance(self.instance_list, instance)
def __init__(self, geometry_h5_path, max_tstep, mpi_shape, pbc_axes='', target_device='all', precision_float='single', **kargs): """ """ common.check_type('geometry_h5_path', geometry_h5_path, str) common.check_type('max_tstep', max_tstep, int) common.check_type('mpi_shape', mpi_shape, (list, tuple), int) common.check_type('pbc_axes', pbc_axes, str) common.check_type('target_device', target_device, str) common.check_value('precision_float', precision_float, ['single', 'double']) # import modules global is_mpi, is_gpu is_mpi = False if mpi_shape == (1, 1, 1) else True if is_mpi: global network, common_mpi, comm, size, rank, coord from mpi4py import MPI from kemp.fdtd3d import network from kemp.fdtd3d.util import common_mpi comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() coord = common_mpi.my_coord(rank, mpi_shape) is_master = False if is_mpi and rank != 0 else True is_cpu = True if target_device == 'all' or 'cpu' in target_device else False is_gpu = True if target_device == 'all' or 'gpu' in target_device else False if is_mpi: if reduce(lambda a, b: a * b, mpi_shape) != size: if is_master: print("The MPI size %d is not matched the mpi_shape %s" % (size, mpi_shape)) sys.exit() if is_gpu: try: global cl, gpu, common_gpu import pyopencl as cl from kemp.fdtd3d import gpu from kemp.fdtd3d.util import common_gpu except: if is_master: print("The 'pyopencl' module is not found.") if is_cpu: if is_master: print("The CPU is only used.") target_device = 'cpu' is_gpu = False else: sys.exit() # read from the h5 file try: h5f = h5py.File(geometry_h5_path, 'r') coeff_use = h5f.attrs['coeff_use'] nx = h5f.attrs['nx'] ny = h5f.attrs['ny'] nz = h5f.attrs['nz'] except: if is_master: print(repr(sys.exc_info())) print("To load the geometry HDF5 file '%s' is failed." % geometry_h5_path) sys.exit() # local variables device_nx_list = kargs['device_nx_list'] if kargs.has_key( 'device_nx_list') else None ny_list = kargs['ny_list'] if kargs.has_key('ny_list') else None nz_list = kargs['nz_list'] if kargs.has_key('nz_list') else None # Set the number of device and the device_n_list ndev = 1 if is_cpu else 0 if is_gpu: try: gpu_devices = common_gpu.gpu_device_list(print_info=False) context = cl.Context(gpu_devices) ndev += len(gpu_devices) except Exception as errinst: if is_master: print(repr(sys.exc_info())) print( "To get the GPU devices is failed. The CPU is only used." ) target_device = 'cpu' is_gpu = False if is_mpi: mi, mj, mk = coord dnx_list = device_nx_list[mi * ndev:(mi + 1) * ndev] dny = ny_list[mj] dnz = nz_list[mk] else: dnx_list = device_nx_list dny = ny_list[0] dnz = nz_list[0] total_ndev = mpi_shape[0] * ndev if len(device_nx_list) != total_ndev: if is_master: print( "The device_nx_list %s is not matched with the number of total devices %d." % (device_nx_list, total_ndev)) sys.exit() # create the mainf_list and the buffer_dict buffer_dict = {} if is_mpi: # create BufferFields instances snx = sum(dnx_list) - ndev + 1 sny, snz = dny, dnz mpi_target_dict = common_mpi.mpi_target_dict( rank, mpi_shape, pbc_axes) for direction, target_rank in mpi_target_dict.items(): if target_rank != None: n0, n1 = { 'x': (sny, snz), 'y': (snx, snz), 'z': (snx, sny) }[direction[0]] bufferf = cpu.BufferFields(direction, target_rank, n0, n1, coeff_use, precision_float) buffer_dict[direction] = bufferf #network.ExchangeMpi(bufferf, target_rank, max_tstep) #network.ExchangeMpiNoSplitBlock(bufferf, target_rank) #network.ExchangeMpiBlock(bufferf, target_rank) mainf_list = [] if is_cpu: mainf_list += [ cpu.Fields(dnx_list.pop(0), dny, dnz, coeff_use, precision_float, use_cpu_core=1) ] if is_gpu: mainf_list += [ gpu.Fields(context, gpu_device, dnx, dny, dnz, coeff_use, precision_float) for gpu_device, dnx in zip(gpu_devices, dnx_list) ] # create node.Fields instance nodef = node.Fields(mainf_list, buffer_dict) # create nodePbc instance node_pbc_axes = ''.join([ axis for i, axis in enumerate(['x', 'y', 'z']) if mpi_shape[i] == 1 and axis in pbc_axes ]) if node_pbc_axes != '': node.Pbc(nodef, node_pbc_axes) # create update instances node.Core(nodef) for bufferf in nodef.buffer_dict.values(): #network.ExchangeMpiSplitBlock(bufferf) network.ExchangeMpiSplitNonBlock(bufferf, max_tstep) ''' if rank == 0: direction = 'x+' target_rank = 1 elif rank == 1: direction = 'x-' target_rank = 0 #network.ExchangeMpiNoBufferBlock(nodef, target_rank, direction) # no buffer, block self.mpi_instance_list = [] self.mpi_instance_list.append( network.ExchangeMpiNoBufferNonBlock(nodef, target_rank, direction) ) ''' # accum_sub_ns_dict, node_pts if is_mpi: asn_dict = common_mpi.accum_sub_ns_dict(mpi_shape, ndev, device_nx_list, ny_list, nz_list) axes = ['x', 'y', 'z'] node_pt0 = [asn_dict[ax][m] for ax, m in zip(axes, coord)] node_pt1 = [asn_dict[ax][m + 1] - 1 for ax, m in zip(axes, coord)] # global variables self.max_tstep = max_tstep self.mpi_shape = mpi_shape #self.ns = (nx, ny, nz) self.ns = (asn_dict['x'][-1], asn_dict['y'][-1], asn_dict['z'][-1]) if is_mpi else nodef.ns self.nodef = nodef self.is_master = is_master if is_mpi: self.asn_dict = asn_dict self.node_pt0 = node_pt0 self.node_pt1 = node_pt1 # for savefields self.savef_tag_list = [] self.savef_list = []
def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True): """ """ common.check_type('fields', fields, Fields) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), (int, float)) common.check_type('pt1', pt1, (list, tuple), (int, float)) common.check_type('is_array', is_array, bool) common.check_type('is_overwrite', is_overwrite, bool) pt0 = list( common.convert_indices(fields.ns, pt0) ) pt1 = list( common.convert_indices(fields.ns, pt1) ) # local variables str_fs = common.convert_to_tuple(str_f) dtype_str_list = fields.dtype_str_list overwrite_str = {True: '=', False: '+='}[is_overwrite] for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # program macros = ['NMAX', 'XID', 'YID', 'ZID', \ 'ARGS', \ 'TARGET', 'SOURCE', 'OVERWRITE', \ 'DTYPE', 'PRAGMA_fp64'] nmax_str, xid_str, yid_str, zid_str = common_gpu.macro_replace_list(pt0, pt1) if is_array: values = [nmax_str, xid_str, yid_str, zid_str, \ '__global DTYPE *source', \ 'target[idx]', 'source[sub_idx]', overwrite_str] + \ dtype_str_list else: values = [nmax_str, xid_str, yid_str, zid_str, \ 'DTYPE source', \ 'target[idx]', 'source', overwrite_str] + \ dtype_str_list ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'copy.cl').read(), macros, values) program = cl.Program(fields.context, ksrc).build() # allocation target_bufs = [fields.get_buf(str_f) for str_f in str_fs] shape = common.shape_two_points(pt0, pt1, len(str_fs)) if is_array: tmp_array = np.zeros(shape, dtype=fields.dtype) source_buf = cl.Buffer( \ fields.context, \ cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, \ hostbuf=tmp_array) # global variabels and functions self.mainf = fields self.program = program self.target_bufs = target_bufs self.shape = shape nmax = int(nmax_str) remainder = nmax % fields.ls self.gs = nmax if remainder == 0 else nmax - remainder + fields.ls if is_array: self.source_buf = source_buf self.set_fields = self.set_fields_spatial_value else: self.set_fields = self.set_fields_single_value
def __init__(self, queue_task, \ nx, ny, nz, \ precision_float='single', \ use_cpu_core=0): """ """ common.check_type('queue_task', queue_task, QueueTask) common.check_type('nx', nx, int) common.check_type('ny', ny, int) common.check_type('nz', nz, int) common.check_value('precision_float', precision_float, ('single', 'double')) common.check_type('use_cpu_core', use_cpu_core, int) # local variables ns = [nx, ny, nz] dtype = {'single': np.float32, 'double': np.float64}[precision_float] # allocations ehs = [np.zeros(ns, dtype) for i in range(6)] # common macros for C templates dtype_macros = ['DTYPE'] dtype_values = { 'single': ['float'], ' double': ['double'] }[precision_float] omp_macros = ['OMP ', 'SET_NUM_THREADS'] if use_cpu_core == 0: omp_values = ['', ''] elif use_cpu_core == 1: omp_values = ['// ', ''] else: omp_values = ['', 'omp_set_num_threads(%d);' % use_cpu_core] # global variables and functions self.device_type = 'cpu' self.qtask = queue_task self.enqueue = queue_task.enqueue self.enqueue_barrier = queue_task.enqueue_barrier self.dx = 1. self.dt = 0.5 self.nx = nx self.ny = ny self.nz = nz self.ns = ns self.dtype = dtype self.dtype_omp_macros = dtype_macros + omp_macros self.dtype_omp_values = dtype_values + omp_values self.ehs = ehs self.ex, self.ey, self.ez = ehs[:3] self.hx, self.hy, self.hz = ehs[3:] self.ce_on, self.ch_on = False, False self.rd_on = False # update list self.instance_list = [] self.append_instance = lambda instance: \ common.append_instance(self.instance_list, instance)
def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True): """ """ common.check_type('fields', fields, Fields) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), (int, float)) common.check_type('pt1', pt1, (list, tuple), (int, float)) common.check_type('is_array', is_array, bool) common.check_type('is_overwrite', is_overwrite, bool) pt0 = list(common.convert_indices(fields.ns, pt0)) pt1 = list(common.convert_indices(fields.ns, pt1)) # local variables str_fs = common.convert_to_tuple(str_f) dtype_str_list = fields.dtype_str_list overwrite_str = {True: '=', False: '+='}[is_overwrite] for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # program macros = ['NMAX', 'XID', 'YID', 'ZID', \ 'ARGS', \ 'TARGET', 'SOURCE', 'OVERWRITE', \ 'DTYPE'] if is_array: values = common_gpu.macro_replace_list(pt0, pt1) + \ ['DTYPE *source', \ 'target[idx]', 'source[sub_idx]', overwrite_str] + \ dtype_str_list else: values = common_gpu.macro_replace_list(pt0, pt1) + \ ['DTYPE source', \ 'target[idx]', 'source', overwrite_str] + \ dtype_str_list ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'copy.cu').read(), macros, values) program = SourceModule(ksrc) kernel_copy = program.get_function('copy') # allocation target_bufs = [fields.get_buf(str_f) for str_f in str_fs] shape = common.shape_two_points(pt0, pt1, len(str_fs)) if is_array: tmp_array = np.zeros(shape, fields.dtype) source_buf = cuda.to_device(tmp_array) # global variabels and functions self.mainf = fields self.kernel_copy = kernel_copy self.target_bufs = target_bufs self.shape = shape if is_array: self.source_buf = source_buf self.set_fields = self.set_fields_spatial_value else: self.set_fields = self.set_fields_single_value
def __init__(self, context, device, \ nx, ny, nz, \ precision_float='single', \ local_work_size=256): """ """ common.check_type('context', context, cl.Context) common.check_type('device', device, cl.Device) common.check_type('nx', nx, int) common.check_type('ny', ny, int) common.check_type('nz', nz, int) common.check_value('precision_float', precision_float, ('single', 'double')) common.check_type('local_work_size', local_work_size, int) # local variables ns = [np.int32(nx), np.int32(ny), np.int32(nz)] queue = cl.CommandQueue(context, device) pragma_fp64 = '' if precision_float == 'double': extensions = device.get_info(cl.device_info.EXTENSIONS) if 'cl_khr_fp64' in extensions: pragma_fp64 = '#pragma OPENCL EXTENSION cl_khr_fp64 : enable' elif 'cl_amd_fp64' in extensions: pragma_fp64 = '#pragma OPENCL EXTENSION cl_amd_fp64 : enable' else: precision_float = 'single' print('Warning: The %s GPU device is not support the double-precision.') % \ device.get_info(cl.device_info.NAME) print('The precision is changed to \'single\'.') dtype = {'single': np.float32, 'double': np.float64}[precision_float] dtype_str_list = { \ 'single':['float', ''], \ 'double':['double', pragma_fp64] }[precision_float] # allocations f = np.zeros(ns, dtype) eh_bufs = [ cl.Buffer(context, cl.mem_flags.READ_WRITE, f.nbytes) for i in range(6) ] for eh_buf in eh_bufs: cl.enqueue_copy(queue, eh_buf, f) # global variables self.device_type = 'gpu' self.context = context self.device = device self.queue = queue self.dx = 1. self.dt = 0.5 self.nx = nx self.ny = ny self.nz = nz self.ns = ns self.precision_float = precision_float self.dtype = dtype self.dtype_str_list = dtype_str_list self.eh_bufs = eh_bufs self.ex_buf, self.ey_buf, self.ez_buf = eh_bufs[:3] self.hx_buf, self.hy_buf, self.hz_buf = eh_bufs[3:] self.ce_on, self.ch_on = False, False self.rd_on = False self.ls = local_work_size # create update list self.instance_list = [] self.append_instance = lambda instance: \ common.append_instance(self.instance_list, instance)
def __init__(self, fields, str_f, pt0, pt1): """ """ common.check_type('fields', fields, Fields) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) # local variables nodef = fields str_fs = common.convert_to_tuple(str_f) mainf_list = nodef.mainf_list anx = nodef.accum_nx_list for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], nodef.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # allocation shape = common.shape_two_points(pt0, pt1, len(str_fs)) dummied_shape = common.shape_two_points(pt0, pt1, is_dummy=True) host_array = np.zeros(shape, dtype=nodef.dtype) split_host_array = np.split(host_array, len(str_fs)) split_host_array_dict = dict(zip(str_fs, split_host_array)) getf_list = [] slices_list = [] self.gpu, self.cpu = gpu, cpu for i, mainf in enumerate(mainf_list): nx0 = anx[i] nx1 = anx[i + 1] - 1 if i < len(mainf_list) - 1 else anx[i + 1] overlap = common.intersection_two_lines((nx0, nx1), (pt0[0], pt1[0])) if overlap != None: x0, y0, z0 = pt0 x1, y1, z1 = pt1 slice_pt0 = (overlap[0] - x0, 0, 0) slice_pt1 = (overlap[1] - x0, y1 - y0, z1 - z0) slices = [] for j, p0, p1 in zip([0, 1, 2], slice_pt0, slice_pt1): if dummied_shape[j] != 1: slices.append(slice(p0, p1 + 1)) slices_list.append(slices if slices != [] else [slice(0, 1)]) local_pt0 = (overlap[0] - nx0, y0, z0) local_pt1 = (overlap[1] - nx0, y1, z1) getf_list.append( getattr(self, mainf.device_type). \ GetFields(mainf, str_fs, local_pt0, local_pt1) ) # global variables self.str_fs = str_fs self.host_array = host_array self.split_host_array_dict = split_host_array_dict self.getf_list = getf_list self.slices_list = slices_list
def __init__(self, fields, str_f, pt0, pt1, tfunc, spatial_value=1., is_overwrite=False): """ """ common.check_type('fields', fields, (Fields, BufferFields)) common.check_value('str_f', str_f, ('ex', 'ey', 'ez', 'hx', 'hy', 'hz')) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) common.check_type('tfunc', tfunc, types.FunctionType) common.check_type('spatial_value', spatial_value, \ (np.ndarray, np.number, types.FloatType, types.IntType) ) common.check_type('is_overwrite', is_overwrite, bool) # local variables e_or_h = str_f[0] dtype = fields.dtype is_buffer = True if isinstance(fields, BufferFields) else False is_array = True if isinstance(spatial_value, np.ndarray) else False for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) if is_array: shape = common.shape_two_points(pt0, pt1) assert shape == spatial_value.shape, \ 'shape mismatch : %s, %s' % (shape, spatial_value.shape) assert dtype == spatial_value.dtype, \ 'dtype mismatch : %s, %s' % (dtype, spatial_value.dtype) else: spatial_value = dtype(spatial_value) # create the SetFields instances is_update_dict = {} setf_dict = {} svalue_dict = {} if is_buffer: for part in ['', 'pre', 'post']: sl0 = common.slices_two_points(pt0, pt1) sl1 = common_buffer.slice_dict[e_or_h][part] overlap = common.overlap_two_slices(fields.ns, sl0, sl1) if overlap == None: setf_dict[part] = None else: opt0, opt1 = common.two_points_slices(fields.ns, overlap) setf_dict[part] = SetFields(fields, str_f, opt0, opt1, is_array, is_overwrite) svalue_dict[part] = self.overlap_svalue( pt0, pt1, opt0, opt1, spatial_value, is_array) else: setf_dict[''] = SetFields(fields, str_f, pt0, pt1, is_array, is_overwrite) svalue_dict[''] = spatial_value # global variables self.mainf = fields self.tfunc = tfunc self.setf_dict = setf_dict self.svalue_dict = svalue_dict self.e_or_h = e_or_h self.tstep = 1 # append to the update list self.priority_type = 'incident' fields.append_instance(self)
def __init__(self, gpuf, direction, tmax, ny, nz, coeff_use, precision_float): """ """ super(BufferFields, self).__init__(3, ny, nz, coeff_use, precision_float, use_cpu_core=0) common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('x+', 'x-')) if direction == 'x+': gf0 = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) sf0 = cpu.SetFields(self, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) gf1 = cpu.GetFields(self, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) sf1 = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) gf2 = cpu.GetFields(self, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) sf2 = cpu.SetFields(self, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) target_rank = rank + 1 tag_send, tag_recv = 0, 1 elif direction == 'x-': gf0 = gpu.GetFields(gpuf, ['ey', 'ez'], (2, 0, 0), (2, -1, -1)) sf0 = cpu.SetFields(self, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) gf1 = cpu.GetFields(self, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) sf1 = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) gf2 = cpu.GetFields(self, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) sf2 = cpu.SetFields(self, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) target_rank = rank - 1 tag_send, tag_recv = 1, 0 req_send = comm.Send_init(gf2.host_array, target_rank, tag=tag_send) tmp_recv_list = [ np.zeros(gf2.host_array.shape, gpuf.dtype) for i in range(2) ] req_recv_list = [ comm.Recv_init(tmp_recv, target_rank, tag=tag_recv) for tmp_recv in tmp_recv_list ] # global variables self.direction = direction self.gf0 = gf0 self.sf0 = sf0 self.gf1 = gf1 self.sf1 = sf1 self.gf2 = gf2 self.sf2 = sf2 self.req_send = req_send self.req_recv_list = req_recv_list self.tmp_recv_list = tmp_recv_list self.switch = 0 self.tmax = tmax self.tstep = 1 # global functions if direction == 'x+': self.update_e = self.update_e_xp self.update_h = self.update_h_xp elif direction == 'x-': self.update_e = self.update_e_xm self.update_h = self.update_h_xm
def __init__(self, context, device, \ nx, ny, nz, \ coeff_use='', \ precision_float='single', \ local_work_size=256, \ global_work_size=0): """ """ common.check_type('context', context, cl.Context) common.check_type('device', device, cl.Device) common.check_type('nx', nx, int) common.check_type('ny', ny, int) common.check_type('nz', nz, int) common.check_type('global_work_size', global_work_size, int) common.check_type('local_work_size', local_work_size, int) common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh')) common.check_value('precision_float', precision_float, \ ('single', 'double')) # local variables queue = cl.CommandQueue(context, device) pragma_fp64 = '' if precision_float == 'double': extensions = device.get_info(cl.device_info.EXTENSIONS) if 'cl_khr_fp64' in extensions: pragma_fp64 = '#pragma OPENCL EXTENSION cl_khr_fp64 : enable' elif 'cl_amd_fp64' in extensions: pragma_fp64 = '#pragma OPENCL EXTENSION cl_amd_fp64 : enable' else: precision_float = 'single' print('Warning: The %s GPU device is not support the double-precision.') % \ device.get_info(cl.device_info.NAME) print('The precision is changed to \'single\'.') dtype = {'single': np.float32, 'double': np.float64}[precision_float] dtype_str_list = { \ 'single':['float', ''], \ 'double':['double', pragma_fp64] }[precision_float] # padding for the nz which is multiple of 16 (float32) or 8 (float64) align_size = {'single': 16, 'double': 8}[precision_float] # 64 Bytes pad = int(np.ceil(float(nz) / align_size) * align_size) - nz slice_z = slice(None, None) if pad == 0 else slice(None, -pad) nz_pitch = nz + pad ns = [np.int32(nx), np.int32(ny), np.int32(nz)] ns_pitch = [np.int32(nx), np.int32(ny), np.int32(nz_pitch)] ns_pad = [np.int32(nx), np.int32(ny), np.int32(pad)] # on/off the coefficient arrays ce_on = True if 'e' in coeff_use else False ch_on = True if 'h' in coeff_use else False # allocations f = np.zeros(ns_pitch, dtype) cf = np.ones_like(f) * 0.5 mflags = cl.mem_flags.READ_WRITE eh_bufs = [cl.Buffer(context, mflags, f.nbytes) for i in range(6)] for eh_buf in eh_bufs: cl.enqueue_copy(queue, eh_buf, f) if ce_on: mflags = cl.mem_flags.READ_ONLY ce_bufs = [cl.Buffer(context, mflags, cf.nbytes) for i in range(3)] if ch_on: mflags = cl.mem_flags.READ_ONLY ch_bufs = [cl.Buffer(context, mflags, cf.nbytes) for i in range(3)] del f, cf # global variables self.device_type = 'gpu' self.context = context self.device = device self.queue = queue self.nx = nx self.ny = ny self.nz = nz self.ns = ns self.ns_pitch = ns_pitch self.ns_pad = ns_pad self.align_size = align_size self.pad = pad self.slice_z = slice_z self.precision_float = precision_float self.dtype = dtype self.dtype_str_list = dtype_str_list self.coeff_use = coeff_use self.ce_on = ce_on self.ch_on = ch_on self.eh_bufs = eh_bufs self.ex_buf, self.ey_buf, self.ez_buf = eh_bufs[:3] self.hx_buf, self.hy_buf, self.hz_buf = eh_bufs[3:] if ce_on: self.ce_bufs = ce_bufs self.cex_buf, self.cey_buf, self.cez_buf = ce_bufs if ch_on: self.ch_bufs = ch_bufs self.chx_buf, self.chy_buf, self.chz_buf = ch_bufs self.ls = local_work_size self.gs = global_work_size if self.gs == 0: self.gs = common_gpu.get_optimal_gs(device) # create update list self.instance_list = [] self.append_instance = lambda instance: \ common.append_instance(self.instance_list, instance)
def __init__(self, nx, ny, nz, \ coeff_use='e', \ precision_float='single', \ use_cpu_core=0): """ """ common.check_type('nx', nx, int) common.check_type('ny', ny, int) common.check_type('nz', nz, int) common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh')) common.check_value('precision_float', precision_float, ('single', 'double')) self.nx = nx self.ny = ny self.nz = nz self.coeff_use = coeff_use self.dtype = { 'single': np.float32, 'double': np.float64 }[precision_float] self.dtype_str_list = { \ 'single':['float', 'xmmintrin.h', 'ps', '__m128', '4', '0, 1, 1, 1'], \ 'double':['double', 'emmintrin.h', 'pd', '__m128d', '2', '0, 1'] }[precision_float] self.device_type = 'cpu' # padding for the nz which is multiple of 4 (float32) or 2 (float64) a_size = {'single': 4, 'double': 2}[precision_float] # 16 Bytes self.pad = pad = int(np.ceil(float(nz) / a_size) * a_size) - nz self.slz = slice(None, None) if pad == 0 else slice(None, -pad) self.nz_pitch = nz_pitch = nz + pad mask_arr = np.ones(a_size, 'i') mask_arr[-(pad + 1):] = 0 self.dtype_str_list.append(str(list(mask_arr)).strip('[]')) # ns, qtask, enqueue self.ns = [nx, ny, nz] self.ns_pitch = [nx, ny, nz_pitch] self.qtask = QueueTask() self.enqueue = self.qtask.enqueue self.enqueue_barrier = self.qtask.enqueue_barrier # on/off the coefficient arrays self.ce_on = True if 'e' in self.coeff_use else False self.ch_on = True if 'h' in self.coeff_use else False # allocations self.ehs = [ np.zeros(self.ns_pitch, dtype=self.dtype) for i in range(6) ] self.ex, self.ey, self.ez, self.hx, self.hy, self.hz = self.ehs if self.ce_on: self.ces = [ np.ones(self.ns_pitch, dtype=self.dtype) * 0.5 for i in range(3) ] self.cex, self.cey, self.cez = self.ces if self.ch_on: self.chs = [ np.ones(self.ns_pitch, dtype=self.dtype) * 0.5 for i in range(3) ] self.chx, self.chy, self.chz = self.chs # program macros = [ \ 'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \ 'OMP_SET_NUM_THREADS', \ 'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', 'MASK_H', 'MASK_E'] values = [ \ '', 'ce=SET1(0.5)', '', '', '', '', \ '', 'ch=SET1(0.5)', '', '', '', '', \ ''] + self.dtype_str_list if use_cpu_core != 0: values[12] = 'omp_set_num_threads(%d);' % use_cpu_core if self.ce_on: values[:6] = [ \ ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \ 'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);'] if self.ch_on: values[6:12] = [ \ ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \ 'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);'] ksrc = common.replace_template_code( \ open(common_cpu.src_path + 'core.c').read(), macros, values) self.program = common_cpu.build_clib(ksrc) carg = np.ctypeslib.ndpointer(dtype=self.dtype, ndim=3, \ shape=(nx, ny, nz_pitch), flags='C_CONTIGUOUS, ALIGNED') argtypes = [c_int, c_int, c_int, c_int, c_int] + \ [carg for i in xrange(6)] self.program.update_e.argtypes = argtypes self.program.update_e.restype = None self.program.update_h.argtypes = argtypes self.program.update_h.restype = None self.e_args = self.ns_pitch + [0, nx * ny * nz_pitch] + self.ehs self.h_args = self.ns_pitch + [0, nx * ny * nz_pitch] + self.ehs if self.ce_on: self.program.update_e.argtypes += [carg for i in xrange(3)] self.e_args += self.ces if self.ch_on: self.program.update_h.argtypes += [carg for i in xrange(3)] self.h_args += self.chs
def __init__(self, fields, str_f, pt0, pt1, tfunc, spatial_value=1., is_overwrite=False): """ """ common.check_type('fields', fields, Fields) common.check_value('str_f', str_f, ('ex', 'ey', 'ez', 'hx', 'hy', 'hz')) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) common.check_type('tfunc', tfunc, types.FunctionType) common.check_type('spatial_value', spatial_value, \ (np.ndarray, np.number, types.FloatType, types.IntType) ) common.check_type('is_overwrite', is_overwrite, bool) # local variables e_or_h = str_f[0] dtype = fields.dtype is_array = True if isinstance(spatial_value, np.ndarray) else False for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) if is_array: shape = common.shape_two_points(pt0, pt1) assert shape == spatial_value.shape, \ 'shape mismatch : %s, %s' % (shape, spatial_value.shape) assert dtype == spatial_value.dtype, \ 'dtype mismatch : %s, %s' % (dtype, spatial_value.dtype) else: spatial_value = dtype(spatial_value) # create the SetFields instance func_dict = {} pts_dict = fields.split_points_dict(e_or_h, pt0, pt1) for part, pts in pts_dict.items(): if pts == None: func_dict[part] = lambda a='': None else: func_dict[part] = SetFields(fields, str_f, \ pts[0], pts[1], is_array, is_overwrite).set_fields if is_array: spatial_array_dict = {} for part, pts in pts_dict.items(): if pts == None: spatial_array_dict[part] = 0 else: slices0 = [slice(p0, p1+1) for p0, p1 in zip(pt0, pt1)] slices1 = [slice(p0, p1+1) for p0, p1 in zip(pts[0], pts[1])] overlap_slices = common.intersection_two_slices(fields.ns, slices0, slices1) shift_slices = [] for sl, p0 in zip(overlap_slices, pt0): s0, s1 = sl.start, sl.stop shift_slices.append( slice(s0-p0, s1-p0) ) dummied_shape = common.shape_two_points(pt0, pt1, is_dummy=True) reshaped_value = spatial_value.reshape(dummied_shape) dummied_array = reshaped_value[shift_slices] overlap_shape = common.shape_two_points(pts[0], pts[1]) spatial_array_dict[part] = dummied_array.reshape(overlap_shape) # global variables and functions self.mainf = fields self.dtype = dtype self.tfunc = tfunc self.func_dict = func_dict self.e_or_h = e_or_h self.tstep = 1 if is_array: self.spatial_array_dict = spatial_array_dict self.update = self.update_spatial_value else: self.spatial_value = spatial_value self.update = self.update_single_value # append to the update list self.priority_type = 'incident' fields.append_instance(self)
def __init__(self, gpuf, direction, tmax): common.check_type('gpuf', gpuf, gpu.Fields) common.check_value('direction', direction, ('+', '-', '+-')) qtask = cpu.QueueTask() if '+' in direction: self.cpuf_p = cpuf_p = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=1) self.gf_p_h = gpu.GetFields(gpuf, ['hy', 'hz'], (-2, 0, 0), (-2, -1, -1)) self.sf_p_h = cpu.SetFields(cpuf_p, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_p_e = cpu.GetFields(cpuf_p, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_p_e = gpu.SetFields(gpuf, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_h = gf_h = cpu.GetFields(cpuf_p, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_e = cpu.SetFields(cpuf_p, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.req_send_h = comm.Send_init(gf_h.host_array, rank + 1, tag=0) self.tmp_recv_e_list = [ np.zeros(gf_h.host_array.shape, gpuf.dtype) for i in range(2) ] self.req_recv_e_list = [ comm.Recv_init(tmp_recv_e, rank + 1, tag=1) for tmp_recv_e in self.tmp_recv_e_list ] self.switch_e = 0 if '-' in direction: self.cpuf_m = cpuf_m = cpu.Fields(qtask, 3, gpuf.ny, gpuf.nz, gpuf.coeff_use, gpuf.precision_float, use_cpu_core=1) self.gf_m_e = gpu.GetFields(gpuf, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_m_e = cpu.SetFields(cpuf_m, ['ey', 'ez'], (-1, 0, 0), (-1, -1, -1), True) self.gf_m_h = cpu.GetFields(cpuf_m, ['hy', 'hz'], (1, 0, 0), (1, -1, -1)) self.sf_m_h = gpu.SetFields(gpuf, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.gf_e = gf_e = cpu.GetFields(cpuf_m, ['ey', 'ez'], (1, 0, 0), (1, -1, -1)) self.sf_h = cpu.SetFields(cpuf_m, ['hy', 'hz'], (0, 0, 0), (0, -1, -1), True) self.req_send_e = comm.Send_init(gf_e.host_array, rank - 1, tag=1) self.tmp_recv_h_list = [ np.zeros(gf_e.host_array.shape, gpuf.dtype) for i in range(2) ] self.req_recv_h_list = [ comm.Recv_init(tmp_recv_h, rank - 1, tag=0) for tmp_recv_h in self.tmp_recv_h_list ] self.switch_h = 0 # global variables self.direction = direction self.qtask = qtask self.tmax = tmax self.tstep = 1
def __init__(self, fields, pt0, pt1, ep_inf, drude_freq, gamma, mask_arrays=(1, 1, 1)): common.check_type('fields', fields, Fields) common.check_type('pt0', pt0, (list, tuple), (int, float)) common.check_type('pt1', pt1, (list, tuple), (int, float)) common.check_type('ep_inf', ep_inf, (int, float)) common.check_type('drude_freq', drude_freq, (int, float)) common.check_type('gamma', gamma, (int, float)) common.check_type('mask_arrays', mask_arrays, (list, tuple), (np.ndarray, int)) # local variables pt0 = common.convert_indices(fields.ns, pt0) pt1 = common.convert_indices(fields.ns, pt1) context = fields.context queue = fields.queue dtype = fields.dtype shape = common.shape_two_points(pt0, pt1, is_dummy=True) for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) for mask_array in mask_arrays: if isinstance(mask_array, np.ndarray): assert common.shape_two_points(pt0, pt1) == mask_array.shape, \ 'shape mismatch : %s, %s' % (shape, mask_array.shape) # allocations psis = [np.zeros(shape, dtype) for i in range(3)] psi_bufs = [ cl.Buffer(context, cl.mem_flags.READ_WRITE, psi.nbytes) for psi in psis ] for psi_buf, psi in zip(psi_bufs, psis): cl.enqueue_copy(queue, psi_buf, psi) dt = fields.dt aa = (2 - gamma * dt) / (2 + gamma * dt) bb = drude_freq**2 * dt / (2 + gamma * dt) comm = 2 * ep_inf + bb * dt ca = 2 * dt / comm cb = -(aa + 3) * bb * dt / comm cc = -(aa + 1) * dt / comm cas = [ca * mask for mask in mask_arrays] shape = common.shape_two_points(pt0, pt1, is_dummy=True) f = np.zeros(shape, dtype) psi_bufs = [ cl.Buffer(context, cl.mem_flags.READ_WRITE, f.nbytes) for i in range(3) ] for psi_buf in psi_bufs: cl.enqueue_copy(queue, psi_buf, f) cf = np.ones(shape, dtype) mask_bufs = [ cl.Buffer(context, cl.mem_flags.READ_ONLY, cf.nbytes) for i in range(3) ] for mask_buf, mask in zip(mask_bufs, mask_arrays): cl.enqueue_copy(queue, mask_buf, cf * mask) # modify ce arrays slices = common.slices_two_points(pt0, pt1) for ce, ca in zip(fields.get_ces(), cas): ce[slices] = ca * mask + ce[slices] * mask.__invert__() # program nmax_str, xid_str, yid_str, zid_str = common_gpu.macro_replace_list( pt0, pt1) macros = ['NMAX', 'XID', 'YID', 'ZID', 'DX', 'DTYPE', 'PRAGMA_fp64'] values = [nmax_str, xid_str, yid_str, zid_str, str(fields.ls)] + fields.dtype_str_list ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'drude.cl').read(), macros, values) program = cl.Program(fields.context, ksrc).build() # arguments pca = aa pcb = (aa + 1) * bb args = fields.ns + [dtype(cb), dtype(cc), dtype(pca), dtype(pcb)] \ + fields.eh_bufs[:3] + psi_bufs + mask_bufs # global variables self.mainf = fields self.program = program self.args = args nx, ny, nz = fields.ns nmax = int(nmax_str) remainder = nmax % fields.ls self.gs = nmax if remainder == 0 else nmax - remainder + fields.ls # append to the update list self.priority_type = 'material' fields.append_instance(self)