def __init__(self, fields, axis): """ """ common.check_type('fields', fields, Fields) common.check_value('axis', axis, ['x', 'y', 'z']) # local variables nx, ny, nz = fields.ns dtype_str_list = fields.dtype_str_list # program replace = lambda lst, idx, val: lst[:idx] + [val] + lst[idx+1:] base0 = {'e': [0, 0, 0], 'h': [1, 1, 1]} base1 = {'e': [nx-2, ny-2, nz-2], 'h': [nx-1, ny-1, nz-1]} axis_id = {'x':0, 'y':1, 'z':2}[axis] nn = fields.ns[axis_id] value_dict = {'e': [], 'h': []} for eh in ['e', 'h']: for idx in {'e': [0, nn-1], 'h':[nn-1, 0]}[eh]: pt0 = replace(base0[eh], axis_id, idx) pt1 = replace(base1[eh], axis_id, idx) nmax, xid, yid, zid = \ common_gpu.macro_replace_list(pt0, pt1) value_dict[eh].append( \ '%s*ny*nz + %s*nz + %s' % (xid, yid, zid) ) macros = ['NMAX', 'IDX0', 'IDX1', 'DTYPE', 'PRAGMA_fp64'] values_e = [nmax] + value_dict['e'] + dtype_str_list values_h = [nmax] + value_dict['h'] + dtype_str_list ksrc_e = common.replace_template_code( \ open(common_gpu.src_path + 'copy_self.cl').read(), \ macros, values_e) ksrc_h = common.replace_template_code( \ open(common_gpu.src_path + 'copy_self.cl').read(), \ macros, values_h) program_e = cl.Program(fields.context, ksrc_e).build() program_h = cl.Program(fields.context, ksrc_h).build() # global variables self.mainf = fields self.program_e = program_e self.program_h = program_h self.strfs_e = {'x':['ey','ez'], 'y':['ex','ez'], 'z':['ex','ey']}[axis] self.strfs_h = {'x':['hy','hz'], 'y':['hx','hz'], 'z':['hx','hy']}[axis] # append to the update list self.priority_type = 'pbc' self.mainf.append_instance(self)
def __init__(self, fields, str_f, pt0, pt1): """ """ common.check_type('fields', fields, Fields) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), (int, float)) common.check_type('pt1', pt1, (list, tuple), (int, float)) pt0 = list( common.convert_indices(fields.ns, pt0) ) pt1 = list( common.convert_indices(fields.ns, pt1) ) # local variables str_fs = common.convert_to_tuple(str_f) dtype_str_list = fields.dtype_str_list for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # program macros = ['NMAX', 'XID', 'YID', 'ZID', \ 'ARGS', \ 'TARGET', 'SOURCE', 'OVERWRITE', \ 'DTYPE'] values = common_gpu.macro_replace_list(pt0, pt1) + \ ['DTYPE *source', \ 'target[sub_idx]', 'source[idx]', '='] + \ dtype_str_list ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'copy.cu').read(), macros, values) program = SourceModule(ksrc) kernel_copy = program.get_function('copy') # allocation source_bufs = [fields.get_buf(str_f) for str_f in str_fs] shape = common.shape_two_points(pt0, pt1, len(str_fs)) host_array = np.zeros(shape, fields.dtype) split_host_array = np.split(host_array, len(str_fs)) split_host_array_dict = dict( zip(str_fs, split_host_array) ) target_buf = cuda.to_device(host_array) # global variables self.mainf = fields self.kernel_copy = kernel_copy self.source_bufs = source_bufs self.target_buf = target_buf self.host_array = host_array self.split_host_array_dict = split_host_array_dict
def __init__(s, fields, axis): s.emf = fields s.e_strfs = {'x':['ey','ez'], 'y':['ex','ez'], 'z':['ex','ey']}[axis] s.h_strfs = {'x':['hy','hz'], 'y':['hx','hz'], 'z':['hx','hy']}[axis] macros = ['NMAX', 'IDX1', 'IDX2'] e_vals = { 'x': ['ny*nz', '(nx-1)*ny*nz + gid', 'gid'], 'y': ['nx*nz', '(gid/nz)*ny*nz + (ny-1)*nz + gid', '(gid/nz)*ny*nz + gid'], 'z': ['nx*ny', '(gid/ny)*ny*nz + gid*nz + (nz-1)', '(gid/ny)*ny*nz + gid*nz'] }[axis] h_vals = { 'x': ['ny*nz', 'gid', '(nx-1)*ny*nz + gid'], 'y': ['nx*nz', '(gid/nz)*ny*nz + gid', '(gid/nz)*ny*nz + (ny-1)*nz + gid'], 'z': ['nx*ny', '(gid/ny)*ny*nz + gid*nz', '(gid/ny)*ny*nz + gid*nz + (nz-1)'] }[axis] e_ksrc = common.replace_template_code(open(common_gpu.src_path + '/copy.cl').read(), macros, e_vals) h_ksrc = common.replace_template_code(open(common_gpu.src_path + '/copy.cl').read(), macros, h_vals) s.program_e = cl.Program(s.emf.context, e_ksrc).build() s.program_h = cl.Program(s.emf.context, h_ksrc).build()
def __init__(self, fields, axes): """ """ common.check_type('fields', fields, Fields) common.check_type('axes', axes, str) assert len( set(axes).intersection(set('xyz')) ) > 0, 'axes option is wrong: %s is given' % repr(axes) # local variables nx, ny, nz = fields.ns dtype_str_list = fields.dtype_str_list # program macros = ['NMAX', 'IDX0', 'IDX1', 'DTYPE', 'PRAGMA_fp64'] program_dict = {} gs_dict = {} for axis in list(axes): program_dict[axis] = {} for e_or_h in ['e', 'h']: pt0 = common_exchange.pt0_dict(nx, ny, nz)[axis][e_or_h]['get'] pt1 = common_exchange.pt1_dict(nx, ny, nz)[axis][e_or_h]['get'] nmaxi_str, xid_str, yid_str, zid_str = common_gpu.macro_replace_list(pt0, pt1) idx0_str = '%s*ny*nz + %s*nz + %s' % (xid_str, yid_str, zid_str) pt0 = common_exchange.pt0_dict(nx, ny, nz)[axis][e_or_h]['set'] pt1 = common_exchange.pt1_dict(nx, ny, nz)[axis][e_or_h]['set'] nmax_str, xid_str, yid_str, zid_str = common_gpu.macro_replace_list(pt0, pt1) idx1_str = '%s*ny*nz + %s*nz + %s' % (xid_str, yid_str, zid_str) values = [nmax_str, idx0_str, idx1_str] + dtype_str_list ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'copy_self.cl').read(), \ macros, values) program = cl.Program(fields.context, ksrc).build() program_dict[axis][e_or_h] = program nmax = int(nmax_str) remainder = nmax % fields.ls gs_dict[axis] = nmax if remainder == 0 else nmax - remainder + fields.ls # global variables self.mainf = fields self.axes = axes self.program_dict = program_dict self.gs_dict = gs_dict # append to the update list self.priority_type = 'pbc' self.mainf.append_instance(self)
def __init__(self, fields, str_f, pt0, pt1): """ """ common.check_type("fields", fields, Fields) common.check_type("str_f", str_f, (str, list, tuple), str) common.check_type("pt0", pt0, (list, tuple), int) common.check_type("pt1", pt1, (list, tuple), int) # local variables str_fs = common.convert_to_tuple(str_f) dtype_str_list = fields.dtype_str_list for strf in str_fs: strf_list = ["ex", "ey", "ez", "hx", "hy", "hz"] common.check_value("str_f", strf, strf_list) for axis, n, p0, p1 in zip(["x", "y", "z"], fields.ns, pt0, pt1): common.check_value("pt0 %s" % axis, p0, range(n)) common.check_value("pt1 %s" % axis, p1, range(n)) # program macros = ["NMAX", "XID", "YID", "ZID", "ARGS", "TARGET", "SOURCE", "OVERWRITE", "DTYPE", "PRAGMA_fp64"] values = ( common_gpu.macro_replace_list(pt0, pt1) + ["__global DTYPE *source", "target[sub_idx]", "source[idx]", "="] + dtype_str_list ) ksrc = common.replace_template_code(open(common_gpu.src_path + "copy.cl").read(), macros, values) program = cl.Program(fields.context, ksrc).build() # allocation source_bufs = [fields.get_buf(str_f) for str_f in str_fs] shape = common.shape_two_points(pt0, pt1, len(str_fs)) host_array = np.zeros(shape, dtype=fields.dtype) split_host_array = np.split(host_array, len(str_fs)) split_host_array_dict = dict(zip(str_fs, split_host_array)) target_buf = cl.Buffer(fields.context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=host_array) # global variables self.mainf = fields self.program = program self.source_bufs = source_bufs self.target_buf = target_buf self.host_array = host_array self.split_host_array_dict = split_host_array_dict
def __init__(self, fields, str_f, pt0, pt1, source_is_array=False): """ """ common.check_type('fields', fields, Fields) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) common.check_type('source_is_array', source_is_array, bool) self.mainf = mainf = fields str_fs = common.convert_to_tuple(str_f) # program macros = ['NMAX', 'XID', 'YID', 'ZID', \ 'ARGS', \ 'TARGET', 'SOURCE', \ 'DTYPE', 'PRAGMA_fp64'] if source_is_array: values = macro_replace_list(pt0, pt1) + \ ['__global DTYPE *source', \ 'target[idx]', 'source[sub_idx]'] + mainf.dtype_str_list[:2] else: values = macro_replace_list(pt0, pt1) + \ ['DTYPE source', \ 'target[idx]', 'source'] + mainf.dtype_str_list[:2] ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'subdomain.cl').read(), macros, values) self.program = cl.Program(mainf.context, ksrc).build() # allocation self.target_bufs = [mainf.get_buf(str_f) for str_f in str_fs] shape = list( common.shape_two_points(pt0, pt1) ) shape[0] *= len(str_fs) if source_is_array: tmp_array = np.zeros(shape, dtype=mainf.dtype) self.source_buf = cl.Buffer( \ mainf.context, \ cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, \ hostbuf=tmp_array) self.set_fields = self.set_fields_spatial_value else: self.set_fields = self.set_fields_single_value
def __init__(self, fields, axes): """ """ common.check_type("fields", fields, Fields) common.check_type("axes", axes, str) assert len(set(axes).intersection(set("xyz"))) > 0, "axes option is wrong: %s is given" % repr(axes) # local variables nx, ny, nz = fields.ns dtype_str_list = fields.dtype_str_list # program macros = ["NMAX", "IDX0", "IDX1", "DTYPE", "PRAGMA_fp64"] program_dict = {} for axis in list(axes): program_dict[axis] = {} for e_or_h in ["e", "h"]: pt0 = common_exchange.pt0_dict(nx, ny, nz)[axis][e_or_h]["get"] pt1 = common_exchange.pt1_dict(nx, ny, nz)[axis][e_or_h]["get"] nmax, xid, yid, zid = common_gpu.macro_replace_list(pt0, pt1) idx0_str = "%s*ny*nz + %s*nz + %s" % (xid, yid, zid) pt0 = common_exchange.pt0_dict(nx, ny, nz)[axis][e_or_h]["set"] pt1 = common_exchange.pt1_dict(nx, ny, nz)[axis][e_or_h]["set"] nmax, xid, yid, zid = common_gpu.macro_replace_list(pt0, pt1) idx1_str = "%s*ny*nz + %s*nz + %s" % (xid, yid, zid) values = [nmax, idx0_str, idx1_str] + dtype_str_list ksrc = common.replace_template_code(open(common_gpu.src_path + "copy_self.cl").read(), macros, values) program = cl.Program(fields.context, ksrc).build() program_dict[axis][e_or_h] = program # global variables self.mainf = fields self.axes = axes self.program_dict = program_dict # append to the update list self.priority_type = "pbc" self.mainf.append_instance(self)
def __init__(self, fields): """ """ common.check_type('fields', fields, Fields) # local variables precision_float = fields.precision_float use_cpu_core = fields.use_cpu_core dtype = fields.dtype nx, ny, nz_pitch = ns_pitch = fields.ns_pitch align_size = fields.align_size pad = fields.pad ce_on = fields.ce_on ch_on = fields.ch_on ehs = fields.ehs if ce_on: ces = fields.ces if ch_on: chs = fields.chs # pad_str_list pad_str_list = [] pad_str_append = lambda mask: pad_str_list.append( str(list(mask)).strip('[]') ) mask0 = np.ones(align_size, 'i') mask_h = mask0.copy() mask_h[0] = 0 pad_str_append(mask_h) mask_exy = mask0.copy() mask_exy[-(pad+1):] = 0 pad_str_append(mask_exy) mask = mask0.copy() if pad != 0: mask[-pad:] = 0 pad_str_append(mask) # program dtype_str_list = { \ 'single':['float', 'xmmintrin.h', 'ps', '__m128', '4'], \ 'double':['double', 'emmintrin.h', 'pd', '__m128d', '2'] }[precision_float] macros = [ \ 'OMP_HEADER', 'OMP_FOR_E', 'OMP_FOR_H', \ 'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \ 'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', \ 'MASK_H', 'MASK_EXY', 'MASK'] values = ['', '', '', \ '', 'ce=SET1(0.5)', '', '', '', '', \ '', 'ch=SET1(0.5)', '', '', '', '', \ ] + dtype_str_list + pad_str_list if use_cpu_core != 1: values[0] = '#include <omp.h>' omp_str = '' if use_cpu_core == 0 else 'omp_set_num_threads(%d);\n\t' % use_cpu_core values[1] = omp_str + '#pragma omp parallel for private(idx, i, j, k, hx0, hy0, hz0, h1, h2, e PRIVATE_CE)' values[2] = omp_str + '#pragma omp parallel for private(idx, i, j, k, ex0, ey0, ez0, e1, e2, h PRIVATE_CH)' if ce_on: values[3:9] = [ \ ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \ 'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);'] if ch_on: values[9:15] = [ \ ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \ 'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);'] ksrc = common.replace_template_code( \ open(common_cpu.src_path + 'core.c').read(), macros, values) program = common_cpu.build_clib(ksrc) carg = np.ctypeslib.ndpointer(dtype, ndim=3, \ shape=tuple(ns_pitch), flags='C_CONTIGUOUS, ALIGNED') argtypes = [c_int, c_int, c_int, c_int, c_int] + \ [carg for i in xrange(6)] program.update_e.argtypes = argtypes program.update_e.restype = None program.update_h.argtypes = argtypes program.update_h.restype = None # arguments nyz_pitch = ny * nz_pitch e_args = ns_pitch + [0, nx*nyz_pitch] + ehs h_args = ns_pitch + [0, nx*nyz_pitch] + ehs if ce_on: program.update_e.argtypes += [carg for i in xrange(3)] e_args += ces if ch_on: program.update_h.argtypes += [carg for i in xrange(3)] h_args += chs set_args = lambda args, idx0, nmax: args[:3] + [idx0, nmax] + args[5:] e_args_dict = { \ '': e_args, \ 'pre': set_args(e_args, nyz_pitch, 3*nyz_pitch), \ 'post': set_args(e_args, 0, nyz_pitch) } h_args_dict = { \ '': h_args, \ 'pre': set_args(h_args, 0, 2*nyz_pitch), \ 'post': set_args(h_args, 2*nyz_pitch, 3*nyz_pitch) } # global variables self.mainf = fields self.program = program self.e_args_dict = e_args_dict self.h_args_dict = h_args_dict # append to the update list self.priority_type = 'core' fields.append_instance(self)
def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True): """ """ common.check_type('fields', fields, Fields) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), (int, float)) common.check_type('pt1', pt1, (list, tuple), (int, float)) common.check_type('is_array', is_array, bool) common.check_type('is_overwrite', is_overwrite, bool) pt0 = list( common.convert_indices(fields.ns, pt0) ) pt1 = list( common.convert_indices(fields.ns, pt1) ) # local variables str_fs = common.convert_to_tuple(str_f) dtype_str_list = fields.dtype_str_list overwrite_str = {True: '=', False: '+='}[is_overwrite] for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # program macros = ['NMAX', 'XID', 'YID', 'ZID', \ 'ARGS', \ 'TARGET', 'SOURCE', 'OVERWRITE', \ 'DTYPE'] if is_array: values = common_gpu.macro_replace_list(pt0, pt1) + \ ['DTYPE *source', \ 'target[idx]', 'source[sub_idx]', overwrite_str] + \ dtype_str_list else: values = common_gpu.macro_replace_list(pt0, pt1) + \ ['DTYPE source', \ 'target[idx]', 'source', overwrite_str] + \ dtype_str_list ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'copy.cu').read(), macros, values) program = SourceModule(ksrc) kernel_copy = program.get_function('copy') # allocation target_bufs = [fields.get_buf(str_f) for str_f in str_fs] shape = common.shape_two_points(pt0, pt1, len(str_fs)) if is_array: tmp_array = np.zeros(shape, fields.dtype) source_buf = cuda.to_device(tmp_array) # global variabels and functions self.mainf = fields self.kernel_copy = kernel_copy self.target_bufs = target_bufs self.shape = shape if is_array: self.source_buf = source_buf self.set_fields = self.set_fields_spatial_value else: self.set_fields = self.set_fields_single_value
def __init__( self, context, device, nx, ny, nz, coeff_use="e", precision_float="single", local_work_size=256, global_work_size=0, ): """ """ common.check_type("context", context, cl.Context) common.check_type("device", device, cl.Device) common.check_type("nx", nx, int) common.check_type("ny", ny, int) common.check_type("nz", nz, int) common.check_type("global_work_size", global_work_size, int) common.check_type("local_work_size", local_work_size, int) common.check_value("coeff_use", coeff_use, ("", "e", "h", "eh")) common.check_value("precision_float", precision_float, ("single", "double")) self.context = context self.device = device self.nx = nx self.ny = ny self.nz = nz self.ls = local_work_size self.gs = global_work_size self.coeff_use = coeff_use self.dtype = {"single": np.float32, "double": np.float64}[precision_float] self.dtype_str = {"single": "float", "double": "double"}[precision_float] self.dtype_str_list = { "single": ["float", ""], "double": ["double", "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"], }[precision_float] self.device_type = "gpu" # padding for the nz which is multiple of 16 (float32) or 8 (float64) self.align_size = a_size = {"single": 16, "double": 8}[precision_float] # 64 Bytes self.pad = pad = int(np.ceil(float(nz) / a_size) * a_size) - nz self.slz = slice(None, None) if pad == 0 else slice(None, -pad) self.nz_pitch = nz_pitch = nz + pad self.dtype_str_list.append("" if pad == 0 else "-%s" % pad) # ns, queue, global_size self.ns = [np.int32(nx), np.int32(ny), np.int32(nz)] self.ns_pitch = [np.int32(nx), np.int32(ny), np.int32(nz_pitch)] self.ns_pad = [np.int32(nx), np.int32(ny), np.int32(pad)] self.queue = cl.CommandQueue(self.context, self.device) if self.gs == 0: self.gs = common_gpu.get_optimal_gs(self.device) # on/off the coefficient arrays self.ce_on = True if "e" in self.coeff_use else False self.ch_on = True if "h" in self.coeff_use else False # allocations f = np.zeros(self.ns_pitch, dtype=self.dtype) cf = np.ones_like(f) * 0.5 mf = cl.mem_flags self.eh_bufs = [cl.Buffer(self.context, mf.READ_WRITE, f.nbytes) for i in range(6)] for eh_buf in self.eh_bufs: cl.enqueue_copy(self.queue, eh_buf, f) self.ex_buf, self.ey_buf, self.ez_buf = self.eh_bufs[:3] self.hx_buf, self.hy_buf, self.hz_buf = self.eh_bufs[3:] if self.ce_on: self.ce_bufs = [cl.Buffer(self.context, mf.READ_ONLY, cf.nbytes) for i in range(3)] self.cex_buf, self.cey_buf, self.cez_buf = self.ce_bufs if self.ch_on: self.ch_bufs = [cl.Buffer(self.context, mf.READ_ONLY, cf.nbytes) for i in range(3)] self.chx_buf, self.chy_buf, self.chz_buf = self.ch_bufs del f, cf # program macros = ["ARGS_CE", "CEX", "CEY", "CEZ", "ARGS_CH", "CHX", "CHY", "CHZ", "DX", "DTYPE", "PRAGMA_fp64", "PAD"] values = ["", "0.5", "0.5", "0.5", "", "0.5", "0.5", "0.5", str(self.ls)] + self.dtype_str_list self.e_args = self.ns_pitch + self.eh_bufs self.h_args = self.ns_pitch + self.eh_bufs if self.ce_on: values[:4] = [ ", __global DTYPE *cex, __global DTYPE *cey, __global DTYPE *cez", "cex[idx]", "cey[idx]", "cez[idx]", ] self.e_args += self.ce_bufs if self.ch_on: values[4:8] = [ ", __global DTYPE *chx, __global DTYPE *chy, __global DTYPE *chz", "chx[idx]", "chy[idx]", "chz[idx]", ] self.h_args += self.ch_bufs ksrc = common.replace_template_code(open(common_gpu.src_path + "core.cl").read(), macros, values) self.program = cl.Program(self.context, ksrc).build()
def __init__(self, nx, ny, nz, \ coeff_use='e', \ precision_float='single', \ use_cpu_core=0): """ """ common.check_type('nx', nx, int) common.check_type('ny', ny, int) common.check_type('nz', nz, int) common.check_value('coeff_use', coeff_use, ('', 'e', 'h', 'eh')) common.check_value('precision_float', precision_float, ('single', 'double')) self.nx = nx self.ny = ny self.nz = nz self.coeff_use=coeff_use self.dtype = {'single':np.float32, 'double':np.float64}[precision_float] self.dtype_str_list = { \ 'single':['float', 'xmmintrin.h', 'ps', '__m128', '4', '0, 1, 1, 1'], \ 'double':['double', 'emmintrin.h', 'pd', '__m128d', '2', '0, 1'] }[precision_float] self.device_type = 'cpu' # padding for the nz which is multiple of 4 (float32) or 2 (float64) a_size = {'single':4, 'double':2}[precision_float] # 16 Bytes self.pad = pad = int(np.ceil(float(nz) / a_size) * a_size) - nz self.slz = slice(None, None) if pad == 0 else slice(None, -pad) self.nz_pitch = nz_pitch = nz + pad mask_arr = np.ones(a_size, 'i') mask_arr[-(pad+1):] = 0 self.dtype_str_list.append( str(list(mask_arr)).strip('[]') ) # ns, qtask, enqueue self.ns = [nx, ny, nz] self.ns_pitch = [nx, ny, nz_pitch] self.qtask = QueueTask() self.enqueue = self.qtask.enqueue self.enqueue_barrier = self.qtask.enqueue_barrier # on/off the coefficient arrays self.ce_on = True if 'e' in self.coeff_use else False self.ch_on = True if 'h' in self.coeff_use else False # allocations self.ehs = [np.zeros(self.ns_pitch, dtype=self.dtype) for i in range(6)] self.ex, self.ey, self.ez, self.hx, self.hy, self.hz = self.ehs if self.ce_on: self.ces = [np.ones(self.ns_pitch, dtype=self.dtype)*0.5 for i in range(3)] self.cex, self.cey, self.cez = self.ces if self.ch_on: self.chs = [np.ones(self.ns_pitch, dtype=self.dtype)*0.5 for i in range(3)] self.chx, self.chy, self.chz = self.chs # program macros = [ \ 'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \ 'OMP_SET_NUM_THREADS', \ 'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', 'MASK_H', 'MASK_E'] values = [ \ '', 'ce=SET1(0.5)', '', '', '', '', \ '', 'ch=SET1(0.5)', '', '', '', '', \ ''] + self.dtype_str_list if use_cpu_core != 0: values[12] = 'omp_set_num_threads(%d);' % use_cpu_core if self.ce_on: values[:6] = [ \ ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \ 'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);'] if self.ch_on: values[6:12] = [ \ ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \ 'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);'] ksrc = common.replace_template_code( \ open(common_cpu.src_path + 'core.c').read(), macros, values) self.program = common_cpu.build_clib(ksrc) carg = np.ctypeslib.ndpointer(dtype=self.dtype, ndim=3, \ shape=(nx, ny, nz_pitch), flags='C_CONTIGUOUS, ALIGNED') argtypes = [c_int, c_int, c_int, c_int, c_int] + \ [carg for i in xrange(6)] self.program.update_e.argtypes = argtypes self.program.update_e.restype = None self.program.update_h.argtypes = argtypes self.program.update_h.restype = None self.e_args = self.ns_pitch + [0, nx*ny*nz_pitch] + self.ehs self.h_args = self.ns_pitch + [0, nx*ny*nz_pitch] + self.ehs if self.ce_on: self.program.update_e.argtypes += [carg for i in xrange(3)] self.e_args += self.ces if self.ch_on: self.program.update_h.argtypes += [carg for i in xrange(3)] self.h_args += self.chs
def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True): """ """ common.check_type('fields', fields, Fields) common.check_type('str_f', str_f, (str, list, tuple), str) common.check_type('pt0', pt0, (list, tuple), (int, float)) common.check_type('pt1', pt1, (list, tuple), (int, float)) common.check_type('is_array', is_array, bool) common.check_type('is_overwrite', is_overwrite, bool) pt0 = list( common.convert_indices(fields.ns, pt0) ) pt1 = list( common.convert_indices(fields.ns, pt1) ) # local variables str_fs = common.convert_to_tuple(str_f) dtype_str_list = fields.dtype_str_list overwrite_str = {True: '=', False: '+='}[is_overwrite] for strf in str_fs: strf_list = ['ex', 'ey', 'ez', 'hx', 'hy', 'hz'] common.check_value('str_f', strf, strf_list) for axis, n, p0, p1 in zip(['x', 'y', 'z'], fields.ns, pt0, pt1): common.check_value('pt0 %s' % axis, p0, range(n)) common.check_value('pt1 %s' % axis, p1, range(n)) # program macros = ['NMAX', 'XID', 'YID', 'ZID', \ 'ARGS', \ 'TARGET', 'SOURCE', 'OVERWRITE', \ 'DTYPE', 'PRAGMA_fp64'] nmax_str, xid_str, yid_str, zid_str = common_gpu.macro_replace_list(pt0, pt1) if is_array: values = [nmax_str, xid_str, yid_str, zid_str, \ '__global DTYPE *source', \ 'target[idx]', 'source[sub_idx]', overwrite_str] + \ dtype_str_list else: values = [nmax_str, xid_str, yid_str, zid_str, \ 'DTYPE source', \ 'target[idx]', 'source', overwrite_str] + \ dtype_str_list ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'copy.cl').read(), macros, values) program = cl.Program(fields.context, ksrc).build() # allocation target_bufs = [fields.get_buf(str_f) for str_f in str_fs] shape = common.shape_two_points(pt0, pt1, len(str_fs)) if is_array: tmp_array = np.zeros(shape, dtype=fields.dtype) source_buf = cl.Buffer( \ fields.context, \ cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, \ hostbuf=tmp_array) # global variabels and functions self.mainf = fields self.program = program self.target_bufs = target_bufs self.shape = shape nmax = int(nmax_str) remainder = nmax % fields.ls self.gs = nmax if remainder == 0 else nmax - remainder + fields.ls if is_array: self.source_buf = source_buf self.set_fields = self.set_fields_spatial_value else: self.set_fields = self.set_fields_single_value
def __init__(self, fields): """ """ common.check_type('fields', fields, Fields) # local variables context = fields.context ns_pitch = fields.ns_pitch pad = fields.pad precision_float = fields.precision_float dtype_str_list = fields.dtype_str_list ce_on = fields.ce_on ch_on = fields.ch_on eh_bufs = fields.eh_bufs if ce_on: ce_bufs = fields.ce_bufs if ch_on: ch_bufs = fields.ch_bufs ls = fields.ls # program str_pad = '' if pad==0 else '-%s' % pad coeff_constant = {'single': '0.5f', 'double': '0.5'}[precision_float] macros = ['ARGS_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'CHX', 'CHY', 'CHZ', \ 'DX', 'PAD', 'DTYPE', 'PRAGMA_fp64'] values = ['', coeff_constant, coeff_constant, coeff_constant, \ '', coeff_constant, coeff_constant, coeff_constant, \ str(ls), str_pad] + dtype_str_list if ce_on: values[:4] = [ \ ', __global DTYPE *cex, __global DTYPE *cey, __global DTYPE *cez', \ 'cex[idx]', 'cey[idx]', 'cez[idx]'] if ch_on: values[4:8] = [ \ ', __global DTYPE *chx, __global DTYPE *chy, __global DTYPE *chz', \ 'chx[idx]', 'chy[idx]', 'chz[idx]'] ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'core_split.cl').read(), macros, values) program = cl.Program(context, ksrc).build() # arguments e_args = ns_pitch + eh_bufs h_args = ns_pitch + eh_bufs if ce_on: e_args += ce_bufs if ch_on: h_args += ch_bufs nx, ny, nz_pitch = ns_pitch nyzp = ny * nz_pitch e_args_dict = { \ '': [np.int32(0), np.int32(nx*nyzp)] + e_args, \ 'pre': [np.int32(nyzp), np.int32(2*nyzp)] + e_args, \ 'mid': [np.int32(2*nyzp), np.int32(nx*nyzp)] + e_args, \ 'post': [np.int32(0), np.int32(nyzp)] + e_args} h_args_dict = { \ '': [np.int32(0), np.int32(nx*nyzp)] + h_args, \ 'pre': [np.int32((nx-2)*nyzp), np.int32((nx-1)*nyzp)] + h_args, \ 'mid': [np.int32(0), np.int32((nx-2)*nyzp)] + h_args, \ 'post': [np.int32((nx-1)*nyzp), np.int32(nx*nyzp)] + h_args} gs = lambda n: int(n) if (n % fields.ls) == 0 else int(n - (n % fields.ls) + fields.ls) gs_dict = { \ '': gs(nx*nyzp), \ 'pre': gs(nyzp), \ 'mid': gs((nx-2)*nyzp), \ 'post': gs(nyzp)} # global variables and functions self.mainf = fields self.program = program self.e_args_dict = e_args_dict self.h_args_dict = h_args_dict self.gs_dict = gs_dict
def __init__(self, fields): """ """ common.check_type('fields', fields, Fields) # local variables context = fields.context ns_pitch = fields.ns_pitch pad = fields.pad precision_float = fields.precision_float dtype_str_list = fields.dtype_str_list ce_on = fields.ce_on ch_on = fields.ch_on eh_bufs = fields.eh_bufs if ce_on: ce_bufs = fields.ce_bufs if ch_on: ch_bufs = fields.ch_bufs ls = fields.ls # program str_pad = '' if pad==0 else '-%s' % pad coeff_constant = {'single': '0.5f', 'double': '0.5'}[precision_float] macros = ['ARGS_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'CHX', 'CHY', 'CHZ', \ 'DX', 'PAD', 'DTYPE', 'PRAGMA_fp64'] values = ['', coeff_constant, coeff_constant, coeff_constant, \ '', coeff_constant, coeff_constant, coeff_constant, \ str(ls), str_pad] + dtype_str_list if ce_on: values[:4] = [ \ ', __global DTYPE *cex, __global DTYPE *cey, __global DTYPE *cez', \ 'cex[idx]', 'cey[idx]', 'cez[idx]'] if ch_on: values[4:8] = [ \ ', __global DTYPE *chx, __global DTYPE *chy, __global DTYPE *chz', \ 'chx[idx]', 'chy[idx]', 'chz[idx]'] ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'core.cl').read(), macros, values) program = cl.Program(context, ksrc).build() # arguments e_args = ns_pitch + eh_bufs h_args = ns_pitch + eh_bufs if ce_on: e_args += ce_bufs if ch_on: h_args += ch_bufs # global variables and functions self.mainf = fields self.e_args = e_args self.h_args = h_args self.program = program # append to the update list self.priority_type = 'core' self.mainf.append_instance(self)
def __init__(self, fields, str_f, pt0, pt1, is_array=False, is_overwrite=True): """ """ common.check_type("fields", fields, Fields) common.check_type("str_f", str_f, (str, list, tuple), str) common.check_type("pt0", pt0, (list, tuple), int) common.check_type("pt1", pt1, (list, tuple), int) common.check_type("is_array", is_array, bool) common.check_type("is_overwrite", is_overwrite, bool) # local variables str_fs = common.convert_to_tuple(str_f) dtype_str_list = fields.dtype_str_list overwrite_str = {True: "=", False: "+="}[is_overwrite] for strf in str_fs: strf_list = ["ex", "ey", "ez", "hx", "hy", "hz"] common.check_value("str_f", strf, strf_list) for axis, n, p0, p1 in zip(["x", "y", "z"], fields.ns, pt0, pt1): common.check_value("pt0 %s" % axis, p0, range(n)) common.check_value("pt1 %s" % axis, p1, range(n)) # program macros = ["NMAX", "XID", "YID", "ZID", "ARGS", "TARGET", "SOURCE", "OVERWRITE", "DTYPE", "PRAGMA_fp64"] if is_array: values = ( common_gpu.macro_replace_list(pt0, pt1) + ["__global DTYPE *source", "target[idx]", "source[sub_idx]", overwrite_str] + dtype_str_list ) else: values = ( common_gpu.macro_replace_list(pt0, pt1) + ["DTYPE source", "target[idx]", "source", overwrite_str] + dtype_str_list ) ksrc = common.replace_template_code(open(common_gpu.src_path + "copy.cl").read(), macros, values) program = cl.Program(fields.context, ksrc).build() # allocation target_bufs = [fields.get_buf(str_f) for str_f in str_fs] shape = common.shape_two_points(pt0, pt1, len(str_fs)) if is_array: tmp_array = np.zeros(shape, dtype=fields.dtype) source_buf = cl.Buffer( fields.context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=tmp_array ) # global variabels and functions self.mainf = fields self.program = program self.target_bufs = target_bufs self.shape = shape if is_array: self.source_buf = source_buf self.set_fields = self.set_fields_spatial_value else: self.set_fields = self.set_fields_single_value
def __init__(self, fields): """ """ common.check_type('fields', fields, Fields) # local variables ns_pitch = fields.ns_pitch pad = fields.pad precision_float = fields.precision_float dtype_str_list = fields.dtype_str_list ce_on = fields.ce_on ch_on = fields.ch_on eh_bufs = fields.eh_bufs ce_bufs = fields.ce_bufs ch_bufs = fields.ch_bufs bs = fields.bs # program str_pad = '' if pad==0 else '-%s' % pad coeff_constant = {'single': '0.5f', 'double': '0.5'}[precision_float] macros = ['ARGS_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'CHX', 'CHY', 'CHZ', \ 'DX', 'PAD', 'DTYPE'] values = ['', coeff_constant, coeff_constant, coeff_constant, \ '', coeff_constant, coeff_constant, coeff_constant, \ str(bs[0]), str_pad] + dtype_str_list if ce_on: values[:4] = [ \ ', DTYPE *cex, DTYPE *cey, DTYPE *cez', \ 'cex[idx]', 'cey[idx]', 'cez[idx]'] if ch_on: values[4:8] = [ \ ', DTYPE *chx, DTYPE *chy, DTYPE *chz', \ 'chx[idx]', 'chy[idx]', 'chz[idx]'] ksrc = common.replace_template_code( \ open(common_gpu.src_path + 'core.cu').read(), macros, values) program = SourceModule(ksrc) kernel_update_e = program.get_function('update_e') kernel_update_h = program.get_function('update_h') # arguments args = ns_pitch + eh_bufs e_args = args + ce_bufs if ce_on else args h_args = args + ch_bufs if ch_on else args kernel_update_e.prepare([type(arg) for arg in e_args]) kernel_update_h.prepare([type(arg) for arg in h_args]) # global variables and functions self.mainf = fields self.e_args = e_args self.h_args = h_args self.kernel_update_e = kernel_update_e self.kernel_update_h = kernel_update_h # append to the update list self.priority_type = 'core' self.mainf.append_instance(self)
def __init__(self, fields): """ """ common.check_type('fields', fields, Fields) # local variables precision_float = fields.precision_float use_cpu_core = fields.use_cpu_core dtype = fields.dtype nx, ny, nz_pitch = ns_pitch = fields.ns_pitch align_size = fields.align_size pad = fields.pad ce_on = fields.ce_on ch_on = fields.ch_on ehs = fields.ehs if ce_on: ces = fields.ces if ch_on: chs = fields.chs # program dtype_str_list = { \ 'single':['float', 'xmmintrin.h', 'ps', '__m128', '4'], \ 'double':['double', 'emmintrin.h', 'pd', '__m128d', '2'] }[precision_float] pad_str_list = [] pad_str_append = lambda mask: pad_str_list.append( str(list(mask)).strip('[]') ) mask0 = np.ones(align_size, 'i') mask_h = mask0.copy() mask_h[0] = 0 pad_str_append(mask_h) mask_exy = mask0.copy() mask_exy[-(pad+1):] = 0 pad_str_append(mask_exy) mask = mask0.copy() if pad != 0: mask[-pad:] = 0 pad_str_append(mask) macros = [ \ 'ARGS_CE', 'INIT_CE', 'PRIVATE_CE', 'CEX', 'CEY', 'CEZ', \ 'ARGS_CH', 'INIT_CH', 'PRIVATE_CH', 'CHX', 'CHY', 'CHZ', \ 'OMP_SET_NUM_THREADS', \ 'DTYPE', 'MM_HEADER', 'PSD', 'TYPE128', 'INCRE', \ 'MASK_H', 'MASK_EXY', 'MASK'] values = [ \ '', 'ce=SET1(0.5)', '', '', '', '', \ '', 'ch=SET1(0.5)', '', '', '', '', \ ''] + dtype_str_list + pad_str_list if use_cpu_core != 0: values[12] = 'omp_set_num_threads(%d);' % use_cpu_core if ce_on: values[:6] = [ \ ', DTYPE *cex, DTYPE *cey, DTYPE *cez', 'ce', ', ce', \ 'ce = LOAD(cex+idx);', 'ce = LOAD(cey+idx);', 'ce = LOAD(cez+idx);'] if ch_on: values[6:12] = [ \ ', DTYPE *chx, DTYPE *chy, DTYPE *chz', 'ch', ', ch', \ 'ch = LOAD(chx+idx);', 'ch = LOAD(chy+idx);', 'ch = LOAD(chz+idx);'] ksrc = common.replace_template_code( \ open(common_cpu.src_path + 'core.c').read(), macros, values) program = common_cpu.build_clib(ksrc) carg = np.ctypeslib.ndpointer(dtype, ndim=3, \ shape=tuple(ns_pitch), flags='C_CONTIGUOUS, ALIGNED') argtypes = [c_int, c_int, c_int, c_int, c_int] + \ [carg for i in xrange(6)] program.update_e.argtypes = argtypes program.update_e.restype = None program.update_h.argtypes = argtypes program.update_h.restype = None # arguments nyz_pitch = ny * nz_pitch e_args = ns_pitch + [0, nx*nyz_pitch] + ehs h_args = ns_pitch + [0, nx*nyz_pitch] + ehs if ce_on: program.update_e.argtypes += [carg for i in xrange(3)] e_args += ces if ch_on: program.update_h.argtypes += [carg for i in xrange(3)] h_args += chs pre_e_args = e_args[:] pre_e_args[3:5] = [(nx-2)*nyz_pitch, nx*nyz_pitch] mid_e_args = e_args[:] mid_e_args[3:5] = [nyz_pitch, (nx-2)*nyz_pitch] post_e_args = e_args[:] post_e_args[3:5] = [0, nyz_pitch] pre_h_args = h_args[:] pre_h_args[3:5] = [0, 2*nyz_pitch] mid_h_args = h_args[:] mid_h_args[3:5] = [2*nyz_pitch, (nx-1)*nyz_pitch] post_h_args = h_args[:] post_h_args[3:5] = [(nx-1)*nyz_pitch, nx*nyz_pitch] # global variables self.mainf = fields self.e_args = e_args self.h_args = h_args self.program = program self.e_args_dict = {'':e_args, \ 'pre':pre_e_args, 'mid':mid_e_args, 'post':post_e_args} self.h_args_dict = {'':h_args, \ 'pre':pre_h_args, 'mid':mid_h_args, 'post':post_h_args} # append to the update list self.priority_type = 'core' fields.append_instance(self)