def _generate_kernel_arg_decls(self): _kernel_arg_decls = [] _kernel_lib_arg_decls = [] _kernel_structs = cgen.Module( [cgen.Comment('#### Structs generated per ParticleDat ####')]) if self._kernel.static_args is not None: for i, dat in enumerate(self._kernel.static_args.items()): _kernel_arg_decls.append( cgen.Const(cgen.Value(host.ctypes_map[dat[1]], dat[0]))) for i, dat in enumerate(self._dat_dict.items()): assert type(dat[1]) is tuple, "Access descriptors not found" kernel_lib_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dat[1][0].dtype], Restrict(self._cc.restrict_keyword, dat[0]))) # print host.ctypes_map[dat[1][0].dtype], dat[1][0].dtype if issubclass(type(dat[1][0]), host._Array): kernel_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dat[1][0].dtype], Restrict(self._cc.restrict_keyword, dat[0]))) if not dat[1][1].write: kernel_arg = cgen.Const(kernel_arg) _kernel_arg_decls.append(kernel_arg) elif issubclass(type(dat[1][0]), host.Matrix): # MAKE STRUCT TYPE dtype = dat[1][0].dtype ti = cgen.Pointer( cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword, 'i'))) tj = cgen.Pointer( cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword, 'j'))) if not dat[1][1].write: ti = cgen.Const(ti) tj = cgen.Const(tj) typename = '_' + dat[0] + '_t' _kernel_structs.append( cgen.Typedef(cgen.Struct('', [ti, tj], typename))) # MAKE STRUCT ARG _kernel_arg_decls.append(cgen.Value(typename, dat[0])) if not dat[1][1].write: kernel_lib_arg = cgen.Const(kernel_lib_arg) _kernel_lib_arg_decls.append(kernel_lib_arg) self._components['KERNEL_ARG_DECLS'] = _kernel_arg_decls self._components['KERNEL_LIB_ARG_DECLS'] = _kernel_lib_arg_decls self._components['KERNEL_STRUCT_TYPEDEFS'] = _kernel_structs
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] # Generate type definition for particle type vdecl = [] for v in self.ptype.variables: if v.dtype == np.uint64: vdecl.append(c.Pointer(c.POD(np.void, v.name))) else: vdecl.append(c.POD(v.dtype, v.name)) ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))] if c_include: ccode += [c_include] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt")] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("float", const)] fargs_str = ", ".join(['particles[p].time', 'sign_dt * __dt'] + list(field_args.keys()) + list(const_args.keys())) # Inner loop nest for forward runs sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") sign_end_part = c.Assign("sign_end_part", "endtime - particles[p].time > 0 ? 1 : -1") dt_pos = c.Assign("__dt", "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))") dt_0_break = c.If("particles[p].dt == 0", c.Statement("break")) notstarted_continue = c.If("(sign_end_part != sign_dt) && (particles[p].dt != 0)", c.Statement("continue")) body = [c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str))] body += [c.Assign("particles[p].state", "res")] # Store return code on particle body += [c.If("res == SUCCESS", c.Block([c.Statement("particles[p].time += sign_dt * __dt"), dt_pos, dt_0_break, c.Statement("continue")]))] body += [c.If("res == REPEAT", c.Block([dt_pos, c.Statement("continue")]), c.Statement("break"))] time_loop = c.While("__dt > __tol || particles[p].dt == 0", c.Block(body)) part_loop = c.For("p = 0", "p < num_particles", "++p", c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop])) fbody = c.Block([c.Value("int", "p, sign_dt, sign_end_part"), c.Value("ErrorCode", "res"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), sign_dt, part_loop]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def generate(self, funcname, field_args, kernel_ast, adaptive=False): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] # Generate type definition for particle type vdecl = [c.POD(dtype, var) for var, dtype in self.ptype.var_types.items()] ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt")] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] fargs_str = ", ".join(['particles[p].time', 'particles[p].dt'] + list(field_args.keys())) # Inner loop nest for forward runs dt_fwd = c.Statement("__dt = fmin(particles[p].dt, endtime - particles[p].time)") body_fwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)), c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_fwd] time_fwd = c.While("__dt > __tol", c.Block(body_fwd)) part_fwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_fwd, time_fwd])) # Inner loop nest for backward runs dt_bwd = c.Statement("__dt = fmax(particles[p].dt, endtime - particles[p].time)") body_bwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)), c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_bwd] time_bwd = c.While("__dt < -1. * __tol", c.Block(body_bwd)) part_bwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_bwd, time_bwd])) time_if = c.If("dt > 0.0", c.Block([part_fwd]), c.Block([part_bwd])) fbody = c.Block([c.Value("int", "p"), c.Value("KernelOp", "res"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), time_if]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def _generate_kernel_arg_decls(self): _kernel_arg_decls = [] _kernel_lib_arg_decls = [] _kernel_structs = cgen.Module([ cgen.Comment('#### Structs generated per ParticleDat ####') ]) if self._kernel.static_args is not None: for i, dat in enumerate(self._kernel.static_args.items()): arg = cgen.Const(cgen.Value(host.ctypes_map[dat[1]], dat[0])) _kernel_arg_decls.append(arg) _kernel_lib_arg_decls.append(arg) for i, dat in enumerate(self._dat_dict.items()): assert type(dat[1]) is tuple, "Access descriptors not found" obj = dat[1][0] mode = dat[1][1] symbol = dat[0] kernel_lib_arg = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype], Restrict(self._cc.restrict_keyword, symbol)) ) if issubclass(type(obj), data.GlobalArrayClassic): kernel_lib_arg = cgen.Pointer(kernel_lib_arg) if issubclass(type(obj), host._Array): kernel_arg = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype], Restrict(self._cc.restrict_keyword, symbol)) ) if not mode.write: kernel_arg = cgen.Const(kernel_arg) _kernel_arg_decls.append(kernel_arg) if mode.write is True: assert issubclass(type(obj), data.GlobalArrayClassic),\ "global array must be a thread safe type for \ write access. Type is:" + str(type(obj)) elif issubclass(type(dat[1][0]), host.Matrix): # MAKE STRUCT TYPE dtype = dat[1][0].dtype ti = cgen.Pointer(cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword,'i'))) tj = cgen.Pointer(cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword,'j'))) if not dat[1][1].write: ti = cgen.Const(ti) tj = cgen.Const(tj) typename = '_'+dat[0]+'_t' _kernel_structs.append(cgen.Typedef(cgen.Struct('', [ti,tj], typename))) # MAKE STRUCT ARG _kernel_arg_decls.append(cgen.Value(typename, dat[0])) if not dat[1][1].write: kernel_lib_arg = cgen.Const(kernel_lib_arg) _kernel_lib_arg_decls.append(kernel_lib_arg) self._components['KERNEL_ARG_DECLS'] = _kernel_arg_decls self._components['KERNEL_LIB_ARG_DECLS'] = _kernel_lib_arg_decls self._components['KERNEL_STRUCT_TYPEDEFS'] = _kernel_structs
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] pname = self.ptype.name + 'p' # ==== Add include for Parcels and math header ==== # ccode += [str(c.Include("parcels.h", system=False))] #ccode += [str(c.Include("math.h", system=False))] # removed by Lyc because it is already in parcels.h ??? #ccode += [str(c.Include("stdbool.h", system=False))] # added by Luc to accomodate crossdike.h booleans ccode += [str(c.Assign('double _next_dt', '0'))] ccode += [str(c.Assign('size_t _next_dt_set', '0'))] ccode += [ str( c.Assign( 'const int ngrid', str(self.fieldset.gridset.size if self. fieldset is not None else 1))) ] # ==== Generate type definition for particle type ==== # vdeclp = [ c.Pointer(c.POD(v.dtype, v.name)) for v in self.ptype.variables ] ccode += [ str(c.Typedef(c.GenerableStruct("", vdeclp, declname=pname))) ] # Generate type definition for single particle type vdecl = [ c.POD(v.dtype, v.name) for v in self.ptype.variables if v.dtype != np.uint64 ] ccode += [ str( c.Typedef( c.GenerableStruct("", vdecl, declname=self.ptype.name))) ] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_set_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle_backup->%s" % v.name), ("particles->%s[pnum]" % v.name)) ] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_get_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particles->%s[pnum]" % v.name), ("particle_backup->%s" % v.name)) ] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] update_next_dt_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "update_next_dt"), spec='inline')), [c.Value('double', 'dt')]) if 'update_next_dt' in str(kernel_ast): body = [] body += [c.Assign("_next_dt", "dt")] body += [c.Assign("_next_dt_set", "1")] update_next_dt_body = c.Block(body) update_next_dt = str( c.FunctionBody(update_next_dt_decl, update_next_dt_body)) ccode += [update_next_dt] if c_include: ccode += [c_include] # ==== Insert kernel code ==== # ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [ c.Value("int", "num_particles"), c.Pointer(c.Value(pname, "particles")), c.Value("double", "endtime"), c.Value("double", "dt") ] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("double", const)] fargs_str = ", ".join(['particles->time[pnum]'] + list(field_args.keys()) + list(const_args.keys())) # ==== statement clusters use to compose 'body' variable and variables 'time_loop' and 'part_loop' ==== ## sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign( "sign_end_part", "(endtime - particles->time[pnum]) > 0 ? 1 : -1") reset_res_state = c.Assign("res", "particles->state[pnum]") update_state = c.Assign("particles->state[pnum]", "res") update_pdt = c.If( "_next_dt_set == 1", c.Block([ c.Assign("_next_dt_set", "0"), c.Assign("particles->dt[pnum]", "_next_dt") ])) dt_pos = c.Assign( "__dt", "fmin(fabs(particles->dt[pnum]), fabs(endtime - particles->time[pnum]))" ) # original pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt") partdt = c.Assign("particles->dt[pnum]", "__pdt_prekernels") check_pdt = c.If( "(res == SUCCESS) & !is_equal_dbl(__pdt_prekernels, particles->dt[pnum])", c.Assign("res", "REPEAT")) dt_0_break = c.If("is_zero_dbl(particles->dt[pnum])", c.Statement("break")) notstarted_continue = c.If( "(( sign_end_part != sign_dt) || is_close_dbl(__dt, 0) ) && !is_zero_dbl(particles->dt[pnum])", c.Block([ c.If("fabs(particles->time[pnum]) >= fabs(endtime)", c.Assign("particles->state[pnum]", "SUCCESS")), c.Statement("continue") ])) # ==== main computation body ==== # body = [ c.Statement( "set_particle_backup(&particle_backup, particles, pnum)") ] body += [pdt_eq_dt_pos] body += [partdt] body += [ c.Value("StatusCode", "state_prev"), c.Assign("state_prev", "particles->state[pnum]") ] body += [ c.Assign("res", "%s(particles, pnum, %s)" % (funcname, fargs_str)) ] body += [ c.If("(res==SUCCESS) && (particles->state[pnum] != state_prev)", c.Assign("res", "particles->state[pnum]")) ] body += [check_pdt] body += [ c.If( "res == SUCCESS || res == DELETE", c.Block([ c.Statement( "particles->time[pnum] += particles->dt[pnum]"), update_pdt, dt_pos, sign_end_part, c.If( "(res != DELETE) && !is_close_dbl(__dt, 0) && (sign_dt == sign_end_part)", c.Assign("res", "EVALUATE")), c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, dt_0_break ]), c.Block([ c.Statement( "get_particle_backup(&particle_backup, particles, pnum)" ), dt_pos, sign_end_part, c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, c.Statement("break") ])) ] time_loop = c.While( "(particles->state[pnum] == EVALUATE || particles->state[pnum] == REPEAT) || is_zero_dbl(particles->dt[pnum])", c.Block(body)) part_loop = c.For( "pnum = 0", "pnum < num_particles", "++pnum", c.Block([ sign_end_part, reset_res_state, dt_pos, notstarted_continue, time_loop ])) fbody = c.Block([ c.Value("int", "pnum, sign_dt, sign_end_part"), c.Value("StatusCode", "res"), c.Value("double", "__pdt_prekernels"), c.Value("double", "__dt"), # 1e-8 = built-in tolerance for np.isclose() sign_dt, particle_backup, part_loop ]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def setup(self, g): if g.is_parent: if len(self._path) < 1: self._path = '%s/%s.%s' % (os.getcwd(), g.name, self._lang.lower()) if len(self.includes) > 0: for incfile in self.includes: include = cg.Include('%s.h' % incfile) self._lines.append(str(include)) self._lines.append('') if len(self.defines) > 0: for tuple in self.defines: (lhs, rhs) = tuple define = cg.Define(lhs, rhs) self._lines.append(str(define)) self._lines.append('') if len(self._typedefs) > 0: for tuple in self._typedefs: (lhs, rhs) = tuple typedef = cg.Typedef(cg.Value(lhs, rhs)) self._lines.append(str(typedef)) self._lines.append('') if len(self._functions) > 0: for tuple in self._functions: (lhs, rhs) = tuple fxndef = 'inline %s { %s }' % (lhs, rhs) self._lines.append(fxndef) self._lines.append('') if len(self._structs) > 0: for structname in self._structs: struct = self._structs[structname] fields = [cg.Value(struct[name], name) for name in struct] struct = cg.Struct('', fields) typedef = cg.Typedef(cg.Value(struct, '%s%s' % (structname, StructNode.suffix()))) self._lines.append(str(typedef).replace('} ;', '}')) self._lines.append('') if len(g.constants) > 0: for const in g.constants: define = cg.Define(const.name, const.value) self._lines.append(str(define)) self._lines.append('') if len(g.includes) > 0: for incfile in g.includes: include = cg.Include('%s.h' % incfile) self._lines.append(str(include)) self._lines.append('') if len(g.subgraphs) < 1: func = CGenHelper.functionDecl(g.returntype, g.name, g.params) decl = str(func) self._lines.append(decl) self._lines.append('inline %s' % decl.replace(';', ' {')) if len(g.constants) > 0: structname = self._structname if len(structname) < 1: structname = g.name.split('_')[0] if structname not in self._structs: self._structs[structname] = OrderedDict() for const in g.constants: self._structs[structname][const.name] = const.type # Add const to struct for later use...
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] ccode += [str(c.Assign('double _next_dt', '0'))] ccode += [str(c.Assign('size_t _next_dt_set', '0'))] # Generate type definition for particle type vdecl = [] for v in self.ptype.variables: if v.dtype == np.uint64: vdecl.append(c.Pointer(c.POD(np.void, v.name))) else: vdecl.append(c.POD(v.dtype, v.name)) ccode += [ str( c.Typedef( c.GenerableStruct("", vdecl, declname=self.ptype.name))) ] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(self.ptype.name, "particle")) ] p_back_set_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle_backup->%s" % v.name), ("particle->%s" % v.name)) ] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(self.ptype.name, "particle")) ] p_back_get_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle->%s" % v.name), ("particle_backup->%s" % v.name)) ] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] update_next_dt_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "update_next_dt"), spec='inline')), [c.Value('double', 'dt')]) if 'update_next_dt' in str(kernel_ast): body = [] body += [c.Assign("_next_dt", "dt")] body += [c.Assign("_next_dt_set", "1")] update_next_dt_body = c.Block(body) update_next_dt = str( c.FunctionBody(update_next_dt_decl, update_next_dt_body)) ccode += [update_next_dt] if c_include: ccode += [c_include] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [ c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt") ] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("float", const)] fargs_str = ", ".join(['particles[p].time'] + list(field_args.keys()) + list(const_args.keys())) # Inner loop nest for forward runs sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign("sign_end_part", "endtime - particles[p].time > 0 ? 1 : -1") dt_pos = c.Assign( "__dt", "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))") pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt") partdt = c.Assign("particles[p].dt", "__pdt_prekernels") dt_0_break = c.If("particles[p].dt == 0", c.Statement("break")) notstarted_continue = c.If( "(sign_end_part != sign_dt) && (particles[p].dt != 0)", c.Statement("continue")) body = [ c.Statement( "set_particle_backup(&particle_backup, &(particles[p]))") ] body += [pdt_eq_dt_pos] body += [partdt] body += [ c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str)) ] check_pdt = c.If( "(res == SUCCESS) & (__pdt_prekernels != particles[p].dt)", c.Assign("res", "REPEAT")) body += [check_pdt] body += [c.Assign("particles[p].state", "res")] # Store return code on particle update_pdt = c.If( "_next_dt_set == 1", c.Block([ c.Assign("_next_dt_set", "0"), c.Assign("particles[p].dt", "_next_dt") ])) body += [ c.If( "res == SUCCESS || res == DELETE", c.Block([ c.Statement("particles[p].time += particles[p].dt"), update_pdt, dt_pos, dt_0_break, c.Statement("continue") ]), c.Block([ c.Statement( "get_particle_backup(&particle_backup, &(particles[p]))" ), dt_pos, c.Statement("break") ])) ] time_loop = c.While("__dt > __tol || particles[p].dt == 0", c.Block(body)) part_loop = c.For( "p = 0", "p < num_particles", "++p", c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop])) fbody = c.Block([ c.Value("int", "p, sign_dt, sign_end_part"), c.Value("ErrorCode", "res"), c.Value("float", "__pdt_prekernels"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), sign_dt, particle_backup, part_loop ]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def _generate_per_dat(self): # =================== DICT INIT =============================== self._components['KERNEL_ARG_DECLS'] = [ cgen.Const(cgen.Value(host.int32_str, '_D_N_LOCAL')) ] self._components['KERNEL_LIB_ARG_DECLS'] = [] self._components['KERNEL_STRUCT_TYPEDEFS'] = cgen.Module( [cgen.Comment('#### Structs generated per ParticleDat ####')]) self._components['LIB_KERNEL_CALL'] = cgen.Module( [cgen.Comment('#### Kernel call ####')]) kernel_call_symbols = ['_H_N_LOCAL'] self._components['KERNEL_SCATTER'] = cgen.Module( [cgen.Comment('#### kernel scatter ####')]) self._components['KERNEL_GATHER'] = cgen.Module( [cgen.Comment('#### kernel gather ####')]) self._components['IF_SCATTER'] = cgen.Module( [cgen.Comment('#### if scatter ####')]) self._components['IF_GATHER'] = cgen.Module( [cgen.Comment('#### if gather ####')]) self._components['KERNEL_MAPPING'] = cgen.Module( [cgen.Comment('#### kernel symbol mapping ####')]) # =================== Static Args =============================== if self._kernel.static_args is not None: for i, datt in enumerate(self._kernel.static_args.items()): ksym = datt[0] ktype = datt[1] # Add to kernel args g = cgen.Const(cgen.Value(host.ctypes_map[ktype], ksym)) self._components['KERNEL_ARG_DECLS'].append(g) self._components['KERNEL_LIB_ARG_DECLS'].append(g) kernel_call_symbols.append(ksym) # =================== Dynamic Args =============================== for i, datt in enumerate(self._dat_dict.items()): assert type(datt[1]) is tuple, "Access descriptors not found" dati = datt[1][0] ksym = datt[0] dsym = 'd_' + ksym kacc = datt[1][1] # add to lib args kernel_lib_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dati.dtype], Restrict(self._cc.restrict_keyword, ksym))) if type(dati) is cuda_data.GlobalArray or \ issubclass(type(dati), cuda_base.Array): # KERNEL ARGS DECLS ----------------------------- kernel_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dati.dtype], Restrict(self._cc.restrict_keyword, dsym))) if not kacc.write: kernel_arg = cgen.Const(kernel_arg) self._components['KERNEL_ARG_DECLS'].append(kernel_arg) # KERNEL CALL SYMS ----------------------------- kernel_call_symbols.append(ksym) # KERNEL GATHER/SCATTER START ------------------ if not kacc.incremented: a = cgen.Pointer( cgen.Value(host.ctypes_map[dati.dtype], ksym)) a = cgen.Const(a) a = cgen.Initializer(a, dsym) self._components['IF_GATHER'].append(a) else: a = cgen.Initializer( cgen.Value(host.ctypes_map[dati.dtype], ksym + '[' + str(dati.ncomp) + ']'), '{0}') self._components['IF_GATHER'].append(a) # add the scatter code self._components['IF_SCATTER'].append( cgen.Line( generate_reduction_final_stage(dsym, ksym, dati))) # KERNEL GATHER/SCATTER END ------------------ elif issubclass(type(dati), cuda_base.Matrix): # KERNEL ARGS DECLS, STRUCT DECLS ---------------- dtype = dati.dtype ti = cgen.Pointer( cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword, 'i'))) if not kacc.write: ti = cgen.Const(ti) typename = '_' + ksym + '_t' self._components['KERNEL_STRUCT_TYPEDEFS'].append( cgen.Typedef(cgen.Struct('', [ti], typename))) # add to kernel args kernel_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dati.dtype], Restrict(self._cc.restrict_keyword, dsym))) if not kacc.write: kernel_arg = cgen.Const(kernel_arg) self._components['KERNEL_ARG_DECLS'].append(kernel_arg) # KERNEL CALL SYMS ----------------------------- kernel_call_symbols.append(ksym) # KERNEL GATHER/SCATTER START ------------------ nc = str(dati.ncomp) _ishift = '+' + self._components['LIB_PAIR_INDEX_0'] + '*' + nc isym = dsym + _ishift g = cgen.Value(typename, ksym) g = cgen.Initializer(g, '{ ' + isym + '}') self._components['KERNEL_MAPPING'].append(g) # KERNEL GATHER/SCATTER END ------------------ # END OF IF ------------------------ # add to lib args if not kacc.write: kernel_lib_arg = cgen.Const(kernel_lib_arg) self._components['KERNEL_LIB_ARG_DECLS'].append(kernel_lib_arg) # KERNEL CALL SYMS ----------------------------- kernel_call_symbols_s = '' for sx in kernel_call_symbols: kernel_call_symbols_s += sx + ',' kernel_call_symbols_s = kernel_call_symbols_s[:-1] self._components['LIB_KERNEL_CALL'].append( cgen.Module([ cgen.Value('dim3', '_B'), cgen.Value('dim3', '_T'), cgen.Assign('_B.x', '_H_BLOCKSIZE[0]'), cgen.Assign('_B.y', '_H_BLOCKSIZE[1]'), cgen.Assign('_B.z', '_H_BLOCKSIZE[2]'), cgen.Assign('_T.x', '_H_THREADSIZE[0]'), cgen.Assign('_T.y', '_H_THREADSIZE[1]'), cgen.Assign('_T.z', '_H_THREADSIZE[2]') ])) self._components['LIB_KERNEL_CALL'].append( cgen.Line('k_' + self._kernel.name + '<<<_B,_T>>>(' + kernel_call_symbols_s + ');')) self._components['LIB_KERNEL_CALL'].append( cgen.Line('checkCudaErrors(cudaDeviceSynchronize());'))