def _generate_kernel_arg_decls(self): _kernel_arg_decls = [] _kernel_lib_arg_decls = [] _kernel_structs = cgen.Module( [cgen.Comment('#### Structs generated per ParticleDat ####')]) if self._kernel.static_args is not None: for i, dat in enumerate(self._kernel.static_args.items()): _kernel_arg_decls.append( cgen.Const(cgen.Value(host.ctypes_map[dat[1]], dat[0]))) for i, dat in enumerate(self._dat_dict.items()): assert type(dat[1]) is tuple, "Access descriptors not found" kernel_lib_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dat[1][0].dtype], Restrict(self._cc.restrict_keyword, dat[0]))) # print host.ctypes_map[dat[1][0].dtype], dat[1][0].dtype if issubclass(type(dat[1][0]), host._Array): kernel_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dat[1][0].dtype], Restrict(self._cc.restrict_keyword, dat[0]))) if not dat[1][1].write: kernel_arg = cgen.Const(kernel_arg) _kernel_arg_decls.append(kernel_arg) elif issubclass(type(dat[1][0]), host.Matrix): # MAKE STRUCT TYPE dtype = dat[1][0].dtype ti = cgen.Pointer( cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword, 'i'))) tj = cgen.Pointer( cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword, 'j'))) if not dat[1][1].write: ti = cgen.Const(ti) tj = cgen.Const(tj) typename = '_' + dat[0] + '_t' _kernel_structs.append( cgen.Typedef(cgen.Struct('', [ti, tj], typename))) # MAKE STRUCT ARG _kernel_arg_decls.append(cgen.Value(typename, dat[0])) if not dat[1][1].write: kernel_lib_arg = cgen.Const(kernel_lib_arg) _kernel_lib_arg_decls.append(kernel_lib_arg) self._components['KERNEL_ARG_DECLS'] = _kernel_arg_decls self._components['KERNEL_LIB_ARG_DECLS'] = _kernel_lib_arg_decls self._components['KERNEL_STRUCT_TYPEDEFS'] = _kernel_structs
def _generate_kernel_arg_decls(self): _kernel_arg_decls = [] _kernel_lib_arg_decls = [] _kernel_structs = cgen.Module([ cgen.Comment('#### Structs generated per ParticleDat ####') ]) if self._kernel.static_args is not None: for i, dat in enumerate(self._kernel.static_args.items()): arg = cgen.Const(cgen.Value(host.ctypes_map[dat[1]], dat[0])) _kernel_arg_decls.append(arg) _kernel_lib_arg_decls.append(arg) for i, dat in enumerate(self._dat_dict.items()): assert type(dat[1]) is tuple, "Access descriptors not found" obj = dat[1][0] mode = dat[1][1] symbol = dat[0] kernel_lib_arg = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype], Restrict(self._cc.restrict_keyword, symbol)) ) if issubclass(type(obj), data.GlobalArrayClassic): kernel_lib_arg = cgen.Pointer(kernel_lib_arg) if issubclass(type(obj), host._Array): kernel_arg = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype], Restrict(self._cc.restrict_keyword, symbol)) ) if not mode.write: kernel_arg = cgen.Const(kernel_arg) _kernel_arg_decls.append(kernel_arg) if mode.write is True: assert issubclass(type(obj), data.GlobalArrayClassic),\ "global array must be a thread safe type for \ write access. Type is:" + str(type(obj)) elif issubclass(type(dat[1][0]), host.Matrix): # MAKE STRUCT TYPE dtype = dat[1][0].dtype ti = cgen.Pointer(cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword,'i'))) tj = cgen.Pointer(cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword,'j'))) if not dat[1][1].write: ti = cgen.Const(ti) tj = cgen.Const(tj) typename = '_'+dat[0]+'_t' _kernel_structs.append(cgen.Typedef(cgen.Struct('', [ti,tj], typename))) # MAKE STRUCT ARG _kernel_arg_decls.append(cgen.Value(typename, dat[0])) if not dat[1][1].write: kernel_lib_arg = cgen.Const(kernel_lib_arg) _kernel_lib_arg_decls.append(kernel_lib_arg) self._components['KERNEL_ARG_DECLS'] = _kernel_arg_decls self._components['KERNEL_LIB_ARG_DECLS'] = _kernel_lib_arg_decls self._components['KERNEL_STRUCT_TYPEDEFS'] = _kernel_structs
def _generate_per_dat(self): # =================== DICT INIT =============================== self._components['KERNEL_ARG_DECLS'] = [ cgen.Const(cgen.Value(host.int32_str, '_D_N_LOCAL')) ] self._components['KERNEL_LIB_ARG_DECLS'] = [] self._components['KERNEL_STRUCT_TYPEDEFS'] = cgen.Module( [cgen.Comment('#### Structs generated per ParticleDat ####')]) self._components['LIB_KERNEL_CALL'] = cgen.Module( [cgen.Comment('#### Kernel call ####')]) kernel_call_symbols = ['_H_N_LOCAL'] self._components['KERNEL_SCATTER'] = cgen.Module( [cgen.Comment('#### kernel scatter ####')]) self._components['KERNEL_GATHER'] = cgen.Module( [cgen.Comment('#### kernel gather ####')]) self._components['IF_SCATTER'] = cgen.Module( [cgen.Comment('#### if scatter ####')]) self._components['IF_GATHER'] = cgen.Module( [cgen.Comment('#### if gather ####')]) self._components['KERNEL_MAPPING'] = cgen.Module( [cgen.Comment('#### kernel symbol mapping ####')]) # =================== Static Args =============================== if self._kernel.static_args is not None: for i, datt in enumerate(self._kernel.static_args.items()): ksym = datt[0] ktype = datt[1] # Add to kernel args g = cgen.Const(cgen.Value(host.ctypes_map[ktype], ksym)) self._components['KERNEL_ARG_DECLS'].append(g) self._components['KERNEL_LIB_ARG_DECLS'].append(g) kernel_call_symbols.append(ksym) # =================== Dynamic Args =============================== for i, datt in enumerate(self._dat_dict.items()): assert type(datt[1]) is tuple, "Access descriptors not found" dati = datt[1][0] ksym = datt[0] dsym = 'd_' + ksym kacc = datt[1][1] # add to lib args kernel_lib_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dati.dtype], Restrict(self._cc.restrict_keyword, ksym))) if type(dati) is cuda_data.GlobalArray or \ issubclass(type(dati), cuda_base.Array): # KERNEL ARGS DECLS ----------------------------- kernel_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dati.dtype], Restrict(self._cc.restrict_keyword, dsym))) if not kacc.write: kernel_arg = cgen.Const(kernel_arg) self._components['KERNEL_ARG_DECLS'].append(kernel_arg) # KERNEL CALL SYMS ----------------------------- kernel_call_symbols.append(ksym) # KERNEL GATHER/SCATTER START ------------------ if not kacc.incremented: a = cgen.Pointer( cgen.Value(host.ctypes_map[dati.dtype], ksym)) a = cgen.Const(a) a = cgen.Initializer(a, dsym) self._components['IF_GATHER'].append(a) else: a = cgen.Initializer( cgen.Value(host.ctypes_map[dati.dtype], ksym + '[' + str(dati.ncomp) + ']'), '{0}') self._components['IF_GATHER'].append(a) # add the scatter code self._components['IF_SCATTER'].append( cgen.Line( generate_reduction_final_stage(dsym, ksym, dati))) # KERNEL GATHER/SCATTER END ------------------ elif issubclass(type(dati), cuda_base.Matrix): # KERNEL ARGS DECLS, STRUCT DECLS ---------------- dtype = dati.dtype ti = cgen.Pointer( cgen.Value(ctypes_map(dtype), Restrict(self._cc.restrict_keyword, 'i'))) if not kacc.write: ti = cgen.Const(ti) typename = '_' + ksym + '_t' self._components['KERNEL_STRUCT_TYPEDEFS'].append( cgen.Typedef(cgen.Struct('', [ti], typename))) # add to kernel args kernel_arg = cgen.Pointer( cgen.Value(host.ctypes_map[dati.dtype], Restrict(self._cc.restrict_keyword, dsym))) if not kacc.write: kernel_arg = cgen.Const(kernel_arg) self._components['KERNEL_ARG_DECLS'].append(kernel_arg) # KERNEL CALL SYMS ----------------------------- kernel_call_symbols.append(ksym) # KERNEL GATHER/SCATTER START ------------------ nc = str(dati.ncomp) _ishift = '+' + self._components['LIB_PAIR_INDEX_0'] + '*' + nc isym = dsym + _ishift g = cgen.Value(typename, ksym) g = cgen.Initializer(g, '{ ' + isym + '}') self._components['KERNEL_MAPPING'].append(g) # KERNEL GATHER/SCATTER END ------------------ # END OF IF ------------------------ # add to lib args if not kacc.write: kernel_lib_arg = cgen.Const(kernel_lib_arg) self._components['KERNEL_LIB_ARG_DECLS'].append(kernel_lib_arg) # KERNEL CALL SYMS ----------------------------- kernel_call_symbols_s = '' for sx in kernel_call_symbols: kernel_call_symbols_s += sx + ',' kernel_call_symbols_s = kernel_call_symbols_s[:-1] self._components['LIB_KERNEL_CALL'].append( cgen.Module([ cgen.Value('dim3', '_B'), cgen.Value('dim3', '_T'), cgen.Assign('_B.x', '_H_BLOCKSIZE[0]'), cgen.Assign('_B.y', '_H_BLOCKSIZE[1]'), cgen.Assign('_B.z', '_H_BLOCKSIZE[2]'), cgen.Assign('_T.x', '_H_THREADSIZE[0]'), cgen.Assign('_T.y', '_H_THREADSIZE[1]'), cgen.Assign('_T.z', '_H_THREADSIZE[2]') ])) self._components['LIB_KERNEL_CALL'].append( cgen.Line('k_' + self._kernel.name + '<<<_B,_T>>>(' + kernel_call_symbols_s + ');')) self._components['LIB_KERNEL_CALL'].append( cgen.Line('checkCudaErrors(cudaDeviceSynchronize());'))