def visit_NestedFieldEvalNode(self, node): self.visit(node.fields) self.visit(node.args) cstat = [] for fld in node.fields.obj: ccode_eval = fld.ccode_eval(node.var, *node.args.ccode) ccode_conv = fld.ccode_convert(*node.args.ccode) conv_stat = c.Statement("%s *= %s" % (node.var, ccode_conv)) cstat += [c.Assign("err", ccode_eval), conv_stat, c.If("err != ERROR_OUT_OF_BOUNDS ", c.Block([c.Statement("CHECKERROR(err)"), c.Statement("break")]))] cstat += [c.Statement("CHECKERROR(err)"), c.Statement("break")] node.ccode = c.While("1==1", c.Block(cstat))
def _generate_kernel_func(self): if_block = cgen.If( self._components['LIB_PAIR_INDEX_0']+'<_D_N_LOCAL', cgen.Block([ self._components['KERNEL_GATHER'], cgen.For('int _k=1', '_k<=_D_NMATRIX['+self._components['LIB_PAIR_INDEX_0']+']', '_k++', cgen.Block([ cgen.Initializer( cgen.Const(cgen.Value( host.int32_str, self._components['LIB_PAIR_INDEX_1'])), '_D_NMATRIX['+self._components['LIB_PAIR_INDEX_0']+\ ' + _D_N_LOCAL * _k ]' ), self._components['KERNEL_MAPPING'], cgen.Line(self._kernel.code) ]) ), self._components['KERNEL_SCATTER'] ]) ) func = cgen.Block([ cgen.Initializer( cgen.Const( cgen.Value( host.int32_str, self._components['LIB_PAIR_INDEX_0'] )), 'threadIdx.x + blockIdx.x*blockDim.x' ), self._components['IF_GATHER'], if_block, self._components['IF_SCATTER'] ]) self._components['KERNEL_FUNC'] = cgen.FunctionBody( cgen.FunctionDeclaration( cgen.DeclSpecifier( cgen.Value("void", 'k_' + self._kernel.name), '__global__' ), self._components['KERNEL_ARG_DECLS'] ), func )
def visit_If(self, node): self.visit(node.test) for b in node.body: self.visit(b) for b in node.orelse: self.visit(b) # field evals are replaced by a tmp variable is added to the stack. # Here it means field evals passes from node.test to node.body. We take it out manually fieldInTestCount = node.test.ccode.count('tmp') body0 = c.Block([b.ccode for b in node.body[:fieldInTestCount]]) body = c.Block([b.ccode for b in node.body[fieldInTestCount:]]) orelse = c.Block([b.ccode for b in node.orelse]) if len(node.orelse) > 0 else None ifcode = c.If(node.test.ccode, body, orelse) node.ccode = c.Block([body0, ifcode])
def eval(self, generator): block = c.Collection() raw_variable = f'data.{self.base.name.eval()}' if self.base.optional: variable = f'*{raw_variable}' block.append( c.Statement(f'*packet << static_cast<bool>({raw_variable})')) block.append( c.If(raw_variable, c.Block([self._pack(generator, variable)]))) else: block.append(self._pack(generator, raw_variable)) return block
def _unpack_vector(self, generator, variable): spec = self.base.dtype.spec.eval() if generator.is_trivial(spec): return c.Block([ self._guard(f'sizeof({spec})'), c.Statement( f'{variable}.Add(packet.Data.Read{READ_MAP[spec]}())') ]) else: return c.Block([ c.Statement(f'{spec} var = new {spec}()'), c.Statement(f'{variable}.Add(var)'), c.If(f'!unpack_{spec}(packet, var)', c.Block([c.Statement('return false')])) ])
def eval(self, generator): block = c.Collection() raw_variable = f'data.{self.base.name.eval()}' if self.base.optional: variable = f'{raw_variable}.Value' block.append( c.Statement( f'packet.Data.WriteByte((byte)Convert.ToInt32({raw_variable}.HasValue))' )) block.append( c.If(f'{raw_variable}.HasValue', c.Block([self._pack(generator, variable)]))) else: block.append(self._pack(generator, raw_variable)) return block
def visit_NestedVectorFieldEvalNode(self, node): self.visit(node.fields) self.visit(node.args) cstat = [] for fld in node.fields.obj: ccode_eval = fld.ccode_eval(node.var, node.var2, node.var3, fld.U, fld.V, fld.W, *node.args.ccode) if fld.U.interp_method != 'cgrid_velocity': ccode_conv1 = fld.U.ccode_convert(*node.args.ccode) ccode_conv2 = fld.V.ccode_convert(*node.args.ccode) statements = [c.Statement("%s *= %s" % (node.var, ccode_conv1)), c.Statement("%s *= %s" % (node.var2, ccode_conv2))] else: statements = [] if fld.vector_type == '3D': ccode_conv3 = fld.W.ccode_convert(*node.args.ccode) statements.append(c.Statement("%s *= %s" % (node.var3, ccode_conv3))) cstat += [c.Assign("err", ccode_eval), c.Block(statements), c.If("err != ERROR_OUT_OF_BOUNDS ", c.Block([c.Statement("CHECKERROR(err)"), c.Statement("break")]))] cstat += [c.Statement("CHECKERROR(err)"), c.Statement("break")] node.ccode = c.While("1==1", c.Block(cstat))
def map_If(self, node): return cgen.If(self.transform_expr(node.expr), self.rec(node.content[0]))
def _guard(size): return c.If(f'packet->size() + {size} > packet->length()', c.Block([c.Statement('return false')]))
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] # Generate type definition for particle type vdecl = [] for v in self.ptype.variables: if v.dtype == np.uint64: vdecl.append(c.Pointer(c.POD(np.void, v.name))) else: vdecl.append(c.POD(v.dtype, v.name)) ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))] args = [c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(self.ptype.name, "particle"))] p_back_set_decl = c.FunctionDeclaration(c.Static(c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [c.Assign(("particle_backup->%s" % v.name), ("particle->%s" % v.name))] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(self.ptype.name, "particle"))] p_back_get_decl = c.FunctionDeclaration(c.Static(c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [c.Assign(("particle->%s" % v.name), ("particle_backup->%s" % v.name))] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] if c_include: ccode += [c_include] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt")] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("float", const)] fargs_str = ", ".join(['particles[p].time'] + list(field_args.keys()) + list(const_args.keys())) # Inner loop nest for forward runs sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign("sign_end_part", "endtime - particles[p].time > 0 ? 1 : -1") dt_pos = c.Assign("__dt", "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))") pdt_eq_dt_pos = c.Assign("particles[p].dt", "__dt * sign_dt") dt_0_break = c.If("particles[p].dt == 0", c.Statement("break")) notstarted_continue = c.If("(sign_end_part != sign_dt) && (particles[p].dt != 0)", c.Statement("continue")) body = [c.Statement("set_particle_backup(&particle_backup, &(particles[p]))")] body += [pdt_eq_dt_pos] body += [c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str))] body += [c.Assign("particles[p].state", "res")] # Store return code on particle body += [c.If("res == SUCCESS", c.Block([c.Statement("particles[p].time += sign_dt * __dt"), dt_pos, dt_0_break, c.Statement("continue")]))] body += [c.If("res == REPEAT", c.Block([c.Statement("get_particle_backup(&particle_backup, &(particles[p]))"), dt_pos, c.Statement("break")]), c.Statement("break"))] time_loop = c.While("__dt > __tol || particles[p].dt == 0", c.Block(body)) part_loop = c.For("p = 0", "p < num_particles", "++p", c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop])) fbody = c.Block([c.Value("int", "p, sign_dt, sign_end_part"), c.Value("ErrorCode", "res"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), sign_dt, particle_backup, part_loop]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] pname = self.ptype.name + 'p' # ==== Add include for Parcels and math header ==== # ccode += [str(c.Include("parcels.h", system=False))] #ccode += [str(c.Include("math.h", system=False))] # removed by Lyc because it is already in parcels.h ??? #ccode += [str(c.Include("stdbool.h", system=False))] # added by Luc to accomodate crossdike.h booleans ccode += [str(c.Assign('double _next_dt', '0'))] ccode += [str(c.Assign('size_t _next_dt_set', '0'))] ccode += [ str( c.Assign( 'const int ngrid', str(self.fieldset.gridset.size if self. fieldset is not None else 1))) ] # ==== Generate type definition for particle type ==== # vdeclp = [ c.Pointer(c.POD(v.dtype, v.name)) for v in self.ptype.variables ] ccode += [ str(c.Typedef(c.GenerableStruct("", vdeclp, declname=pname))) ] # Generate type definition for single particle type vdecl = [ c.POD(v.dtype, v.name) for v in self.ptype.variables if v.dtype != np.uint64 ] ccode += [ str( c.Typedef( c.GenerableStruct("", vdecl, declname=self.ptype.name))) ] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_set_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle_backup->%s" % v.name), ("particles->%s[pnum]" % v.name)) ] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_get_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particles->%s[pnum]" % v.name), ("particle_backup->%s" % v.name)) ] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] update_next_dt_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "update_next_dt"), spec='inline')), [c.Value('double', 'dt')]) if 'update_next_dt' in str(kernel_ast): body = [] body += [c.Assign("_next_dt", "dt")] body += [c.Assign("_next_dt_set", "1")] update_next_dt_body = c.Block(body) update_next_dt = str( c.FunctionBody(update_next_dt_decl, update_next_dt_body)) ccode += [update_next_dt] if c_include: ccode += [c_include] # ==== Insert kernel code ==== # ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [ c.Value("int", "num_particles"), c.Pointer(c.Value(pname, "particles")), c.Value("double", "endtime"), c.Value("double", "dt") ] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("double", const)] fargs_str = ", ".join(['particles->time[pnum]'] + list(field_args.keys()) + list(const_args.keys())) # ==== statement clusters use to compose 'body' variable and variables 'time_loop' and 'part_loop' ==== ## sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign( "sign_end_part", "(endtime - particles->time[pnum]) > 0 ? 1 : -1") reset_res_state = c.Assign("res", "particles->state[pnum]") update_state = c.Assign("particles->state[pnum]", "res") update_pdt = c.If( "_next_dt_set == 1", c.Block([ c.Assign("_next_dt_set", "0"), c.Assign("particles->dt[pnum]", "_next_dt") ])) dt_pos = c.Assign( "__dt", "fmin(fabs(particles->dt[pnum]), fabs(endtime - particles->time[pnum]))" ) # original pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt") partdt = c.Assign("particles->dt[pnum]", "__pdt_prekernels") check_pdt = c.If( "(res == SUCCESS) & !is_equal_dbl(__pdt_prekernels, particles->dt[pnum])", c.Assign("res", "REPEAT")) dt_0_break = c.If("is_zero_dbl(particles->dt[pnum])", c.Statement("break")) notstarted_continue = c.If( "(( sign_end_part != sign_dt) || is_close_dbl(__dt, 0) ) && !is_zero_dbl(particles->dt[pnum])", c.Block([ c.If("fabs(particles->time[pnum]) >= fabs(endtime)", c.Assign("particles->state[pnum]", "SUCCESS")), c.Statement("continue") ])) # ==== main computation body ==== # body = [ c.Statement( "set_particle_backup(&particle_backup, particles, pnum)") ] body += [pdt_eq_dt_pos] body += [partdt] body += [ c.Value("StatusCode", "state_prev"), c.Assign("state_prev", "particles->state[pnum]") ] body += [ c.Assign("res", "%s(particles, pnum, %s)" % (funcname, fargs_str)) ] body += [ c.If("(res==SUCCESS) && (particles->state[pnum] != state_prev)", c.Assign("res", "particles->state[pnum]")) ] body += [check_pdt] body += [ c.If( "res == SUCCESS || res == DELETE", c.Block([ c.Statement( "particles->time[pnum] += particles->dt[pnum]"), update_pdt, dt_pos, sign_end_part, c.If( "(res != DELETE) && !is_close_dbl(__dt, 0) && (sign_dt == sign_end_part)", c.Assign("res", "EVALUATE")), c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, dt_0_break ]), c.Block([ c.Statement( "get_particle_backup(&particle_backup, particles, pnum)" ), dt_pos, sign_end_part, c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, c.Statement("break") ])) ] time_loop = c.While( "(particles->state[pnum] == EVALUATE || particles->state[pnum] == REPEAT) || is_zero_dbl(particles->dt[pnum])", c.Block(body)) part_loop = c.For( "pnum = 0", "pnum < num_particles", "++pnum", c.Block([ sign_end_part, reset_res_state, dt_pos, notstarted_continue, time_loop ])) fbody = c.Block([ c.Value("int", "pnum, sign_dt, sign_end_part"), c.Value("StatusCode", "res"), c.Value("double", "__pdt_prekernels"), c.Value("double", "__dt"), # 1e-8 = built-in tolerance for np.isclose() sign_dt, particle_backup, part_loop ]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def _cgen(self): return cgen.If(self.cond.__str__(), self.if_block._cgen(), self.else_block._cgen())
def ifnothalo(b): return cgen.Block((cgen.If(iif + '[' + i + ']<' + nloc, b), ))
def _generate_kernel_gather(self): cp = self._components cx = cp['LIB_CELL_CX'] cy = cp['LIB_CELL_CY'] cz = cp['LIB_CELL_CZ'] ncx = cp['N_CELL_X'] ncy = cp['N_CELL_Y'] ncz = cp['N_CELL_Z'] ci = cp['LIB_CELL_INDEX_0'] kernel_gather = cgen.Module([ cgen.Comment('#### Pre kernel gather ####'), # compute the linear cell index cgen.Initializer( cgen.Const(cgen.Value('INT64', ci)), cx + '+' + ncx + '*(' + cy + '+' + ncy + '*' + cz + ')'), # get the thread index cgen.Initializer( cgen.Const( cgen.Value('int', self._components['OMP_THREAD_INDEX_SYM'])), 'omp_get_thread_num()') ]) # partition this threads space for temporary vars self._components['PARTICLE_DAT_PARTITION'] = \ DSLPartitionTempSpace(self._dat_dict, self._components['CCC_MAX'], '_GATHER_SPACE[_threadid]', extras=((cp['TMP_INDEX'], 1, INT64),)) kernel_gather.append( self._components['PARTICLE_DAT_PARTITION'].ptr_init) src_sym = '__tmp_gpx' dst_sym = cp['CCC_0'] record_local = DSLRecordLocal( ind_sym=src_sym, nlocal_sym=cp['N_LOCAL'], store_sym=cp['PARTICLE_DAT_PARTITION'].idict[cp['TMP_INDEX']], store_ind_sym=dst_sym, count_sym=cp['I_LOCAL_SYM']) kernel_gather.append(record_local[0]) inner_l = [record_local[1]] # add dats to omp shared and init global array reduction shared_syms = self._components['OMP_SHARED_SYMS'] for i, dat in enumerate(self._dat_dict.items()): obj = dat[1][0] mode = dat[1][1] symbol = dat[0] shared_syms.append(symbol) if issubclass(type(obj), data.GlobalArrayClassic): isym = symbol + '_c' val = symbol + '[' + self._components[ 'OMP_THREAD_INDEX_SYM'] + ']' g = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype], isym)) if not mode.write: g = cgen.Const(g) g = cgen.Initializer(g, val) kernel_gather.append(g) if issubclass(type(obj), data.ParticleDat): tsym = cp['PARTICLE_DAT_PARTITION'].idict[symbol] inner_l.append( DSLStrideGather(symbol, tsym, obj.ncomp, src_sym, dst_sym, self._components['CCC_MAX'])) inner_l.append(cgen.Line(dst_sym + '++;')) inner = cgen.Module(inner_l) g = self._components['CELL_LIST_ITER'](src_sym, ci, inner) kernel_gather.append( cgen.Initializer(cgen.Value('INT64', dst_sym), '0')) kernel_gather.append(g) # skip cell if there are not local particles kernel_gather.append( cgen.If(cp['I_LOCAL_SYM'] + '==0', cgen.Block((cgen.Line('continue;'), )))) self._components['LIB_KERNEL_GATHER'] = kernel_gather
def get_idl_dtype_size(idl_dtype, spec_type, variable, optional): # Here goes all the sizes logic ret = col = c.Collection([]) # Optional data is an special case if optional: col.append(c.Statement('size += sizeof(bool)')) inner = c.Block([]) col.append(c.If(f'data.{variable}.HasValue', inner)) col = inner dtype = self.get_dtype(idl_dtype) if self.is_trivial(idl_dtype): col.append(c.Statement(f'size += sizeof({dtype})')) elif idl_dtype == 'string': col.append( c.Statement( f'size += (byte)(sizeof({self.get_dtype("uint8")}) + ' + self._str_len(f'data.{variable}') + ')')) elif self.is_message(idl_dtype): col.append( c.Statement( f'size += {idl_dtype}_size(data.{variable})')) elif idl_dtype == 'vector': if self.is_trivial(spec_type): # TODO(gpascualg): No vector of optionals yet col.append( c.Statement( f'size += (byte)(sizeof({self.get_dtype("uint8")}) + data.{variable}.Count * {get_idl_dtype_size(spec_type, None, variable, None)})' )) elif self.is_message(spec_type): if self._is_trivial( spec_type, self.user_defined_messages[spec_type], include_messages=False): col.append( c.Collection([ c.If( f'data.{variable}.Count == 0', c.Block([ c.Statement( f'size += sizeof({self.get_dtype("uint8")})' ) ]), c.Block([ c.Statement( f'size += (byte)(sizeof({self.get_dtype("uint8")}) + data.{variable}.Count * {spec_type}_size(data.{variable}[0]))' ) ])) ])) else: col.append( c.Collection([ c.Statement( f'size += sizeof({self.get_dtype("uint8")})' ), c.For( 'int i = 0', f'i < data.{variable}.Count', '++i', c.Block([ c.Statement( f'size += {spec_type}_size(data.{variable}[i])' ) ])) ])) elif spec_type == 'string': raise NotImplementedError( f'Vector of strings is a WIP') else: raise NotImplementedError( f'Unsupported vector type {spec_type}') else: raise NotImplementedError(f'Unrecognized type {idl_dtype}') return ret
def _generate_kernel_func(self): IX = self._components['LIB_PAIR_INDEX_0'] IY = self._components['LIB_PAIR_INDEX_1'] CX = '_CX' CY = '_CY' if_block = cgen.If( IX + '<_D_N_LOCAL', cgen.Block([ self._components['KERNEL_GATHER'], cgen.Initializer(cgen.Const(cgen.Value(host.int32_str, CX)), '_D_CRL[' + IX +']'), cgen.For('int _jk=0','_jk<_D_CCC['+CX+']', '_jk++', cgen.Block([ cgen.Initializer( cgen.Const(cgen.Value(host.int32_str, IY)), '_D_L_MATRIX[' + CX+'*_D_N_LAYERS' + '+_jk]' ), cgen.If( IX+'!='+IY, cgen.Block([ self._components['KERNEL_MAPPING'], cgen.Line(self._kernel.code) ]) ), ]) ), cgen.For('int _k=0','_k<_D_N_OFFSETS', '_k++', cgen.Block([ cgen.Initializer(cgen.Const(cgen.Value(host.int32_str, CY)), CX + '+ _D_OFFSETS[_k]'), cgen.For('int _jk=0','_jk<_D_CCC['+CY+']', '_jk++', cgen.Block([ cgen.Initializer( cgen.Const(cgen.Value(host.int32_str, IY)), '_D_L_MATRIX[' + CY+'*_D_N_LAYERS' + '+_jk]' ), #cgen.If(IX+'!='+IY, #cgen.Block([ self._components['KERNEL_MAPPING'], cgen.Line(self._kernel.code) #])) ])) ]) ), self._components['KERNEL_SCATTER'] ]) ) func = cgen.Block([ cgen.Initializer( cgen.Const( cgen.Value( host.int32_str, self._components['LIB_PAIR_INDEX_0'] )), 'threadIdx.x + blockIdx.x*blockDim.x' ), self._components['IF_GATHER'], if_block, self._components['IF_SCATTER'] ]) self._components['KERNEL_FUNC'] = cgen.FunctionBody( cgen.FunctionDeclaration( cgen.DeclSpecifier( cgen.Value("void", 'k_' + self._kernel.name), '__global__' ), self._components['KERNEL_ARG_DECLS'] ), func )
def _guard(size): return c.If(f'packet.Data.Size + {size} > packet.length()', c.Block([c.Statement('return false')]))
c.LineComment( 'Calculate size of each step for dimension and time.'), c.Assign('dx', '(double)xSize / (double)xIntervals'), c.Assign('dy', '(double)ySize / (double)yIntervals'), c.Assign('dt', '(double)tTotal / (double)tIntervals'), c.Line(), c.Statement('printf("dx=%lf | dy=%lf | dt=%lf\\n", dx, dy, dt)'), c.Statement( 'printf("nx=%d | ny=%d | nT=%d | borders=%d\\n", xIntervals, yIntervals, tIntervals, BORDER_SIZE)' ), c.Line(), c.LineComment('Check CFL convergency conditions.'), c.If( 'dt / dx > 1 && dt / dy > 1', c.block_if_necessary([ c.Statement( 'cout << "Does not comply with CFL conditions." << endl' ), c.Statement('return -1') ])), c.Line(), c.Statement('ops_decl_const2("dx", 1, "double", &dx)'), c.Statement('ops_decl_const2("dy", 1, "double", &dy)'), c.Statement('ops_decl_const2("dt", 1, "double", &dt)'), c.Statement('ops_decl_const2("nx", 1, "double", &nx)'), c.Statement('ops_decl_const2("ny", 1, "double", &ny)'), c.Line(), c.Initializer( c.Value('int', 'range_CPML[]'), '{1 - BORDER_SIZE, xIntervals + BORDER_SIZE - 1, 1 - BORDER_SIZE, yIntervals + BORDER_SIZE - 1}' ), c.Initializer(
"return output" ] diff = [ c.Template( "typename T", CudaGlobal( c.FunctionDeclaration(c.Value("void", "diffKernel"), [ c.Value("T*", "inputPtr"), c.Value("int", "length"), c.Value("T*", "outputPtr") ]))), c.Block([ c.Statement(global_index), c.If( "index == 0", c.Statement("outputPtr[0] = inputPtr[0]"), c.If("index < length", c.Statement(compute_diff), c.Statement(""))) ]), c.Template( "typename T", c.FunctionDeclaration( c.Value("CUdeviceptr", "difference"), [c.Value("CUdeviceptr", "inputPtr"), c.Value("int", "length")])), c.Block([c.Statement(x) for x in launch]) ] cuda_mod.add_to_module(diff) diff_instance = c.FunctionBody( c.FunctionDeclaration( c.Value("CUdeviceptr", "diffInstance"),
'return output' ] diff = [ c.Template( 'typename T', CudaGlobal( c.FunctionDeclaration(c.Value('void', 'diffKernel'), [ c.Value('T*', 'inputPtr'), c.Value('int', 'length'), c.Value('T*', 'outputPtr') ]))), c.Block([ c.Statement(global_index), c.If( 'index == 0', c.Statement('outputPtr[0] = inputPtr[0]'), c.If('index < length', c.Statement(compute_diff), c.Statement(''))) ]), c.Template( 'typename T', c.FunctionDeclaration( c.Value('CUdeviceptr', 'difference'), [c.Value('CUdeviceptr', 'inputPtr'), c.Value('int', 'length')])), c.Block([c.Statement(x) for x in launch]) ] cuda_mod.add_to_module(diff) diff_instance = c.FunctionBody( c.FunctionDeclaration( c.Value('CUdeviceptr', 'diffInstance'),