def generate(self, funcname, field_args, const_args, kernel_ast): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] # Generate type definition for particle type vdecl = [] for v in self.ptype.variables: if v.name is 'CGridIndexSet': vdecl.append(c.Pointer(c.POD(np.void, v.name))) else: vdecl.append(c.POD(v.dtype, v.name)) ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt")] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("float", const)] fargs_str = ", ".join(['particles[p].time', 'sign * __dt'] + list(field_args.keys()) + list(const_args.keys())) # Inner loop nest for forward runs sign = c.Assign("sign", "dt > 0. ? 1. : -1.") dt_pos = c.Assign("__dt", "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))") dt_0_break = c.If("particles[p].dt == 0", c.Statement("break")) body = [c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str))] body += [c.Assign("particles[p].state", "res")] # Store return code on particle body += [c.If("res == SUCCESS", c.Block([c.Statement("particles[p].time += sign * __dt"), dt_pos, dt_0_break, c.Statement("continue")]))] body += [c.If("res == REPEAT", c.Block([dt_pos, c.Statement("continue")]), c.Statement("break"))] time_loop = c.While("__dt > __tol || particles[p].dt == 0", c.Block(body)) part_loop = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_pos, time_loop])) fbody = c.Block([c.Value("int", "p"), c.Value("ErrorCode", "res"), c.Value("double", "__dt, __tol, sign"), c.Assign("__tol", "1.e-6"), sign, part_loop]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def visit_Call(self, o, nested_call=False): arguments = self._args_call(o.arguments) if o.retobj is not None: return c.Assign(ccode(o.retobj), MultilineCall(o.name, arguments, True, o.is_indirect)) else: return MultilineCall(o.name, arguments, nested_call, o.is_indirect)
def map_Assignment(self, node): lhs = self.parse_expr(node.variable) from pymbolic.primitives import Subscript if isinstance(lhs, Subscript): lhs_name = lhs.aggregate.name else: lhs_name = lhs.name scope = self.scope_stack[-1] scope.use_name(lhs_name) infer_type = scope.get_type_inference_mapper() rhs = self.parse_expr(node.expr) lhs_dtype = infer_type(lhs) rhs_dtype = infer_type(rhs) # check for silent truncation of complex if lhs_dtype.kind != 'c' and rhs_dtype.kind == 'c': from pymbolic import var rhs = var("real")(rhs) # check for silent widening of real if lhs_dtype.kind == 'c' and rhs_dtype.kind != 'c': from pymbolic import var rhs = var("fromreal")(rhs) return cgen.Assign(self.gen_expr(lhs), self.gen_expr(rhs))
def visit_NestedVectorFieldEvalNode(self, node): self.visit(node.fields) self.visit(node.args) cstat = [] for fld in node.fields.obj: ccode_eval = fld.ccode_eval(node.var, node.var2, node.var3, fld.U, fld.V, fld.W, *node.args.ccode) if fld.U.interp_method != 'cgrid_velocity': ccode_conv1 = fld.U.ccode_convert(*node.args.ccode) ccode_conv2 = fld.V.ccode_convert(*node.args.ccode) statements = [ c.Statement("%s *= %s" % (node.var, ccode_conv1)), c.Statement("%s *= %s" % (node.var2, ccode_conv2)) ] else: statements = [] if fld.vector_type == '3D': ccode_conv3 = fld.W.ccode_convert(*node.args.ccode) statements.append( c.Statement("%s *= %s" % (node.var3, ccode_conv3))) cstat += [ c.Assign("err", ccode_eval), c.Block(statements), c.If( "err != ERROR_OUT_OF_BOUNDS ", c.Block([ c.Statement("CHECKSTATUS(err)"), c.Statement("break") ])) ] cstat += [c.Statement("CHECKSTATUS(err)"), c.Statement("break")] node.ccode = c.While("1==1", c.Block(cstat))
def visit_VectorFieldEvalNode(self, node): self.visit(node.field) self.visit(node.args) ccode_eval = node.field.obj.ccode_eval(node.var, node.var2, node.var3, node.field.obj.U, node.field.obj.V, node.field.obj.W, *node.args.ccode) if node.field.obj.U.interp_method != 'cgrid_velocity': ccode_conv1 = node.field.obj.U.ccode_convert(*node.args.ccode) ccode_conv2 = node.field.obj.V.ccode_convert(*node.args.ccode) statements = [ c.Statement("%s *= %s" % (node.var, ccode_conv1)), c.Statement("%s *= %s" % (node.var2, ccode_conv2)) ] else: statements = [] if node.field.obj.vector_type == '3D': ccode_conv3 = node.field.obj.W.ccode_convert(*node.args.ccode) statements.append( c.Statement("%s *= %s" % (node.var3, ccode_conv3))) conv_stat = c.Block(statements) node.ccode = c.Block([ c.Assign("err", ccode_eval), conv_stat, c.Statement("CHECKSTATUS(err)") ])
def visit_SummedVectorFieldEvalNode(self, node): self.visit(node.field) self.visit(node.args) cstat = [] if node.field.obj.W: Wlist = node.field.obj.W else: Wlist = [None] * len(node.field.obj.U) for U, V, W, var, var2, var3 in zip(node.field.obj.U, node.field.obj.V, Wlist, node.var, node.var2, node.var3): vfld = VectorField(node.field.obj.name, U, V, W) ccode_eval = vfld.ccode_eval(var, var2, var3, U, V, W, *node.args.ccode) if U.interp_method != 'cgrid_velocity': ccode_conv1 = U.ccode_convert(*node.args.ccode) ccode_conv2 = V.ccode_convert(*node.args.ccode) statements = [ c.Statement("%s *= %s" % (var, ccode_conv1)), c.Statement("%s *= %s" % (var2, ccode_conv2)) ] else: statements = [] if var3: ccode_conv3 = W.ccode_convert(*node.args.ccode) statements.append(c.Statement("%s *= %s" % (var3, ccode_conv3))) conv_stat = c.Block(statements) cstat += [ c.Assign("err", ccode_eval), conv_stat, c.Statement("CHECKERROR(err)") ] node.ccode = c.Block(cstat)
def _cgen(self): if self.expr is not None: val_decl = \ cgen.Value(self.ctyp.__str__(), self.cname.name).inline(True) return cgen.Assign(val_decl, self.expr.__str__()) else: return cgen.Value(self.ctyp.__str__(), self.cname.name)
def _generate_kernel_scatter(self): kernel_scatter = cgen.Module( [cgen.Comment('#### Post kernel scatter ####')]) if self._kernel.static_args is not None: for i, dat in enumerate(self._kernel.static_args.items()): pass for i, dat in enumerate(self._dat_dict.items()): if issubclass(type(dat[1][0]), host._Array): pass elif issubclass(type(dat[1][0]), host.Matrix)\ and dat[1][1].write\ and dat[1][0].ncomp <= self._gather_size_limit: isym = dat[0] + 'i' nc = dat[1][0].ncomp ncb = '[' + str(nc) + ']' dtype = host.ctypes_map[dat[1][0].dtype] ix = self._components['LIB_PAIR_INDEX_0'] b = cgen.Assign(dat[0] + '[' + str(nc) + '*' + ix + '+_tx]', isym + '[_tx]') g = cgen.For('int _tx=0', '_tx<' + str(nc), '_tx++', cgen.Block([b])) kernel_scatter.append(g) self._components['LIB_KERNEL_SCATTER'] = kernel_scatter
def visit_SummedVectorFieldEvalNode(self, node): self.visit(node.fields) self.visit(node.args) cstat = [] args = self._check_FieldSamplingArguments(node.args.ccode) for fld, var, var2, var3 in zip(node.fields.obj, node.var, node.var2, node.var3): ccode_eval = fld.ccode_eval(var, var2, var3, fld.U, fld.V, fld.W, *args) if fld.U.interp_method != 'cgrid_velocity': ccode_conv1 = fld.U.ccode_convert(*args) ccode_conv2 = fld.V.ccode_convert(*args) statements = [ c.Statement("%s *= %s" % (var, ccode_conv1)), c.Statement("%s *= %s" % (var2, ccode_conv2)) ] else: statements = [] if fld.vector_type == '3D': ccode_conv3 = fld.W.ccode_convert(*args) statements.append(c.Statement("%s *= %s" % (var3, ccode_conv3))) cstat += [c.Assign("err", ccode_eval), c.Block(statements)] cstat += [c.Statement("CHECKSTATUS(err)")] node.ccode = c.Block(cstat)
def visit_Assign(self, node): self.visit(node.targets[0]) self.visit(node.value) if isinstance(node.value, ast.List): # Detect in-place initialisation of multi-dimensional arrays tmp_node = node.value decl = c.Value('float', node.targets[0].id) while isinstance(tmp_node, ast.List): decl = c.ArrayOf(decl, len(tmp_node.elts)) if isinstance(tmp_node.elts[0], ast.List): # Check type and dimension are the same if not all(isinstance(e, ast.List) for e in tmp_node.elts): raise TypeError( "Non-list element discovered in array declaration") if not all( len(e.elts) == len(tmp_node.elts[0].elts) for e in tmp_node.elts): raise TypeError( "Irregular array length not allowed in array declaration" ) tmp_node = tmp_node.elts[0] node.ccode = c.Initializer(decl, node.value.ccode) self.array_vars += [node.targets[0].id] else: node.ccode = c.Assign(node.targets[0].ccode, node.value.ccode)
def visit_FieldEvalNode(self, node): self.visit(node.field) self.visit(node.args) ccode_eval = node.field.obj.ccode_eval(node.var, *node.args.ccode) ccode_conv = node.field.obj.ccode_convert(*node.args.ccode) conv_stat = c.Statement("%s *= %s" % (node.var, ccode_conv)) node.ccode = c.Block([c.Assign("err", ccode_eval), conv_stat, c.Statement("CHECKERROR(err)")])
def execute_parallel_block(self): statements = [] if self.profiling: if self.numevents_papi > 0: statements += [self.grid.define_papi_events] statements.append( cgen.Statement( "opesci_papi_start_counters(numevents, events)")) else: statements.append(cgen.Value("float", "real_time")) statements.append(cgen.Value("float", "proc_time")) statements.append(cgen.Value("float", "mflops")) statements.append(cgen.Value("long long", "flpins")) statements.append( cgen.Statement( "opesci_flops(&real_time, &proc_time, &flpins, &mflops)" )) statements.append(self.grid.initialise) statements.append(self.execute_time_loop()) if self.profiling: if self.numevents_papi > 0: statements.append( cgen.Statement( "opesci_papi_read_counters(numevents, counters)")) statements.append(cgen.Pragma("omp critical")) statements.append(cgen.Block(self.grid.sum_papi_events())) else: statements.append( cgen.Statement( "opesci_flops(&real_time, &proc_time, &flpins, &mflops)" )) statements.append(cgen.Pragma("omp critical")) critical_block = [] critical_block.append( cgen.Assign("profiling->g_rtime", "fmax(profiling->g_rtime, real_time)")) critical_block.append( cgen.Assign("profiling->g_ptime", "fmax(profiling->g_ptime, proc_time)")) critical_block.append( cgen.Statement("profiling->g_mflops += mflops;")) statements.append(cgen.Block(critical_block)) return [cgen.Pragma("omp parallel"), cgen.Block(statements)]
def visit_SummedFieldEvalNode(self, node): self.visit(node.fields) self.visit(node.args) cstat = [] for fld, var in zip(node.fields.obj, node.var): ccode_eval = fld.ccode_eval(var, *node.args.ccode) ccode_conv = fld.ccode_convert(*node.args.ccode) conv_stat = c.Statement("%s *= %s" % (var, ccode_conv)) cstat += [c.Assign("err", ccode_eval), conv_stat, c.Statement("CHECKERROR(err)")] node.ccode = c.Block(cstat)
def visit_SummedFieldEvalNode(self, node): self.visit(node.fields) self.visit(node.args) cstat = [] args = self._check_FieldSamplingArguments(node.args.ccode) for fld, var in zip(node.fields.obj, node.var): ccode_eval = fld.ccode_eval(var, *args) ccode_conv = fld.ccode_convert(*args) conv_stat = c.Statement("%s *= %s" % (var, ccode_conv)) cstat += [c.Assign("err", ccode_eval), conv_stat, c.Statement("CHECKSTATUS(err)")] node.ccode = c.Block(cstat)
def execute_function_body(self): statements = [] if self.profiling: statements += [ cgen.Assign(cgen.Value("int", "assign"), "opesci_papi_init()") ] statements += [self.grid.define_constants] statements += [self.grid.declare_fields] statements += self.execute_parallel_block() statements.append(self.grid.store_fields) statements.append(cgen.Statement("return 0")) return cgen.Block(statements)
def visit_Expression(self, o): lhs = ccode(o.expr.lhs, dtype=o.dtype) rhs = ccode(o.expr.rhs, dtype=o.dtype) if o.init: code = c.Initializer(c.Value(o.expr.lhs._C_typename, lhs), rhs) else: code = c.Assign(lhs, rhs) if o.pragmas: code = c.Module(list(o.pragmas) + [code]) return code
def visit_FieldEvalNode(self, node): self.visit(node.field) self.visit(node.args) ccode_eval = node.field.obj.ccode_eval(node.var, *node.args.ccode) stmts = [c.Assign("err", ccode_eval)] if node.convert: ccode_conv = node.field.obj.ccode_convert(*node.args.ccode) conv_stat = c.Statement("%s *= %s" % (node.var, ccode_conv)) stmts += [conv_stat] node.ccode = c.Block(stmts + [c.Statement("CHECKSTATUS(err)")])
def visit_NestedFieldEvalNode(self, node): self.visit(node.fields) self.visit(node.args) cstat = [] for fld in node.fields.obj: ccode_eval = fld.ccode_eval(node.var, *node.args.ccode) ccode_conv = fld.ccode_convert(*node.args.ccode) conv_stat = c.Statement("%s *= %s" % (node.var, ccode_conv)) cstat += [c.Assign("err", ccode_eval), conv_stat, c.If("err != ERROR_OUT_OF_BOUNDS ", c.Block([c.Statement("CHECKERROR(err)"), c.Statement("break")]))] cstat += [c.Statement("CHECKERROR(err)"), c.Statement("break")] node.ccode = c.While("1==1", c.Block(cstat))
def visit_FieldEvalNode(self, node): self.visit(node.field) self.visit(node.args) if node.var2: # evaluation UV Field ccode_eval = node.field.obj.ccode_evalUV(node.var, node.var2, *node.args.ccode) ccode_conv1 = node.field.obj.fieldset.U.ccode_convert(*node.args.ccode) ccode_conv2 = node.field.obj.fieldset.V.ccode_convert(*node.args.ccode) conv_stat = c.Block([c.Statement("%s *= %s" % (node.var, ccode_conv1)), c.Statement("%s *= %s" % (node.var2, ccode_conv2))]) else: ccode_eval = node.field.obj.ccode_eval(node.var, *node.args.ccode) ccode_conv = node.field.obj.ccode_convert(*node.args.ccode) conv_stat = c.Statement("%s *= %s" % (node.var, ccode_conv)) node.ccode = c.Block([c.Assign("err", ccode_eval), conv_stat, c.Statement("CHECKERROR(err)")])
def visit_VectorFieldEvalNode(self, node): self.visit(node.field) self.visit(node.args) ccode_eval = node.field.obj.ccode_eval(node.var, node.var2, node.var3, node.field.obj.U, node.field.obj.V, node.field.obj.W, *node.args.ccode) ccode_conv1 = node.field.obj.U.ccode_convert(*node.args.ccode) ccode_conv2 = node.field.obj.V.ccode_convert(*node.args.ccode) statements = [c.Statement("%s *= %s" % (node.var, ccode_conv1)), c.Statement("%s *= %s" % (node.var2, ccode_conv2))] if node.var3: ccode_conv3 = node.field.obj.W.ccode_convert(*node.args.ccode) statements.append(c.Statement("%s *= %s" % (node.var3, ccode_conv3))) conv_stat = c.Block(statements) node.ccode = c.Block([c.Assign("err", ccode_eval), conv_stat, c.Statement("CHECKERROR(err)")])
def generate(self, funcname, field_args, kernel_ast, adaptive=False): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] # Generate type definition for particle type vdecl = [c.POD(dtype, var) for var, dtype in self.ptype.var_types.items()] ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt")] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] fargs_str = ", ".join(['particles[p].time', 'particles[p].dt'] + list(field_args.keys())) # Inner loop nest for forward runs dt_fwd = c.Statement("__dt = fmin(particles[p].dt, endtime - particles[p].time)") body_fwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)), c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_fwd] time_fwd = c.While("__dt > __tol", c.Block(body_fwd)) part_fwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_fwd, time_fwd])) # Inner loop nest for backward runs dt_bwd = c.Statement("__dt = fmax(particles[p].dt, endtime - particles[p].time)") body_bwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)), c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_bwd] time_bwd = c.While("__dt < -1. * __tol", c.Block(body_bwd)) part_bwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_bwd, time_bwd])) time_if = c.If("dt > 0.0", c.Block([part_fwd]), c.Block([part_bwd])) fbody = c.Block([c.Value("int", "p"), c.Value("KernelOp", "res"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), time_if]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def gen_shape_fn(output_shapes): shape_fn = [] shape_fn.append(c.Line( "[](::tensorflow::shape_inference::InferenceContext* c)")) func_body = [] for out, dims in output_shapes: dim_strs = [ str(d) for d in dims ] # dim initializer list dim_init_list = "{" + ",".join(dim_strs) + "}" # make a shape shape_name = "s" + str(out) func_body.append(c.Assign("auto " + shape_name, "c->MakeShape(" + dim_init_list + ")")) dim_set_str = "c->set_output(" + str(out) + ", " + shape_name + ")" func_body.append(c.Statement(dim_set_str)); func_body.append(c.Statement("return Status::OK()")); body = c.Block(func_body) shape_fn.append(body) return str(c.Module(shape_fn))
def visit_Expression(self, o): return c.Assign(ccode(o.expr.lhs, dtype=o.dtype), ccode(o.expr.rhs, dtype=o.dtype))
def to_ops_dat(function, block): ndim = function.ndim - (1 if function.is_TimeFunction else 0) dim = SymbolicArray(name="%s_dim" % function.name, dimensions=(ndim, ), dtype=np.int32) base = SymbolicArray(name="%s_base" % function.name, dimensions=(ndim, ), dtype=np.int32) d_p = SymbolicArray(name="%s_d_p" % function.name, dimensions=(ndim, ), dtype=np.int32) d_m = SymbolicArray(name="%s_d_m" % function.name, dimensions=(ndim, ), dtype=np.int32) res = [] dats = {} ops_decl_dat_call = [] if function.is_TimeFunction: time_pos = function._time_position time_index = function.indices[time_pos] time_dims = function.shape[time_pos] dim_shape = function.shape[:time_pos] + function.shape[time_pos + 1:] padding = function.padding[:time_pos] + function.padding[time_pos + 1:] halo = function.halo[:time_pos] + function.halo[time_pos + 1:] base_val = [0 for i in range(ndim)] d_p_val = tuple([p[0] + h[0] for p, h in zip(padding, halo)]) d_m_val = tuple([-(p[1] + h[1]) for p, h in zip(padding, halo)]) ops_dat_array = SymbolicArray( name="%s_dat" % function.name, dimensions=[time_dims], dtype="ops_dat", ) ops_decl_dat_call.append( Element( cgen.Statement( "%s %s[%s]" % (ops_dat_array.dtype, ops_dat_array.name, time_dims)))) for i in range(time_dims): access = FunctionTimeAccess(function, i) ops_dat_access = ArrayAccess(ops_dat_array, i) call = Call("ops_decl_dat", [ block, 1, dim, base, d_m, d_p, access, String(function._C_typedata), String("%s%s%s" % (function.name, time_index, i)) ], False) dats["%s%s%s" % (function.name, time_index, i)] = ArrayAccess( ops_dat_array, Symbol("%s%s" % (time_index, i))) ops_decl_dat_call.append(Element(cgen.Assign(ops_dat_access, call))) else: ops_dat = OPSDat("%s_dat" % function.name) dats[function.name] = ops_dat d_p_val = tuple( [p[0] + h[0] for p, h in zip(function.padding, function.halo)]) d_m_val = tuple( [-(p[1] + h[1]) for p, h in zip(function.padding, function.halo)]) dim_shape = function.shape base_val = [0 for i in function.shape] ops_decl_dat_call.append( Element( cgen.Initializer( ops_dat, Call("ops_decl_dat", [ block, 1, dim, base, d_m, d_p, FunctionTimeAccess(function, 0), String(function._C_typedata), String(function.name) ], False)))) res.append(Expression(ClusterizedEq(Eq(dim, ListInitializer(dim_shape))))) res.append(Expression(ClusterizedEq(Eq(base, ListInitializer(base_val))))) res.append(Expression(ClusterizedEq(Eq(d_p, ListInitializer(d_p_val))))) res.append(Expression(ClusterizedEq(Eq(d_m, ListInitializer(d_m_val))))) res.extend(ops_decl_dat_call) return res, dats
def visit_Expression(self, o): code = (c.Assign(ccode(o.expr.lhs, dtype=o.dtype), ccode(o.expr.rhs, dtype=o.dtype))) if o.pragmas: code = c.Module(list(o.pragmas) + [code]) return code
def visit_Expression(self, o): return c.Assign(ccode(o.expr.lhs), ccode(o.expr.rhs))
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] # Generate type definition for particle type vdecl = [] for v in self.ptype.variables: if v.dtype == np.uint64: vdecl.append(c.Pointer(c.POD(np.void, v.name))) else: vdecl.append(c.POD(v.dtype, v.name)) ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))] args = [c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(self.ptype.name, "particle"))] p_back_set_decl = c.FunctionDeclaration(c.Static(c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [c.Assign(("particle_backup->%s" % v.name), ("particle->%s" % v.name))] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(self.ptype.name, "particle"))] p_back_get_decl = c.FunctionDeclaration(c.Static(c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [c.Assign(("particle->%s" % v.name), ("particle_backup->%s" % v.name))] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] if c_include: ccode += [c_include] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt")] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("float", const)] fargs_str = ", ".join(['particles[p].time'] + list(field_args.keys()) + list(const_args.keys())) # Inner loop nest for forward runs sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign("sign_end_part", "endtime - particles[p].time > 0 ? 1 : -1") dt_pos = c.Assign("__dt", "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))") pdt_eq_dt_pos = c.Assign("particles[p].dt", "__dt * sign_dt") dt_0_break = c.If("particles[p].dt == 0", c.Statement("break")) notstarted_continue = c.If("(sign_end_part != sign_dt) && (particles[p].dt != 0)", c.Statement("continue")) body = [c.Statement("set_particle_backup(&particle_backup, &(particles[p]))")] body += [pdt_eq_dt_pos] body += [c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str))] body += [c.Assign("particles[p].state", "res")] # Store return code on particle body += [c.If("res == SUCCESS", c.Block([c.Statement("particles[p].time += sign_dt * __dt"), dt_pos, dt_0_break, c.Statement("continue")]))] body += [c.If("res == REPEAT", c.Block([c.Statement("get_particle_backup(&particle_backup, &(particles[p]))"), dt_pos, c.Statement("break")]), c.Statement("break"))] time_loop = c.While("__dt > __tol || particles[p].dt == 0", c.Block(body)) part_loop = c.For("p = 0", "p < num_particles", "++p", c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop])) fbody = c.Block([c.Value("int", "p, sign_dt, sign_end_part"), c.Value("ErrorCode", "res"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), sign_dt, particle_backup, part_loop]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def _cgen(self): return cgen.Assign(self.lvalue.__str__(), self.rvalue.__str__())
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] pname = self.ptype.name + 'p' # ==== Add include for Parcels and math header ==== # ccode += [str(c.Include("parcels.h", system=False))] #ccode += [str(c.Include("math.h", system=False))] # removed by Lyc because it is already in parcels.h ??? #ccode += [str(c.Include("stdbool.h", system=False))] # added by Luc to accomodate crossdike.h booleans ccode += [str(c.Assign('double _next_dt', '0'))] ccode += [str(c.Assign('size_t _next_dt_set', '0'))] ccode += [ str( c.Assign( 'const int ngrid', str(self.fieldset.gridset.size if self. fieldset is not None else 1))) ] # ==== Generate type definition for particle type ==== # vdeclp = [ c.Pointer(c.POD(v.dtype, v.name)) for v in self.ptype.variables ] ccode += [ str(c.Typedef(c.GenerableStruct("", vdeclp, declname=pname))) ] # Generate type definition for single particle type vdecl = [ c.POD(v.dtype, v.name) for v in self.ptype.variables if v.dtype != np.uint64 ] ccode += [ str( c.Typedef( c.GenerableStruct("", vdecl, declname=self.ptype.name))) ] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_set_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle_backup->%s" % v.name), ("particles->%s[pnum]" % v.name)) ] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_get_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particles->%s[pnum]" % v.name), ("particle_backup->%s" % v.name)) ] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] update_next_dt_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "update_next_dt"), spec='inline')), [c.Value('double', 'dt')]) if 'update_next_dt' in str(kernel_ast): body = [] body += [c.Assign("_next_dt", "dt")] body += [c.Assign("_next_dt_set", "1")] update_next_dt_body = c.Block(body) update_next_dt = str( c.FunctionBody(update_next_dt_decl, update_next_dt_body)) ccode += [update_next_dt] if c_include: ccode += [c_include] # ==== Insert kernel code ==== # ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [ c.Value("int", "num_particles"), c.Pointer(c.Value(pname, "particles")), c.Value("double", "endtime"), c.Value("double", "dt") ] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("double", const)] fargs_str = ", ".join(['particles->time[pnum]'] + list(field_args.keys()) + list(const_args.keys())) # ==== statement clusters use to compose 'body' variable and variables 'time_loop' and 'part_loop' ==== ## sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign( "sign_end_part", "(endtime - particles->time[pnum]) > 0 ? 1 : -1") reset_res_state = c.Assign("res", "particles->state[pnum]") update_state = c.Assign("particles->state[pnum]", "res") update_pdt = c.If( "_next_dt_set == 1", c.Block([ c.Assign("_next_dt_set", "0"), c.Assign("particles->dt[pnum]", "_next_dt") ])) dt_pos = c.Assign( "__dt", "fmin(fabs(particles->dt[pnum]), fabs(endtime - particles->time[pnum]))" ) # original pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt") partdt = c.Assign("particles->dt[pnum]", "__pdt_prekernels") check_pdt = c.If( "(res == SUCCESS) & !is_equal_dbl(__pdt_prekernels, particles->dt[pnum])", c.Assign("res", "REPEAT")) dt_0_break = c.If("is_zero_dbl(particles->dt[pnum])", c.Statement("break")) notstarted_continue = c.If( "(( sign_end_part != sign_dt) || is_close_dbl(__dt, 0) ) && !is_zero_dbl(particles->dt[pnum])", c.Block([ c.If("fabs(particles->time[pnum]) >= fabs(endtime)", c.Assign("particles->state[pnum]", "SUCCESS")), c.Statement("continue") ])) # ==== main computation body ==== # body = [ c.Statement( "set_particle_backup(&particle_backup, particles, pnum)") ] body += [pdt_eq_dt_pos] body += [partdt] body += [ c.Value("StatusCode", "state_prev"), c.Assign("state_prev", "particles->state[pnum]") ] body += [ c.Assign("res", "%s(particles, pnum, %s)" % (funcname, fargs_str)) ] body += [ c.If("(res==SUCCESS) && (particles->state[pnum] != state_prev)", c.Assign("res", "particles->state[pnum]")) ] body += [check_pdt] body += [ c.If( "res == SUCCESS || res == DELETE", c.Block([ c.Statement( "particles->time[pnum] += particles->dt[pnum]"), update_pdt, dt_pos, sign_end_part, c.If( "(res != DELETE) && !is_close_dbl(__dt, 0) && (sign_dt == sign_end_part)", c.Assign("res", "EVALUATE")), c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, dt_0_break ]), c.Block([ c.Statement( "get_particle_backup(&particle_backup, particles, pnum)" ), dt_pos, sign_end_part, c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, c.Statement("break") ])) ] time_loop = c.While( "(particles->state[pnum] == EVALUATE || particles->state[pnum] == REPEAT) || is_zero_dbl(particles->dt[pnum])", c.Block(body)) part_loop = c.For( "pnum = 0", "pnum < num_particles", "++pnum", c.Block([ sign_end_part, reset_res_state, dt_pos, notstarted_continue, time_loop ])) fbody = c.Block([ c.Value("int", "pnum, sign_dt, sign_end_part"), c.Value("StatusCode", "res"), c.Value("double", "__pdt_prekernels"), c.Value("double", "__dt"), # 1e-8 = built-in tolerance for np.isclose() sign_dt, particle_backup, part_loop ]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def visit_Call(self, node): """Generate C code for simple C-style function calls. Please note that starred and keyword arguments are currently not supported.""" pointer_args = False parcels_customed_Cfunc = False if isinstance(node.func, PrintNode): # Write our own Print parser because Python3-AST does not seem to have one if isinstance(node.args[0], ast.Str): node.ccode = str( c.Statement('printf("%s\\n")' % (node.args[0].s))) elif isinstance(node.args[0], ast.Name): node.ccode = str( c.Statement('printf("%%f\\n", %s)' % (node.args[0].id))) elif isinstance(node.args[0], ast.BinOp): if hasattr(node.args[0].right, 'ccode'): args = node.args[0].right.ccode elif hasattr(node.args[0].right, 'id'): args = node.args[0].right.id elif hasattr(node.args[0].right, 'elts'): args = [] for a in node.args[0].right.elts: if hasattr(a, 'ccode'): args.append(a.ccode) elif hasattr(a, 'id'): args.append(a.id) else: args = [] s = 'printf("%s\\n"' % node.args[0].left.s if isinstance(args, str): s = s + (", %s)" % args) else: for arg in args: s = s + (", %s" % arg) s = s + ")" node.ccode = str(c.Statement(s)) else: raise RuntimeError( "This print statement is not supported in Python3 version of Parcels" ) else: for a in node.args: self.visit(a) if a.ccode == 'parcels_customed_Cfunc_pointer_args': pointer_args = True parcels_customed_Cfunc = True elif a.ccode == 'parcels_customed_Cfunc': parcels_customed_Cfunc = True elif isinstance(a, FieldNode) or isinstance( a, VectorFieldNode): a.ccode = a.obj.ccode_name elif isinstance(a, ParticleNode): continue elif pointer_args: a.ccode = "&%s" % a.ccode ccode_args = ", ".join([a.ccode for a in node.args[pointer_args:]]) try: if isinstance(node.func, str): node.ccode = node.func + '(' + ccode_args + ')' else: self.visit(node.func) rhs = "%s(%s)" % (node.func.ccode, ccode_args) if parcels_customed_Cfunc: node.ccode = str( c.Block([ c.Assign("err", rhs), c.Statement("CHECKSTATUS(err)") ])) else: node.ccode = rhs except: raise RuntimeError( "Error in converting Kernel to C. See http://oceanparcels.org/#writing-parcels-kernels for hints and tips" )