def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] # Generate type definition for particle type vdecl = [] for v in self.ptype.variables: if v.dtype == np.uint64: vdecl.append(c.Pointer(c.POD(np.void, v.name))) else: vdecl.append(c.POD(v.dtype, v.name)) ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))] if c_include: ccode += [c_include] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt")] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("float", const)] fargs_str = ", ".join(['particles[p].time', 'sign_dt * __dt'] + list(field_args.keys()) + list(const_args.keys())) # Inner loop nest for forward runs sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") sign_end_part = c.Assign("sign_end_part", "endtime - particles[p].time > 0 ? 1 : -1") dt_pos = c.Assign("__dt", "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))") dt_0_break = c.If("particles[p].dt == 0", c.Statement("break")) notstarted_continue = c.If("(sign_end_part != sign_dt) && (particles[p].dt != 0)", c.Statement("continue")) body = [c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str))] body += [c.Assign("particles[p].state", "res")] # Store return code on particle body += [c.If("res == SUCCESS", c.Block([c.Statement("particles[p].time += sign_dt * __dt"), dt_pos, dt_0_break, c.Statement("continue")]))] body += [c.If("res == REPEAT", c.Block([dt_pos, c.Statement("continue")]), c.Statement("break"))] time_loop = c.While("__dt > __tol || particles[p].dt == 0", c.Block(body)) part_loop = c.For("p = 0", "p < num_particles", "++p", c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop])) fbody = c.Block([c.Value("int", "p, sign_dt, sign_end_part"), c.Value("ErrorCode", "res"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), sign_dt, part_loop]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def push_stack(self, scope, obj): """ Generate a cgen statement that allocates ``obj`` on the stack. """ shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape) alignment = "__attribute__((aligned(64)))" handle = self.stack.setdefault(scope, OrderedDict()) handle[obj] = c.POD(obj.dtype, "%s%s %s" % (obj.name, shape, alignment))
def push_stack(self, scope, obj): """Generate a cgen object that allocates ``obj`` on the stack.""" handle = self.stack.setdefault(scope, OrderedDict()) if obj.is_LocalObject: handle[obj] = c.Value(obj._C_typename, obj.name) else: shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape) alignment = "__attribute__((aligned(%d)))" % obj._data_alignment handle[obj] = c.POD(obj.dtype, "%s%s %s" % (obj.name, shape, alignment))
def _alloc_array_on_low_lat_mem(self, site, obj, storage): """ Allocate an Array in the low latency memory. """ shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape) alignment = self.lang['aligned'](obj._data_alignment) value = "%s%s %s" % (obj.name, shape, alignment) storage.update(obj, site, allocs=c.POD(obj.dtype, value))
def push_array_on_stack(self, scope, obj): """Define an Array on the stack.""" handle = self.stack.setdefault(scope, OrderedDict()) if obj in flatten(self.stack.values()): return shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape) alignment = "__attribute__((aligned(%d)))" % obj._data_alignment value = "%s%s %s" % (obj.name, shape, alignment) handle[obj] = Element(c.POD(obj.dtype, value))
def generate_optimmat_code(self, pos, name=None): """Generates the code for computing the local optimization matrix for the optimization over site nr. `pos` The function has the following signature: DTYPE const *const A, DTYPE const *const X_0, ..., DTYPE const *const X_N, DTYPE *const result :param pos: The local tensor to copy (should be `< len(X)`) :param name: Name of the C function (default: get_optimmat_%(pos)) :returns: cgen.FunctionBody with given name """ name = 'get_optimmat_%i' % pos if name is None else name finalization_src = ''' if (mid < {nr_meas:d}) {{ for (uint i = 0; i < {pdim:d}; ++i) {{ for (uint k_l = 0; k_l < {rank_l:d}; ++k_l) {{ for (uint k_r = 0; k_r < {rank_r:d}; ++k_r) {{ result[mid * {rank_l:d} * {pdim:d} * {rank_r:d} + k_l * {pdim:d} * {rank_r:d} + i * {rank_r:d} + k_r] = left_c[k_l] * current_row[{offset:d} + i] * right_c[k_r]; }} }} }} }} '''.format(nr_meas=self._meas, pdim=self._dims[pos], rank_l=1 if pos == 0 else self._ranks[pos - 1], rank_r=1 if pos == self._sites - 1 else self._ranks[pos], offset=sum(self._dims[:pos])) finalization = c.LiteralLines(finalization_src) arg_decls = [ConstPointerToConstDecl(self._dtype, 'A')] arg_decls += [ ConstPointerToConstDecl(self._dtype, 'X%i' % i) for i in range(self._sites) ] arg_decls += [c.Pointer(c.Const(c.POD(self._dtype, 'result')))] return c.FunctionBody( ccu.CudaGlobal( c.FunctionDeclaration(c.Value('void', 'get_optimmat_%i' % pos), arg_decls=arg_decls)), c.Block( self.declaration(pos) + self.left_contractions(pos) + self.right_contractions(pos) + [finalization]))
def visit_ArrayCast(self, o): """ Build cgen type casts for an :class:`AbstractFunction`. """ f = o.function align = "__attribute__((aligned(64)))" shape = ''.join(["[%s]" % ccode(j) for j in f.symbolic_shape[1:]]) lvalue = c.POD(f.dtype, '(*restrict %s)%s %s' % (f.name, shape, align)) rvalue = '(%s (*)%s) %s' % (c.dtype_to_ctype( f.dtype), shape, '%s_vec' % f.name) return c.Initializer(lvalue, rvalue)
def _alloc_array_on_low_lat_mem(self, scope, obj, storage): """Allocate an Array in the low latency memory.""" handle = storage._low_lat_mem.setdefault(scope, OrderedDict()) if obj in flatten(storage._low_lat_mem.values()): return shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape) alignment = "__attribute__((aligned(%d)))" % obj._data_alignment value = "%s%s %s" % (obj.name, shape, alignment) handle[obj] = Element(c.POD(obj.dtype, value))
def push_object_on_stack(self, scope, obj): """Define an Array or a composite type (e.g., a struct) on the stack.""" handle = self.stack.setdefault(scope, OrderedDict()) if obj.is_LocalObject: handle[obj] = Element(c.Value(obj._C_typename, obj.name)) else: shape = "".join("[%s]" % ccode(i) for i in obj.symbolic_shape) alignment = "__attribute__((aligned(%d)))" % obj._data_alignment value = "%s%s %s" % (obj.name, shape, alignment) handle[obj] = Element(c.POD(obj.dtype, value))
def push_stack(self, scope, obj): """ Generate a cgen statement that allocates ``obj`` on the stack. """ dtype = c.dtype_to_ctype(obj.dtype) shape = "".join("[%d]" % j for j in obj.shape) alignment = "__attribute__((aligned(64)))" item = c.POD(dtype, "%s%s %s" % (obj.name, shape, alignment)) handle = self.stack.setdefault(scope, []) if item not in handle: handle.append(item)
def ccode(self): """Returns the C code generated by this kernel. This function generates the internal code block from Iteration and Expression objects, and adds the necessary template code around it. """ header_vars = [ c.Pointer(c.POD(v.dtype, '%s_vec' % v.name)) for v in self.signature ] header = c.Extern( "C", c.FunctionDeclaration(c.Value('int', self.name), header_vars)) cast_shapes = [(v, ''.join(['[%d]' % d for d in v.shape[1:]])) for v in self.signature] casts = [ c.Initializer( c.POD(v.dtype, '(*%s)%s' % (v.name, shape)), '(%s (*)%s) %s' % (c.dtype_to_ctype(v.dtype), shape, '%s_vec' % v.name)) for v, shape in cast_shapes ] body = [e.ccode for e in self.expressions] ret = [c.Statement("return 0")] return c.FunctionBody(header, c.Block(casts + body + ret))
def _ccasts(self): """Generate data casts.""" alignment = "__attribute__((aligned(64)))" handle = [ f for f in self.parameters if isinstance(f, (SymbolicData, TensorFunction)) ] shapes = [(f, ''.join(["[%s]" % i.ccode for i in f.indices[1:]])) for f in handle] casts = [ c.Initializer( c.POD(v.dtype, '(*restrict %s)%s %s' % (v.name, shape, alignment)), '(%s (*)%s) %s' % (c.dtype_to_ctype(v.dtype), shape, '%s_vec' % v.name)) for v, shape in shapes ] return casts
def _args_cast(self, args): """Build cgen type casts for an iterable of :class:`Argument`.""" ret = [] for i in args: if i.is_TensorArgument: align = "__attribute__((aligned(64)))" shape = ''.join( ["[%s]" % ccode(j) for j in i.provider.symbolic_shape[1:]]) lvalue = c.POD(i.dtype, '(*restrict %s)%s %s' % (i.name, shape, align)) rvalue = '(%s (*)%s) %s' % (c.dtype_to_ctype( i.dtype), shape, '%s_vec' % i.name) ret.append(c.Initializer(lvalue, rvalue)) elif i.is_PtrArgument: ctype = ctypes_to_C(i.dtype) lvalue = c.Pointer(c.Value(ctype, i.name)) rvalue = '(%s*) %s' % (ctype, '_%s' % i.name) ret.append(c.Initializer(lvalue, rvalue)) return ret
def generate(self, funcname, field_args, kernel_ast, adaptive=False): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] # Generate type definition for particle type vdecl = [c.POD(dtype, var) for var, dtype in self.ptype.var_types.items()] ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt")] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] fargs_str = ", ".join(['particles[p].time', 'particles[p].dt'] + list(field_args.keys())) # Inner loop nest for forward runs dt_fwd = c.Statement("__dt = fmin(particles[p].dt, endtime - particles[p].time)") body_fwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)), c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_fwd] time_fwd = c.While("__dt > __tol", c.Block(body_fwd)) part_fwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_fwd, time_fwd])) # Inner loop nest for backward runs dt_bwd = c.Statement("__dt = fmax(particles[p].dt, endtime - particles[p].time)") body_bwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)), c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_bwd] time_bwd = c.While("__dt < -1. * __tol", c.Block(body_bwd)) part_bwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_bwd, time_bwd])) time_if = c.If("dt > 0.0", c.Block([part_fwd]), c.Block([part_bwd])) fbody = c.Block([c.Value("int", "p"), c.Value("KernelOp", "res"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), time_if]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def declaration(self, pos): """Generates the declarative instructions for the optimizations over sites nr. `pos` :param pos: The local tensor to copy (should be `< len(X)`) :returns: List containing cgen Statements """ max_ltens_size = max(self._ltens_sizes) max_left_size = 1 if pos == 0 else max(self._ranks[:pos]) max_right_size = 1 if pos == self._sites - 1 else max( self._ranks[pos:]) max_tmat_size = max(self._ranks[i] * self._ranks[i + 1] for i in range(self._sites - 2)) init_statements = [ c.LineComment( "Define the row number the current thread is operating on"), c.Initializer(c.Const(c.POD(np.int32, 'mid')), 'threadIdx.x + blockIdx.x * blockDim.x'), c.LineComment("Allocate shared memory for the local tensors"), ccu.CudaShared( c.ArrayOf(c.POD(self._dtype, 'x_shared'), max_ltens_size)), c.LineComment( "Allocate the left-, right-, and transfer contractions"), c.ArrayOf(c.POD(self._dtype, 'left_c'), max_left_size), c.ArrayOf(c.POD(self._dtype, 'right_c'), max_right_size), c.ArrayOf(c.POD(self._dtype, 'tmat_c'), max_tmat_size), c.ArrayOf(c.POD(self._dtype, 'buf_c'), max(max_right_size, max_left_size)), c.LineComment("Shortcut for current row of design matrix"), c.LineComment("Carefull, current_row might be out of bounds!"), ConstPointerToConst(self._dtype, 'current_row', 'A + (mid * %i)' % sum(self._dims)) ] return init_statements
def ConstPointerToConstDecl(dtype, name): """Returns a cgen variable declaration of a constant pointer to a constant of type `dtype` """ return c.Const(c.Pointer(c.Const(c.POD(dtype, name))))
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] ccode += [str(c.Assign('double _next_dt', '0'))] ccode += [str(c.Assign('size_t _next_dt_set', '0'))] # Generate type definition for particle type vdecl = [] for v in self.ptype.variables: if v.dtype == np.uint64: vdecl.append(c.Pointer(c.POD(np.void, v.name))) else: vdecl.append(c.POD(v.dtype, v.name)) ccode += [ str( c.Typedef( c.GenerableStruct("", vdecl, declname=self.ptype.name))) ] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(self.ptype.name, "particle")) ] p_back_set_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle_backup->%s" % v.name), ("particle->%s" % v.name)) ] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(self.ptype.name, "particle")) ] p_back_get_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle->%s" % v.name), ("particle_backup->%s" % v.name)) ] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] update_next_dt_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "update_next_dt"), spec='inline')), [c.Value('double', 'dt')]) if 'update_next_dt' in str(kernel_ast): body = [] body += [c.Assign("_next_dt", "dt")] body += [c.Assign("_next_dt_set", "1")] update_next_dt_body = c.Block(body) update_next_dt = str( c.FunctionBody(update_next_dt_decl, update_next_dt_body)) ccode += [update_next_dt] if c_include: ccode += [c_include] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [ c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt") ] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("float", const)] fargs_str = ", ".join(['particles[p].time'] + list(field_args.keys()) + list(const_args.keys())) # Inner loop nest for forward runs sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign("sign_end_part", "endtime - particles[p].time > 0 ? 1 : -1") dt_pos = c.Assign( "__dt", "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))") pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt") partdt = c.Assign("particles[p].dt", "__pdt_prekernels") dt_0_break = c.If("particles[p].dt == 0", c.Statement("break")) notstarted_continue = c.If( "(sign_end_part != sign_dt) && (particles[p].dt != 0)", c.Statement("continue")) body = [ c.Statement( "set_particle_backup(&particle_backup, &(particles[p]))") ] body += [pdt_eq_dt_pos] body += [partdt] body += [ c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str)) ] check_pdt = c.If( "(res == SUCCESS) & (__pdt_prekernels != particles[p].dt)", c.Assign("res", "REPEAT")) body += [check_pdt] body += [c.Assign("particles[p].state", "res")] # Store return code on particle update_pdt = c.If( "_next_dt_set == 1", c.Block([ c.Assign("_next_dt_set", "0"), c.Assign("particles[p].dt", "_next_dt") ])) body += [ c.If( "res == SUCCESS || res == DELETE", c.Block([ c.Statement("particles[p].time += particles[p].dt"), update_pdt, dt_pos, dt_0_break, c.Statement("continue") ]), c.Block([ c.Statement( "get_particle_backup(&particle_backup, &(particles[p]))" ), dt_pos, c.Statement("break") ])) ] time_loop = c.While("__dt > __tol || particles[p].dt == 0", c.Block(body)) part_loop = c.For( "p = 0", "p < num_particles", "++p", c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop])) fbody = c.Block([ c.Value("int", "p, sign_dt, sign_end_part"), c.Value("ErrorCode", "res"), c.Value("float", "__pdt_prekernels"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), sign_dt, particle_backup, part_loop ]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] pname = self.ptype.name + 'p' # ==== Add include for Parcels and math header ==== # ccode += [str(c.Include("parcels.h", system=False))] #ccode += [str(c.Include("math.h", system=False))] # removed by Lyc because it is already in parcels.h ??? #ccode += [str(c.Include("stdbool.h", system=False))] # added by Luc to accomodate crossdike.h booleans ccode += [str(c.Assign('double _next_dt', '0'))] ccode += [str(c.Assign('size_t _next_dt_set', '0'))] ccode += [ str( c.Assign( 'const int ngrid', str(self.fieldset.gridset.size if self. fieldset is not None else 1))) ] # ==== Generate type definition for particle type ==== # vdeclp = [ c.Pointer(c.POD(v.dtype, v.name)) for v in self.ptype.variables ] ccode += [ str(c.Typedef(c.GenerableStruct("", vdeclp, declname=pname))) ] # Generate type definition for single particle type vdecl = [ c.POD(v.dtype, v.name) for v in self.ptype.variables if v.dtype != np.uint64 ] ccode += [ str( c.Typedef( c.GenerableStruct("", vdecl, declname=self.ptype.name))) ] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_set_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle_backup->%s" % v.name), ("particles->%s[pnum]" % v.name)) ] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_get_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particles->%s[pnum]" % v.name), ("particle_backup->%s" % v.name)) ] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] update_next_dt_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "update_next_dt"), spec='inline')), [c.Value('double', 'dt')]) if 'update_next_dt' in str(kernel_ast): body = [] body += [c.Assign("_next_dt", "dt")] body += [c.Assign("_next_dt_set", "1")] update_next_dt_body = c.Block(body) update_next_dt = str( c.FunctionBody(update_next_dt_decl, update_next_dt_body)) ccode += [update_next_dt] if c_include: ccode += [c_include] # ==== Insert kernel code ==== # ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [ c.Value("int", "num_particles"), c.Pointer(c.Value(pname, "particles")), c.Value("double", "endtime"), c.Value("double", "dt") ] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("double", const)] fargs_str = ", ".join(['particles->time[pnum]'] + list(field_args.keys()) + list(const_args.keys())) # ==== statement clusters use to compose 'body' variable and variables 'time_loop' and 'part_loop' ==== ## sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign( "sign_end_part", "(endtime - particles->time[pnum]) > 0 ? 1 : -1") reset_res_state = c.Assign("res", "particles->state[pnum]") update_state = c.Assign("particles->state[pnum]", "res") update_pdt = c.If( "_next_dt_set == 1", c.Block([ c.Assign("_next_dt_set", "0"), c.Assign("particles->dt[pnum]", "_next_dt") ])) dt_pos = c.Assign( "__dt", "fmin(fabs(particles->dt[pnum]), fabs(endtime - particles->time[pnum]))" ) # original pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt") partdt = c.Assign("particles->dt[pnum]", "__pdt_prekernels") check_pdt = c.If( "(res == SUCCESS) & !is_equal_dbl(__pdt_prekernels, particles->dt[pnum])", c.Assign("res", "REPEAT")) dt_0_break = c.If("is_zero_dbl(particles->dt[pnum])", c.Statement("break")) notstarted_continue = c.If( "(( sign_end_part != sign_dt) || is_close_dbl(__dt, 0) ) && !is_zero_dbl(particles->dt[pnum])", c.Block([ c.If("fabs(particles->time[pnum]) >= fabs(endtime)", c.Assign("particles->state[pnum]", "SUCCESS")), c.Statement("continue") ])) # ==== main computation body ==== # body = [ c.Statement( "set_particle_backup(&particle_backup, particles, pnum)") ] body += [pdt_eq_dt_pos] body += [partdt] body += [ c.Value("StatusCode", "state_prev"), c.Assign("state_prev", "particles->state[pnum]") ] body += [ c.Assign("res", "%s(particles, pnum, %s)" % (funcname, fargs_str)) ] body += [ c.If("(res==SUCCESS) && (particles->state[pnum] != state_prev)", c.Assign("res", "particles->state[pnum]")) ] body += [check_pdt] body += [ c.If( "res == SUCCESS || res == DELETE", c.Block([ c.Statement( "particles->time[pnum] += particles->dt[pnum]"), update_pdt, dt_pos, sign_end_part, c.If( "(res != DELETE) && !is_close_dbl(__dt, 0) && (sign_dt == sign_end_part)", c.Assign("res", "EVALUATE")), c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, dt_0_break ]), c.Block([ c.Statement( "get_particle_backup(&particle_backup, particles, pnum)" ), dt_pos, sign_end_part, c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, c.Statement("break") ])) ] time_loop = c.While( "(particles->state[pnum] == EVALUATE || particles->state[pnum] == REPEAT) || is_zero_dbl(particles->dt[pnum])", c.Block(body)) part_loop = c.For( "pnum = 0", "pnum < num_particles", "++pnum", c.Block([ sign_end_part, reset_res_state, dt_pos, notstarted_continue, time_loop ])) fbody = c.Block([ c.Value("int", "pnum, sign_dt, sign_end_part"), c.Value("StatusCode", "res"), c.Value("double", "__pdt_prekernels"), c.Value("double", "__dt"), # 1e-8 = built-in tolerance for np.isclose() sign_dt, particle_backup, part_loop ]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def _cgen(self): decl = cgen.POD(self.typ, '') decl = cgen.Reference(decl) for i in range(0, self.dim): decl = cgen.Pointer(decl) return decl.inline(True)
def _cgen(self): return cgen.POD(self.typ, '').inline(True)
def ConstPointerToConst(dtype, name, value): """Returns a cgen variable declaration & assignment of a constant pointer to a constant of type `dtype` """ return c.Constant(c.Pointer(c.Const(c.POD(dtype, name))), value)