from gg.ast import * from gg.lib.graph import Graph from gg.lib.wl import Worklist from gg.ast.params import GraphParam import cgen G = Graph("graph") WL = Worklist() ast = Module([ CBlock([cgen.Include("sssp_push_cuda.cuh", system=False)], parse=False), Kernel("InitializeGraph", [ G.param(), ('unsigned int', '__begin'), ('unsigned int', '__end'), ('const uint32_t ', 'local_infinity'), ('unsigned long long', 'local_src_node'), ('uint32_t *', 'p_dist_current'), ('uint32_t *', 'p_dist_old') ], [ ForAll("src", G.nodes("__begin", "__end"), [ CDecl([("bool", "pop", " = src < __end")]), If("pop", [ CBlock([ "p_dist_current[src] = (graph.node_data[src] == local_src_node) ? 0 : local_infinity" ]), CBlock([ "p_dist_old[src] = (graph.node_data[src] == local_src_node) ? 0 : local_infinity" ]), ]), ]), ]), Kernel("FirstItr_SSSP", [ G.param(), ('unsigned int', '__begin'), ('unsigned int', '__end'), ('uint32_t *', 'p_dist_current'), ('uint32_t *', 'p_dist_old'), ('DynamicBitset&', 'bitset_dist_current')
from gg.ast import * from gg.lib.graph import Graph from gg.lib.wl import Worklist from gg.ast.params import GraphParam import cgen G = Graph("graph") WL = Worklist() ast = Module([ CBlock([cgen.Include("kernels/reduce.cuh", system=False)], parse=False), CBlock([cgen.Include("gen_cuda.cuh", system=False)], parse=False), CDeclGlobal([("unsigned int *", "P_CURRENT_DEGREE", "")]), CDeclGlobal([("bool *", "P_FLAG", "")]), CDeclGlobal([("unsigned int *", "P_TRIM", "")]), Kernel("InitializeGraph2", [ G.param(), ('unsigned int', '__nowned'), ('unsigned int', '__begin'), ('unsigned int', '__end'), ('unsigned int *', 'p_current_degree') ], [ ForAll("src", G.nodes("__begin", "__end"), [ CDecl([("bool", "pop", " = src < __end")]), If("pop", []), UniformConditional(If("!pop", [CBlock("continue")]), uniform_only=False, _only_if_np=True), ClosureHint( ForAll("current_edge", G.edges("src"), [ CDecl([("index_type", "dest_node", "")]), CBlock( ["dest_node = graph.getAbsDestination(current_edge)"]), CBlock([ "atomicAdd(&p_current_degree[dest_node], (unsigned int)1)" ]),
'PyObject* newShape = Py_BuildValue("(i)", intLength)', 'PyObject* kwargs = Py_BuildValue(' '"{sOsOs%s}", "shape", newShape, "dtype", type, "gpudata", diffResult)' % ptr_sz_uint_conv, 'PyObject* GPUArrayClass = PyObject_GetAttrString(gpuArray, "__class__")', 'PyObject* remoteResult = PyObject_Call(GPUArrayClass, args, kwargs)', 'return remoteResult' ] host_mod.add_function( c.FunctionBody( c.FunctionDeclaration( c.Pointer(c.Value("PyObject", "adjacentDifference")), [c.Pointer(c.Value("PyObject", "gpuArray"))]), c.Block([c.Statement(x) for x in statements]))) host_mod.add_to_preamble([c.Include('boost/python/extract.hpp')]) cuda_mod = CudaModule(host_mod) cuda_mod.add_to_preamble([c.Include('cuda.h')]) globalIndex = 'int index = blockIdx.x * blockDim.x + threadIdx.x' compute_diff = 'outputPtr[index] = inputPtr[index] - inputPtr[index-1]' launch = [ 'CUdeviceptr output', 'cuMemAlloc(&output, sizeof(T) * length)', 'int bSize = 256', 'int gSize = (length-1)/bSize + 1', 'diffKernel<<<gSize, bSize>>>((T*)inputPtr, length, (T*)output)', 'return output' ] diff = [ c.Template(
def get_cpp_headers_ast(self): """ Return the code to include the required header file(s). """ return cgen.Include('chrono')
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] pname = self.ptype.name + 'p' # ==== Add include for Parcels and math header ==== # ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] ccode += [str(c.Assign('double _next_dt', '0'))] ccode += [str(c.Assign('size_t _next_dt_set', '0'))] ccode += [ str( c.Assign( 'const int ngrid', str(self.fieldset.gridset.size if self. fieldset is not None else 1))) ] # ==== Generate type definition for particle type ==== # vdeclp = [ c.Pointer(c.POD(v.dtype, v.name)) for v in self.ptype.variables ] ccode += [ str(c.Typedef(c.GenerableStruct("", vdeclp, declname=pname))) ] # Generate type definition for single particle type vdecl = [ c.POD(v.dtype, v.name) for v in self.ptype.variables if v.dtype != np.uint64 ] ccode += [ str( c.Typedef( c.GenerableStruct("", vdecl, declname=self.ptype.name))) ] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_set_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particle_backup->%s" % v.name), ("particles->%s[pnum]" % v.name)) ] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [ c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(pname, "particles")), c.Value("int", "pnum") ] p_back_get_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [ c.Assign(("particles->%s[pnum]" % v.name), ("particle_backup->%s" % v.name)) ] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] update_next_dt_decl = c.FunctionDeclaration( c.Static( c.DeclSpecifier(c.Value("void", "update_next_dt"), spec='inline')), [c.Value('double', 'dt')]) if 'update_next_dt' in str(kernel_ast): body = [] body += [c.Assign("_next_dt", "dt")] body += [c.Assign("_next_dt_set", "1")] update_next_dt_body = c.Block(body) update_next_dt = str( c.FunctionBody(update_next_dt_decl, update_next_dt_body)) ccode += [update_next_dt] if c_include: ccode += [c_include] # ==== Insert kernel code ==== # ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [ c.Value("int", "num_particles"), c.Pointer(c.Value(pname, "particles")), c.Value("double", "endtime"), c.Value("double", "dt") ] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("double", const)] fargs_str = ", ".join(['particles->time[pnum]'] + list(field_args.keys()) + list(const_args.keys())) # ==== statement clusters use to compose 'body' variable and variables 'time_loop' and 'part_loop' ==== ## sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign( "sign_end_part", "(endtime - particles->time[pnum]) > 0 ? 1 : -1") reset_res_state = c.Assign("res", "particles->state[pnum]") update_state = c.Assign("particles->state[pnum]", "res") update_pdt = c.If( "_next_dt_set == 1", c.Block([ c.Assign("_next_dt_set", "0"), c.Assign("particles->dt[pnum]", "_next_dt") ])) dt_pos = c.Assign( "__dt", "fmin(fabs(particles->dt[pnum]), fabs(endtime - particles->time[pnum]))" ) # original pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt") partdt = c.Assign("particles->dt[pnum]", "__pdt_prekernels") check_pdt = c.If( "(res == SUCCESS) & !is_equal_dbl(__pdt_prekernels, particles->dt[pnum])", c.Assign("res", "REPEAT")) dt_0_break = c.If("is_zero_dbl(particles->dt[pnum])", c.Statement("break")) notstarted_continue = c.If( "(( sign_end_part != sign_dt) || is_close_dbl(__dt, 0) ) && !is_zero_dbl(particles->dt[pnum])", c.Block([ c.If("fabs(particles->time[pnum]) >= fabs(endtime)", c.Assign("particles->state[pnum]", "SUCCESS")), c.Statement("continue") ])) # ==== main computation body ==== # body = [ c.Statement( "set_particle_backup(&particle_backup, particles, pnum)") ] body += [pdt_eq_dt_pos] body += [partdt] body += [ c.Value("StatusCode", "state_prev"), c.Assign("state_prev", "particles->state[pnum]") ] body += [ c.Assign("res", "%s(particles, pnum, %s)" % (funcname, fargs_str)) ] body += [ c.If("(res==SUCCESS) && (particles->state[pnum] != state_prev)", c.Assign("res", "particles->state[pnum]")) ] body += [check_pdt] body += [ c.If( "res == SUCCESS || res == DELETE", c.Block([ c.Statement( "particles->time[pnum] += particles->dt[pnum]"), update_pdt, dt_pos, sign_end_part, c.If( "(res != DELETE) && !is_close_dbl(__dt, 0) && (sign_dt == sign_end_part)", c.Assign("res", "EVALUATE")), c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, dt_0_break ]), c.Block([ c.Statement( "get_particle_backup(&particle_backup, particles, pnum)" ), dt_pos, sign_end_part, c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")), update_state, c.Statement("break") ])) ] time_loop = c.While( "(particles->state[pnum] == EVALUATE || particles->state[pnum] == REPEAT) || is_zero_dbl(particles->dt[pnum])", c.Block(body)) part_loop = c.For( "pnum = 0", "pnum < num_particles", "++pnum", c.Block([ sign_end_part, reset_res_state, dt_pos, notstarted_continue, time_loop ])) fbody = c.Block([ c.Value("int", "pnum, sign_dt, sign_end_part"), c.Value("StatusCode", "res"), c.Value("double", "__pdt_prekernels"), c.Value("double", "__dt"), # 1e-8 = built-in tolerance for np.isclose() sign_dt, particle_backup, part_loop ]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
def generate(self, funcname, field_args, const_args, kernel_ast, c_include): ccode = [] # Add include for Parcels and math header ccode += [str(c.Include("parcels.h", system=False))] ccode += [str(c.Include("math.h", system=False))] # Generate type definition for particle type vdecl = [] for v in self.ptype.variables: if v.dtype == np.uint64: vdecl.append(c.Pointer(c.POD(np.void, v.name))) else: vdecl.append(c.POD(v.dtype, v.name)) ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))] args = [c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(self.ptype.name, "particle"))] p_back_set_decl = c.FunctionDeclaration(c.Static(c.DeclSpecifier(c.Value("void", "set_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [c.Assign(("particle_backup->%s" % v.name), ("particle->%s" % v.name))] p_back_set_body = c.Block(body) p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body)) ccode += [p_back_set] args = [c.Pointer(c.Value(self.ptype.name, "particle_backup")), c.Pointer(c.Value(self.ptype.name, "particle"))] p_back_get_decl = c.FunctionDeclaration(c.Static(c.DeclSpecifier(c.Value("void", "get_particle_backup"), spec='inline')), args) body = [] for v in self.ptype.variables: if v.dtype != np.uint64 and v.name not in ['dt', 'state']: body += [c.Assign(("particle->%s" % v.name), ("particle_backup->%s" % v.name))] p_back_get_body = c.Block(body) p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body)) ccode += [p_back_get] if c_include: ccode += [c_include] # Insert kernel code ccode += [str(kernel_ast)] # Generate outer loop for repeated kernel invocation args = [c.Value("int", "num_particles"), c.Pointer(c.Value(self.ptype.name, "particles")), c.Value("double", "endtime"), c.Value("float", "dt")] for field, _ in field_args.items(): args += [c.Pointer(c.Value("CField", "%s" % field))] for const, _ in const_args.items(): args += [c.Value("float", const)] fargs_str = ", ".join(['particles[p].time', 'sign_dt * __dt'] + list(field_args.keys()) + list(const_args.keys())) # Inner loop nest for forward runs sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1") particle_backup = c.Statement("%s particle_backup" % self.ptype.name) sign_end_part = c.Assign("sign_end_part", "endtime - particles[p].time > 0 ? 1 : -1") dt_pos = c.Assign("__dt", "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))") dt_0_break = c.If("particles[p].dt == 0", c.Statement("break")) notstarted_continue = c.If("(sign_end_part != sign_dt) && (particles[p].dt != 0)", c.Statement("continue")) body = [c.Statement("set_particle_backup(&particle_backup, &(particles[p]))")] body += [c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str))] body += [c.Assign("particles[p].state", "res")] # Store return code on particle body += [c.If("res == SUCCESS", c.Block([c.Statement("particles[p].time += sign_dt * __dt"), dt_pos, dt_0_break, c.Statement("continue")]))] body += [c.If("res == REPEAT", c.Block([c.Statement("get_particle_backup(&particle_backup, &(particles[p]))"), dt_pos, c.Statement("break")]), c.Statement("break"))] time_loop = c.While("__dt > __tol || particles[p].dt == 0", c.Block(body)) part_loop = c.For("p = 0", "p < num_particles", "++p", c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop])) fbody = c.Block([c.Value("int", "p, sign_dt, sign_end_part"), c.Value("ErrorCode", "res"), c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"), sign_dt, particle_backup, part_loop]) fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args) ccode += [str(c.FunctionBody(fdecl, fbody))] return "\n\n".join(ccode)
from gg.ast import * from gg.lib.graph import Graph from gg.lib.wl import Worklist from gg.ast.params import GraphParam import cgen G = Graph("graph") WL = Worklist() ast = Module([ CBlock([cgen.Include("kernels/reduce.cuh", system=False)], parse=False), CBlock([cgen.Include("pagerank_push_cuda.cuh", system=False)], parse=False), Kernel("ResetGraph", [ G.param(), ('unsigned int', '__begin'), ('unsigned int', '__end'), ('float *', 'p_delta'), ('uint32_t *', 'p_nout'), ('float *', 'p_residual'), ('float *', 'p_value') ], [ ForAll("src", G.nodes("__begin", "__end"), [ CDecl([("bool", "pop", " = src < __end")]), If("pop", [ CBlock(["p_value[src] = 0"]), CBlock(["p_nout[src] = 0"]), CBlock(["p_residual[src] = 0"]), CBlock(["p_delta[src] = 0"]), ]), ]), ]), Kernel("InitializeGraph", [ G.param(), ('unsigned int', '__begin'), ('unsigned int', '__end'), ('const float ', 'local_alpha'), ('uint32_t *', 'p_nout'), ('float *', 'p_residual'), ('DynamicBitset&', 'bitset_nout') ], [
from gg.ast import * from gg.lib.graph import Graph from gg.lib.wl import Worklist from gg.ast.params import GraphParam import cgen G = Graph("graph") WL = Worklist() ast = Module([ CBlock([cgen.Include("kcore_pull_cuda.cuh", system=False)], parse=False), Kernel("DegreeCounting", [ G.param(), ('unsigned int', '__begin'), ('unsigned int', '__end'), ('uint32_t *', 'p_current_degree'), ('DynamicBitset&', 'bitset_current_degree') ], [ ForAll("src", G.nodes("__begin", "__end"), [ CDecl([("bool", "pop", " = src < __end")]), If("pop", [ CBlock(["p_current_degree[src] = graph.getOutDegree(src)"]), CBlock(["bitset_current_degree.set(src)"]), ]), ]), ]), Kernel("InitializeGraph", [ G.param(), ('unsigned int', '__begin'), ('unsigned int', '__end'), ('uint32_t *', 'p_current_degree'), ('uint8_t *', 'p_flag'), ('uint8_t *', 'p_pull_flag'), ('uint32_t *', 'p_trim') ], [ ForAll("src", G.nodes("__begin", "__end"), [ CDecl([("bool", "pop", " = src < __end")]), If("pop", [
# INÍCIO BLOCO DE DECLARAÇÃO DE VARIAVEIS GLOBAIS tempString = c.Assign('int ny', '2') tempString = append(tempString, c.Assign('int nx', '80')) tempString = append(tempString, c.Assign('int nt', '100')) tempString = append(tempString, c.Assign('double nx', '2.0 / (nx + 1.0)')) tempString = append(tempString, c.Assign('double dy', '2.0 / (ny + 1.0)')) tempString = append(tempString, c.Assign('double sigma', '0.2')) tempString = append(tempString, c.Assign('double dt', 'sigma * dx')) # FIM BLOCO DE DECLARAÇÃO DE VARIAVEIS GLOBAIS # SALVANDO O VALOR NA STRING PRINCIPAL DE SAÍDA CodeOutput = tempString # INÍCIO BLOCO DE INCLUDES E DEFINES tempString = c.Define("OPS_2D", '') tempString = append(tempString, c.Include('ops_seq.h', system=True)) tempString = append(tempString, c.Include('iostream', system=True)) tempString = append(tempString, c.Include('fstream', system=True)) tempString = append(tempString, c.Include('convec.h', system=False)) tempString = append(tempString, c.Value('using namespace std', '')) # FIM BLOCO DE INCLUDES CodeOutput = append(CodeOutput, tempString) temp = c.FunctionDeclaration( c.Value("", "ops_init"), [c.Value("", "argc"), c.Value("char", "argv"), c.Value("", "1")]) # Array para salvar as operações do Main blockMain = []
def profiling_include(): libraries = ['opesciProfiling.h'] statements = [cgen.Include(s, False) for s in libraries] return statements
def io_include(): libraries = ['opesciIO.h', 'opesciHandy.h'] statements = [cgen.Include(s, False) for s in libraries] return statements