Ejemplo n.º 1
0
    def _generate_kernel_arg_decls(self):

        _kernel_arg_decls = []
        _kernel_lib_arg_decls = []
        _kernel_structs = cgen.Module(
            [cgen.Comment('#### Structs generated per ParticleDat ####')])

        if self._kernel.static_args is not None:

            for i, dat in enumerate(self._kernel.static_args.items()):
                _kernel_arg_decls.append(
                    cgen.Const(cgen.Value(host.ctypes_map[dat[1]], dat[0])))

        for i, dat in enumerate(self._dat_dict.items()):

            assert type(dat[1]) is tuple, "Access descriptors not found"

            kernel_lib_arg = cgen.Pointer(
                cgen.Value(host.ctypes_map[dat[1][0].dtype],
                           Restrict(self._cc.restrict_keyword, dat[0])))

            # print host.ctypes_map[dat[1][0].dtype], dat[1][0].dtype

            if issubclass(type(dat[1][0]), host._Array):
                kernel_arg = cgen.Pointer(
                    cgen.Value(host.ctypes_map[dat[1][0].dtype],
                               Restrict(self._cc.restrict_keyword, dat[0])))
                if not dat[1][1].write:
                    kernel_arg = cgen.Const(kernel_arg)

                _kernel_arg_decls.append(kernel_arg)

            elif issubclass(type(dat[1][0]), host.Matrix):
                # MAKE STRUCT TYPE
                dtype = dat[1][0].dtype
                ti = cgen.Pointer(
                    cgen.Value(ctypes_map(dtype),
                               Restrict(self._cc.restrict_keyword, 'i')))
                tj = cgen.Pointer(
                    cgen.Value(ctypes_map(dtype),
                               Restrict(self._cc.restrict_keyword, 'j')))
                if not dat[1][1].write:
                    ti = cgen.Const(ti)
                    tj = cgen.Const(tj)
                typename = '_' + dat[0] + '_t'
                _kernel_structs.append(
                    cgen.Typedef(cgen.Struct('', [ti, tj], typename)))

                # MAKE STRUCT ARG
                _kernel_arg_decls.append(cgen.Value(typename, dat[0]))

            if not dat[1][1].write:
                kernel_lib_arg = cgen.Const(kernel_lib_arg)

            _kernel_lib_arg_decls.append(kernel_lib_arg)

        self._components['KERNEL_ARG_DECLS'] = _kernel_arg_decls
        self._components['KERNEL_LIB_ARG_DECLS'] = _kernel_lib_arg_decls
        self._components['KERNEL_STRUCT_TYPEDEFS'] = _kernel_structs
Ejemplo n.º 2
0
    def generate(self, funcname, field_args, const_args, kernel_ast, c_include):
        ccode = []

        # Add include for Parcels and math header
        ccode += [str(c.Include("parcels.h", system=False))]
        ccode += [str(c.Include("math.h", system=False))]

        # Generate type definition for particle type
        vdecl = []
        for v in self.ptype.variables:
            if v.dtype == np.uint64:
                vdecl.append(c.Pointer(c.POD(np.void, v.name)))
            else:
                vdecl.append(c.POD(v.dtype, v.name))

        ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))]

        if c_include:
            ccode += [c_include]

        # Insert kernel code
        ccode += [str(kernel_ast)]

        # Generate outer loop for repeated kernel invocation
        args = [c.Value("int", "num_particles"),
                c.Pointer(c.Value(self.ptype.name, "particles")),
                c.Value("double", "endtime"), c.Value("float", "dt")]
        for field, _ in field_args.items():
            args += [c.Pointer(c.Value("CField", "%s" % field))]
        for const, _ in const_args.items():
            args += [c.Value("float", const)]
        fargs_str = ", ".join(['particles[p].time', 'sign_dt * __dt'] + list(field_args.keys())
                              + list(const_args.keys()))
        # Inner loop nest for forward runs
        sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1")
        sign_end_part = c.Assign("sign_end_part", "endtime - particles[p].time > 0 ? 1 : -1")
        dt_pos = c.Assign("__dt", "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))")
        dt_0_break = c.If("particles[p].dt == 0", c.Statement("break"))
        notstarted_continue = c.If("(sign_end_part != sign_dt) && (particles[p].dt != 0)",
                                   c.Statement("continue"))
        body = [c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str))]
        body += [c.Assign("particles[p].state", "res")]  # Store return code on particle
        body += [c.If("res == SUCCESS", c.Block([c.Statement("particles[p].time += sign_dt * __dt"),
                                                 dt_pos, dt_0_break, c.Statement("continue")]))]
        body += [c.If("res == REPEAT", c.Block([dt_pos, c.Statement("continue")]),
                      c.Statement("break"))]

        time_loop = c.While("__dt > __tol || particles[p].dt == 0", c.Block(body))
        part_loop = c.For("p = 0", "p < num_particles", "++p",
                          c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop]))
        fbody = c.Block([c.Value("int", "p, sign_dt, sign_end_part"), c.Value("ErrorCode", "res"),
                         c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"),
                         sign_dt, part_loop])
        fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args)
        ccode += [str(c.FunctionBody(fdecl, fbody))]
        return "\n\n".join(ccode)
Ejemplo n.º 3
0
    def generate(self, funcname, field_args, kernel_ast, adaptive=False):
        ccode = []

        # Add include for Parcels and math header
        ccode += [str(c.Include("parcels.h", system=False))]
        ccode += [str(c.Include("math.h", system=False))]

        # Generate type definition for particle type
        vdecl = [c.POD(dtype, var) for var, dtype in self.ptype.var_types.items()]
        ccode += [str(c.Typedef(c.GenerableStruct("", vdecl, declname=self.ptype.name)))]

        # Insert kernel code
        ccode += [str(kernel_ast)]

        # Generate outer loop for repeated kernel invocation
        args = [c.Value("int", "num_particles"),
                c.Pointer(c.Value(self.ptype.name, "particles")),
                c.Value("double", "endtime"), c.Value("float", "dt")]
        for field, _ in field_args.items():
            args += [c.Pointer(c.Value("CField", "%s" % field))]
        fargs_str = ", ".join(['particles[p].time', 'particles[p].dt'] + list(field_args.keys()))
        # Inner loop nest for forward runs
        dt_fwd = c.Statement("__dt = fmin(particles[p].dt, endtime - particles[p].time)")
        body_fwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)),
                    c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_fwd]
        time_fwd = c.While("__dt > __tol", c.Block(body_fwd))
        part_fwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_fwd, time_fwd]))
        # Inner loop nest for backward runs
        dt_bwd = c.Statement("__dt = fmax(particles[p].dt, endtime - particles[p].time)")
        body_bwd = [c.Statement("res = %s(&(particles[p]), %s)" % (funcname, fargs_str)),
                    c.If("res == SUCCESS", c.Statement("particles[p].time += __dt")), dt_bwd]
        time_bwd = c.While("__dt < -1. * __tol", c.Block(body_bwd))
        part_bwd = c.For("p = 0", "p < num_particles", "++p", c.Block([dt_bwd, time_bwd]))

        time_if = c.If("dt > 0.0", c.Block([part_fwd]), c.Block([part_bwd]))
        fbody = c.Block([c.Value("int", "p"), c.Value("KernelOp", "res"),
                         c.Value("double", "__dt, __tol"), c.Assign("__tol", "1.e-6"),
                         time_if])
        fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args)
        ccode += [str(c.FunctionBody(fdecl, fbody))]
        return "\n\n".join(ccode)
Ejemplo n.º 4
0
    def _generate_kernel_arg_decls(self):

        _kernel_arg_decls = []
        _kernel_lib_arg_decls = []
        _kernel_structs = cgen.Module([
            cgen.Comment('#### Structs generated per ParticleDat ####')
        ])

        if self._kernel.static_args is not None:
            for i, dat in enumerate(self._kernel.static_args.items()):
                arg = cgen.Const(cgen.Value(host.ctypes_map[dat[1]], dat[0]))
                _kernel_arg_decls.append(arg)
                _kernel_lib_arg_decls.append(arg)

        for i, dat in enumerate(self._dat_dict.items()):

            assert type(dat[1]) is tuple, "Access descriptors not found"
            obj = dat[1][0]
            mode = dat[1][1]
            symbol = dat[0]

            kernel_lib_arg = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype],
                                          Restrict(self._cc.restrict_keyword, symbol))
                                      )

            if issubclass(type(obj), data.GlobalArrayClassic):
                kernel_lib_arg = cgen.Pointer(kernel_lib_arg)

            if issubclass(type(obj), host._Array):
                kernel_arg = cgen.Pointer(cgen.Value(host.ctypes_map[obj.dtype],
                                              Restrict(self._cc.restrict_keyword, symbol))
                                          )
                if not mode.write:
                    kernel_arg = cgen.Const(kernel_arg)
                _kernel_arg_decls.append(kernel_arg)

                if mode.write is True:
                    assert issubclass(type(obj), data.GlobalArrayClassic),\
                        "global array must be a thread safe type for \
                        write access. Type is:" + str(type(obj))


            elif issubclass(type(dat[1][0]), host.Matrix):
                # MAKE STRUCT TYPE
                dtype = dat[1][0].dtype
                ti = cgen.Pointer(cgen.Value(ctypes_map(dtype),
                                             Restrict(self._cc.restrict_keyword,'i')))
                tj = cgen.Pointer(cgen.Value(ctypes_map(dtype),
                                             Restrict(self._cc.restrict_keyword,'j')))
                if not dat[1][1].write:
                    ti = cgen.Const(ti)
                    tj = cgen.Const(tj)
                typename = '_'+dat[0]+'_t'
                _kernel_structs.append(cgen.Typedef(cgen.Struct('', [ti,tj], typename)))


                # MAKE STRUCT ARG
                _kernel_arg_decls.append(cgen.Value(typename, dat[0]))

            if not dat[1][1].write:
                kernel_lib_arg = cgen.Const(kernel_lib_arg)

            _kernel_lib_arg_decls.append(kernel_lib_arg)

        self._components['KERNEL_ARG_DECLS'] = _kernel_arg_decls
        self._components['KERNEL_LIB_ARG_DECLS'] = _kernel_lib_arg_decls
        self._components['KERNEL_STRUCT_TYPEDEFS'] = _kernel_structs
Ejemplo n.º 5
0
    def generate(self, funcname, field_args, const_args, kernel_ast,
                 c_include):
        ccode = []

        pname = self.ptype.name + 'p'

        # ==== Add include for Parcels and math header ==== #
        ccode += [str(c.Include("parcels.h", system=False))]
        #ccode += [str(c.Include("math.h", system=False))]     # removed by Lyc because it is already in parcels.h ???
        #ccode += [str(c.Include("stdbool.h", system=False))]  # added by Luc to accomodate crossdike.h booleans
        ccode += [str(c.Assign('double _next_dt', '0'))]
        ccode += [str(c.Assign('size_t _next_dt_set', '0'))]
        ccode += [
            str(
                c.Assign(
                    'const int ngrid',
                    str(self.fieldset.gridset.size if self.
                        fieldset is not None else 1)))
        ]

        # ==== Generate type definition for particle type ==== #
        vdeclp = [
            c.Pointer(c.POD(v.dtype, v.name)) for v in self.ptype.variables
        ]
        ccode += [
            str(c.Typedef(c.GenerableStruct("", vdeclp, declname=pname)))
        ]
        # Generate type definition for single particle type
        vdecl = [
            c.POD(v.dtype, v.name) for v in self.ptype.variables
            if v.dtype != np.uint64
        ]
        ccode += [
            str(
                c.Typedef(
                    c.GenerableStruct("", vdecl, declname=self.ptype.name)))
        ]

        args = [
            c.Pointer(c.Value(self.ptype.name, "particle_backup")),
            c.Pointer(c.Value(pname, "particles")),
            c.Value("int", "pnum")
        ]
        p_back_set_decl = c.FunctionDeclaration(
            c.Static(
                c.DeclSpecifier(c.Value("void", "set_particle_backup"),
                                spec='inline')), args)
        body = []
        for v in self.ptype.variables:
            if v.dtype != np.uint64 and v.name not in ['dt', 'state']:
                body += [
                    c.Assign(("particle_backup->%s" % v.name),
                             ("particles->%s[pnum]" % v.name))
                ]
        p_back_set_body = c.Block(body)
        p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body))
        ccode += [p_back_set]

        args = [
            c.Pointer(c.Value(self.ptype.name, "particle_backup")),
            c.Pointer(c.Value(pname, "particles")),
            c.Value("int", "pnum")
        ]
        p_back_get_decl = c.FunctionDeclaration(
            c.Static(
                c.DeclSpecifier(c.Value("void", "get_particle_backup"),
                                spec='inline')), args)
        body = []
        for v in self.ptype.variables:
            if v.dtype != np.uint64 and v.name not in ['dt', 'state']:
                body += [
                    c.Assign(("particles->%s[pnum]" % v.name),
                             ("particle_backup->%s" % v.name))
                ]
        p_back_get_body = c.Block(body)
        p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body))
        ccode += [p_back_get]

        update_next_dt_decl = c.FunctionDeclaration(
            c.Static(
                c.DeclSpecifier(c.Value("void", "update_next_dt"),
                                spec='inline')), [c.Value('double', 'dt')])
        if 'update_next_dt' in str(kernel_ast):
            body = []
            body += [c.Assign("_next_dt", "dt")]
            body += [c.Assign("_next_dt_set", "1")]
            update_next_dt_body = c.Block(body)
            update_next_dt = str(
                c.FunctionBody(update_next_dt_decl, update_next_dt_body))
            ccode += [update_next_dt]

        if c_include:
            ccode += [c_include]

        # ==== Insert kernel code ==== #
        ccode += [str(kernel_ast)]

        # Generate outer loop for repeated kernel invocation
        args = [
            c.Value("int", "num_particles"),
            c.Pointer(c.Value(pname, "particles")),
            c.Value("double", "endtime"),
            c.Value("double", "dt")
        ]
        for field, _ in field_args.items():
            args += [c.Pointer(c.Value("CField", "%s" % field))]
        for const, _ in const_args.items():
            args += [c.Value("double", const)]
        fargs_str = ", ".join(['particles->time[pnum]'] +
                              list(field_args.keys()) +
                              list(const_args.keys()))
        # ==== statement clusters use to compose 'body' variable and variables 'time_loop' and 'part_loop' ==== ##
        sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1")
        particle_backup = c.Statement("%s particle_backup" % self.ptype.name)
        sign_end_part = c.Assign(
            "sign_end_part", "(endtime - particles->time[pnum]) > 0 ? 1 : -1")
        reset_res_state = c.Assign("res", "particles->state[pnum]")
        update_state = c.Assign("particles->state[pnum]", "res")
        update_pdt = c.If(
            "_next_dt_set == 1",
            c.Block([
                c.Assign("_next_dt_set", "0"),
                c.Assign("particles->dt[pnum]", "_next_dt")
            ]))

        dt_pos = c.Assign(
            "__dt",
            "fmin(fabs(particles->dt[pnum]), fabs(endtime - particles->time[pnum]))"
        )  # original

        pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt")
        partdt = c.Assign("particles->dt[pnum]", "__pdt_prekernels")
        check_pdt = c.If(
            "(res == SUCCESS) & !is_equal_dbl(__pdt_prekernels, particles->dt[pnum])",
            c.Assign("res", "REPEAT"))

        dt_0_break = c.If("is_zero_dbl(particles->dt[pnum])",
                          c.Statement("break"))

        notstarted_continue = c.If(
            "(( sign_end_part != sign_dt) || is_close_dbl(__dt, 0) ) && !is_zero_dbl(particles->dt[pnum])",
            c.Block([
                c.If("fabs(particles->time[pnum]) >= fabs(endtime)",
                     c.Assign("particles->state[pnum]", "SUCCESS")),
                c.Statement("continue")
            ]))

        # ==== main computation body ==== #
        body = [
            c.Statement(
                "set_particle_backup(&particle_backup, particles, pnum)")
        ]
        body += [pdt_eq_dt_pos]
        body += [partdt]
        body += [
            c.Value("StatusCode", "state_prev"),
            c.Assign("state_prev", "particles->state[pnum]")
        ]
        body += [
            c.Assign("res", "%s(particles, pnum, %s)" % (funcname, fargs_str))
        ]
        body += [
            c.If("(res==SUCCESS) && (particles->state[pnum] != state_prev)",
                 c.Assign("res", "particles->state[pnum]"))
        ]
        body += [check_pdt]
        body += [
            c.If(
                "res == SUCCESS || res == DELETE",
                c.Block([
                    c.Statement(
                        "particles->time[pnum] += particles->dt[pnum]"),
                    update_pdt, dt_pos, sign_end_part,
                    c.If(
                        "(res != DELETE) && !is_close_dbl(__dt, 0) && (sign_dt == sign_end_part)",
                        c.Assign("res", "EVALUATE")),
                    c.If("sign_dt != sign_end_part",
                         c.Assign("__dt", "0")), update_state, dt_0_break
                ]),
                c.Block([
                    c.Statement(
                        "get_particle_backup(&particle_backup, particles, pnum)"
                    ), dt_pos, sign_end_part,
                    c.If("sign_dt != sign_end_part", c.Assign("__dt", "0")),
                    update_state,
                    c.Statement("break")
                ]))
        ]

        time_loop = c.While(
            "(particles->state[pnum] == EVALUATE || particles->state[pnum] == REPEAT) || is_zero_dbl(particles->dt[pnum])",
            c.Block(body))
        part_loop = c.For(
            "pnum = 0", "pnum < num_particles", "++pnum",
            c.Block([
                sign_end_part, reset_res_state, dt_pos, notstarted_continue,
                time_loop
            ]))
        fbody = c.Block([
            c.Value("int", "pnum, sign_dt, sign_end_part"),
            c.Value("StatusCode", "res"),
            c.Value("double", "__pdt_prekernels"),
            c.Value("double",
                    "__dt"),  # 1e-8 = built-in tolerance for np.isclose()
            sign_dt,
            particle_backup,
            part_loop
        ])
        fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args)
        ccode += [str(c.FunctionBody(fdecl, fbody))]
        return "\n\n".join(ccode)
Ejemplo n.º 6
0
    def setup(self, g):
        if g.is_parent:
            if len(self._path) < 1:
                self._path = '%s/%s.%s' % (os.getcwd(), g.name, self._lang.lower())

            if len(self.includes) > 0:
                for incfile in self.includes:
                    include = cg.Include('%s.h' % incfile)
                    self._lines.append(str(include))
                self._lines.append('')

            if len(self.defines) > 0:
                for tuple in self.defines:
                    (lhs, rhs) = tuple
                    define = cg.Define(lhs, rhs)
                    self._lines.append(str(define))
                self._lines.append('')

            if len(self._typedefs) > 0:
                for tuple in self._typedefs:
                    (lhs, rhs) = tuple
                    typedef = cg.Typedef(cg.Value(lhs, rhs))
                    self._lines.append(str(typedef))
                self._lines.append('')

            if len(self._functions) > 0:
                for tuple in self._functions:
                    (lhs, rhs) = tuple
                    fxndef = 'inline %s { %s }' % (lhs, rhs)
                    self._lines.append(fxndef)
                self._lines.append('')

            if len(self._structs) > 0:
                for structname in self._structs:
                    struct = self._structs[structname]
                    fields = [cg.Value(struct[name], name) for name in struct]
                    struct = cg.Struct('', fields)
                    typedef = cg.Typedef(cg.Value(struct, '%s%s' % (structname, StructNode.suffix())))
                    self._lines.append(str(typedef).replace('} ;', '}'))
                    self._lines.append('')

            if len(g.constants) > 0:
                for const in g.constants:
                    define = cg.Define(const.name, const.value)
                    self._lines.append(str(define))
                self._lines.append('')

            if len(g.includes) > 0:
                for incfile in g.includes:
                    include = cg.Include('%s.h' % incfile)
                    self._lines.append(str(include))
                self._lines.append('')

        if len(g.subgraphs) < 1:
            func = CGenHelper.functionDecl(g.returntype, g.name, g.params)
            decl = str(func)
            self._lines.append(decl)
            self._lines.append('inline %s' % decl.replace(';', ' {'))

        if len(g.constants) > 0:
            structname = self._structname
            if len(structname) < 1:
                structname = g.name.split('_')[0]
            if structname not in self._structs:
                self._structs[structname] = OrderedDict()
            for const in g.constants:
                self._structs[structname][const.name] = const.type  # Add const to struct for later use...
Ejemplo n.º 7
0
    def generate(self, funcname, field_args, const_args, kernel_ast,
                 c_include):
        ccode = []

        # Add include for Parcels and math header
        ccode += [str(c.Include("parcels.h", system=False))]
        ccode += [str(c.Include("math.h", system=False))]
        ccode += [str(c.Assign('double _next_dt', '0'))]
        ccode += [str(c.Assign('size_t _next_dt_set', '0'))]

        # Generate type definition for particle type
        vdecl = []
        for v in self.ptype.variables:
            if v.dtype == np.uint64:
                vdecl.append(c.Pointer(c.POD(np.void, v.name)))
            else:
                vdecl.append(c.POD(v.dtype, v.name))

        ccode += [
            str(
                c.Typedef(
                    c.GenerableStruct("", vdecl, declname=self.ptype.name)))
        ]

        args = [
            c.Pointer(c.Value(self.ptype.name, "particle_backup")),
            c.Pointer(c.Value(self.ptype.name, "particle"))
        ]
        p_back_set_decl = c.FunctionDeclaration(
            c.Static(
                c.DeclSpecifier(c.Value("void", "set_particle_backup"),
                                spec='inline')), args)
        body = []
        for v in self.ptype.variables:
            if v.dtype != np.uint64 and v.name not in ['dt', 'state']:
                body += [
                    c.Assign(("particle_backup->%s" % v.name),
                             ("particle->%s" % v.name))
                ]
        p_back_set_body = c.Block(body)
        p_back_set = str(c.FunctionBody(p_back_set_decl, p_back_set_body))
        ccode += [p_back_set]

        args = [
            c.Pointer(c.Value(self.ptype.name, "particle_backup")),
            c.Pointer(c.Value(self.ptype.name, "particle"))
        ]
        p_back_get_decl = c.FunctionDeclaration(
            c.Static(
                c.DeclSpecifier(c.Value("void", "get_particle_backup"),
                                spec='inline')), args)
        body = []
        for v in self.ptype.variables:
            if v.dtype != np.uint64 and v.name not in ['dt', 'state']:
                body += [
                    c.Assign(("particle->%s" % v.name),
                             ("particle_backup->%s" % v.name))
                ]
        p_back_get_body = c.Block(body)
        p_back_get = str(c.FunctionBody(p_back_get_decl, p_back_get_body))
        ccode += [p_back_get]

        update_next_dt_decl = c.FunctionDeclaration(
            c.Static(
                c.DeclSpecifier(c.Value("void", "update_next_dt"),
                                spec='inline')), [c.Value('double', 'dt')])
        if 'update_next_dt' in str(kernel_ast):
            body = []
            body += [c.Assign("_next_dt", "dt")]
            body += [c.Assign("_next_dt_set", "1")]
            update_next_dt_body = c.Block(body)
            update_next_dt = str(
                c.FunctionBody(update_next_dt_decl, update_next_dt_body))
            ccode += [update_next_dt]

        if c_include:
            ccode += [c_include]

        # Insert kernel code
        ccode += [str(kernel_ast)]

        # Generate outer loop for repeated kernel invocation
        args = [
            c.Value("int", "num_particles"),
            c.Pointer(c.Value(self.ptype.name, "particles")),
            c.Value("double", "endtime"),
            c.Value("float", "dt")
        ]
        for field, _ in field_args.items():
            args += [c.Pointer(c.Value("CField", "%s" % field))]
        for const, _ in const_args.items():
            args += [c.Value("float", const)]
        fargs_str = ", ".join(['particles[p].time'] + list(field_args.keys()) +
                              list(const_args.keys()))
        # Inner loop nest for forward runs
        sign_dt = c.Assign("sign_dt", "dt > 0 ? 1 : -1")
        particle_backup = c.Statement("%s particle_backup" % self.ptype.name)
        sign_end_part = c.Assign("sign_end_part",
                                 "endtime - particles[p].time > 0 ? 1 : -1")
        dt_pos = c.Assign(
            "__dt",
            "fmin(fabs(particles[p].dt), fabs(endtime - particles[p].time))")
        pdt_eq_dt_pos = c.Assign("__pdt_prekernels", "__dt * sign_dt")
        partdt = c.Assign("particles[p].dt", "__pdt_prekernels")
        dt_0_break = c.If("particles[p].dt == 0", c.Statement("break"))
        notstarted_continue = c.If(
            "(sign_end_part != sign_dt) && (particles[p].dt != 0)",
            c.Statement("continue"))
        body = [
            c.Statement(
                "set_particle_backup(&particle_backup, &(particles[p]))")
        ]
        body += [pdt_eq_dt_pos]
        body += [partdt]
        body += [
            c.Assign("res", "%s(&(particles[p]), %s)" % (funcname, fargs_str))
        ]
        check_pdt = c.If(
            "(res == SUCCESS) & (__pdt_prekernels != particles[p].dt)",
            c.Assign("res", "REPEAT"))
        body += [check_pdt]
        body += [c.Assign("particles[p].state",
                          "res")]  # Store return code on particle
        update_pdt = c.If(
            "_next_dt_set == 1",
            c.Block([
                c.Assign("_next_dt_set", "0"),
                c.Assign("particles[p].dt", "_next_dt")
            ]))
        body += [
            c.If(
                "res == SUCCESS || res == DELETE",
                c.Block([
                    c.Statement("particles[p].time += particles[p].dt"),
                    update_pdt, dt_pos, dt_0_break,
                    c.Statement("continue")
                ]),
                c.Block([
                    c.Statement(
                        "get_particle_backup(&particle_backup, &(particles[p]))"
                    ), dt_pos,
                    c.Statement("break")
                ]))
        ]

        time_loop = c.While("__dt > __tol || particles[p].dt == 0",
                            c.Block(body))
        part_loop = c.For(
            "p = 0", "p < num_particles", "++p",
            c.Block([sign_end_part, notstarted_continue, dt_pos, time_loop]))
        fbody = c.Block([
            c.Value("int", "p, sign_dt, sign_end_part"),
            c.Value("ErrorCode", "res"),
            c.Value("float", "__pdt_prekernels"),
            c.Value("double", "__dt, __tol"),
            c.Assign("__tol", "1.e-6"), sign_dt, particle_backup, part_loop
        ])
        fdecl = c.FunctionDeclaration(c.Value("void", "particle_loop"), args)
        ccode += [str(c.FunctionBody(fdecl, fbody))]
        return "\n\n".join(ccode)
Ejemplo n.º 8
0
    def _generate_per_dat(self):

        # =================== DICT INIT ===============================

        self._components['KERNEL_ARG_DECLS'] = [
            cgen.Const(cgen.Value(host.int32_str, '_D_N_LOCAL'))
        ]

        self._components['KERNEL_LIB_ARG_DECLS'] = []

        self._components['KERNEL_STRUCT_TYPEDEFS'] = cgen.Module(
            [cgen.Comment('#### Structs generated per ParticleDat ####')])

        self._components['LIB_KERNEL_CALL'] = cgen.Module(
            [cgen.Comment('#### Kernel call ####')])
        kernel_call_symbols = ['_H_N_LOCAL']

        self._components['KERNEL_SCATTER'] = cgen.Module(
            [cgen.Comment('#### kernel scatter ####')])
        self._components['KERNEL_GATHER'] = cgen.Module(
            [cgen.Comment('#### kernel gather ####')])
        self._components['IF_SCATTER'] = cgen.Module(
            [cgen.Comment('#### if scatter ####')])
        self._components['IF_GATHER'] = cgen.Module(
            [cgen.Comment('#### if gather ####')])
        self._components['KERNEL_MAPPING'] = cgen.Module(
            [cgen.Comment('#### kernel symbol mapping ####')])

        # =================== Static Args ===============================
        if self._kernel.static_args is not None:

            for i, datt in enumerate(self._kernel.static_args.items()):

                ksym = datt[0]
                ktype = datt[1]

                # Add to kernel args
                g = cgen.Const(cgen.Value(host.ctypes_map[ktype], ksym))
                self._components['KERNEL_ARG_DECLS'].append(g)
                self._components['KERNEL_LIB_ARG_DECLS'].append(g)
                kernel_call_symbols.append(ksym)

        # =================== Dynamic Args ===============================
        for i, datt in enumerate(self._dat_dict.items()):
            assert type(datt[1]) is tuple, "Access descriptors not found"

            dati = datt[1][0]
            ksym = datt[0]
            dsym = 'd_' + ksym
            kacc = datt[1][1]

            # add to lib args
            kernel_lib_arg = cgen.Pointer(
                cgen.Value(host.ctypes_map[dati.dtype],
                           Restrict(self._cc.restrict_keyword, ksym)))


            if type(dati) is cuda_data.GlobalArray or \
                issubclass(type(dati), cuda_base.Array):

                # KERNEL ARGS DECLS -----------------------------
                kernel_arg = cgen.Pointer(
                    cgen.Value(host.ctypes_map[dati.dtype],
                               Restrict(self._cc.restrict_keyword, dsym)))
                if not kacc.write:
                    kernel_arg = cgen.Const(kernel_arg)
                self._components['KERNEL_ARG_DECLS'].append(kernel_arg)

                # KERNEL CALL SYMS -----------------------------
                kernel_call_symbols.append(ksym)

                # KERNEL GATHER/SCATTER START ------------------
                if not kacc.incremented:
                    a = cgen.Pointer(
                        cgen.Value(host.ctypes_map[dati.dtype], ksym))

                    a = cgen.Const(a)
                    a = cgen.Initializer(a, dsym)
                    self._components['IF_GATHER'].append(a)

                else:

                    a = cgen.Initializer(
                        cgen.Value(host.ctypes_map[dati.dtype],
                                   ksym + '[' + str(dati.ncomp) + ']'), '{0}')

                    self._components['IF_GATHER'].append(a)

                    # add the scatter code
                    self._components['IF_SCATTER'].append(
                        cgen.Line(
                            generate_reduction_final_stage(dsym, ksym, dati)))
                    # KERNEL GATHER/SCATTER END ------------------

            elif issubclass(type(dati), cuda_base.Matrix):

                # KERNEL ARGS DECLS, STRUCT DECLS ----------------

                dtype = dati.dtype
                ti = cgen.Pointer(
                    cgen.Value(ctypes_map(dtype),
                               Restrict(self._cc.restrict_keyword, 'i')))
                if not kacc.write:
                    ti = cgen.Const(ti)
                typename = '_' + ksym + '_t'
                self._components['KERNEL_STRUCT_TYPEDEFS'].append(
                    cgen.Typedef(cgen.Struct('', [ti], typename)))

                # add to kernel args
                kernel_arg = cgen.Pointer(
                    cgen.Value(host.ctypes_map[dati.dtype],
                               Restrict(self._cc.restrict_keyword, dsym)))
                if not kacc.write:
                    kernel_arg = cgen.Const(kernel_arg)
                self._components['KERNEL_ARG_DECLS'].append(kernel_arg)

                # KERNEL CALL SYMS -----------------------------
                kernel_call_symbols.append(ksym)

                # KERNEL GATHER/SCATTER START ------------------
                nc = str(dati.ncomp)
                _ishift = '+' + self._components['LIB_PAIR_INDEX_0'] + '*' + nc

                isym = dsym + _ishift
                g = cgen.Value(typename, ksym)
                g = cgen.Initializer(g, '{ ' + isym + '}')

                self._components['KERNEL_MAPPING'].append(g)
                # KERNEL GATHER/SCATTER END ------------------

            # END OF IF ------------------------

            # add to lib args
            if not kacc.write:
                kernel_lib_arg = cgen.Const(kernel_lib_arg)
            self._components['KERNEL_LIB_ARG_DECLS'].append(kernel_lib_arg)

        # KERNEL CALL SYMS -----------------------------

        kernel_call_symbols_s = ''
        for sx in kernel_call_symbols:
            kernel_call_symbols_s += sx + ','
        kernel_call_symbols_s = kernel_call_symbols_s[:-1]

        self._components['LIB_KERNEL_CALL'].append(
            cgen.Module([
                cgen.Value('dim3', '_B'),
                cgen.Value('dim3', '_T'),
                cgen.Assign('_B.x', '_H_BLOCKSIZE[0]'),
                cgen.Assign('_B.y', '_H_BLOCKSIZE[1]'),
                cgen.Assign('_B.z', '_H_BLOCKSIZE[2]'),
                cgen.Assign('_T.x', '_H_THREADSIZE[0]'),
                cgen.Assign('_T.y', '_H_THREADSIZE[1]'),
                cgen.Assign('_T.z', '_H_THREADSIZE[2]')
            ]))

        self._components['LIB_KERNEL_CALL'].append(
            cgen.Line('k_' + self._kernel.name + '<<<_B,_T>>>(' +
                      kernel_call_symbols_s + ');'))
        self._components['LIB_KERNEL_CALL'].append(
            cgen.Line('checkCudaErrors(cudaDeviceSynchronize());'))