Ejemplo n.º 1
0
    def prepare_loopy_kernel(self, loopy_knl):
        from sumpy.codegen import (bessel_preamble_generator, bessel_mangler)
        loopy_knl = lp.register_function_manglers(loopy_knl, [bessel_mangler])
        loopy_knl = lp.register_preamble_generators(
            loopy_knl, [bessel_preamble_generator])

        return loopy_knl
Ejemplo n.º 2
0
    def prepare_loopy_kernel(self, loopy_knl):
        from sumpy.codegen import (bessel_preamble_generator, bessel_mangler)
        loopy_knl = lp.register_function_manglers(loopy_knl,
                [bessel_mangler])
        loopy_knl = lp.register_preamble_generators(loopy_knl,
                [bessel_preamble_generator])

        return loopy_knl
Ejemplo n.º 3
0
    def get_kernel(self, **kwargs):

        extra_kernel_kwarg_types = ()
        if "extra_kernel_kwarg_types" in kwargs:
            extra_kernel_kwarg_types = kwargs["extra_kernel_kwarg_types"]

        eval_inames = frozenset(["itgt"])
        scalar_assignment = lp.Assignment(
            id=None,
            assignee="expr_val",
            expression=self.get_normalised_expr(),
            temp_var_type=None,
        )
        eval_insns = [
            insn.copy(within_inames=insn.within_inames | eval_inames)
            for insn in [scalar_assignment]
        ]

        loopy_knl = lp.make_kernel(  # NOQA
            "{ [itgt]: 0<=itgt<n_targets }",
            [
                """
                for itgt
                    VAR_ASSIGNMENT
                end
                """.replace("VAR_ASSIGNMENT",
                            self.get_variable_assignment_code())
            ] + eval_insns + [
                """
                for itgt
                    result[itgt] = expr_val
                end
                """
            ],
            [
                lp.ValueArg("dim, n_targets", np.int32),
                lp.GlobalArg("target_points", np.float64, "dim, n_targets"),
                lp.TemporaryVariable("expr_val", None, ()),
            ] + list(extra_kernel_kwarg_types) + [
                "...",
            ],
            name="eval_expr",
            lang_version=(2018, 2),
        )

        loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim)
        loopy_knl = lp.set_options(loopy_knl, write_cl=False)
        loopy_knl = lp.set_options(loopy_knl, return_dict=True)

        if self.function_manglers is not None:
            loopy_knl = lp.register_function_manglers(loopy_knl,
                                                      self.function_manglers)

        if self.preamble_generators is not None:
            loopy_knl = lp.register_preamble_generators(
                loopy_knl, self.preamble_generators)

        return loopy_knl
Ejemplo n.º 4
0
def test_call_with_no_returned_value(ctx_factory):
    import pymbolic.primitives as p

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel("{:}",
                         [lp.CallInstruction((), p.Call(p.Variable("f"), ()))])

    from library_for_test import no_ret_f_mangler, no_ret_f_preamble_gen
    knl = lp.register_function_manglers(knl, [no_ret_f_mangler])
    knl = lp.register_preamble_generators(knl, [no_ret_f_preamble_gen])

    evt, _ = knl(queue)
Ejemplo n.º 5
0
def test_symbol_mangler_in_call(ctx_factory):
    from library_for_test import (symbol_x, preamble_for_x)
    ctx = cl.create_some_context()
    cq = cl.CommandQueue(ctx)

    knl = lp.make_kernel("{:}", """
        y = sin(X)
        """, [lp.GlobalArg("y", shape=lp.auto)])

    knl = lp.register_symbol_manglers(knl, [symbol_x])

    knl = lp.register_preamble_generators(knl, [preamble_for_x])

    evt, (out, ) = knl(cq)
    np.testing.assert_allclose(out.get(), np.sin(10))
Ejemplo n.º 6
0
    def __call__(self, queue, target_points, **kwargs):
        """
        :arg target_points
        :arg extra_kernel_kwargs
        """
        # handle target_points given as an obj_array of coords
        if (isinstance(target_points, np.ndarray)
                and target_points.dtype == np.object
                and isinstance(target_points[0], cl.array.Array)):
            target_points = cl.array.concatenate(target_points).reshape(
                [self.dim, -1])

        assert target_points.shape[0] == self.dim

        n_tgt_points = target_points[0].shape[0]
        for tgt_d in target_points:
            assert len(tgt_d) == n_tgt_points

        extra_kernel_kwargs = {}
        if "extra_kernel_kwargs" in kwargs:
            extra_kernel_kwargs = kwargs["extra_kernel_kwargs"]

        knl = self.get_cached_optimized_kernel()

        # FIXME: caching loses function mangler information
        if self.function_manglers is not None:
            knl = lp.register_function_manglers(knl, self.function_manglers)

        if self.preamble_generators is not None:
            knl = lp.register_preamble_generators(knl,
                                                  self.preamble_generators)

        evt, res = knl(queue,
                       target_points=target_points,
                       n_targets=n_tgt_points,
                       result=np.zeros(n_tgt_points, dtype=self.dtype),
                       **extra_kernel_kwargs)

        return res["result"]
Ejemplo n.º 7
0
def generate(builder, wrapper_name=None):
    if builder.layer_index is not None:
        outer_inames = frozenset(
            [builder._loop_index.name, builder.layer_index.name])
    else:
        outer_inames = frozenset([builder._loop_index.name])

    instructions = list(builder.emit_instructions())

    parameters = Bag()
    parameters.domains = OrderedDict()
    parameters.assumptions = OrderedDict()
    parameters.wrapper_arguments = builder.wrapper_args
    parameters.layer_start = builder.layer_extents[0].name
    parameters.layer_end = builder.layer_extents[1].name
    parameters.conditions = []
    parameters.kernel_data = list(None for _ in parameters.wrapper_arguments)
    parameters.temporaries = OrderedDict()
    parameters.kernel_name = builder.kernel.name

    # replace Materialise
    mapper = Memoizer(replace_materialise)
    mapper.initialisers = []
    instructions = list(mapper(i) for i in instructions)

    # merge indices
    merger = index_merger(instructions)
    instructions = list(merger(i) for i in instructions)
    initialiser = list(itertools.chain(*mapper.initialisers))
    merger = index_merger(initialiser)
    initialiser = list(merger(i) for i in initialiser)
    instructions = instructions + initialiser
    mapper.initialisers = [
        tuple(merger(i) for i in inits) for inits in mapper.initialisers
    ]

    # rename indices and nodes (so that the counters start from zero)
    pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$")
    replacements = {}
    counter = defaultdict(itertools.count)
    for node in traversal(instructions):
        if isinstance(node,
                      (Index, RuntimeIndex, Variable, Argument, NamedLiteral)):
            match = pattern.match(node.name)
            if match is None:
                continue
            prefix, _, postfix = match.groups()
            if postfix is None:
                postfix = ""
            replacements[node] = "%s%d%s" % (
                prefix, next(counter[(prefix, postfix)]), postfix)

    instructions = rename_nodes(instructions, replacements)
    mapper.initialisers = [
        rename_nodes(inits, replacements) for inits in mapper.initialisers
    ]
    parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments,
                                                replacements)
    s, e = rename_nodes([mapper(e) for e in builder.layer_extents],
                        replacements)
    parameters.layer_start = s.name
    parameters.layer_end = e.name

    # scheduling and loop nesting
    deps = instruction_dependencies(instructions, mapper.initialisers)
    within_inames = loop_nesting(instructions, deps, outer_inames,
                                 parameters.kernel_name)

    # generate loopy
    context = Bag()
    context.parameters = parameters
    context.within_inames = within_inames
    context.conditions = []
    context.index_ordering = []
    context.instruction_dependencies = deps

    statements = list(statement(insn, context) for insn in instructions)
    # remote the dummy instructions (they were only used to ensure
    # that the kernel knows about the outer inames).
    statements = list(s for s in statements
                      if not isinstance(s, DummyInstruction))

    domains = list(parameters.domains.values())
    if builder.single_cell:
        new_domains = []
        for d in domains:
            if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name:
                # n = start
                new_domains.append(
                    d.add_constraint(
                        isl.Constraint.eq_from_names(d.space, {
                            "n": 1,
                            "start": -1
                        })))
            else:
                new_domains.append(d)
        domains = new_domains
        if builder.extruded:
            new_domains = []
            for d in domains:
                if d.get_dim_name(isl.dim_type.set,
                                  0) == builder.layer_index.name:
                    # layer = t1 - 1
                    t1 = parameters.layer_end
                    new_domains.append(
                        d.add_constraint(
                            isl.Constraint.eq_from_names(
                                d.space, {
                                    "layer": 1,
                                    t1: -1,
                                    1: 1
                                })))
                else:
                    new_domains.append(d)
        domains = new_domains

    assumptions, = reduce(
        operator.and_,
        parameters.assumptions.values()).params().get_basic_sets()
    options = loopy.Options(check_dep_resolution=True,
                            ignore_boostable_into=True)

    # sometimes masks are not used, but we still need to create the function arguments
    for i, arg in enumerate(parameters.wrapper_arguments):
        if parameters.kernel_data[i] is None:
            arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape)
            parameters.kernel_data[i] = arg

    if wrapper_name is None:
        wrapper_name = "wrap_%s" % builder.kernel.name

    pwaffd = isl.affs_from_space(assumptions.get_space())
    assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0])
    if builder.single_cell:
        assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"])
    else:
        assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"])
    if builder.extruded:
        assumptions = assumptions & pwaffd[parameters.layer_start].le_set(
            pwaffd[parameters.layer_end])
    assumptions = reduce(operator.and_, assumptions.get_basic_sets())

    wrapper = loopy.make_kernel(domains,
                                statements,
                                kernel_data=parameters.kernel_data,
                                target=loopy.CTarget(),
                                temporary_variables=parameters.temporaries,
                                symbol_manglers=[symbol_mangler],
                                options=options,
                                assumptions=assumptions,
                                lang_version=(2018, 2),
                                name=wrapper_name)

    # prioritize loops
    for indices in context.index_ordering:
        wrapper = loopy.prioritize_loops(wrapper, indices)

    # register kernel
    kernel = builder.kernel
    headers = set(kernel._headers)
    headers = headers | set(
        ["#include <math.h>", "#include <complex.h>", "#include <petsc.h>"])
    preamble = "\n".join(sorted(headers))

    from coffee.base import Node

    if isinstance(kernel._code, loopy.LoopKernel):
        knl = kernel._code
        wrapper = loopy.register_callable_kernel(wrapper, knl)
        from loopy.transform.callable import _match_caller_callee_argument_dimension_
        wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name)
        wrapper = loopy.inline_callable_kernel(wrapper, knl.name)
    else:
        # kernel is a string, add it to preamble
        if isinstance(kernel._code, Node):
            code = kernel._code.gencode()
        else:
            code = kernel._code
        wrapper = loopy.register_function_id_to_in_knl_callable_mapper(
            wrapper,
            PyOP2KernelLookup(kernel.name, code,
                              tuple(builder.argument_accesses)))
        preamble = preamble + "\n" + code

    wrapper = loopy.register_preamble_generators(wrapper,
                                                 [_PreambleGen(preamble)])

    # register petsc functions
    wrapper = loopy.register_function_id_to_in_knl_callable_mapper(
        wrapper, petsc_function_lookup)

    return wrapper
Ejemplo n.º 8
0
    def map_insn_assign(self, insn):
        from grudge.symbolic.primitives import OperatorBinding

        if (
                len(insn.exprs) == 1
                and (
                    isinstance(insn.exprs[0], OperatorBinding)
                    or is_external_call(
                        insn.exprs[0], self.function_registry))):
            return insn

        # FIXME: These names and the size names could clash with user-given names.
        # Need better metadata tracking in loopy.
        iel = "iel"
        idof = "idof"

        temp_names = [
                name
                for name, dnr in zip(insn.names, insn.do_not_return)
                if dnr]

        from pymbolic import var
        expr_mapper = ToLoopyExpressionMapper(
                self.dd_inference_mapper, temp_names, (var(iel), var(idof)))
        insns = []

        import loopy as lp
        from pymbolic import var
        for name, expr, dnr in zip(insn.names, insn.exprs, insn.do_not_return):
            insns.append(
                    lp.Assignment(
                        expr_mapper(var(name)),
                        expr_mapper(expr),
                        temp_var_type=lp.Optional(None) if dnr else lp.Optional(),
                        no_sync_with=frozenset([
                            ("*", "any"),
                            ]),
                        ))

        if not expr_mapper.non_scalar_vars:
            return insn

        knl = lp.make_kernel(
                "{[%(iel)s, %(idof)s]: "
                "0 <= %(iel)s < nelements and 0 <= %(idof)s < nunit_dofs}"
                % {"iel": iel, "idof": idof},
                insns,

                name="grudge_assign_%d" % self.insn_count,

                # Single-insn kernels may have their no_sync_with resolve to an
                # empty set, that's OK.
                options=lp.Options(
                    check_dep_resolution=False,
                    return_dict=True,
                    no_numpy=True,
                    )
                )

        self.insn_count += 1

        from pytools import single_valued
        governing_dd = single_valued(
                self.dd_inference_mapper(expr)
                for expr in insn.exprs)

        knl = lp.register_preamble_generators(knl,
                [bessel_preamble_generator])
        knl = lp.register_function_manglers(knl,
                [bessel_function_mangler])

        input_mappings = {}
        output_mappings = {}

        from grudge.symbolic.mappers import DependencyMapper
        dep_mapper = DependencyMapper(composite_leaves=False)

        for expr, name in expr_mapper.expr_to_name.items():
            deps = dep_mapper(expr)
            assert len(deps) <= 1
            if not deps:
                is_output = False
            else:
                dep, = deps
                is_output = dep.name in insn.names

            if is_output:
                tgt_dict = output_mappings
            else:
                tgt_dict = input_mappings

            tgt_dict[name] = expr

        return LoopyKernelInstruction(
            LoopyKernelDescriptor(
                loopy_kernel=knl,
                input_mappings=input_mappings,
                output_mappings=output_mappings,
                fixed_arguments={},
                governing_dd=governing_dd)
            )
Ejemplo n.º 9
0
    def map_insn_assign(self, insn):
        from grudge.symbolic.primitives import OperatorBinding

        if (len(insn.exprs) == 1 and
            (isinstance(insn.exprs[0], OperatorBinding)
             or is_external_call(insn.exprs[0], self.function_registry))):
            return insn

        iname = "grdg_i"
        size_name = "grdg_n"

        temp_names = [
            name for name, dnr in zip(insn.names, insn.do_not_return) if dnr
        ]

        expr_mapper = ToLoopyExpressionMapper(self.dd_inference_mapper,
                                              temp_names, iname)
        insns = []

        import loopy as lp
        from pymbolic import var
        for name, expr, dnr in zip(insn.names, insn.exprs, insn.do_not_return):
            insns.append(
                lp.Assignment(
                    expr_mapper(var(name)),
                    expr_mapper(expr),
                    temp_var_type=lp.Optional(None) if dnr else lp.Optional(),
                    no_sync_with=frozenset([
                        ("*", "any"),
                    ]),
                ))

        if not expr_mapper.non_scalar_vars:
            return insn

        knl = lp.make_kernel(
            "{[%s]: 0 <= %s < %s}" % (iname, iname, size_name),
            insns,
            default_offset=lp.auto,
            name="grudge_assign_%d" % self.insn_count,
            # Single-insn kernels may have their no_sync_with resolve to an
            # empty set, that's OK.
            options=lp.Options(check_dep_resolution=False))

        knl = lp.set_options(knl, return_dict=True)
        knl = lp.split_iname(knl, iname, 128, outer_tag="g.0", inner_tag="l.0")

        self.insn_count += 1

        from pytools import single_valued
        governing_dd = single_valued(
            self.dd_inference_mapper(expr) for expr in insn.exprs)

        knl = lp.register_preamble_generators(knl, [bessel_preamble_generator])
        knl = lp.register_function_manglers(knl, [bessel_function_mangler])

        input_mappings = {}
        output_mappings = {}

        from grudge.symbolic.mappers import DependencyMapper
        dep_mapper = DependencyMapper(composite_leaves=False)

        for expr, name in six.iteritems(expr_mapper.expr_to_name):
            deps = dep_mapper(expr)
            assert len(deps) <= 1
            if not deps:
                is_output = False
            else:
                dep, = deps
                is_output = dep.name in insn.names

            if is_output:
                tgt_dict = output_mappings
            else:
                tgt_dict = input_mappings

            tgt_dict[name] = expr

        return LoopyKernelInstruction(
            LoopyKernelDescriptor(loopy_kernel=knl,
                                  input_mappings=input_mappings,
                                  output_mappings=output_mappings,
                                  fixed_arguments={},
                                  governing_dd=governing_dd))
Ejemplo n.º 10
0
def generate(builder, wrapper_name=None):
    if builder.layer_index is not None:
        outer_inames = frozenset([builder._loop_index.name,
                                  builder.layer_index.name])
    else:
        outer_inames = frozenset([builder._loop_index.name])

    instructions = list(builder.emit_instructions())

    parameters = Bag()
    parameters.domains = OrderedDict()
    parameters.assumptions = OrderedDict()
    parameters.wrapper_arguments = builder.wrapper_args
    parameters.layer_start = builder.layer_extents[0].name
    parameters.layer_end = builder.layer_extents[1].name
    parameters.conditions = []
    parameters.kernel_data = list(None for _ in parameters.wrapper_arguments)
    parameters.temporaries = OrderedDict()
    parameters.kernel_name = builder.kernel.name

    # replace Materialise
    mapper = Memoizer(replace_materialise)
    mapper.initialisers = []
    instructions = list(mapper(i) for i in instructions)

    # merge indices
    merger = index_merger(instructions)
    instructions = list(merger(i) for i in instructions)
    initialiser = list(itertools.chain(*mapper.initialisers))
    merger = index_merger(initialiser)
    initialiser = list(merger(i) for i in initialiser)
    instructions = instructions + initialiser
    mapper.initialisers = [tuple(merger(i) for i in inits) for inits in mapper.initialisers]

    # rename indices and nodes (so that the counters start from zero)
    pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$")
    replacements = {}
    counter = defaultdict(itertools.count)
    for node in traversal(instructions):
        if isinstance(node, (Index, RuntimeIndex, Variable, Argument, NamedLiteral)):
            match = pattern.match(node.name)
            if match is None:
                continue
            prefix, _, postfix = match.groups()
            if postfix is None:
                postfix = ""
            replacements[node] = "%s%d%s" % (prefix, next(counter[(prefix, postfix)]), postfix)

    instructions = rename_nodes(instructions, replacements)
    mapper.initialisers = [rename_nodes(inits, replacements) for inits in mapper.initialisers]
    parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments, replacements)
    s, e = rename_nodes([mapper(e) for e in builder.layer_extents], replacements)
    parameters.layer_start = s.name
    parameters.layer_end = e.name

    # scheduling and loop nesting
    deps = instruction_dependencies(instructions, mapper.initialisers)
    within_inames = loop_nesting(instructions, deps, outer_inames, parameters.kernel_name)

    # generate loopy
    context = Bag()
    context.parameters = parameters
    context.within_inames = within_inames
    context.conditions = []
    context.index_ordering = []
    context.instruction_dependencies = deps

    statements = list(statement(insn, context) for insn in instructions)
    # remote the dummy instructions (they were only used to ensure
    # that the kernel knows about the outer inames).
    statements = list(s for s in statements if not isinstance(s, DummyInstruction))

    domains = list(parameters.domains.values())
    if builder.single_cell:
        new_domains = []
        for d in domains:
            if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name:
                # n = start
                new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"n": 1, "start": -1})))
            else:
                new_domains.append(d)
        domains = new_domains
        if builder.extruded:
            new_domains = []
            for d in domains:
                if d.get_dim_name(isl.dim_type.set, 0) == builder.layer_index.name:
                    # layer = t1 - 1
                    t1 = parameters.layer_end
                    new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"layer": 1, t1: -1, 1: 1})))
                else:
                    new_domains.append(d)
        domains = new_domains

    assumptions, = reduce(operator.and_,
                          parameters.assumptions.values()).params().get_basic_sets()
    options = loopy.Options(check_dep_resolution=True, ignore_boostable_into=True)

    # sometimes masks are not used, but we still need to create the function arguments
    for i, arg in enumerate(parameters.wrapper_arguments):
        if parameters.kernel_data[i] is None:
            arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape)
            parameters.kernel_data[i] = arg

    if wrapper_name is None:
        wrapper_name = "wrap_%s" % builder.kernel.name

    pwaffd = isl.affs_from_space(assumptions.get_space())
    assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0])
    if builder.single_cell:
        assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"])
    else:
        assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"])
    if builder.extruded:
        assumptions = assumptions & pwaffd[parameters.layer_start].le_set(pwaffd[parameters.layer_end])
    assumptions = reduce(operator.and_, assumptions.get_basic_sets())

    wrapper = loopy.make_kernel(domains,
                                statements,
                                kernel_data=parameters.kernel_data,
                                target=loopy.CTarget(),
                                temporary_variables=parameters.temporaries,
                                symbol_manglers=[symbol_mangler],
                                options=options,
                                assumptions=assumptions,
                                lang_version=(2018, 2),
                                name=wrapper_name)

    # prioritize loops
    for indices in context.index_ordering:
        wrapper = loopy.prioritize_loops(wrapper, indices)

    # register kernel
    kernel = builder.kernel
    headers = set(kernel._headers)
    headers = headers | set(["#include <math.h>"])
    preamble = "\n".join(sorted(headers))

    from coffee.base import Node

    if isinstance(kernel._code, loopy.LoopKernel):
        knl = kernel._code
        wrapper = loopy.register_callable_kernel(wrapper, knl)
        from loopy.transform.callable import _match_caller_callee_argument_dimension_
        wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name)
        wrapper = loopy.inline_callable_kernel(wrapper, knl.name)
    else:
        # kernel is a string, add it to preamble
        if isinstance(kernel._code, Node):
            code = kernel._code.gencode()
        else:
            code = kernel._code
        wrapper = loopy.register_function_id_to_in_knl_callable_mapper(
            wrapper,
            PyOP2KernelLookup(kernel.name, code, tuple(builder.argument_accesses)))
        preamble = preamble + "\n" + code

    wrapper = loopy.register_preamble_generators(wrapper, [_PreambleGen(preamble)])

    # register petsc functions
    wrapper = loopy.register_function_id_to_in_knl_callable_mapper(wrapper, petsc_function_lookup)

    return wrapper