Example #1
0
def augment_domain_for_temporary_promotion(
        kernel, domain, promoted_temporary, mode, name_gen):
    """
    Add new axes to the domain corresponding to the dimensions of
    `promoted_temporary`.
    """
    import islpy as isl

    orig_temporary = promoted_temporary.orig_temporary
    orig_dim = domain.dim(isl.dim_type.set)
    dims_to_insert = len(orig_temporary.shape)

    iname_to_tag = {}

    # Add dimension-dependent inames.
    dim_inames = []

    domain = domain.add(isl.dim_type.set, dims_to_insert)
    for t_idx in range(len(orig_temporary.shape)):
        new_iname = name_gen("{name}_{mode}_dim_{dim}".
            format(name=orig_temporary.name,
                   mode=mode,
                   dim=orig_dim + t_idx))
        domain = domain.set_dim_name(
            isl.dim_type.set, orig_dim + t_idx, new_iname)
        #from loopy.kernel.data import auto
        #iname_to_tag[new_iname] = auto
        dim_inames.append(new_iname)

        # Add size information.
        aff = isl.affs_from_space(domain.space)
        domain &= aff[0].le_set(aff[new_iname])
        size = orig_temporary.shape[t_idx]
        from loopy.symbolic import aff_from_expr
        domain &= aff[new_iname].le_set(aff_from_expr(domain.space, size))

    hw_inames = []

    # Add hardware inames duplicates.
    for t_idx, hw_iname in enumerate(promoted_temporary.hw_inames):
        new_iname = name_gen("{name}_{mode}_hw_dim_{dim}".
            format(name=orig_temporary.name,
                   mode=mode,
                   dim=t_idx))
        hw_inames.append(new_iname)
        iname_to_tag[new_iname] = kernel.iname_to_tag[hw_iname]

    from loopy.isl_helpers import duplicate_axes
    domain = duplicate_axes(
        domain, promoted_temporary.hw_inames, hw_inames)

    # The operations on the domain above return a Set object, but the
    # underlying domain should be expressible as a single BasicSet.
    domain_list = domain.get_basic_set_list()
    assert domain_list.n_basic_set() == 1
    domain = domain_list.get_basic_set(0)
    return domain, hw_inames, dim_inames, iname_to_tag
Example #2
0
def augment_domain_for_temporary_promotion(kernel, domain, promoted_temporary,
                                           mode, name_gen):
    """
    Add new axes to the domain corresponding to the dimensions of
    `promoted_temporary`.
    """
    import islpy as isl

    orig_temporary = promoted_temporary.orig_temporary
    orig_dim = domain.dim(isl.dim_type.set)
    dims_to_insert = len(orig_temporary.shape)

    iname_to_tag = {}

    # Add dimension-dependent inames.
    dim_inames = []

    domain = domain.add(isl.dim_type.set, dims_to_insert)
    for t_idx in range(len(orig_temporary.shape)):
        new_iname = name_gen("{name}_{mode}_dim_{dim}".format(
            name=orig_temporary.name, mode=mode, dim=t_idx))
        domain = domain.set_dim_name(isl.dim_type.set, orig_dim + t_idx,
                                     new_iname)
        if orig_temporary.is_local:
            # If the temporary is has local scope, then loads / stores can be
            # done in parallel.
            from loopy.kernel.data import AutoFitLocalIndexTag
            iname_to_tag[new_iname] = AutoFitLocalIndexTag()

        dim_inames.append(new_iname)

        # Add size information.
        aff = isl.affs_from_space(domain.space)
        domain &= aff[0].le_set(aff[new_iname])
        size = orig_temporary.shape[t_idx]
        from loopy.symbolic import aff_from_expr
        domain &= aff[new_iname].lt_set(aff_from_expr(domain.space, size))

    hw_inames = []

    # Add hardware inames duplicates.
    for t_idx, hw_iname in enumerate(promoted_temporary.hw_inames):
        new_iname = name_gen("{name}_{mode}_hw_dim_{dim}".format(
            name=orig_temporary.name, mode=mode, dim=t_idx))
        hw_inames.append(new_iname)
        iname_to_tag[new_iname] = kernel.iname_to_tag[hw_iname]

    from loopy.isl_helpers import duplicate_axes
    domain = duplicate_axes(domain, promoted_temporary.hw_inames, hw_inames)

    # The operations on the domain above return a Set object, but the
    # underlying domain should be expressible as a single BasicSet.
    domain_list = domain.get_basic_set_list()
    assert domain_list.n_basic_set() == 1
    domain = domain_list.get_basic_set(0)
    return domain, hw_inames, dim_inames, iname_to_tag
Example #3
0
def test_affs_from_space():
    s = isl.Set("[n] -> {[i,j,k]: 0<=i,j,k<n}")
    v = isl.affs_from_space(s.space)

    myset = (v[0].le_set(v["i"] + v["j"])
             & (v["i"] + v["j"]).lt_set(v["n"])
             & (v[0].le_set(v["i"]))
             & (v["i"].le_set(13 + v["n"])))

    print(myset)
Example #4
0
def test_affs_from_space():
    s = isl.Set("[n] -> {[i,j,k]: 0<=i,j,k<n}")
    v = isl.affs_from_space(s.space)

    myset = (
            v[0].le_set(v["i"] + v["j"])
            &
            (v["i"] + v["j"]).lt_set(v["n"])
            &
            (v[0].le_set(v["i"]))
            &
            (v["i"].le_set(13 + v["n"]))
            )

    print(myset)
Example #5
0
    def augment_domain_for_save_or_reload(self, domain, promoted_temporary,
                                          mode, subkernel):
        """
        Add new axes to the domain corresponding to the dimensions of
        `promoted_temporary`. These axes will be used in the save/
        reload stage. These get prefixed onto the already existing axes.
        """
        assert mode in ("save", "reload")
        import islpy as isl

        orig_temporary = (self.kernel.temporary_variables[
            promoted_temporary.orig_temporary_name])
        orig_dim = domain.dim(isl.dim_type.set)

        # Tags for newly added inames
        iname_to_tag = {}

        from loopy.symbolic import aff_from_expr

        # FIXME: Restrict size of new inames to access footprint.

        # Add dimension-dependent inames.
        dim_inames = []
        domain = domain.add(
            isl.dim_type.set,
            len(promoted_temporary.non_hw_dims) +
            len(promoted_temporary.hw_dims))

        for dim_idx, dim_size in enumerate(promoted_temporary.non_hw_dims):
            new_iname = self.insn_name_gen(
                "{name}_{mode}_axis_{dim}_{sk}".format(
                    name=orig_temporary.name,
                    mode=mode,
                    dim=dim_idx,
                    sk=subkernel))
            domain = domain.set_dim_name(isl.dim_type.set, orig_dim + dim_idx,
                                         new_iname)

            if orig_temporary.is_local:
                # If the temporary has local scope, then loads / stores can
                # be done in parallel.
                from loopy.kernel.data import AutoFitLocalIndexTag
                iname_to_tag[new_iname] = AutoFitLocalIndexTag()

            dim_inames.append(new_iname)

            # Add size information.
            aff = isl.affs_from_space(domain.space)
            domain &= aff[0].le_set(aff[new_iname])
            domain &= aff[new_iname].lt_set(
                aff_from_expr(domain.space, dim_size))

        dim_offset = orig_dim + len(promoted_temporary.non_hw_dims)

        hw_inames = []
        # Add hardware dims.
        for hw_iname_idx, (hw_tag, dim) in enumerate(
                zip(promoted_temporary.hw_tags, promoted_temporary.hw_dims)):
            new_iname = self.insn_name_gen(
                "{name}_{mode}_hw_dim_{dim}_{sk}".format(
                    name=orig_temporary.name,
                    mode=mode,
                    dim=hw_iname_idx,
                    sk=subkernel))
            domain = domain.set_dim_name(isl.dim_type.set,
                                         dim_offset + hw_iname_idx, new_iname)

            aff = isl.affs_from_space(domain.space)
            domain = (
                domain
                & aff[0].le_set(aff[new_iname])
                & aff[new_iname].lt_set(aff_from_expr(domain.space, dim)))

            self.updated_iname_to_tag[new_iname] = hw_tag
            hw_inames.append(new_iname)

        # The operations on the domain above return a Set object, but the
        # underlying domain should be expressible as a single BasicSet.
        domain_list = domain.get_basic_set_list()
        assert domain_list.n_basic_set() == 1
        domain = domain_list.get_basic_set(0)
        return domain, hw_inames, dim_inames, iname_to_tag
Example #6
0
def generate(builder, wrapper_name=None):
    if builder.layer_index is not None:
        outer_inames = frozenset(
            [builder._loop_index.name, builder.layer_index.name])
    else:
        outer_inames = frozenset([builder._loop_index.name])

    instructions = list(builder.emit_instructions())

    parameters = Bag()
    parameters.domains = OrderedDict()
    parameters.assumptions = OrderedDict()
    parameters.wrapper_arguments = builder.wrapper_args
    parameters.layer_start = builder.layer_extents[0].name
    parameters.layer_end = builder.layer_extents[1].name
    parameters.conditions = []
    parameters.kernel_data = list(None for _ in parameters.wrapper_arguments)
    parameters.temporaries = OrderedDict()
    parameters.kernel_name = builder.kernel.name

    # replace Materialise
    mapper = Memoizer(replace_materialise)
    mapper.initialisers = []
    instructions = list(mapper(i) for i in instructions)

    # merge indices
    merger = index_merger(instructions)
    instructions = list(merger(i) for i in instructions)
    initialiser = list(itertools.chain(*mapper.initialisers))
    merger = index_merger(initialiser)
    initialiser = list(merger(i) for i in initialiser)
    instructions = instructions + initialiser
    mapper.initialisers = [
        tuple(merger(i) for i in inits) for inits in mapper.initialisers
    ]

    # rename indices and nodes (so that the counters start from zero)
    pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$")
    replacements = {}
    counter = defaultdict(itertools.count)
    for node in traversal(instructions):
        if isinstance(node,
                      (Index, RuntimeIndex, Variable, Argument, NamedLiteral)):
            match = pattern.match(node.name)
            if match is None:
                continue
            prefix, _, postfix = match.groups()
            if postfix is None:
                postfix = ""
            replacements[node] = "%s%d%s" % (
                prefix, next(counter[(prefix, postfix)]), postfix)

    instructions = rename_nodes(instructions, replacements)
    mapper.initialisers = [
        rename_nodes(inits, replacements) for inits in mapper.initialisers
    ]
    parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments,
                                                replacements)
    s, e = rename_nodes([mapper(e) for e in builder.layer_extents],
                        replacements)
    parameters.layer_start = s.name
    parameters.layer_end = e.name

    # scheduling and loop nesting
    deps = instruction_dependencies(instructions, mapper.initialisers)
    within_inames = loop_nesting(instructions, deps, outer_inames,
                                 parameters.kernel_name)

    # generate loopy
    context = Bag()
    context.parameters = parameters
    context.within_inames = within_inames
    context.conditions = []
    context.index_ordering = []
    context.instruction_dependencies = deps

    statements = list(statement(insn, context) for insn in instructions)
    # remote the dummy instructions (they were only used to ensure
    # that the kernel knows about the outer inames).
    statements = list(s for s in statements
                      if not isinstance(s, DummyInstruction))

    domains = list(parameters.domains.values())
    if builder.single_cell:
        new_domains = []
        for d in domains:
            if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name:
                # n = start
                new_domains.append(
                    d.add_constraint(
                        isl.Constraint.eq_from_names(d.space, {
                            "n": 1,
                            "start": -1
                        })))
            else:
                new_domains.append(d)
        domains = new_domains
        if builder.extruded:
            new_domains = []
            for d in domains:
                if d.get_dim_name(isl.dim_type.set,
                                  0) == builder.layer_index.name:
                    # layer = t1 - 1
                    t1 = parameters.layer_end
                    new_domains.append(
                        d.add_constraint(
                            isl.Constraint.eq_from_names(
                                d.space, {
                                    "layer": 1,
                                    t1: -1,
                                    1: 1
                                })))
                else:
                    new_domains.append(d)
        domains = new_domains

    assumptions, = reduce(
        operator.and_,
        parameters.assumptions.values()).params().get_basic_sets()
    options = loopy.Options(check_dep_resolution=True,
                            ignore_boostable_into=True)

    # sometimes masks are not used, but we still need to create the function arguments
    for i, arg in enumerate(parameters.wrapper_arguments):
        if parameters.kernel_data[i] is None:
            arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape)
            parameters.kernel_data[i] = arg

    if wrapper_name is None:
        wrapper_name = "wrap_%s" % builder.kernel.name

    pwaffd = isl.affs_from_space(assumptions.get_space())
    assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0])
    if builder.single_cell:
        assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"])
    else:
        assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"])
    if builder.extruded:
        assumptions = assumptions & pwaffd[parameters.layer_start].le_set(
            pwaffd[parameters.layer_end])
    assumptions = reduce(operator.and_, assumptions.get_basic_sets())

    wrapper = loopy.make_kernel(domains,
                                statements,
                                kernel_data=parameters.kernel_data,
                                target=loopy.CTarget(),
                                temporary_variables=parameters.temporaries,
                                symbol_manglers=[symbol_mangler],
                                options=options,
                                assumptions=assumptions,
                                lang_version=(2018, 2),
                                name=wrapper_name)

    # prioritize loops
    for indices in context.index_ordering:
        wrapper = loopy.prioritize_loops(wrapper, indices)

    # register kernel
    kernel = builder.kernel
    headers = set(kernel._headers)
    headers = headers | set(
        ["#include <math.h>", "#include <complex.h>", "#include <petsc.h>"])
    preamble = "\n".join(sorted(headers))

    from coffee.base import Node

    if isinstance(kernel._code, loopy.LoopKernel):
        knl = kernel._code
        wrapper = loopy.register_callable_kernel(wrapper, knl)
        from loopy.transform.callable import _match_caller_callee_argument_dimension_
        wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name)
        wrapper = loopy.inline_callable_kernel(wrapper, knl.name)
    else:
        # kernel is a string, add it to preamble
        if isinstance(kernel._code, Node):
            code = kernel._code.gencode()
        else:
            code = kernel._code
        wrapper = loopy.register_function_id_to_in_knl_callable_mapper(
            wrapper,
            PyOP2KernelLookup(kernel.name, code,
                              tuple(builder.argument_accesses)))
        preamble = preamble + "\n" + code

    wrapper = loopy.register_preamble_generators(wrapper,
                                                 [_PreambleGen(preamble)])

    # register petsc functions
    wrapper = loopy.register_function_id_to_in_knl_callable_mapper(
        wrapper, petsc_function_lookup)

    return wrapper
Example #7
0
def domain_for_shape(
    dim_names: Tuple[str, ...],
    shape: ShapeType,
    reductions: Dict[str, Tuple[ScalarExpression, ScalarExpression]],
) -> isl.BasicSet:  # noqa
    """Create an :class:`islpy.BasicSet` that expresses an appropriate index domain
    for an array of (potentially symbolic) shape *shape* having reduction
    dimensions *reductions*.

    :param dim_names: A tuple of strings, the names of the axes. These become set
        dimensions in the returned domain.

    :param shape: A tuple of constant or quasi-affine :mod:`pymbolic`
        expressions. The variables in these expressions become parameter
        dimensions in the returned set.  Must have the same length as
        *dim_names*.

    :arg reductions: A map from reduction inames to (lower, upper) bounds
        (as half-open integer ranges). The variables in the bounds become
        parameter dimensions in the returned set.
    """
    assert len(dim_names) == len(shape)

    # Collect parameters.
    param_names_set: Set[str] = set()
    for sdep in map(scalar_expr.get_dependencies, shape):
        param_names_set |= sdep

    for bounds in reductions.values():
        for sdep in map(scalar_expr.get_dependencies, bounds):
            # FIXME: Assumes that reduction bounds are not data-dependent.
            param_names_set |= sdep

    set_names = sorted(tuple(dim_names) + tuple(reductions))
    param_names = sorted(param_names_set)

    # Build domain.
    dom = isl.BasicSet.universe(
        isl.Space.create_from_names(isl.DEFAULT_CONTEXT,
                                    set=set_names,
                                    params=param_names))

    # Add constraints.
    from loopy.symbolic import aff_from_expr
    affs = isl.affs_from_space(dom.space)

    for iname, dim in zip(dim_names, shape):
        dom &= affs[0].le_set(affs[iname])
        dom &= affs[iname].lt_set(aff_from_expr(dom.space, dim))

    for iname, (left, right) in reductions.items():
        dom &= aff_from_expr(dom.space, left).le_set(affs[iname])
        dom &= affs[iname].lt_set(aff_from_expr(dom.space, right))

    doms = dom.get_basic_sets()

    if len(doms) == 0:
        # empty set
        dom = isl.BasicSet.empty(dom.get_space())
    else:
        dom, = doms

    return dom
Example #8
0
    def augment_domain_for_save_or_reload(self,
            domain, promoted_temporary, mode, subkernel):
        """
        Add new axes to the domain corresponding to the dimensions of
        `promoted_temporary`. These axes will be used in the save/
        reload stage. These get prefixed onto the already existing axes.
        """
        assert mode in ("save", "reload")
        import islpy as isl

        orig_temporary = (
                self.kernel.temporary_variables[
                    promoted_temporary.orig_temporary_name])
        orig_dim = domain.dim(isl.dim_type.set)

        # Tags for newly added inames
        iname_to_tags = {}

        from loopy.symbolic import aff_from_expr

        # FIXME: Restrict size of new inames to access footprint.

        # Add dimension-dependent inames.
        dim_inames = []
        domain = domain.add_dims(isl.dim_type.set,
                            len(promoted_temporary.non_hw_dims)
                            + len(promoted_temporary.hw_dims))

        for dim_idx, dim_size in enumerate(promoted_temporary.non_hw_dims):
            new_iname = self.insn_name_gen("{name}_{mode}_axis_{dim}_{sk}".
                format(name=orig_temporary.name,
                       mode=mode,
                       dim=dim_idx,
                       sk=subkernel))
            domain = domain.set_dim_name(
                isl.dim_type.set, orig_dim + dim_idx, new_iname)

            if orig_temporary.address_space == AddressSpace.LOCAL:
                # If the temporary has local scope, then loads / stores can
                # be done in parallel.
                from loopy.kernel.data import AutoFitLocalIndexTag
                iname_to_tags[new_iname] = frozenset([AutoFitLocalIndexTag()])

            dim_inames.append(new_iname)

            # Add size information.
            aff = isl.affs_from_space(domain.space)
            domain &= aff[0].le_set(aff[new_iname])
            domain &= aff[new_iname].lt_set(aff_from_expr(domain.space, dim_size))

        dim_offset = orig_dim + len(promoted_temporary.non_hw_dims)

        hw_inames = []
        # Add hardware dims.
        for hw_iname_idx, (hw_tag, dim) in enumerate(
                zip(promoted_temporary.hw_tags, promoted_temporary.hw_dims)):
            new_iname = self.insn_name_gen("{name}_{mode}_hw_dim_{dim}_{sk}".
                format(name=orig_temporary.name,
                       mode=mode,
                       dim=hw_iname_idx,
                       sk=subkernel))
            domain = domain.set_dim_name(
                isl.dim_type.set, dim_offset + hw_iname_idx, new_iname)

            aff = isl.affs_from_space(domain.space)
            domain = (domain
                &
                aff[0].le_set(aff[new_iname])
                &
                aff[new_iname].lt_set(aff_from_expr(domain.space, dim)))

            self.updated_iname_to_tags[new_iname] = frozenset([hw_tag])
            hw_inames.append(new_iname)

        # The operations on the domain above return a Set object, but the
        # underlying domain should be expressible as a single BasicSet.
        domain_list = domain.get_basic_set_list()
        assert domain_list.n_basic_set() == 1
        domain = domain_list.get_basic_set(0)
        return domain, hw_inames, dim_inames, iname_to_tags
Example #9
0
def generate(builder, wrapper_name=None):
    if builder.layer_index is not None:
        outer_inames = frozenset([builder._loop_index.name,
                                  builder.layer_index.name])
    else:
        outer_inames = frozenset([builder._loop_index.name])

    instructions = list(builder.emit_instructions())

    parameters = Bag()
    parameters.domains = OrderedDict()
    parameters.assumptions = OrderedDict()
    parameters.wrapper_arguments = builder.wrapper_args
    parameters.layer_start = builder.layer_extents[0].name
    parameters.layer_end = builder.layer_extents[1].name
    parameters.conditions = []
    parameters.kernel_data = list(None for _ in parameters.wrapper_arguments)
    parameters.temporaries = OrderedDict()
    parameters.kernel_name = builder.kernel.name

    # replace Materialise
    mapper = Memoizer(replace_materialise)
    mapper.initialisers = []
    instructions = list(mapper(i) for i in instructions)

    # merge indices
    merger = index_merger(instructions)
    instructions = list(merger(i) for i in instructions)
    initialiser = list(itertools.chain(*mapper.initialisers))
    merger = index_merger(initialiser)
    initialiser = list(merger(i) for i in initialiser)
    instructions = instructions + initialiser
    mapper.initialisers = [tuple(merger(i) for i in inits) for inits in mapper.initialisers]

    # rename indices and nodes (so that the counters start from zero)
    pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$")
    replacements = {}
    counter = defaultdict(itertools.count)
    for node in traversal(instructions):
        if isinstance(node, (Index, RuntimeIndex, Variable, Argument, NamedLiteral)):
            match = pattern.match(node.name)
            if match is None:
                continue
            prefix, _, postfix = match.groups()
            if postfix is None:
                postfix = ""
            replacements[node] = "%s%d%s" % (prefix, next(counter[(prefix, postfix)]), postfix)

    instructions = rename_nodes(instructions, replacements)
    mapper.initialisers = [rename_nodes(inits, replacements) for inits in mapper.initialisers]
    parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments, replacements)
    s, e = rename_nodes([mapper(e) for e in builder.layer_extents], replacements)
    parameters.layer_start = s.name
    parameters.layer_end = e.name

    # scheduling and loop nesting
    deps = instruction_dependencies(instructions, mapper.initialisers)
    within_inames = loop_nesting(instructions, deps, outer_inames, parameters.kernel_name)

    # generate loopy
    context = Bag()
    context.parameters = parameters
    context.within_inames = within_inames
    context.conditions = []
    context.index_ordering = []
    context.instruction_dependencies = deps

    statements = list(statement(insn, context) for insn in instructions)
    # remote the dummy instructions (they were only used to ensure
    # that the kernel knows about the outer inames).
    statements = list(s for s in statements if not isinstance(s, DummyInstruction))

    domains = list(parameters.domains.values())
    if builder.single_cell:
        new_domains = []
        for d in domains:
            if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name:
                # n = start
                new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"n": 1, "start": -1})))
            else:
                new_domains.append(d)
        domains = new_domains
        if builder.extruded:
            new_domains = []
            for d in domains:
                if d.get_dim_name(isl.dim_type.set, 0) == builder.layer_index.name:
                    # layer = t1 - 1
                    t1 = parameters.layer_end
                    new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"layer": 1, t1: -1, 1: 1})))
                else:
                    new_domains.append(d)
        domains = new_domains

    assumptions, = reduce(operator.and_,
                          parameters.assumptions.values()).params().get_basic_sets()
    options = loopy.Options(check_dep_resolution=True, ignore_boostable_into=True)

    # sometimes masks are not used, but we still need to create the function arguments
    for i, arg in enumerate(parameters.wrapper_arguments):
        if parameters.kernel_data[i] is None:
            arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape)
            parameters.kernel_data[i] = arg

    if wrapper_name is None:
        wrapper_name = "wrap_%s" % builder.kernel.name

    pwaffd = isl.affs_from_space(assumptions.get_space())
    assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0])
    if builder.single_cell:
        assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"])
    else:
        assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"])
    if builder.extruded:
        assumptions = assumptions & pwaffd[parameters.layer_start].le_set(pwaffd[parameters.layer_end])
    assumptions = reduce(operator.and_, assumptions.get_basic_sets())

    wrapper = loopy.make_kernel(domains,
                                statements,
                                kernel_data=parameters.kernel_data,
                                target=loopy.CTarget(),
                                temporary_variables=parameters.temporaries,
                                symbol_manglers=[symbol_mangler],
                                options=options,
                                assumptions=assumptions,
                                lang_version=(2018, 2),
                                name=wrapper_name)

    # prioritize loops
    for indices in context.index_ordering:
        wrapper = loopy.prioritize_loops(wrapper, indices)

    # register kernel
    kernel = builder.kernel
    headers = set(kernel._headers)
    headers = headers | set(["#include <math.h>"])
    preamble = "\n".join(sorted(headers))

    from coffee.base import Node

    if isinstance(kernel._code, loopy.LoopKernel):
        knl = kernel._code
        wrapper = loopy.register_callable_kernel(wrapper, knl)
        from loopy.transform.callable import _match_caller_callee_argument_dimension_
        wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name)
        wrapper = loopy.inline_callable_kernel(wrapper, knl.name)
    else:
        # kernel is a string, add it to preamble
        if isinstance(kernel._code, Node):
            code = kernel._code.gencode()
        else:
            code = kernel._code
        wrapper = loopy.register_function_id_to_in_knl_callable_mapper(
            wrapper,
            PyOP2KernelLookup(kernel.name, code, tuple(builder.argument_accesses)))
        preamble = preamble + "\n" + code

    wrapper = loopy.register_preamble_generators(wrapper, [_PreambleGen(preamble)])

    # register petsc functions
    wrapper = loopy.register_function_id_to_in_knl_callable_mapper(wrapper, petsc_function_lookup)

    return wrapper