Exemple #1
0
    def map_substitution(self, name, tag, arguments, expn_state):
        if not (
                name == self.subst_name
                and self.within(
                    expn_state.kernel,
                    expn_state.instruction,
                    expn_state.stack)
                and (self.subst_tag is None or self.subst_tag == tag)):
            return super(RuleInvocationReplacer, self).map_substitution(
                    name, tag, arguments, expn_state)

        # {{{ check if in footprint

        rule = self.rule_mapping_context.old_subst_rules[name]
        arg_context = self.make_new_arg_context(
                    name, rule.arguments, arguments, expn_state.arg_context)
        args = [arg_context[arg_name] for arg_name in rule.arguments]

        accdesc = AccessDescriptor(
                storage_axis_exprs=storage_axis_exprs(
                    self.storage_axis_sources, args))

        if not self.array_base_map.is_access_descriptor_in_footprint(accdesc):
            return super(RuleInvocationReplacer, self).map_substitution(
                    name, tag, arguments, expn_state)

        # }}}

        assert len(arguments) == len(rule.arguments)

        abm = self.array_base_map

        stor_subscript = []
        for sax_name, sax_source, sax_base_idx in zip(
                self.storage_axis_names,
                self.storage_axis_sources,
                abm.storage_base_indices):
            if sax_name not in self.non1_storage_axis_names:
                continue

            if isinstance(sax_source, int):
                # an argument
                ax_index = arguments[sax_source]
            else:
                # an iname
                ax_index = var(sax_source)

            from loopy.isl_helpers import simplify_via_aff
            ax_index = simplify_via_aff(ax_index - sax_base_idx)
            stor_subscript.append(ax_index)

        new_outer_expr = var(self.temporary_name)
        if stor_subscript:
            new_outer_expr = new_outer_expr.index(tuple(stor_subscript))

        # Can't possibly be nested, and no need to traverse
        # further as compute expression has already been seen
        # by rule_mapping_context.

        return new_outer_expr
Exemple #2
0
    def map_array_access(self, index, expn_state):
        accdesc = AccessDescriptor(identifier=None, storage_axis_exprs=index)

        if not self.array_base_map.is_access_descriptor_in_footprint(accdesc):
            return None

        abm = self.array_base_map

        index = expn_state.apply_arg_context(index)

        assert len(index) == len(abm.non1_storage_axis_flags)

        access_subscript = []
        for i in range(len(index)):
            if not abm.non1_storage_axis_flags[i]:
                continue

            ax_index = index[i]
            from loopy.isl_helpers import simplify_via_aff
            ax_index = simplify_via_aff(ax_index - abm.storage_base_indices[i])

            access_subscript.append(ax_index)

        result = self.buf_var
        if access_subscript:
            result = result.index(tuple(access_subscript))

        # Can't possibly be nested, but recurse anyway to
        # make sure substitution rules referenced below here
        # do not get thrown away.
        self.rec(result, expn_state.copy(arg_context={}))

        return result
Exemple #3
0
    def map_array_access(self, index, expn_state):
        accdesc = AccessDescriptor(identifier=None, storage_axis_exprs=index)

        if not self.array_base_map.is_access_descriptor_in_footprint(accdesc):
            return None

        abm = self.array_base_map

        index = expn_state.apply_arg_context(index)

        assert len(index) == len(abm.non1_storage_axis_flags)

        access_subscript = []
        for i in range(len(index)):
            if not abm.non1_storage_axis_flags[i]:
                continue

            ax_index = index[i]
            from loopy.isl_helpers import simplify_via_aff

            ax_index = simplify_via_aff(ax_index - abm.storage_base_indices[i])

            access_subscript.append(ax_index)

        result = self.buf_var
        if access_subscript:
            result = result.index(tuple(access_subscript))

        # Can't possibly be nested, but recurse anyway to
        # make sure substitution rules referenced below here
        # do not get thrown away.
        self.rec(result, expn_state.copy(arg_context={}))

        return result
Exemple #4
0
    def map_substitution(self, name, tag, arguments, expn_state):
        if not (name == self.subst_name and self.within(
                expn_state.kernel, expn_state.instruction, expn_state.stack)
                and (self.subst_tag is None or self.subst_tag == tag)):
            return super(RuleInvocationReplacer,
                         self).map_substitution(name, tag, arguments,
                                                expn_state)

        # {{{ check if in footprint

        rule = self.rule_mapping_context.old_subst_rules[name]
        arg_context = self.make_new_arg_context(name, rule.arguments,
                                                arguments,
                                                expn_state.arg_context)
        args = [arg_context[arg_name] for arg_name in rule.arguments]

        accdesc = AccessDescriptor(storage_axis_exprs=storage_axis_exprs(
            self.storage_axis_sources, args))

        if not self.array_base_map.is_access_descriptor_in_footprint(accdesc):
            return super(RuleInvocationReplacer,
                         self).map_substitution(name, tag, arguments,
                                                expn_state)

        # }}}

        assert len(arguments) == len(rule.arguments)

        abm = self.array_base_map

        stor_subscript = []
        for sax_name, sax_source, sax_base_idx in zip(
                self.storage_axis_names, self.storage_axis_sources,
                abm.storage_base_indices):
            if sax_name not in self.non1_storage_axis_names:
                continue

            if isinstance(sax_source, int):
                # an argument
                ax_index = arguments[sax_source]
            else:
                # an iname
                ax_index = var(sax_source)

            from loopy.isl_helpers import simplify_via_aff
            ax_index = simplify_via_aff(ax_index - sax_base_idx)
            stor_subscript.append(ax_index)

        new_outer_expr = var(self.temporary_name)
        if stor_subscript:
            new_outer_expr = new_outer_expr.index(tuple(stor_subscript))

        # Can't possibly be nested, and no need to traverse
        # further as compute expression has already been seen
        # by rule_mapping_context.

        self.replaced_something = True

        return new_outer_expr
Exemple #5
0
def test_simplify_via_aff_reproducibility():
    # See https://github.com/inducer/loopy/pull/349
    from loopy.symbolic import parse
    from loopy.isl_helpers import simplify_via_aff

    expr = parse("i+i_0")

    assert simplify_via_aff(expr) == expr
Exemple #6
0
    def map_subscript(self, expr, expn_state):
        if expr.aggregate.name in self.callee_knl.arg_dict:
            from loopy.symbolic import get_start_subscript_from_sar
            from loopy.isl_helpers import simplify_via_aff
            from pymbolic.primitives import Subscript, Variable

            sar = self.callee_arg_to_call_param[
                expr.aggregate.name]  # SubArrayRef

            callee_arg = self.callee_knl.arg_dict[expr.aggregate.name]
            if sar.subscript.aggregate.name in self.caller_knl.arg_dict:
                caller_arg = self.caller_knl.arg_dict[
                    sar.subscript.aggregate.name]
            else:
                caller_arg = self.caller_knl.temporary_variables[
                    sar.subscript.aggregate.name]

            flatten_index = 0
            for i, idx in enumerate(
                    get_start_subscript_from_sar(sar,
                                                 self.caller_knl).index_tuple):
                flatten_index += idx * caller_arg.dim_tags[i].stride

            flatten_index += sum(idx * tag.stride for idx, tag in zip(
                self.rec(expr.index_tuple, expn_state), callee_arg.dim_tags))

            flatten_index = simplify_via_aff(flatten_index)

            new_indices = []
            for dim_tag in caller_arg.dim_tags:
                ind = flatten_index // dim_tag.stride
                flatten_index -= (dim_tag.stride * ind)
                new_indices.append(ind)

            new_indices = tuple(simplify_via_aff(i) for i in new_indices)

            return Subscript(Variable(sar.subscript.aggregate.name),
                             new_indices)
        else:
            return super().map_subscript(expr, expn_state)
Exemple #7
0
    def reshape(self, new_shape, order='C'):
        """
        Registers a substitution rule to reshape array with the shape
        ``new_shape``. Mimics :func:`numpy.ndarray.reshape`.

        :arg new_shape: An instance of :class:`tuple` of :class:`int`.
        :arg order: Either 'C' or 'F'
        """
        # need an error here complain if there is a shape mismatch
        # how to do this:
        # look at how loopy sets its dim tags, from shape and order.
        subst_name = self.stack.name_generator(based_on='subst')
        inames = tuple(
            self.stack.name_generator(based_on="i") for _ in new_shape)
        new_arg = self.copy(stack=self.stack,
                            name=subst_name,
                            shape=new_shape,
                            dim_tags=None,
                            order=order)

        linearized_idx = sum(
            Variable(iname) * dim_tag.stride
            for iname, dim_tag in zip(inames, new_arg.dim_tags))
        strides = tuple(dim_tag.stride for dim_tag in self.dim_tags)
        if self.dim_tags[0].stride == 1:
            pass
        elif self.dim_tags[-1].stride == 1:
            strides = strides[::-1]
        else:
            raise ValueError()

        indices = []
        for stride in strides[::-1]:
            current_idx = linearized_idx // stride
            indices.append(simplify_via_aff(current_idx))
            linearized_idx -= current_idx * stride

        # should we assert that the linearized index is 0?

        rule = lp.SubstitutionRule(subst_name,
                                   inames,
                                   expression=Variable(
                                       self.name)(*tuple(indices)))

        self.stack.register_substitution(rule)

        return ArraySymbol(stack=self.stack, name=subst_name, shape=new_shape)
def pack_and_unpack_args_for_call_for_single_kernel(kernel,
                                                    callables_table,
                                                    call_name,
                                                    args_to_pack=None,
                                                    args_to_unpack=None):
    """
    Returns a a copy of *kernel* with instructions appended to copy the
    arguments in *args* to match the alignment expected by the *call_name* in
    the kernel. The arguments are copied back to *args* with the appropriate
    data layout.

    :arg call_name: An instance of :class:`str` denoting the function call in
        the *kernel*.
    :arg args_to_unpack: A list of the arguments as instances of :class:`str` which
        must be packed. If set *None*, it is interpreted that all the array
        arguments would be packed.
    :arg args_to_unpack: A list of the arguments as instances of :class:`str`
        which must be unpacked. If set *None*, it is interpreted that
        all the array arguments should be unpacked.
    """
    assert isinstance(kernel, LoopKernel)
    new_domains = []
    new_tmps = kernel.temporary_variables.copy()
    old_insn_to_new_insns = {}

    for insn in kernel.instructions:
        if not isinstance(insn, CallInstruction):
            # pack and unpack call only be done for CallInstructions.
            continue
        if insn.expression.function.name not in callables_table:
            continue

        in_knl_callable = callables_table[insn.expression.function.name]

        if in_knl_callable.name != call_name:
            # not the function we're looking for.
            continue
        in_knl_callable = in_knl_callable.with_packing_for_args()

        vng = kernel.get_var_name_generator()
        ing = kernel.get_instruction_id_generator()

        parameters = insn.expression.parameters
        if args_to_pack is None:
            args_to_pack = [
                par.subscript.aggregate.name
                for par in parameters + insn.assignees
                if isinstance(par, SubArrayRef) and (par.swept_inames)
            ]
        if args_to_unpack is None:
            args_to_unpack = [
                par.subscript.aggregate.name
                for par in parameters + insn.assignees
                if isinstance(par, SubArrayRef) and (par.swept_inames)
            ]

        # {{{ sanity checks for args

        assert isinstance(args_to_pack, list)
        assert isinstance(args_to_unpack, list)

        for arg in args_to_pack:
            found_sub_array_ref = False

            for par in parameters + insn.assignees:
                # checking that the given args is a sub array ref
                if isinstance(par,
                              SubArrayRef) and (par.subscript.aggregate.name
                                                == arg):
                    found_sub_array_ref = True
                    break
            if not found_sub_array_ref:
                raise LoopyError(
                    "No match found for packing arg '%s' of call '%s' "
                    "at insn '%s'." % (arg, call_name, insn.id))
        for arg in args_to_unpack:
            if arg not in args_to_pack:
                raise LoopyError("Argument %s should be packed in order to be "
                                 "unpacked." % arg)

        # }}}

        packing_insns = []
        unpacking_insns = []

        # {{{ handling ilp tags

        from loopy.kernel.data import IlpBaseTag, VectorizeTag
        import islpy as isl
        from pymbolic import var

        dim_type = isl.dim_type.set
        ilp_inames = {
            iname
            for iname in insn.within_inames if all(
                isinstance(tag, (IlpBaseTag, VectorizeTag))
                for tag in kernel.iname_to_tags.get(iname, []))
        }
        new_ilp_inames = set()
        ilp_inames_map = {}
        for iname in ilp_inames:
            new_iname_name = vng(iname + "_ilp")
            ilp_inames_map[var(iname)] = var(new_iname_name)
            new_ilp_inames.add(new_iname_name)
        for iname in ilp_inames:
            new_domain = kernel.get_inames_domain(iname).copy()
            for i in range(new_domain.n_dim()):
                old_iname = new_domain.get_dim_name(dim_type, i)
                if old_iname in ilp_inames:
                    new_domain = new_domain.set_dim_name(
                        dim_type, i, ilp_inames_map[var(old_iname)].name)
            new_domains.append(new_domain)

        # }}}

        from pymbolic.mapper.substitutor import make_subst_func
        from loopy.symbolic import SubstitutionMapper

        # dict to store the new assignees and parameters, the mapping pattern
        # from arg_id to parameters is identical to InKernelCallable.arg_id_to_dtype
        id_to_parameters = tuple(enumerate(parameters)) + tuple(
            (-i - 1, assignee) for i, assignee in enumerate(insn.assignees))
        new_id_to_parameters = {}

        for arg_id, p in id_to_parameters:
            if isinstance(p, SubArrayRef) and (p.subscript.aggregate.name
                                               in args_to_pack):
                new_pack_inames = ilp_inames_map.copy(
                )  # packing-specific inames
                new_unpack_inames = ilp_inames_map.copy(
                )  # unpacking-specific iname

                new_pack_inames = {
                    iname: var(vng(iname.name + "_pack"))
                    for iname in p.swept_inames
                }
                new_unpack_inames = {
                    iname: var(vng(iname.name + "_unpack"))
                    for iname in p.swept_inames
                }

                # Updating the domains corresponding to the new inames.
                for iname in p.swept_inames:
                    new_domain_pack = kernel.get_inames_domain(
                        iname.name).copy()
                    new_domain_unpack = kernel.get_inames_domain(
                        iname.name).copy()
                    for i in range(new_domain_pack.n_dim()):
                        old_iname = new_domain_pack.get_dim_name(dim_type, i)
                        if var(old_iname) in new_pack_inames:
                            new_domain_pack = new_domain_pack.set_dim_name(
                                dim_type, i,
                                new_pack_inames[var(old_iname)].name)
                            new_domain_unpack = new_domain_unpack.set_dim_name(
                                dim_type, i,
                                new_unpack_inames[var(old_iname)].name)
                    new_domains.append(new_domain_pack)
                    new_domains.append(new_domain_unpack)

                arg = p.subscript.aggregate.name
                pack_name = vng(arg + "_pack")

                from loopy.kernel.data import (TemporaryVariable,
                                               temp_var_scope)

                if arg in kernel.arg_dict:
                    arg_in_caller = kernel.arg_dict[arg]
                else:
                    arg_in_caller = kernel.temporary_variables[arg]

                pack_tmp = TemporaryVariable(
                    name=pack_name,
                    dtype=arg_in_caller.dtype,
                    dim_tags=in_knl_callable.arg_id_to_descr[arg_id].dim_tags,
                    shape=in_knl_callable.arg_id_to_descr[arg_id].shape,
                    scope=temp_var_scope.PRIVATE,
                )

                new_tmps[pack_name] = pack_tmp

                from loopy import Assignment
                pack_subst_mapper = SubstitutionMapper(
                    make_subst_func(new_pack_inames))
                unpack_subst_mapper = SubstitutionMapper(
                    make_subst_func(new_unpack_inames))

                # {{{ getting the lhs for packing and rhs for unpacking

                from loopy.isl_helpers import simplify_via_aff, make_slab

                flatten_index = simplify_via_aff(
                    sum(dim_tag.stride * idx for dim_tag, idx in zip(
                        arg_in_caller.dim_tags, p.subscript.index_tuple)))

                new_indices = []
                for dim_tag in in_knl_callable.arg_id_to_descr[
                        arg_id].dim_tags:
                    ind = flatten_index // dim_tag.stride
                    flatten_index -= (dim_tag.stride * ind)
                    new_indices.append(ind)

                new_indices = tuple(simplify_via_aff(i) for i in new_indices)

                pack_lhs_assignee = pack_subst_mapper(
                    var(pack_name).index(new_indices))
                unpack_rhs = unpack_subst_mapper(
                    var(pack_name).index(new_indices))

                # }}}

                packing_insns.append(
                    Assignment(
                        assignee=pack_lhs_assignee,
                        expression=pack_subst_mapper.map_subscript(
                            p.subscript),
                        within_inames=insn.within_inames - ilp_inames
                        | {new_pack_inames[i].name
                           for i in p.swept_inames} | (new_ilp_inames),
                        depends_on=insn.depends_on,
                        id=ing(insn.id + "_pack"),
                        depends_on_is_final=True))

                if p.subscript.aggregate.name in args_to_unpack:
                    unpacking_insns.append(
                        Assignment(
                            expression=unpack_rhs,
                            assignee=unpack_subst_mapper.map_subscript(
                                p.subscript),
                            within_inames=insn.within_inames - ilp_inames | {
                                new_unpack_inames[i].name
                                for i in p.swept_inames
                            } | (new_ilp_inames),
                            id=ing(insn.id + "_unpack"),
                            depends_on=frozenset([insn.id]),
                            depends_on_is_final=True))

                # {{{ creating the sweep inames for the new sub array refs

                updated_swept_inames = []

                for _ in in_knl_callable.arg_id_to_descr[arg_id].shape:
                    updated_swept_inames.append(var(vng("i_packsweep_" + arg)))

                ctx = kernel.isl_context
                space = isl.Space.create_from_names(
                    ctx, set=[iname.name for iname in updated_swept_inames])
                iname_set = isl.BasicSet.universe(space)
                for iname, axis_length in zip(
                        updated_swept_inames,
                        in_knl_callable.arg_id_to_descr[arg_id].shape):
                    iname_set = iname_set & make_slab(space, iname.name, 0,
                                                      axis_length)
                new_domains = new_domains + [iname_set]

                # }}}

                new_id_to_parameters[arg_id] = SubArrayRef(
                    tuple(updated_swept_inames),
                    (var(pack_name).index(tuple(updated_swept_inames))))
            else:
                new_id_to_parameters[arg_id] = p

        if packing_insns:
            subst_mapper = SubstitutionMapper(make_subst_func(ilp_inames_map))
            new_call_insn = insn.with_transformed_expressions(subst_mapper)
            new_params = tuple(
                subst_mapper(new_id_to_parameters[i])
                for i, _ in enumerate(parameters))
            new_assignees = tuple(
                subst_mapper(new_id_to_parameters[-i - 1])
                for i, _ in enumerate(insn.assignees))
            new_call_insn = new_call_insn.copy(
                depends_on=new_call_insn.depends_on
                | {pack.id
                   for pack in packing_insns},
                within_inames=new_call_insn.within_inames - ilp_inames |
                (new_ilp_inames),
                expression=new_call_insn.expression.function(*new_params),
                assignees=new_assignees)
            old_insn_to_new_insns[insn.id] = (packing_insns + [new_call_insn] +
                                              unpacking_insns)

    if old_insn_to_new_insns:
        new_instructions = []
        for insn in kernel.instructions:
            if insn.id in old_insn_to_new_insns:
                # Replacing the current instruction with the group of
                # instructions including the packing and unpacking instructions
                new_instructions.extend(old_insn_to_new_insns[insn.id])
            else:
                # for the instructions that depend on the call instruction that
                # are to be packed and unpacked, we need to add the complete
                # instruction block as a dependency for them.
                new_depends_on = insn.depends_on
                if insn.depends_on & set(old_insn_to_new_insns):
                    # need to add the unpack instructions on dependencies.
                    for old_insn_id in insn.depends_on & set(
                            old_insn_to_new_insns):
                        new_depends_on |= frozenset(
                            i.id for i in old_insn_to_new_insns[old_insn_id])
                new_instructions.append(insn.copy(depends_on=new_depends_on))
        kernel = kernel.copy(domains=kernel.domains + new_domains,
                             instructions=new_instructions,
                             temporary_variables=new_tmps)

    return kernel