Beispiel #1
0
def augment_domain_for_temporary_promotion(
        kernel, domain, promoted_temporary, mode, name_gen):
    """
    Add new axes to the domain corresponding to the dimensions of
    `promoted_temporary`.
    """
    import islpy as isl

    orig_temporary = promoted_temporary.orig_temporary
    orig_dim = domain.dim(isl.dim_type.set)
    dims_to_insert = len(orig_temporary.shape)

    iname_to_tag = {}

    # Add dimension-dependent inames.
    dim_inames = []

    domain = domain.add(isl.dim_type.set, dims_to_insert)
    for t_idx in range(len(orig_temporary.shape)):
        new_iname = name_gen("{name}_{mode}_dim_{dim}".
            format(name=orig_temporary.name,
                   mode=mode,
                   dim=orig_dim + t_idx))
        domain = domain.set_dim_name(
            isl.dim_type.set, orig_dim + t_idx, new_iname)
        #from loopy.kernel.data import auto
        #iname_to_tag[new_iname] = auto
        dim_inames.append(new_iname)

        # Add size information.
        aff = isl.affs_from_space(domain.space)
        domain &= aff[0].le_set(aff[new_iname])
        size = orig_temporary.shape[t_idx]
        from loopy.symbolic import aff_from_expr
        domain &= aff[new_iname].le_set(aff_from_expr(domain.space, size))

    hw_inames = []

    # Add hardware inames duplicates.
    for t_idx, hw_iname in enumerate(promoted_temporary.hw_inames):
        new_iname = name_gen("{name}_{mode}_hw_dim_{dim}".
            format(name=orig_temporary.name,
                   mode=mode,
                   dim=t_idx))
        hw_inames.append(new_iname)
        iname_to_tag[new_iname] = kernel.iname_to_tag[hw_iname]

    from loopy.isl_helpers import duplicate_axes
    domain = duplicate_axes(
        domain, promoted_temporary.hw_inames, hw_inames)

    # The operations on the domain above return a Set object, but the
    # underlying domain should be expressible as a single BasicSet.
    domain_list = domain.get_basic_set_list()
    assert domain_list.n_basic_set() == 1
    domain = domain_list.get_basic_set(0)
    return domain, hw_inames, dim_inames, iname_to_tag
Beispiel #2
0
def augment_domain_for_temporary_promotion(kernel, domain, promoted_temporary,
                                           mode, name_gen):
    """
    Add new axes to the domain corresponding to the dimensions of
    `promoted_temporary`.
    """
    import islpy as isl

    orig_temporary = promoted_temporary.orig_temporary
    orig_dim = domain.dim(isl.dim_type.set)
    dims_to_insert = len(orig_temporary.shape)

    iname_to_tag = {}

    # Add dimension-dependent inames.
    dim_inames = []

    domain = domain.add(isl.dim_type.set, dims_to_insert)
    for t_idx in range(len(orig_temporary.shape)):
        new_iname = name_gen("{name}_{mode}_dim_{dim}".format(
            name=orig_temporary.name, mode=mode, dim=t_idx))
        domain = domain.set_dim_name(isl.dim_type.set, orig_dim + t_idx,
                                     new_iname)
        if orig_temporary.is_local:
            # If the temporary is has local scope, then loads / stores can be
            # done in parallel.
            from loopy.kernel.data import AutoFitLocalIndexTag
            iname_to_tag[new_iname] = AutoFitLocalIndexTag()

        dim_inames.append(new_iname)

        # Add size information.
        aff = isl.affs_from_space(domain.space)
        domain &= aff[0].le_set(aff[new_iname])
        size = orig_temporary.shape[t_idx]
        from loopy.symbolic import aff_from_expr
        domain &= aff[new_iname].lt_set(aff_from_expr(domain.space, size))

    hw_inames = []

    # Add hardware inames duplicates.
    for t_idx, hw_iname in enumerate(promoted_temporary.hw_inames):
        new_iname = name_gen("{name}_{mode}_hw_dim_{dim}".format(
            name=orig_temporary.name, mode=mode, dim=t_idx))
        hw_inames.append(new_iname)
        iname_to_tag[new_iname] = kernel.iname_to_tag[hw_iname]

    from loopy.isl_helpers import duplicate_axes
    domain = duplicate_axes(domain, promoted_temporary.hw_inames, hw_inames)

    # The operations on the domain above return a Set object, but the
    # underlying domain should be expressible as a single BasicSet.
    domain_list = domain.get_basic_set_list()
    assert domain_list.n_basic_set() == 1
    domain = domain_list.get_basic_set(0)
    return domain, hw_inames, dim_inames, iname_to_tag
Beispiel #3
0
def make_reduction_inames_unique(kernel, inames=None, within=None):
    """
    :arg inames: if not *None*, only apply to these inames
    :arg within: a stack match as understood by
        :func:`loopy.match.parse_stack_match`.

    .. versionadded:: 2016.2
    """

    name_gen = kernel.get_var_name_generator()

    from loopy.match import parse_stack_match
    within = parse_stack_match(within)

    # {{{ change kernel

    rule_mapping_context = SubstitutionRuleMappingContext(
        kernel.substitutions, name_gen)
    r_uniq = _ReductionInameUniquifier(rule_mapping_context,
                                       inames,
                                       within=within)

    kernel = rule_mapping_context.finish_kernel(r_uniq.map_kernel(kernel))

    # }}}

    # {{{ duplicate the inames

    for old_iname, new_iname in r_uniq.old_to_new:
        from loopy.kernel.tools import DomainChanger
        domch = DomainChanger(kernel, frozenset([old_iname]))

        from loopy.isl_helpers import duplicate_axes
        kernel = kernel.copy(domains=domch.get_domains_with(
            duplicate_axes(domch.domain, [old_iname], [new_iname])))

    # }}}

    return kernel
Beispiel #4
0
    def __init__(self, kernel, domain, sweep_inames, access_descriptors,
                 storage_axis_count):
        self.kernel = kernel
        self.sweep_inames = sweep_inames

        storage_axis_names = self.storage_axis_names = [
            "_loopy_storage_%d" % i for i in range(storage_axis_count)
        ]

        # {{{ duplicate sweep inames

        # The duplication is necessary, otherwise the storage fetch
        # inames remain weirdly tied to the original sweep inames.

        self.primed_sweep_inames = [psin + "'" for psin in sweep_inames]

        from loopy.isl_helpers import duplicate_axes
        dup_sweep_index = domain.space.dim(dim_type.out)
        domain_dup_sweep = duplicate_axes(domain, sweep_inames,
                                          self.primed_sweep_inames)

        self.prime_sweep_inames = SubstitutionMapper(
            make_subst_func({
                sin: var(psin)
                for sin, psin in zip(sweep_inames, self.primed_sweep_inames)
            }))

        # # }}}

        self.stor2sweep = build_global_storage_to_sweep_map(
            kernel, access_descriptors, domain_dup_sweep, dup_sweep_index,
            storage_axis_names, sweep_inames, self.primed_sweep_inames,
            self.prime_sweep_inames)

        storage_base_indices, storage_shape = compute_bounds(
            kernel, domain, self.stor2sweep, self.primed_sweep_inames,
            storage_axis_names)

        # compute augmented domain

        # {{{ filter out unit-length dimensions

        non1_storage_axis_flags = []
        non1_storage_shape = []

        for saxis_len in storage_shape:
            has_length_non1 = saxis_len != 1

            non1_storage_axis_flags.append(has_length_non1)

            if has_length_non1:
                non1_storage_shape.append(saxis_len)

        # }}}

        # {{{ subtract off the base indices
        # add the new, base-0 indices as new in dimensions

        sp = self.stor2sweep.get_space()
        stor_idx = sp.dim(dim_type.out)

        n_stor = storage_axis_count
        nn1_stor = len(non1_storage_shape)

        aug_domain = self.stor2sweep.move_dims(dim_type.out, stor_idx,
                                               dim_type.in_, 0,
                                               n_stor).range()

        # aug_domain space now:
        # [domain](dup_sweep_index)[dup_sweep](stor_idx)[stor_axes']

        aug_domain = aug_domain.insert_dims(dim_type.set, stor_idx, nn1_stor)

        inew = 0
        for i, name in enumerate(storage_axis_names):
            if non1_storage_axis_flags[i]:
                aug_domain = aug_domain.set_dim_name(dim_type.set,
                                                     stor_idx + inew, name)
                inew += 1

        # aug_domain space now:
        # [domain](dup_sweep_index)[dup_sweep](stor_idx)[stor_axes'][n1_stor_axes]

        from loopy.symbolic import aff_from_expr
        for saxis, bi, s in zip(storage_axis_names, storage_base_indices,
                                storage_shape):
            if s != 1:
                cns = isl.Constraint.equality_from_aff(
                    aff_from_expr(aug_domain.get_space(),
                                  var(saxis) - (var(saxis + "'") - bi)))

                aug_domain = aug_domain.add_constraint(cns)

        # }}}

        # eliminate (primed) storage axes with non-zero base indices
        aug_domain = aug_domain.project_out(dim_type.set, stor_idx + nn1_stor,
                                            n_stor)

        # eliminate duplicated sweep_inames
        nsweep = len(sweep_inames)
        aug_domain = aug_domain.project_out(dim_type.set, dup_sweep_index,
                                            nsweep)

        self.non1_storage_axis_flags = non1_storage_axis_flags
        self.aug_domain = aug_domain
        self.storage_base_indices = storage_base_indices
        self.non1_storage_shape = non1_storage_shape
Beispiel #5
0
    def augment_domain_for_save_or_reload(self, domain, promoted_temporary,
                                          mode, subkernel):
        """
        Add new axes to the domain corresponding to the dimensions of
        `promoted_temporary`. These axes will be used in the save/
        reload stage.
        """
        assert mode in ("save", "reload")
        import islpy as isl

        orig_temporary = promoted_temporary.orig_temporary
        orig_dim = domain.dim(isl.dim_type.set)

        # Tags for newly added inames
        iname_to_tag = {}

        # FIXME: Restrict size of new inames to access footprint.

        # Add dimension-dependent inames.
        dim_inames = []
        domain = domain.add(isl.dim_type.set,
                            len(promoted_temporary.non_hw_dims))

        for dim_idx, dim_size in enumerate(promoted_temporary.non_hw_dims):
            new_iname = self.insn_name_gen(
                "{name}_{mode}_axis_{dim}_{sk}".format(
                    name=orig_temporary.name,
                    mode=mode,
                    dim=dim_idx,
                    sk=subkernel))
            domain = domain.set_dim_name(isl.dim_type.set, orig_dim + dim_idx,
                                         new_iname)

            if orig_temporary.is_local:
                # If the temporary has local scope, then loads / stores can
                # be done in parallel.
                from loopy.kernel.data import AutoFitLocalIndexTag
                iname_to_tag[new_iname] = AutoFitLocalIndexTag()

            dim_inames.append(new_iname)

            # Add size information.
            aff = isl.affs_from_space(domain.space)
            domain &= aff[0].le_set(aff[new_iname])
            from loopy.symbolic import aff_from_expr
            domain &= aff[new_iname].lt_set(
                aff_from_expr(domain.space, dim_size))

        # FIXME: Use promoted_temporary.hw_inames
        hw_inames = []

        # Add hardware inames duplicates.
        for t_idx, hw_iname in enumerate(promoted_temporary.hw_inames):
            new_iname = self.insn_name_gen(
                "{name}_{mode}_hw_dim_{dim}_{sk}".format(
                    name=orig_temporary.name,
                    mode=mode,
                    dim=t_idx,
                    sk=subkernel))
            hw_inames.append(new_iname)
            iname_to_tag[new_iname] = self.kernel.iname_to_tag[hw_iname]

        from loopy.isl_helpers import duplicate_axes
        domain = duplicate_axes(domain, promoted_temporary.hw_inames,
                                hw_inames)

        # The operations on the domain above return a Set object, but the
        # underlying domain should be expressible as a single BasicSet.
        domain_list = domain.get_basic_set_list()
        assert domain_list.n_basic_set() == 1
        domain = domain_list.get_basic_set(0)
        return domain, hw_inames, dim_inames, iname_to_tag
Beispiel #6
0
def link_inames(knl, inames, new_iname, within=None, tag=None):
    # {{{ normalize arguments

    if isinstance(inames, str):
        inames = inames.split(",")

    var_name_gen = knl.get_var_name_generator()
    new_iname = var_name_gen(new_iname)

    # }}}

    # {{{ ensure that each iname is used at most once in each instruction

    inames_set = set(inames)

    if 0:
        # FIXME!
        for insn in knl.instructions:
            insn_inames = knl.insn_inames(insn.id) | insn.reduction_inames()

            if len(insn_inames & inames_set) > 1:
                raise LoopyError("To-be-linked inames '%s' are used in "
                        "instruction '%s'. No more than one such iname can "
                        "be used in one instruction."
                        % (", ".join(insn_inames & inames_set), insn.id))

    # }}}

    from loopy.kernel.tools import DomainChanger
    domch = DomainChanger(knl, tuple(inames))

    # {{{ ensure that projections are identical

    unrelated_dom_inames = list(
            set(domch.domain.get_var_names(dim_type.set))
            - inames_set)

    domain = domch.domain

    # move all inames to be linked to end to prevent shuffly confusion
    for iname in inames:
        dt, index = domain.get_var_dict()[iname]
        assert dt == dim_type.set

        # move to tail of param dim_type
        domain = domain.move_dims(
                    dim_type.param, domain.dim(dim_type.param),
                    dt, index, 1)
        # move to tail of set dim_type
        domain = domain.move_dims(
                    dim_type.set, domain.dim(dim_type.set),
                    dim_type.param, domain.dim(dim_type.param)-1, 1)

    projections = [
            domch.domain.project_out_except(
                unrelated_dom_inames + [iname], [dim_type.set])
            for iname in inames]

    all_equal = True
    first_proj = projections[0]
    for proj in projections[1:]:
        all_equal = all_equal and (proj <= first_proj and first_proj <= proj)

    if not all_equal:
        raise LoopyError("Inames cannot be linked because their domain "
                "constraints are not the same.")

    del domain  # messed up for testing, do not use

    # }}}

    # change the domain
    from loopy.isl_helpers import duplicate_axes
    knl = knl.copy(
            domains=domch.get_domains_with(
                duplicate_axes(domch.domain, [inames[0]], [new_iname])))

    # {{{ change the code

    from pymbolic import var
    subst_dict = dict((iname, var(new_iname)) for iname in inames)

    from loopy.context_matching import parse_stack_match
    within = parse_stack_match(within)

    from pymbolic.mapper.substitutor import make_subst_func
    rule_mapping_context = SubstitutionRuleMappingContext(
            knl.substitutions, var_name_gen)
    ijoin = RuleAwareSubstitutionMapper(rule_mapping_context,
                    make_subst_func(subst_dict), within)

    knl = rule_mapping_context.finish_kernel(
            ijoin.map_kernel(knl))

    # }}}

    knl = remove_unused_inames(knl, inames)

    if tag is not None:
        knl = tag_inames(knl, {new_iname: tag})

    return knl
Beispiel #7
0
def duplicate_inames(knl, inames, within, new_inames=None, suffix=None,
        tags={}):
    """
    :arg within: a stack match as understood by
        :func:`loopy.context_matching.parse_stack_match`.
    """

    # {{{ normalize arguments, find unique new_inames

    if isinstance(inames, str):
        inames = [iname.strip() for iname in inames.split(",")]

    if isinstance(new_inames, str):
        new_inames = [iname.strip() for iname in new_inames.split(",")]

    from loopy.context_matching import parse_stack_match
    within = parse_stack_match(within)

    if new_inames is None:
        new_inames = [None] * len(inames)

    if len(new_inames) != len(inames):
        raise ValueError("new_inames must have the same number of entries as inames")

    name_gen = knl.get_var_name_generator()

    for i, iname in enumerate(inames):
        new_iname = new_inames[i]

        if new_iname is None:
            new_iname = iname

            if suffix is not None:
                new_iname += suffix

            new_iname = name_gen(new_iname)

        else:
            if name_gen.is_name_conflicting(new_iname):
                raise ValueError("new iname '%s' conflicts with existing names"
                        % new_iname)

            name_gen.add_name(new_iname)

        new_inames[i] = new_iname

    # }}}

    # {{{ duplicate the inames

    for old_iname, new_iname in zip(inames, new_inames):
        from loopy.kernel.tools import DomainChanger
        domch = DomainChanger(knl, frozenset([old_iname]))

        from loopy.isl_helpers import duplicate_axes
        knl = knl.copy(
                domains=domch.get_domains_with(
                    duplicate_axes(domch.domain, [old_iname], [new_iname])))

    # }}}

    # {{{ change the inames in the code

    rule_mapping_context = SubstitutionRuleMappingContext(
            knl.substitutions, name_gen)
    indup = _InameDuplicator(rule_mapping_context,
            old_to_new=dict(list(zip(inames, new_inames))),
            within=within)

    knl = rule_mapping_context.finish_kernel(
            indup.map_kernel(knl))

    # }}}

    # {{{ realize tags

    for old_iname, new_iname in zip(inames, new_inames):
        new_tag = tags.get(old_iname)
        if new_tag is not None:
            knl = tag_inames(knl, {new_iname: new_tag})

    # }}}

    return knl
Beispiel #8
0
    def __init__(self, kernel, domain, sweep_inames, access_descriptors,
            storage_axis_count):
        self.kernel = kernel
        self.sweep_inames = sweep_inames

        storage_axis_names = self.storage_axis_names = [
                "_loopy_storage_%d" % i for i in range(storage_axis_count)]

        # {{{ duplicate sweep inames

        # The duplication is necessary, otherwise the storage fetch
        # inames remain weirdly tied to the original sweep inames.

        self.primed_sweep_inames = [psin+"'" for psin in sweep_inames]

        from loopy.isl_helpers import duplicate_axes
        dup_sweep_index = domain.space.dim(dim_type.out)
        domain_dup_sweep = duplicate_axes(
                domain, sweep_inames,
                self.primed_sweep_inames)

        self.prime_sweep_inames = SubstitutionMapper(make_subst_func(
            dict((sin, var(psin))
                for sin, psin in zip(sweep_inames, self.primed_sweep_inames))))

        # # }}}

        self.stor2sweep = build_global_storage_to_sweep_map(
                kernel, access_descriptors,
                domain_dup_sweep, dup_sweep_index,
                storage_axis_names,
                sweep_inames, self.primed_sweep_inames, self.prime_sweep_inames)

        storage_base_indices, storage_shape = compute_bounds(
                kernel, domain, self.stor2sweep, self.primed_sweep_inames,
                storage_axis_names)

        # compute augmented domain

        # {{{ filter out unit-length dimensions

        non1_storage_axis_flags = []
        non1_storage_shape = []

        for saxis, bi, l in zip(
                storage_axis_names, storage_base_indices, storage_shape):
            has_length_non1 = l != 1

            non1_storage_axis_flags.append(has_length_non1)

            if has_length_non1:
                non1_storage_shape.append(l)

        # }}}

        # {{{ subtract off the base indices
        # add the new, base-0 indices as new in dimensions

        sp = self.stor2sweep.get_space()
        stor_idx = sp.dim(dim_type.out)

        n_stor = storage_axis_count
        nn1_stor = len(non1_storage_shape)

        aug_domain = self.stor2sweep.move_dims(
                dim_type.out, stor_idx,
                dim_type.in_, 0,
                n_stor).range()

        # aug_domain space now:
        # [domain](dup_sweep_index)[dup_sweep](stor_idx)[stor_axes']

        aug_domain = aug_domain.insert_dims(dim_type.set, stor_idx, nn1_stor)

        inew = 0
        for i, name in enumerate(storage_axis_names):
            if non1_storage_axis_flags[i]:
                aug_domain = aug_domain.set_dim_name(
                        dim_type.set, stor_idx + inew, name)
                inew += 1

        # aug_domain space now:
        # [domain](dup_sweep_index)[dup_sweep](stor_idx)[stor_axes'][n1_stor_axes]

        from loopy.symbolic import aff_from_expr
        for saxis, bi, s in zip(storage_axis_names, storage_base_indices,
                storage_shape):
            if s != 1:
                cns = isl.Constraint.equality_from_aff(
                        aff_from_expr(aug_domain.get_space(),
                            var(saxis) - (var(saxis+"'") - bi)))

                aug_domain = aug_domain.add_constraint(cns)

        # }}}

        # eliminate (primed) storage axes with non-zero base indices
        aug_domain = aug_domain.project_out(dim_type.set, stor_idx+nn1_stor, n_stor)

        # eliminate duplicated sweep_inames
        nsweep = len(sweep_inames)
        aug_domain = aug_domain.project_out(dim_type.set, dup_sweep_index, nsweep)

        self.non1_storage_axis_flags = non1_storage_axis_flags
        self.aug_domain = aug_domain
        self.storage_base_indices = storage_base_indices
        self.non1_storage_shape = non1_storage_shape
Beispiel #9
0
def duplicate_inames(knl,
                     inames,
                     within,
                     new_inames=None,
                     suffix=None,
                     tags={}):
    """
    :arg within: a stack match as understood by
        :func:`loopy.match.parse_stack_match`.
    """

    # {{{ normalize arguments, find unique new_inames

    if isinstance(inames, str):
        inames = [iname.strip() for iname in inames.split(",")]

    if isinstance(new_inames, str):
        new_inames = [iname.strip() for iname in new_inames.split(",")]

    from loopy.match import parse_stack_match
    within = parse_stack_match(within)

    if new_inames is None:
        new_inames = [None] * len(inames)

    if len(new_inames) != len(inames):
        raise ValueError(
            "new_inames must have the same number of entries as inames")

    name_gen = knl.get_var_name_generator()

    for i, iname in enumerate(inames):
        new_iname = new_inames[i]

        if new_iname is None:
            new_iname = iname

            if suffix is not None:
                new_iname += suffix

            new_iname = name_gen(new_iname)

        else:
            if name_gen.is_name_conflicting(new_iname):
                raise ValueError(
                    "new iname '%s' conflicts with existing names" % new_iname)

            name_gen.add_name(new_iname)

        new_inames[i] = new_iname

    # }}}

    # {{{ duplicate the inames

    for old_iname, new_iname in zip(inames, new_inames):
        from loopy.kernel.tools import DomainChanger
        domch = DomainChanger(knl, frozenset([old_iname]))

        from loopy.isl_helpers import duplicate_axes
        knl = knl.copy(domains=domch.get_domains_with(
            duplicate_axes(domch.domain, [old_iname], [new_iname])))

    # }}}

    # {{{ change the inames in the code

    rule_mapping_context = SubstitutionRuleMappingContext(
        knl.substitutions, name_gen)
    indup = _InameDuplicator(rule_mapping_context,
                             old_to_new=dict(list(zip(inames, new_inames))),
                             within=within)

    knl = rule_mapping_context.finish_kernel(indup.map_kernel(knl))

    # }}}

    # {{{ realize tags

    for old_iname, new_iname in zip(inames, new_inames):
        new_tag = tags.get(old_iname)
        if new_tag is not None:
            knl = tag_inames(knl, {new_iname: new_tag})

    # }}}

    return knl