Esempio n. 1
0
def test_inf_support(ctx_factory, target, dtype):
    from loopy.symbolic import parse
    import math
    # See: https://github.com/inducer/loopy/issues/443 for some laughs
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel("{:}", [
        lp.Assignment(parse("out_inf"), math.inf),
        lp.Assignment(parse("out_neginf"), -math.inf)
    ], [
        lp.GlobalArg("out_inf", shape=lp.auto, dtype=dtype),
        lp.GlobalArg("out_neginf", shape=lp.auto, dtype=dtype)
    ],
                         target=target())

    knl = lp.set_options(knl, "return_dict")

    if target == lp.PyOpenCLTarget:
        _, out_dict = knl(queue)
        out_dict = {k: v.get() for k, v in out_dict.items()}
    elif target == lp.ExecutableCTarget:
        _, out_dict = knl()
    else:
        raise NotImplementedError("unsupported target")

    assert np.isinf(out_dict["out_inf"])
    assert np.isneginf(out_dict["out_neginf"])
Esempio n. 2
0
    def __init__(self,
            assignees, expression,
            id=None,
            depends_on=None,
            depends_on_is_final=None,
            groups=None,
            conflicts_with_groups=None,
            no_sync_with=None,
            within_inames_is_final=None,
            within_inames=None,
            tags=None,
            temp_var_types=None,
            priority=0, predicates=frozenset()):

        super().__init__(
                id=id,
                depends_on=depends_on,
                depends_on_is_final=depends_on_is_final,
                groups=groups,
                conflicts_with_groups=conflicts_with_groups,
                no_sync_with=no_sync_with,
                within_inames_is_final=within_inames_is_final,
                within_inames=within_inames,
                priority=priority,
                predicates=predicates,
                tags=tags)

        from pymbolic.primitives import Call
        from loopy.symbolic import Reduction
        if not isinstance(expression, (Call, Reduction)) and expression is not None:
            raise LoopyError("'expression' argument to CallInstruction "
                    "must be a function call")

        from loopy.symbolic import parse
        if isinstance(assignees, str):
            assignees = parse(assignees)
        if not isinstance(assignees, tuple):
            raise LoopyError("'assignees' argument to CallInstruction "
                    "must be a tuple or a string parseable to a tuple"
                    "--got '%s'" % type(assignees).__name__)

        if isinstance(expression, str):
            expression = parse(expression)

        from pymbolic.primitives import Variable, Subscript
        from loopy.symbolic import LinearSubscript
        for assignee in assignees:
            if not isinstance(assignee, (Variable, Subscript, LinearSubscript)):
                raise LoopyError("invalid lvalue '%s'" % assignee)

        self.assignees = assignees
        self.expression = expression

        if temp_var_types is None:
            self.temp_var_types = (Optional(),) * len(self.assignees)
        else:
            self.temp_var_types = tuple(
                    _check_and_fix_temp_var_type(tvt, stacklevel=3)
                    for tvt in temp_var_types)
Esempio n. 3
0
    def __init__(self,
                 assignee,
                 expression,
                 id=None,
                 depends_on=None,
                 depends_on_is_final=None,
                 groups=None,
                 conflicts_with_groups=None,
                 no_sync_with=None,
                 within_inames_is_final=None,
                 within_inames=None,
                 boostable=None,
                 boostable_into=None,
                 tags=None,
                 temp_var_type=None,
                 atomicity=(),
                 priority=0,
                 predicates=frozenset(),
                 insn_deps=None,
                 insn_deps_is_final=None,
                 forced_iname_deps=None,
                 forced_iname_deps_is_final=None):

        super(Assignment, self).__init__(
            id=id,
            depends_on=depends_on,
            depends_on_is_final=depends_on_is_final,
            groups=groups,
            conflicts_with_groups=conflicts_with_groups,
            no_sync_with=no_sync_with,
            within_inames_is_final=within_inames_is_final,
            within_inames=within_inames,
            boostable=boostable,
            boostable_into=boostable_into,
            priority=priority,
            predicates=predicates,
            tags=tags,
            insn_deps=insn_deps,
            insn_deps_is_final=insn_deps_is_final,
            forced_iname_deps=forced_iname_deps,
            forced_iname_deps_is_final=forced_iname_deps_is_final)

        from loopy.symbolic import parse
        if isinstance(assignee, str):
            assignee = parse(assignee)
        if isinstance(expression, str):
            expression = parse(expression)

        from pymbolic.primitives import Variable, Subscript, Lookup
        from loopy.symbolic import LinearSubscript
        if not isinstance(assignee,
                          (Variable, Subscript, LinearSubscript, Lookup)):
            raise LoopyError("invalid lvalue '%s'" % assignee)

        self.assignee = assignee
        self.expression = expression
        self.temp_var_type = temp_var_type
        self.atomicity = atomicity
Esempio n. 4
0
    def __init__(self,
            assignee, expression,
            id=None,
            depends_on=None,
            depends_on_is_final=None,
            groups=None,
            conflicts_with_groups=None,
            no_sync_with=None,
            within_inames_is_final=None,
            within_inames=None,
            boostable=None, boostable_into=None, tags=None,
            temp_var_type=Optional(), atomicity=(),
            priority=0, predicates=frozenset(),
            insn_deps=None, insn_deps_is_final=None,
            forced_iname_deps=None, forced_iname_deps_is_final=None):

        super(Assignment, self).__init__(
                id=id,
                depends_on=depends_on,
                depends_on_is_final=depends_on_is_final,
                groups=groups,
                conflicts_with_groups=conflicts_with_groups,
                no_sync_with=no_sync_with,
                within_inames_is_final=within_inames_is_final,
                within_inames=within_inames,
                boostable=boostable,
                boostable_into=boostable_into,
                priority=priority,
                predicates=predicates,
                tags=tags,
                insn_deps=insn_deps,
                insn_deps_is_final=insn_deps_is_final,
                forced_iname_deps=forced_iname_deps,
                forced_iname_deps_is_final=forced_iname_deps_is_final)

        from loopy.symbolic import parse
        if isinstance(assignee, str):
            assignee = parse(assignee)
        if isinstance(expression, str):
            expression = parse(expression)

        from pymbolic.primitives import Variable, Subscript, Lookup
        from loopy.symbolic import LinearSubscript
        if not isinstance(assignee, (Variable, Subscript, LinearSubscript, Lookup)):
            raise LoopyError("invalid lvalue '%s'" % assignee)

        self.assignee = assignee
        self.expression = expression

        self.temp_var_type = _check_and_fix_temp_var_type(temp_var_type)
        self.atomicity = atomicity
Esempio n. 5
0
def test_nan_support(ctx_factory):
    from loopy.symbolic import parse
    ctx = ctx_factory()
    knl = lp.make_kernel("{:}", [
        lp.Assignment(parse("a"), np.nan),
        lp.Assignment(parse("b"), parse("isnan(a)")),
        lp.Assignment(parse("c"), parse("isnan(3.14)"))
    ],
                         seq_dependencies=True)

    knl = lp.set_options(knl, "return_dict")

    evt, out_dict = knl(cl.CommandQueue(ctx))
    assert np.isnan(out_dict["a"].get())
    assert out_dict["b"] == 1
    assert out_dict["c"] == 0
Esempio n. 6
0
def test_simplify_via_aff_reproducibility():
    # See https://github.com/inducer/loopy/pull/349
    from loopy.symbolic import parse
    from loopy.isl_helpers import simplify_via_aff

    expr = parse("i+i_0")

    assert simplify_via_aff(expr) == expr
Esempio n. 7
0
    def parse_equation(eqn):
        if isinstance(eqn, str):
            eqn_match = eqn_re.match(eqn)
            if not eqn_match:
                raise ValueError("invalid equation: %s" % eqn)

            from loopy.symbolic import parse
            lhs = parse(eqn_match.group(1))
            rhs = parse(eqn_match.group(2))
            return (lhs, rhs)
        elif isinstance(eqn, tuple):
            if len(eqn) != 2:
                raise ValueError("unexpected length of equation tuple, "
                                 "got %d, should be 2" % len(eqn))
            return eqn
        else:
            raise ValueError("unexpected type of equation"
                             "got %d, should be string or tuple" %
                             type(eqn).__name__)
Esempio n. 8
0
    def __init__(self,
            assignee, expression,
            id=None,
            insn_deps=None,
            insn_deps_is_final=None,
            groups=None,
            conflicts_with_groups=None,
            forced_iname_deps_is_final=None,
            forced_iname_deps=frozenset(),
            boostable=None, boostable_into=None, tags=None,
            temp_var_type=None, priority=0, predicates=frozenset()):

        InstructionBase.__init__(self,
                id=id,
                insn_deps=insn_deps,
                insn_deps_is_final=insn_deps_is_final,
                groups=groups,
                conflicts_with_groups=conflicts_with_groups,
                forced_iname_deps_is_final=forced_iname_deps_is_final,
                forced_iname_deps=forced_iname_deps,
                boostable=boostable,
                boostable_into=boostable_into,
                priority=priority,
                predicates=predicates,
                tags=tags)

        from loopy.symbolic import parse
        if isinstance(assignee, str):
            assignee = parse(assignee)
        if isinstance(expression, str):
            assignee = parse(expression)

        # FIXME: It may be worth it to enable this check eventually.
        # For now, it causes grief with certain 'checky' uses of the
        # with_transformed_expressions(). (notably the access checker)
        #
        # from pymbolic.primitives import Variable, Subscript
        # if not isinstance(assignee, (Variable, Subscript)):
        #     raise LoopyError("invalid lvalue '%s'" % assignee)

        self.assignee = assignee
        self.expression = expression
        self.temp_var_type = temp_var_type
Esempio n. 9
0
    def parse_equation(eqn):
        if isinstance(eqn, str):
            eqn_match = eqn_re.match(eqn)
            if not eqn_match:
                raise ValueError("invalid equation: %s" % eqn)

            from loopy.symbolic import parse
            lhs = parse(eqn_match.group(1))
            rhs = parse(eqn_match.group(2))
            return (lhs, rhs)
        elif isinstance(eqn, tuple):
            if len(eqn) != 2:
                raise ValueError("unexpected length of equation tuple, "
                        "got %d, should be 2" % len(eqn))
            return eqn
        else:
            raise ValueError("unexpected type of equation"
                    "got %d, should be string or tuple"
                    % type(eqn).__name__)
Esempio n. 10
0
    def __init__(self,
            assignee, expression,
            id=None,
            depends_on=None,
            depends_on_is_final=None,
            groups=None,
            conflicts_with_groups=None,
            no_sync_with=None,
            within_inames_is_final=None,
            within_inames=None,
            tags=None,
            temp_var_type=Optional(), atomicity=(),
            priority=0, predicates=frozenset()):

        super().__init__(
                id=id,
                depends_on=depends_on,
                depends_on_is_final=depends_on_is_final,
                groups=groups,
                conflicts_with_groups=conflicts_with_groups,
                no_sync_with=no_sync_with,
                within_inames_is_final=within_inames_is_final,
                within_inames=within_inames,
                priority=priority,
                predicates=predicates,
                tags=tags)

        from loopy.symbolic import parse
        if isinstance(assignee, str):
            assignee = parse(assignee)
        if isinstance(expression, str):
            expression = parse(expression)

        from pymbolic.primitives import Variable, Subscript, Lookup
        from loopy.symbolic import LinearSubscript
        if not isinstance(assignee, (Variable, Subscript, LinearSubscript, Lookup)):
            raise LoopyError("invalid lvalue '%s'" % assignee)

        self.assignee = assignee
        self.expression = expression

        self.temp_var_type = _check_and_fix_temp_var_type(temp_var_type)
        self.atomicity = atomicity
Esempio n. 11
0
 def subst_func(var):
     if isinstance(var, Variable):
         try:
             var_value = defines[var.name]
         except KeyError:
             return None
         else:
             return parse(str(var_value))
     else:
         return None
Esempio n. 12
0
    def __init__(
        self,
        assignee,
        expression,
        id=None,
        forced_iname_deps_is_final=None,
        forced_iname_deps=frozenset(),
        insn_deps=None,
        insn_deps_is_final=None,
        boostable=None,
        boostable_into=None,
        tags=None,
        temp_var_type=None,
        priority=0,
        predicates=frozenset(),
    ):

        InstructionBase.__init__(
            self,
            id=id,
            forced_iname_deps_is_final=forced_iname_deps_is_final,
            forced_iname_deps=forced_iname_deps,
            insn_deps=insn_deps,
            insn_deps_is_final=insn_deps_is_final,
            boostable=boostable,
            boostable_into=boostable_into,
            priority=priority,
            predicates=predicates,
            tags=tags,
        )

        from loopy.symbolic import parse

        if isinstance(assignee, str):
            assignee = parse(assignee)
        if isinstance(expression, str):
            assignee = parse(expression)

        self.assignee = assignee
        self.expression = expression
        self.temp_var_type = temp_var_type
Esempio n. 13
0
def test_extract_subst(ctx_factory):
    knl = lp.make_kernel(
        "{[i]: 0<=i<n}", """
                a[i] = 23*b[i]**2 + 25*b[i]**2
                """)

    knl = lp.extract_subst(knl, "bsquare", "alpha*b[i]**2", "alpha")

    print(knl)

    from loopy.symbolic import parse

    insn, = knl.instructions
    assert insn.expression == parse("bsquare(23) + bsquare(25)")
Esempio n. 14
0
def test_extract_subst(ctx_factory):
    knl = lp.make_kernel(
            "{[i]: 0<=i<n}",
            """
                a[i] = 23*b[i]**2 + 25*b[i]**2
                """)

    knl = lp.extract_subst(knl, "bsquare", "alpha*b[i]**2", "alpha")

    print(knl)

    from loopy.symbolic import parse

    insn, = knl.instructions
    assert insn.expression == parse("bsquare(23) + bsquare(25)")
Esempio n. 15
0
def test_extract_subst(ctx_factory):
    prog = lp.make_kernel("{[i]: 0<=i<n}",
                          """
                a[i] = 23*b[i]**2 + 25*b[i]**2
                """,
                          name="extract_subst")

    prog = lp.extract_subst(prog, "bsquare", "alpha*b[i]**2", "alpha")

    print(prog)

    from loopy.symbolic import parse

    insn, = prog["extract_subst"].instructions
    assert insn.expression == parse("bsquare(23) + bsquare(25)")
Esempio n. 16
0
File: data.py Progetto: shwina/loopy
def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,

        # "None" is a valid value here, distinct from the default.
        default_tag=_not_provided,

        rule_name=None,
        temporary_name=None,
        temporary_scope=None, temporary_is_local=None,
        footprint_subscripts=None,
        fetch_bounding_box=False,
        fetch_outer_inames=None):
    """Prefetch all accesses to the variable *var_name*, with all accesses
    being swept through *sweep_inames*.

    :arg var_name: A string, the name of the variable being prefetched.
        This may be a 'tagged variable name' (such as ``field$mytag``
        to restrict the effect of the operation to only variable accesses
        with a matching tag.

        This may also be a subscripted version of the variable, in which
        case this access dictates the footprint that is prefetched,
        e.g. ``A[:,:]`` or ``field[i,j,:,:]``. In this case, accesses
        in the kernel are disregarded.

    :arg sweep_inames: A list of inames, or a comma-separated string of them.
        This routine 'sweeps' all accesses to *var_name* through all allowed
        values of the *sweep_inames* to generate a footprint. All values
        in this footprint are then stored in a temporary variable, and
        the original variable accesses replaced with accesses to this
        temporary.

    :arg dim_arg_names: List of names representing each fetch axis.
        These names show up as inames in the generated fetch code

    :arg default_tag: The :ref:`implementation tag <iname-tags>` to
        assign to the inames driving the prefetch code. Use *None* to
        leave them undefined (to assign them later by hand). The current
        default will make them local axes and automatically split them to
        fit the work group size, but this default will disappear in favor
        of simply leaving them untagged in 2019.x. For 2018.x, a warning
        will be issued if no *default_tag* is specified.

    :arg rule_name: base name of the generated temporary variable.
    :arg temporary_name: The name of the temporary to be used.
    :arg temporary_scope: The :class:`temp_var_scope` to use for the
        temporary.
    :arg temporary_is_local: Deprecated, use *temporary_scope* instead.
    :arg footprint_subscripts: A list of tuples indicating the index (i.e.
        subscript) tuples used to generate the footprint.

        If only one such set of indices is desired, this may also be specified
        directly by putting an index expression into *var_name*. Substitutions
        such as those occurring in dimension splits are recorded and also
        applied to these indices.

    :arg fetch_bounding_box: To fit within :mod:`loopy`'s execution model,
        the 'footprint' of the fetch currently has to be a convex set.
        Sometimes this is not the case, e.g. for a high-order stencil::

              o
              o
            ooooo
              o
              o

        The footprint of the stencil when 'swept' over a base domain
        would look like this, and because of the 'missing corners',
        this set is not convex::

              oooooooooo
              oooooooooo
            oooooooooooooo
            oooooooooooooo
            oooooooooooooo
            oooooooooooooo
              oooooooooo
              oooooooooo

        Passing ``fetch_bounding_box=True`` gives :mod:`loopy` permission
        to instead fetch the 'bounding box' of the footprint, i.e.
        this set in the stencil example::

            OOooooooooooOO
            OOooooooooooOO
            oooooooooooooo
            oooooooooooooo
            oooooooooooooo
            oooooooooooooo
            OOooooooooooOO
            OOooooooooooOO

        Note the added corners marked with "``O``". The resulting footprint is
        guaranteed to be convex.


    :arg fetch_outer_inames: The inames within which the fetch
        instruction is nested. If *None*, make an educated guess.

    This function internally uses :func:`extract_subst` and :func:`precompute`.
    """

    # {{{ fish indexing out of var_name and into footprint_subscripts

    from loopy.symbolic import parse
    parsed_var_name = parse(var_name)

    from pymbolic.primitives import Variable, Subscript
    if isinstance(parsed_var_name, Variable):
        # nothing to see
        pass
    elif isinstance(parsed_var_name, Subscript):
        if footprint_subscripts is not None:
            raise TypeError("if footprint_subscripts is specified, then var_name "
                    "may not contain a subscript")

        assert isinstance(parsed_var_name.aggregate, Variable)
        footprint_subscripts = [parsed_var_name.index]
        parsed_var_name = parsed_var_name.aggregate
    else:
        raise ValueError("var_name must either be a variable name or a subscript")

    # }}}

    # {{{ fish out tag

    from loopy.symbolic import TaggedVariable
    if isinstance(parsed_var_name, TaggedVariable):
        var_name = parsed_var_name.name
        tag = parsed_var_name.tag
    else:
        var_name = parsed_var_name.name
        tag = None

    # }}}

    c_name = var_name
    if tag is not None:
        c_name = c_name + "_" + tag

    var_name_gen = kernel.get_var_name_generator()

    if rule_name is None:
        rule_name = var_name_gen("%s_fetch_rule" % c_name)
    if temporary_name is None:
        temporary_name = var_name_gen("%s_fetch" % c_name)

    arg = kernel.arg_dict[var_name]

    # {{{ make parameter names and unification template

    parameters = []
    for i in range(arg.num_user_axes()):
        based_on = "%s_dim_%d" % (c_name, i)
        if arg.dim_names is not None:
            based_on = "%s_dim_%s" % (c_name, arg.dim_names[i])
        if dim_arg_names is not None and i < len(dim_arg_names):
            based_on = dim_arg_names[i]

        par_name = var_name_gen(based_on=based_on)
        parameters.append(par_name)

    from pymbolic import var
    uni_template = parsed_var_name
    if len(parameters) > 1:
        uni_template = uni_template.index(
                tuple(var(par_name) for par_name in parameters))
    elif len(parameters) == 1:
        uni_template = uni_template.index(var(parameters[0]))

    # }}}

    from loopy.transform.subst import extract_subst
    kernel = extract_subst(kernel, rule_name, uni_template, parameters)

    if isinstance(sweep_inames, str):
        sweep_inames = [s.strip() for s in sweep_inames.split(",")]
    else:
        # copy, standardize to list
        sweep_inames = list(sweep_inames)

    kernel, subst_use, sweep_inames, inames_to_be_removed = \
            _process_footprint_subscripts(
                    kernel,  rule_name, sweep_inames,
                    footprint_subscripts, arg)

    # Our _not_provided is actually a different object from the one in the
    # precompute module, but precompute acutally uses that to adjust its
    # warning message.

    from loopy.transform.precompute import precompute
    new_kernel = precompute(kernel, subst_use, sweep_inames,
            precompute_inames=dim_arg_names,
            default_tag=default_tag, dtype=arg.dtype,
            fetch_bounding_box=fetch_bounding_box,
            temporary_name=temporary_name,
            temporary_scope=temporary_scope, temporary_is_local=temporary_is_local,
            precompute_outer_inames=fetch_outer_inames)

    # {{{ remove inames that were temporarily added by slice sweeps

    new_domains = new_kernel.domains[:]

    for iname in inames_to_be_removed:
        home_domain_index = kernel.get_home_domain_index(iname)
        domain = new_domains[home_domain_index]

        dt, idx = domain.get_var_dict()[iname]
        assert dt == dim_type.set

        new_domains[home_domain_index] = domain.project_out(dt, idx, 1)

    new_kernel = new_kernel.copy(domains=new_domains)

    # }}}

    # If the rule survived past precompute() (i.e. some accesses fell outside
    # the footprint), get rid of it before moving on.
    if rule_name in new_kernel.substitutions:
        from loopy.transform.subst import expand_subst
        return expand_subst(new_kernel, "... > id:"+rule_name)
    else:
        return new_kernel
Esempio n. 17
0
def _pymbolic_parse_if_necessary(x):
    if isinstance(x, str):
        from pymbolic import parse
        return parse(x)
    else:
        return x
Esempio n. 18
0
def _parse_array_dim_tag(tag, default_target_axis, nesting_levels):
    if isinstance(tag, ArrayDimImplementationTag):
        return False, False, tag

    if not isinstance(tag, str):
        raise TypeError("arg dimension implementation tag must be "
                        "string or tag object")

    tag = tag.strip()
    is_optional = False
    if tag.endswith("?"):
        tag = tag[:-1]
        is_optional = True

    orig_tag = tag

    if tag == "sep":
        return False, is_optional, SeparateArrayArrayDimTag()
    elif tag == "vec":
        return False, is_optional, VectorArrayDimTag()

    nesting_level_match = NESTING_LEVEL_RE.match(tag)

    if nesting_level_match is not None:
        nesting_level = int(nesting_level_match.group(1))
        tag = nesting_level_match.group(2)
        if tag is None:
            tag = ""
    else:
        nesting_level = None

    has_explicit_nesting_level = nesting_level is not None

    target_axis_match = TARGET_AXIS_RE.search(tag)

    if target_axis_match is not None:
        target_axis = int(target_axis_match.group(1))
        tag = tag[:target_axis_match.start()]
    else:
        target_axis = default_target_axis

    ta_nesting_levels = nesting_levels.get(target_axis, [])

    if tag.startswith("stride:"):
        fixed_stride_descr = tag[7:]
        if fixed_stride_descr.strip() == "auto":
            import loopy as lp
            return (has_explicit_nesting_level, is_optional,
                    FixedStrideArrayDimTag(lp.auto,
                                           target_axis,
                                           layout_nesting_level=nesting_level))
        else:
            from loopy.symbolic import parse
            return (has_explicit_nesting_level, is_optional,
                    FixedStrideArrayDimTag(parse(fixed_stride_descr),
                                           target_axis,
                                           layout_nesting_level=nesting_level))

    else:
        padded_stride_match = PADDED_STRIDE_TAG_RE.match(tag)
        if padded_stride_match is not None:
            tag = padded_stride_match.group(1)

            from loopy.symbolic import parse
            pad_to = parse(padded_stride_match.group(2))
        else:
            pad_to = None

        if tag in ["c", "C"]:
            if nesting_level is not None:
                raise LoopyError(
                    "may not specify 'C' array order with explicit "
                    "layout nesting level")

            if ta_nesting_levels:
                nesting_level = min(ta_nesting_levels) - 1
            else:
                nesting_level = 0

        elif tag in ["f", "F"]:
            if nesting_level is not None:
                raise LoopyError(
                    "may not specify 'C' array order with explicit "
                    "layout nesting level")

            if ta_nesting_levels:
                nesting_level = max(ta_nesting_levels) + 1
            else:
                nesting_level = 0

        elif tag == "":
            if nesting_level is None:
                raise LoopyError("invalid dim tag: '%s'" % orig_tag)

        else:
            raise LoopyError("invalid dim tag: '%s'" % orig_tag)

        return (has_explicit_nesting_level, is_optional,
                ComputedStrideArrayDimTag(nesting_level,
                                          pad_to=pad_to,
                                          target_axis=target_axis))
Esempio n. 19
0
def parse_insn(insn):
    """
    :return: a tuple ``(insn, inames_to_dup)``, where insn is a
        :class:`ExpressionInstruction` or a :class:`SubstitutionRule`
        and *inames_to_dup* is None or a list of tuples `(old, new)`.
    """

    insn_match = INSN_RE.match(insn)
    subst_match = SUBST_RE.match(insn)
    if insn_match is not None and subst_match is not None:
        raise RuntimeError("instruction parse error: %s" % insn)

    if insn_match is not None:
        groups = insn_match.groupdict()
    elif subst_match is not None:
        groups = subst_match.groupdict()
    else:
        raise RuntimeError("instruction parse error at '%s'" % insn)

    from loopy.symbolic import parse
    try:
        lhs = parse(groups["lhs"])
    except:
        print("While parsing left hand side '%s', "
                "the following error occurred:" % groups["lhs"])
        raise

    try:
        rhs = parse(groups["rhs"])
    except:
        print("While parsing right hand side '%s', "
                "the following error occurred:" % groups["rhs"])
        raise

    if insn_match is not None:
        insn_deps = None
        insn_deps_is_final = False
        insn_id = None
        inames_to_dup = []
        priority = 0
        forced_iname_deps_is_final = False
        forced_iname_deps = frozenset()
        predicates = frozenset()
        tags = ()

        if groups["options"] is not None:
            for option in groups["options"].split(","):
                option = option.strip()
                if not option:
                    raise RuntimeError("empty option supplied")

                equal_idx = option.find("=")
                if equal_idx == -1:
                    opt_key = option
                    opt_value = None
                else:
                    opt_key = option[:equal_idx].strip()
                    opt_value = option[equal_idx+1:].strip()

                if opt_key == "id":
                    insn_id = opt_value
                elif opt_key == "id_prefix":
                    insn_id = UniqueName(opt_value)
                elif opt_key == "priority":
                    priority = int(opt_value)
                elif opt_key == "dup":
                    for value in opt_value.split(":"):
                        arrow_idx = value.find("->")
                        if arrow_idx >= 0:
                            inames_to_dup.append(
                                    (value[:arrow_idx], value[arrow_idx+2:]))
                        else:
                            inames_to_dup.append((value, None))

                elif opt_key == "dep":
                    if opt_value.startswith("*"):
                        insn_deps_is_final = True
                        opt_value = (opt_value[1:]).strip()

                    insn_deps = frozenset(dep.strip() for dep in opt_value.split(":")
                            if dep.strip())

                elif opt_key == "inames":
                    if opt_value.startswith("+"):
                        forced_iname_deps_is_final = False
                        opt_value = (opt_value[1:]).strip()
                    else:
                        forced_iname_deps_is_final = True

                    forced_iname_deps = frozenset(opt_value.split(":"))

                elif opt_key == "if":
                    predicates = frozenset(opt_value.split(":"))

                elif opt_key == "tags":
                    tags = tuple(
                            tag.strip() for tag in opt_value.split(":")
                            if tag.strip())

                else:
                    raise ValueError("unrecognized instruction option '%s'"
                            % opt_key)

        if groups["temp_var_type"] is not None:
            if groups["temp_var_type"]:
                temp_var_type = np.dtype(groups["temp_var_type"])
            else:
                import loopy as lp
                temp_var_type = lp.auto
        else:
            temp_var_type = None

        from pymbolic.primitives import Variable, Subscript
        if not isinstance(lhs, (Variable, Subscript)):
            raise RuntimeError("left hand side of assignment '%s' must "
                    "be variable or subscript" % lhs)

        return ExpressionInstruction(
                    id=insn_id,
                    insn_deps=insn_deps,
                    insn_deps_is_final=insn_deps_is_final,
                    forced_iname_deps_is_final=forced_iname_deps_is_final,
                    forced_iname_deps=forced_iname_deps,
                    assignee=lhs, expression=rhs,
                    temp_var_type=temp_var_type,
                    priority=priority,
                    predicates=predicates,
                    tags=tags), inames_to_dup

    elif subst_match is not None:
        from pymbolic.primitives import Variable, Call

        if isinstance(lhs, Variable):
            subst_name = lhs.name
            arg_names = []
        elif isinstance(lhs, Call):
            if not isinstance(lhs.function, Variable):
                raise RuntimeError("Invalid substitution rule left-hand side")
            subst_name = lhs.function.name
            arg_names = []

            for i, arg in enumerate(lhs.parameters):
                if not isinstance(arg, Variable):
                    raise RuntimeError("Invalid substitution rule "
                                    "left-hand side: %s--arg number %d "
                                    "is not a variable" % (lhs, i))
                arg_names.append(arg.name)
        else:
            raise RuntimeError("Invalid substitution rule left-hand side")

        return SubstitutionRule(
                name=subst_name,
                arguments=tuple(arg_names),
                expression=rhs), []
Esempio n. 20
0
def buffer_array(
    kernel,
    var_name,
    buffer_inames,
    init_expression=None,
    store_expression=None,
    within=None,
    default_tag="l.auto",
    temporary_is_local=None,
    fetch_bounding_box=False,
):
    """
    :arg init_expression: Either *None* (indicating the prior value of the buffered
        array should be read) or an expression optionally involving the
        variable 'base' (which references the associated location in the array
        being buffered).
    :arg store_expression: Either *None* or an expression involving
        variables 'base' and 'buffer' (without array indices).
    """

    # {{{ process arguments

    if isinstance(init_expression, str):
        from loopy.symbolic import parse

        init_expression = parse(init_expression)

    if isinstance(store_expression, str):
        from loopy.symbolic import parse

        store_expression = parse(store_expression)

    if isinstance(buffer_inames, str):
        buffer_inames = [s.strip() for s in buffer_inames.split(",") if s.strip()]

    for iname in buffer_inames:
        if iname not in kernel.all_inames():
            raise RuntimeError("sweep iname '%s' is not a known iname" % iname)

    buffer_inames = list(buffer_inames)
    buffer_inames_set = frozenset(buffer_inames)

    from loopy.context_matching import parse_stack_match

    within = parse_stack_match(within)

    if var_name in kernel.arg_dict:
        var_descr = kernel.arg_dict[var_name]
    elif var_name in kernel.temporary_variables:
        var_descr = kernel.temporary_variables[var_name]
    else:
        raise ValueError("variable '%s' not found" % var_name)

    from loopy.kernel.data import ArrayBase

    if isinstance(var_descr, ArrayBase):
        var_shape = var_descr.shape
    else:
        var_shape = ()

    if temporary_is_local is None:
        import loopy as lp

        temporary_is_local = lp.auto

    # }}}

    var_name_gen = kernel.get_var_name_generator()
    within_inames = set()

    access_descriptors = []
    for insn in kernel.instructions:
        if not within(kernel, insn.id, ()):
            continue

        for assignee, index in insn.assignees_and_indices():
            if assignee == var_name:
                within_inames.update((get_dependencies(index) & kernel.all_inames()) - buffer_inames_set)
                access_descriptors.append(AccessDescriptor(identifier=insn.id, storage_axis_exprs=index))

    # {{{ find fetch/store inames

    init_inames = []
    store_inames = []
    new_iname_to_tag = {}

    for i in range(len(var_shape)):
        init_iname = var_name_gen("%s_init_%d" % (var_name, i))
        store_iname = var_name_gen("%s_store_%d" % (var_name, i))

        new_iname_to_tag[init_iname] = default_tag
        new_iname_to_tag[store_iname] = default_tag

        init_inames.append(init_iname)
        store_inames.append(store_iname)

    # }}}

    # {{{ modify loop domain

    non1_init_inames = []
    non1_store_inames = []

    if var_shape:
        # {{{ find domain to be changed

        from loopy.kernel.tools import DomainChanger

        domch = DomainChanger(kernel, buffer_inames_set | within_inames)

        if domch.leaf_domain_index is not None:
            # If the sweep inames are at home in parent domains, then we'll add
            # fetches with loops over copies of these parent inames that will end
            # up being scheduled *within* loops over these parents.

            for iname in buffer_inames_set:
                if kernel.get_home_domain_index(iname) != domch.leaf_domain_index:
                    raise RuntimeError("buffer iname '%s' is not 'at home' in the " "sweep's leaf domain" % iname)

        # }}}

        abm = ArrayToBufferMap(kernel, domch.domain, buffer_inames, access_descriptors, len(var_shape))

        for i in range(len(var_shape)):
            if abm.non1_storage_axis_flags[i]:
                non1_init_inames.append(init_inames[i])
                non1_store_inames.append(store_inames[i])
            else:
                del new_iname_to_tag[init_inames[i]]
                del new_iname_to_tag[store_inames[i]]

        new_domain = domch.domain
        new_domain = abm.augment_domain_with_sweep(new_domain, non1_init_inames, boxify_sweep=fetch_bounding_box)
        new_domain = abm.augment_domain_with_sweep(new_domain, non1_store_inames, boxify_sweep=fetch_bounding_box)
        new_kernel_domains = domch.get_domains_with(new_domain)
        del new_domain

    else:
        # leave kernel domains unchanged
        new_kernel_domains = kernel.domains

        abm = NoOpArrayToBufferMap()

    # }}}

    # {{{ set up temp variable

    import loopy as lp

    buf_var_name = var_name_gen(based_on=var_name + "_buf")

    new_temporary_variables = kernel.temporary_variables.copy()
    temp_var = lp.TemporaryVariable(
        name=buf_var_name,
        dtype=var_descr.dtype,
        base_indices=(0,) * len(abm.non1_storage_shape),
        shape=tuple(abm.non1_storage_shape),
        is_local=temporary_is_local,
    )

    new_temporary_variables[buf_var_name] = temp_var

    # }}}

    new_insns = []

    buf_var = var(buf_var_name)

    # {{{ generate init instruction

    buf_var_init = buf_var
    if non1_init_inames:
        buf_var_init = buf_var_init.index(tuple(var(iname) for iname in non1_init_inames))

    init_base = var(var_name)

    init_subscript = []
    init_iname_idx = 0
    if var_shape:
        for i in range(len(var_shape)):
            ax_subscript = abm.storage_base_indices[i]
            if abm.non1_storage_axis_flags[i]:
                ax_subscript += var(non1_init_inames[init_iname_idx])
                init_iname_idx += 1
            init_subscript.append(ax_subscript)

    if init_subscript:
        init_base = init_base.index(tuple(init_subscript))

    if init_expression is None:
        init_expression = init_base
    else:
        init_expression = init_expression
        init_expression = SubstitutionMapper(make_subst_func({"base": init_base}))(init_expression)

    init_insn_id = kernel.make_unique_instruction_id(based_on="init_" + var_name)
    from loopy.kernel.data import ExpressionInstruction

    init_instruction = ExpressionInstruction(
        id=init_insn_id,
        assignee=buf_var_init,
        expression=init_expression,
        forced_iname_deps=frozenset(within_inames),
        insn_deps=frozenset(),
        insn_deps_is_final=True,
    )

    # }}}

    rule_mapping_context = SubstitutionRuleMappingContext(kernel.substitutions, kernel.get_var_name_generator())
    aar = ArrayAccessReplacer(rule_mapping_context, var_name, within, abm, buf_var)
    kernel = rule_mapping_context.finish_kernel(aar.map_kernel(kernel))

    did_write = False
    for insn_id in aar.modified_insn_ids:
        insn = kernel.id_to_insn[insn_id]
        if any(assignee_name == buf_var_name for assignee_name, _ in insn.assignees_and_indices()):
            did_write = True

    # {{{ add init_insn_id to insn_deps

    new_insns = []

    def none_to_empty_set(s):
        if s is None:
            return frozenset()
        else:
            return s

    for insn in kernel.instructions:
        if insn.id in aar.modified_insn_ids:
            new_insns.append(insn.copy(insn_deps=(none_to_empty_set(insn.insn_deps) | frozenset([init_insn_id]))))
        else:
            new_insns.append(insn)

    # }}}

    # {{{ generate store instruction

    buf_var_store = buf_var
    if non1_store_inames:
        buf_var_store = buf_var_store.index(tuple(var(iname) for iname in non1_store_inames))

    store_subscript = []
    store_iname_idx = 0
    if var_shape:
        for i in range(len(var_shape)):
            ax_subscript = abm.storage_base_indices[i]
            if abm.non1_storage_axis_flags[i]:
                ax_subscript += var(non1_store_inames[store_iname_idx])
                store_iname_idx += 1
            store_subscript.append(ax_subscript)

    store_target = var(var_name)
    if store_subscript:
        store_target = store_target.index(tuple(store_subscript))

    if store_expression is None:
        store_expression = buf_var_store
    else:
        store_expression = SubstitutionMapper(make_subst_func({"base": store_target, "buffer": buf_var_store}))(
            store_expression
        )

    from loopy.kernel.data import ExpressionInstruction

    store_instruction = ExpressionInstruction(
        id=kernel.make_unique_instruction_id(based_on="store_" + var_name),
        insn_deps=frozenset(aar.modified_insn_ids),
        assignee=store_target,
        expression=store_expression,
        forced_iname_deps=frozenset(within_inames),
    )

    # }}}

    new_insns.append(init_instruction)
    if did_write:
        new_insns.append(store_instruction)

    kernel = kernel.copy(
        domains=new_kernel_domains, instructions=new_insns, temporary_variables=new_temporary_variables
    )

    from loopy import tag_inames

    kernel = tag_inames(kernel, new_iname_to_tag)

    return kernel
Esempio n. 21
0
def precompute(kernel, subst_use, sweep_inames=[], within=None,
        storage_axes=None, temporary_name=None, precompute_inames=None,
        storage_axis_to_tag={}, default_tag="l.auto", dtype=None,
        fetch_bounding_box=False, temporary_is_local=None,
        compute_insn_id=None):
    """Precompute the expression described in the substitution rule determined by
    *subst_use* and store it in a temporary array. A precomputation needs two
    things to operate, a list of *sweep_inames* (order irrelevant) and an
    ordered list of *storage_axes* (whose order will describe the axis ordering
    of the temporary array).

    :arg subst_use: Describes what to prefetch.

    The following objects may be given for *subst_use*:

    * The name of the substitution rule.

    * The tagged name ("name$tag") of the substitution rule.

    * A list of invocations of the substitution rule.
      This list of invocations, when swept across *sweep_inames*, then serves
      to define the footprint of the precomputation.

      Invocations may be tagged ("name$tag") to filter out a subset of the
      usage sites of the substitution rule. (Namely those usage sites that
      use the same tagged name.)

      Invocations may be given as a string or as a
      :class:`pymbolic.primitives.Expression` object.

      If only one invocation is to be given, then the only entry of the list
      may be given directly.

    If the list of invocations generating the footprint is not given,
    all (tag-matching, if desired) usage sites of the substitution rule
    are used to determine the footprint.

    The following cases can arise for each sweep axis:

    * The axis is an iname that occurs within arguments specified at
      usage sites of the substitution rule. This case is assumed covered
      by the storage axes provided for the argument.

    * The axis is an iname that occurs within the *value* of the rule, but not
      within its arguments. A new, dedicated storage axis is allocated for
      such an axis.

    :arg sweep_inames: A :class:`list` of inames and/or rule argument
        names to be swept.
        May also equivalently be a comma-separated string.
    :arg storage_axes: A :class:`list` of inames and/or rule argument
        names/indices to be used as storage axes.
        May also equivalently be a comma-separated string.
    :arg within: a stack match as understood by
        :func:`loopy.context_matching.parse_stack_match`.
    :arg temporary_name:
        The temporary variable name to use for storing the precomputed data.
        If it does not exist, it will be created. If it does exist, its properties
        (such as size, type) are checked (and updated, if possible) to match
        its use.
    :arg precompute_inames:
        A tuple of inames to be used to carry out the precomputation.
        If the specified inames do not already exist, they will be
        created. If they do already exist, their loop domain is verified
        against the one required for this precomputation. This tuple may
        be shorter than the (provided or automatically found) *storage_axes*
        tuple, in which case names will be automatically created.
        May also equivalently be a comma-separated string.

    :arg compute_insn_id: The ID of the instruction performing the precomputation.

    If `storage_axes` is not specified, it defaults to the arrangement
    `<direct sweep axes><arguments>` with the direct sweep axes being the
    slower-varying indices.

    Trivial storage axes (i.e. axes of length 1 with respect to the sweep) are
    eliminated.
    """

    # {{{ check, standardize arguments

    if isinstance(sweep_inames, str):
        sweep_inames = [iname.strip() for iname in sweep_inames.split(",")]

    for iname in sweep_inames:
        if iname not in kernel.all_inames():
            raise RuntimeError("sweep iname '%s' is not a known iname"
                    % iname)

    sweep_inames = list(sweep_inames)
    sweep_inames_set = frozenset(sweep_inames)

    if isinstance(storage_axes, str):
        storage_axes = [ax.strip() for ax in storage_axes.split(",")]

    if isinstance(precompute_inames, str):
        precompute_inames = [iname.strip() for iname in precompute_inames.split(",")]

    if isinstance(subst_use, str):
        subst_use = [subst_use]

    footprint_generators = None

    subst_name = None
    subst_tag = None

    from pymbolic.primitives import Variable, Call
    from loopy.symbolic import parse, TaggedVariable

    for use in subst_use:
        if isinstance(use, str):
            use = parse(use)

        if isinstance(use, Call):
            if footprint_generators is None:
                footprint_generators = []

            footprint_generators.append(use)
            subst_name_as_expr = use.function
        else:
            subst_name_as_expr = use

        if isinstance(subst_name_as_expr, TaggedVariable):
            new_subst_name = subst_name_as_expr.name
            new_subst_tag = subst_name_as_expr.tag
        elif isinstance(subst_name_as_expr, Variable):
            new_subst_name = subst_name_as_expr.name
            new_subst_tag = None
        else:
            raise ValueError("unexpected type of subst_name")

        if (subst_name, subst_tag) == (None, None):
            subst_name, subst_tag = new_subst_name, new_subst_tag
        else:
            if (subst_name, subst_tag) != (new_subst_name, new_subst_tag):
                raise ValueError("not all uses in subst_use agree "
                        "on rule name and tag")

    from loopy.context_matching import parse_stack_match
    within = parse_stack_match(within)

    from loopy.kernel.data import parse_tag
    default_tag = parse_tag(default_tag)

    subst = kernel.substitutions[subst_name]
    c_subst_name = subst_name.replace(".", "_")

    # }}}

    # {{{ process invocations in footprint generators, start access_descriptors

    if footprint_generators:
        from pymbolic.primitives import Variable, Call

        access_descriptors = []

        for fpg in footprint_generators:
            if isinstance(fpg, Variable):
                args = ()
            elif isinstance(fpg, Call):
                args = fpg.parameters
            else:
                raise ValueError("footprint generator must "
                        "be substitution rule invocation")

            access_descriptors.append(
                    RuleAccessDescriptor(
                        identifier=access_descriptor_id(args, None),
                        args=args
                        ))

    # }}}

    # {{{ gather up invocations in kernel code, finish access_descriptors

    if not footprint_generators:
        rule_mapping_context = SubstitutionRuleMappingContext(
                kernel.substitutions, kernel.get_var_name_generator())
        invg = RuleInvocationGatherer(
                rule_mapping_context, kernel, subst_name, subst_tag, within)
        del rule_mapping_context

        import loopy as lp
        for insn in kernel.instructions:
            if isinstance(insn, lp.Assignment):
                invg(insn.assignee, kernel, insn)
                invg(insn.expression, kernel, insn)

        access_descriptors = invg.access_descriptors
        if not access_descriptors:
            raise RuntimeError("no invocations of '%s' found" % subst_name)

    # }}}

    # {{{ find inames used in arguments

    expanding_usage_arg_deps = set()

    for accdesc in access_descriptors:
        for arg in accdesc.args:
            expanding_usage_arg_deps.update(
                    get_dependencies(arg) & kernel.all_inames())

    # }}}

    var_name_gen = kernel.get_var_name_generator()

    # {{{ use given / find new storage_axes

    # extra axes made necessary because they don't occur in the arguments
    extra_storage_axes = set(sweep_inames_set - expanding_usage_arg_deps)

    from loopy.symbolic import SubstitutionRuleExpander
    submap = SubstitutionRuleExpander(kernel.substitutions)

    value_inames = get_dependencies(
            submap(subst.expression)
            ) & kernel.all_inames()
    if value_inames - expanding_usage_arg_deps < extra_storage_axes:
        raise RuntimeError("unreferenced sweep inames specified: "
                + ", ".join(extra_storage_axes
                    - value_inames - expanding_usage_arg_deps))

    new_iname_to_tag = {}

    if storage_axes is None:
        storage_axes = []

        # Add sweep_inames (in given--rather than arbitrary--order) to
        # storage_axes *if* they are part of extra_storage_axes.
        for iname in sweep_inames:
            if iname in extra_storage_axes:
                extra_storage_axes.remove(iname)
                storage_axes.append(iname)

        if extra_storage_axes:
            if (precompute_inames is not None
                    and len(storage_axes) < len(precompute_inames)):
                raise LoopyError("must specify a sufficient number of "
                        "storage_axes to uniquely determine the meaning "
                        "of the given precompute_inames. (%d storage_axes "
                        "needed)" % len(precompute_inames))
            storage_axes.extend(sorted(extra_storage_axes))

        storage_axes.extend(range(len(subst.arguments)))

    del extra_storage_axes

    prior_storage_axis_name_dict = {}

    storage_axis_names = []
    storage_axis_sources = []  # number for arg#, or iname

    # {{{ check for pre-existing precompute_inames

    if precompute_inames is not None:
        preexisting_precompute_inames = (
                set(precompute_inames) & kernel.all_inames())
    else:
        preexisting_precompute_inames = set()

    # }}}

    for i, saxis in enumerate(storage_axes):
        tag_lookup_saxis = saxis

        if saxis in subst.arguments:
            saxis = subst.arguments.index(saxis)

        storage_axis_sources.append(saxis)

        if isinstance(saxis, int):
            # argument index
            name = old_name = subst.arguments[saxis]
        else:
            old_name = saxis
            name = "%s_%s" % (c_subst_name, old_name)

        if (precompute_inames is not None
                and i < len(precompute_inames)
                and precompute_inames[i]):
            name = precompute_inames[i]
            tag_lookup_saxis = name
            if (name not in preexisting_precompute_inames
                    and var_name_gen.is_name_conflicting(name)):
                raise RuntimeError("new storage axis name '%s' "
                        "conflicts with existing name" % name)
        else:
            name = var_name_gen(name)

        storage_axis_names.append(name)
        if name not in preexisting_precompute_inames:
            new_iname_to_tag[name] = storage_axis_to_tag.get(
                    tag_lookup_saxis, default_tag)

        prior_storage_axis_name_dict[name] = old_name

    del storage_axis_to_tag
    del storage_axes
    del precompute_inames

    # }}}

    # {{{ fill out access_descriptors[...].storage_axis_exprs

    access_descriptors = [
            accdesc.copy(
                storage_axis_exprs=storage_axis_exprs(
                    storage_axis_sources, accdesc.args))
            for accdesc in access_descriptors]

    # }}}

    expanding_inames = sweep_inames_set | frozenset(expanding_usage_arg_deps)
    assert expanding_inames <= kernel.all_inames()

    if storage_axis_names:
        # {{{ find domain to be changed

        change_inames = expanding_inames | preexisting_precompute_inames

        from loopy.kernel.tools import DomainChanger
        domch = DomainChanger(kernel, change_inames)

        if domch.leaf_domain_index is not None:
            # If the sweep inames are at home in parent domains, then we'll add
            # fetches with loops over copies of these parent inames that will end
            # up being scheduled *within* loops over these parents.

            for iname in sweep_inames_set:
                if kernel.get_home_domain_index(iname) != domch.leaf_domain_index:
                    raise RuntimeError("sweep iname '%s' is not 'at home' in the "
                            "sweep's leaf domain" % iname)

        # }}}

        abm = ArrayToBufferMap(kernel, domch.domain, sweep_inames,
                access_descriptors, len(storage_axis_names))

        non1_storage_axis_names = []
        for i, saxis in enumerate(storage_axis_names):
            if abm.non1_storage_axis_flags[i]:
                non1_storage_axis_names.append(saxis)
            else:
                del new_iname_to_tag[saxis]

                if saxis in preexisting_precompute_inames:
                    raise LoopyError("precompute axis %d (1-based) was "
                            "eliminated as "
                            "having length 1 but also mapped to existing "
                            "iname '%s'" % (i+1, saxis))

        mod_domain = domch.domain

        # {{{ modify the domain, taking into account preexisting inames

        # inames may already exist in mod_domain, add them primed to start
        primed_non1_saxis_names = [
                iname+"'" for iname in non1_storage_axis_names]

        mod_domain = abm.augment_domain_with_sweep(
            domch.domain, primed_non1_saxis_names,
            boxify_sweep=fetch_bounding_box)

        check_domain = mod_domain

        for i, saxis in enumerate(non1_storage_axis_names):
            var_dict = mod_domain.get_var_dict(isl.dim_type.set)

            if saxis in preexisting_precompute_inames:
                # add equality constraint between existing and new variable

                dt, dim_idx = var_dict[saxis]
                saxis_aff = isl.Aff.var_on_domain(mod_domain.space, dt, dim_idx)

                dt, dim_idx = var_dict[primed_non1_saxis_names[i]]
                new_var_aff = isl.Aff.var_on_domain(mod_domain.space, dt, dim_idx)

                mod_domain = mod_domain.add_constraint(
                        isl.Constraint.equality_from_aff(new_var_aff - saxis_aff))

                # project out the new one
                mod_domain = mod_domain.project_out(dt, dim_idx, 1)

            else:
                # remove the prime from the new variable
                dt, dim_idx = var_dict[primed_non1_saxis_names[i]]
                mod_domain = mod_domain.set_dim_name(dt, dim_idx, saxis)

        # {{{ check that we got the desired domain

        check_domain = check_domain.project_out_except(
                primed_non1_saxis_names, [isl.dim_type.set])

        mod_check_domain = mod_domain

        # re-add the prime from the new variable
        var_dict = mod_check_domain.get_var_dict(isl.dim_type.set)

        for saxis in non1_storage_axis_names:
            dt, dim_idx = var_dict[saxis]
            mod_check_domain = mod_check_domain.set_dim_name(dt, dim_idx, saxis+"'")

        mod_check_domain = mod_check_domain.project_out_except(
                primed_non1_saxis_names, [isl.dim_type.set])

        mod_check_domain, check_domain = isl.align_two(
                mod_check_domain, check_domain)

        # The modified domain can't get bigger by adding constraints
        assert mod_check_domain <= check_domain

        if not check_domain <= mod_check_domain:
            print(check_domain)
            print(mod_check_domain)
            raise LoopyError("domain of preexisting inames does not match "
                    "domain needed for precompute")

        # }}}

        # {{{ check that we didn't shrink the original domain

        # project out the new names from the modified domain
        orig_domain_inames = list(domch.domain.get_var_dict(isl.dim_type.set))
        mod_check_domain = mod_domain.project_out_except(
                orig_domain_inames, [isl.dim_type.set])

        check_domain = domch.domain

        mod_check_domain, check_domain = isl.align_two(
                mod_check_domain, check_domain)

        # The modified domain can't get bigger by adding constraints
        assert mod_check_domain <= check_domain

        if not check_domain <= mod_check_domain:
            print(check_domain)
            print(mod_check_domain)
            raise LoopyError("original domain got shrunk by applying the precompute")

        # }}}

        # }}}

        new_kernel_domains = domch.get_domains_with(mod_domain)

    else:
        # leave kernel domains unchanged
        new_kernel_domains = kernel.domains

        non1_storage_axis_names = []
        abm = NoOpArrayToBufferMap()

    kernel = kernel.copy(domains=new_kernel_domains)

    # {{{ set up compute insn

    if temporary_name is None:
        temporary_name = var_name_gen(based_on=c_subst_name)

    assignee = var(temporary_name)

    if non1_storage_axis_names:
        assignee = assignee.index(
                tuple(var(iname) for iname in non1_storage_axis_names))

    # {{{ process substitutions on compute instruction

    storage_axis_subst_dict = {}

    for arg_name, bi in zip(storage_axis_names, abm.storage_base_indices):
        if arg_name in non1_storage_axis_names:
            arg = var(arg_name)
        else:
            arg = 0

        storage_axis_subst_dict[
                prior_storage_axis_name_dict.get(arg_name, arg_name)] = arg+bi

    rule_mapping_context = SubstitutionRuleMappingContext(
            kernel.substitutions, kernel.get_var_name_generator())

    from loopy.context_matching import parse_stack_match
    expr_subst_map = RuleAwareSubstitutionMapper(
            rule_mapping_context,
            make_subst_func(storage_axis_subst_dict),
            within=parse_stack_match(None))

    compute_expression = expr_subst_map(subst.expression, kernel, None)

    # }}}

    from loopy.kernel.data import Assignment
    if compute_insn_id is None:
        compute_insn_id = kernel.make_unique_instruction_id(based_on=c_subst_name)

    compute_insn = Assignment(
            id=compute_insn_id,
            assignee=assignee,
            expression=compute_expression)

    # }}}

    # {{{ substitute rule into expressions in kernel (if within footprint)

    invr = RuleInvocationReplacer(rule_mapping_context,
            subst_name, subst_tag, within,
            access_descriptors, abm,
            storage_axis_names, storage_axis_sources,
            non1_storage_axis_names,
            temporary_name, compute_insn_id)

    kernel = invr.map_kernel(kernel)
    kernel = kernel.copy(
            instructions=[compute_insn] + kernel.instructions)
    kernel = rule_mapping_context.finish_kernel(kernel)

    # }}}

    # {{{ set up temp variable

    import loopy as lp
    if dtype is None:
        dtype = lp.auto
    else:
        dtype = np.dtype(dtype)

    import loopy as lp

    if temporary_is_local is None:
        temporary_is_local = lp.auto

    new_temp_shape = tuple(abm.non1_storage_shape)

    new_temporary_variables = kernel.temporary_variables.copy()
    if temporary_name not in new_temporary_variables:
        temp_var = lp.TemporaryVariable(
                name=temporary_name,
                dtype=dtype,
                base_indices=(0,)*len(new_temp_shape),
                shape=tuple(abm.non1_storage_shape),
                is_local=temporary_is_local)

    else:
        temp_var = new_temporary_variables[temporary_name]

        # {{{ check and adapt existing temporary

        if temp_var.dtype is lp.auto:
            pass
        elif temp_var.dtype is not lp.auto and dtype is lp.auto:
            dtype = temp_var.dtype
        elif temp_var.dtype is not lp.auto and dtype is not lp.auto:
            if temp_var.dtype != dtype:
                raise LoopyError("Existing and new dtype of temporary '%s' "
                        "do not match (existing: %s, new: %s)"
                        % (temporary_name, temp_var.dtype, dtype))

        temp_var = temp_var.copy(dtype=dtype)

        if len(temp_var.shape) != len(new_temp_shape):
            raise LoopyError("Existing and new temporary '%s' do not "
                    "have matching number of dimensions "
                    % (temporary_name,
                        len(temp_var.shape), len(new_temp_shape)))

        if temp_var.base_indices != (0,) * len(new_temp_shape):
            raise LoopyError("Existing and new temporary '%s' do not "
                    "have matching number of dimensions "
                    % (temporary_name,
                        len(temp_var.shape), len(new_temp_shape)))

        new_temp_shape = tuple(
                max(i, ex_i)
                for i, ex_i in zip(new_temp_shape, temp_var.shape))

        temp_var = temp_var.copy(shape=new_temp_shape)

        if temporary_is_local == temp_var.is_local:
            pass
        elif temporary_is_local is lp.auto:
            temporary_is_local = temp_var.is_local
        elif temp_var.is_local is lp.auto:
            pass
        else:
            raise LoopyError("Existing and new temporary '%s' do not "
                    "have matching values of 'is_local'"
                    % (temporary_name,
                        temp_var.is_local, temporary_is_local))

        temp_var = temp_var.copy(is_local=temporary_is_local)

        # }}}

    new_temporary_variables[temporary_name] = temp_var

    kernel = kernel.copy(
            temporary_variables=new_temporary_variables)

    # }}}

    from loopy import tag_inames
    kernel = tag_inames(kernel, new_iname_to_tag)

    from loopy.kernel.data import AutoFitLocalIndexTag
    has_automatic_axes = any(
            isinstance(tag, AutoFitLocalIndexTag)
            for tag in new_iname_to_tag.values())

    if has_automatic_axes:
        from loopy.kernel.tools import assign_automatic_axes
        kernel = assign_automatic_axes(kernel)

    return kernel
Esempio n. 22
0
def _parse_array_dim_tag(tag, default_target_axis, nesting_levels):
    if isinstance(tag, ArrayDimImplementationTag):
        return False, False, tag

    if not isinstance(tag, str):
        raise TypeError("arg dimension implementation tag must be "
                "string or tag object")

    tag = tag.strip()
    is_optional = False
    if tag.endswith("?"):
        tag = tag[:-1]
        is_optional = True

    orig_tag = tag

    if tag == "sep":
        return False, is_optional, SeparateArrayArrayDimTag()
    elif tag == "vec":
        return False, is_optional, VectorArrayDimTag()

    nesting_level_match = NESTING_LEVEL_RE.match(tag)

    if nesting_level_match is not None:
        nesting_level = int(nesting_level_match.group(1))
        tag = nesting_level_match.group(2)
        if tag is None:
            tag = ""
    else:
        nesting_level = None

    has_explicit_nesting_level = nesting_level is not None

    target_axis_match = TARGET_AXIS_RE.search(tag)

    if target_axis_match is not None:
        target_axis = int(target_axis_match.group(1))
        tag = tag[:target_axis_match.start()]
    else:
        target_axis = default_target_axis

    ta_nesting_levels = nesting_levels.get(target_axis, [])

    if tag.startswith("stride:"):
        fixed_stride_descr = tag[7:]
        if fixed_stride_descr.strip() == "auto":
            import loopy as lp
            return (
                    has_explicit_nesting_level, is_optional,
                    FixedStrideArrayDimTag(
                        lp.auto, target_axis,
                        layout_nesting_level=nesting_level))
        else:
            from loopy.symbolic import parse
            return (
                has_explicit_nesting_level, is_optional,
                FixedStrideArrayDimTag(
                    parse(fixed_stride_descr), target_axis,
                    layout_nesting_level=nesting_level))

    else:
        padded_stride_match = PADDED_STRIDE_TAG_RE.match(tag)
        if padded_stride_match is not None:
            tag = padded_stride_match.group(1)

            from loopy.symbolic import parse
            pad_to = parse(padded_stride_match.group(2))
        else:
            pad_to = None

        if tag in ["c", "C"]:
            if nesting_level is not None:
                raise LoopyError("may not specify 'C' array order with explicit "
                        "layout nesting level")

            if ta_nesting_levels:
                nesting_level = min(ta_nesting_levels)-1
            else:
                nesting_level = 0

        elif tag in ["f", "F"]:
            if nesting_level is not None:
                raise LoopyError("may not specify 'C' array order with explicit "
                        "layout nesting level")

            if ta_nesting_levels:
                nesting_level = max(ta_nesting_levels)+1
            else:
                nesting_level = 0

        elif tag == "":
            if nesting_level is None:
                raise LoopyError("invalid dim tag: '%s'" % orig_tag)

        else:
            raise LoopyError("invalid dim tag: '%s'" % orig_tag)

        return (
                has_explicit_nesting_level, is_optional,
                ComputedStrideArrayDimTag(
                    nesting_level, pad_to=pad_to, target_axis=target_axis))
Esempio n. 23
0
    def __init__(self, id, depends_on, depends_on_is_final,
            groups, conflicts_with_groups,
            no_sync_with,
            within_inames_is_final, within_inames,
            priority,
            boostable, boostable_into, predicates, tags,
            insn_deps=None, insn_deps_is_final=None,
            forced_iname_deps=None, forced_iname_deps_is_final=None):

        # {{{ backwards compatibility goop

        if depends_on is not None and insn_deps is not None:
            raise LoopyError("may not specify both insn_deps and depends_on")
        elif insn_deps is not None:
            warn("insn_deps is deprecated, use depends_on",
                    DeprecationWarning, stacklevel=2)

            depends_on = insn_deps
            depends_on_is_final = insn_deps_is_final

        if forced_iname_deps is not None and within_inames is not None:
            raise LoopyError("may not specify both forced_iname_deps "
                    "and within_inames")
        elif forced_iname_deps is not None:
            warn("forced_iname_deps is deprecated, use within_inames",
                    DeprecationWarning, stacklevel=2)

            within_inames = forced_iname_deps
            within_inames_is_final = forced_iname_deps_is_final

        if predicates is None:
            predicates = frozenset()

        new_predicates = set()
        for pred in predicates:
            if isinstance(pred, str):
                from pymbolic.primitives import LogicalNot
                from loopy.symbolic import parse
                if pred.startswith("!"):
                    warn("predicates starting with '!' are deprecated. "
                            "Simply use 'not' instead")
                    pred = LogicalNot(parse(pred[1:]))
                else:
                    pred = parse(pred)

            new_predicates.add(pred)

        predicates = frozenset(new_predicates)
        del new_predicates

        # }}}

        if depends_on is None:
            depends_on = frozenset()

        if groups is None:
            groups = frozenset()

        if conflicts_with_groups is None:
            conflicts_with_groups = frozenset()

        if no_sync_with is None:
            no_sync_with = frozenset()

        if within_inames is None:
            within_inames = frozenset()

        if within_inames_is_final is None:
            within_inames_is_final = False

        if isinstance(depends_on, str):
            depends_on = frozenset(
                    s.strip() for s in depends_on.split(",") if s.strip())

        if depends_on_is_final is None:
            depends_on_is_final = False

        if depends_on_is_final and not isinstance(depends_on, frozenset):
            raise LoopyError("Setting depends_on_is_final to True requires "
                    "actually specifying depends_on")

        if tags is None:
            tags = frozenset()

        if priority is None:
            priority = 0

        if not isinstance(tags, frozenset):
            # was previously allowed to be tuple
            tags = frozenset(tags)

        # Periodically reenable these and run the tests to ensure all
        # performance-relevant identifiers are interned.
        #
        # from loopy.tools import is_interned
        # assert is_interned(id)
        # assert all(is_interned(dep) for dep in depends_on)
        # assert all(is_interned(grp) for grp in groups)
        # assert all(is_interned(grp) for grp in conflicts_with_groups)
        # assert all(is_interned(iname) for iname in within_inames)
        # assert all(is_interned(pred) for pred in predicates)

        assert isinstance(within_inames, frozenset)
        assert isinstance(depends_on, frozenset) or depends_on is None
        assert isinstance(groups, frozenset)
        assert isinstance(conflicts_with_groups, frozenset)

        ImmutableRecord.__init__(self,
                id=id,
                depends_on=depends_on,
                depends_on_is_final=depends_on_is_final,
                no_sync_with=no_sync_with,
                groups=groups, conflicts_with_groups=conflicts_with_groups,
                within_inames_is_final=within_inames_is_final,
                within_inames=within_inames,
                priority=priority,
                boostable=boostable,
                boostable_into=boostable_into,
                predicates=predicates,
                tags=tags)
Esempio n. 24
0
def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
        default_tag="l.auto", rule_name=None,
        temporary_name=None, temporary_is_local=None,
        footprint_subscripts=None,
        fetch_bounding_box=False):
    """Prefetch all accesses to the variable *var_name*, with all accesses
    being swept through *sweep_inames*.

    :arg dim_arg_names: List of names representing each fetch axis.
    :arg rule_name: base name of the generated temporary variable.
    :arg footprint_subscripts: A list of tuples indicating the index (i.e.
        subscript) tuples used to generate the footprint.

        If only one such set of indices is desired, this may also be specified
        directly by putting an index expression into *var_name*. Substitutions
        such as those occurring in dimension splits are recorded and also
        applied to these indices.

    This function combines :func:`extract_subst` and :func:`precompute`.
    """

    # {{{ fish indexing out of var_name and into footprint_subscripts

    from loopy.symbolic import parse
    parsed_var_name = parse(var_name)

    from pymbolic.primitives import Variable, Subscript
    if isinstance(parsed_var_name, Variable):
        # nothing to see
        pass
    elif isinstance(parsed_var_name, Subscript):
        if footprint_subscripts is not None:
            raise TypeError("if footprint_subscripts is specified, then var_name "
                    "may not contain a subscript")

        assert isinstance(parsed_var_name.aggregate, Variable)
        footprint_subscripts = [parsed_var_name.index]
        parsed_var_name = parsed_var_name.aggregate
    else:
        raise ValueError("var_name must either be a variable name or a subscript")

    # }}}

    # {{{ fish out tag

    from loopy.symbolic import TaggedVariable
    if isinstance(parsed_var_name, TaggedVariable):
        var_name = parsed_var_name.name
        tag = parsed_var_name.tag
    else:
        var_name = parsed_var_name.name
        tag = None

    # }}}

    c_name = var_name
    if tag is not None:
        c_name = c_name + "_" + tag

    var_name_gen = kernel.get_var_name_generator()

    if rule_name is None:
        rule_name = var_name_gen("%s_fetch_rule" % c_name)
    if temporary_name is None:
        temporary_name = var_name_gen("%s_fetch" % c_name)

    arg = kernel.arg_dict[var_name]

    # {{{ make parameter names and unification template

    parameters = []
    for i in range(arg.num_user_axes()):
        based_on = "%s_dim_%d" % (c_name, i)
        if arg.dim_names is not None:
            based_on = "%s_dim_%s" % (c_name, arg.dim_names[i])
        if dim_arg_names is not None and i < len(dim_arg_names):
            based_on = dim_arg_names[i]

        par_name = var_name_gen(based_on=based_on)
        parameters.append(par_name)

    from pymbolic import var
    uni_template = parsed_var_name
    if len(parameters) > 1:
        uni_template = uni_template.index(
                tuple(var(par_name) for par_name in parameters))
    elif len(parameters) == 1:
        uni_template = uni_template.index(var(parameters[0]))

    # }}}

    from loopy.transform.subst import extract_subst
    kernel = extract_subst(kernel, rule_name, uni_template, parameters)

    if isinstance(sweep_inames, str):
        sweep_inames = [s.strip() for s in sweep_inames.split(",")]
    else:
        # copy, standardize to list
        sweep_inames = list(sweep_inames)

    kernel, subst_use, sweep_inames, inames_to_be_removed = \
            _process_footprint_subscripts(
                    kernel,  rule_name, sweep_inames,
                    footprint_subscripts, arg)

    from loopy.transform.precompute import precompute
    new_kernel = precompute(kernel, subst_use, sweep_inames,
            precompute_inames=dim_arg_names,
            default_tag=default_tag, dtype=arg.dtype,
            fetch_bounding_box=fetch_bounding_box,
            temporary_name=temporary_name,
            temporary_is_local=temporary_is_local)

    # {{{ remove inames that were temporarily added by slice sweeps

    new_domains = new_kernel.domains[:]

    for iname in inames_to_be_removed:
        home_domain_index = kernel.get_home_domain_index(iname)
        domain = new_domains[home_domain_index]

        dt, idx = domain.get_var_dict()[iname]
        assert dt == dim_type.set

        new_domains[home_domain_index] = domain.project_out(dt, idx, 1)

    new_kernel = new_kernel.copy(domains=new_domains)

    # }}}

    # If the rule survived past precompute() (i.e. some accesses fell outside
    # the footprint), get rid of it before moving on.
    if rule_name in new_kernel.substitutions:
        from loopy.transform.subst import expand_subst
        return expand_subst(new_kernel, "... > id:"+rule_name)
    else:
        return new_kernel
Esempio n. 25
0
    def __init__(self,
                 id,
                 depends_on,
                 depends_on_is_final,
                 groups,
                 conflicts_with_groups,
                 no_sync_with,
                 within_inames_is_final,
                 within_inames,
                 priority,
                 boostable,
                 boostable_into,
                 predicates,
                 tags,
                 insn_deps=None,
                 insn_deps_is_final=None,
                 forced_iname_deps=None,
                 forced_iname_deps_is_final=None):

        # {{{ backwards compatibility goop

        if depends_on is not None and insn_deps is not None:
            raise LoopyError("may not specify both insn_deps and depends_on")
        elif insn_deps is not None:
            warn("insn_deps is deprecated, use depends_on",
                 DeprecationWarning,
                 stacklevel=2)

            depends_on = insn_deps
            depends_on_is_final = insn_deps_is_final

        if forced_iname_deps is not None and within_inames is not None:
            raise LoopyError("may not specify both forced_iname_deps "
                             "and within_inames")
        elif forced_iname_deps is not None:
            warn("forced_iname_deps is deprecated, use within_inames",
                 DeprecationWarning,
                 stacklevel=2)

            within_inames = forced_iname_deps
            within_inames_is_final = forced_iname_deps_is_final

        if predicates is None:
            predicates = frozenset()

        new_predicates = set()
        for pred in predicates:
            if isinstance(pred, str):
                from pymbolic.primitives import LogicalNot
                from loopy.symbolic import parse
                if pred.startswith("!"):
                    warn("predicates starting with '!' are deprecated. "
                         "Simply use 'not' instead")
                    pred = LogicalNot(parse(pred[1:]))
                else:
                    pred = parse(pred)

            new_predicates.add(pred)

        predicates = frozenset(new_predicates)
        del new_predicates

        # }}}

        if depends_on is None:
            depends_on = frozenset()

        if groups is None:
            groups = frozenset()

        if conflicts_with_groups is None:
            conflicts_with_groups = frozenset()

        if no_sync_with is None:
            no_sync_with = frozenset()

        if within_inames is None:
            within_inames = frozenset()

        if within_inames_is_final is None:
            within_inames_is_final = False

        if isinstance(depends_on, str):
            depends_on = frozenset(s.strip() for s in depends_on.split(",")
                                   if s.strip())

        if depends_on_is_final is None:
            depends_on_is_final = False

        if depends_on_is_final and not isinstance(depends_on, frozenset):
            raise LoopyError("Setting depends_on_is_final to True requires "
                             "actually specifying depends_on")

        if tags is None:
            tags = frozenset()

        if priority is None:
            priority = 0

        if not isinstance(tags, frozenset):
            # was previously allowed to be tuple
            tags = frozenset(tags)

        # Periodically reenable these and run the tests to ensure all
        # performance-relevant identifiers are interned.
        #
        # from loopy.tools import is_interned
        # assert is_interned(id)
        # assert all(is_interned(dep) for dep in depends_on)
        # assert all(is_interned(grp) for grp in groups)
        # assert all(is_interned(grp) for grp in conflicts_with_groups)
        # assert all(is_interned(iname) for iname in within_inames)
        # assert all(is_interned(pred) for pred in predicates)

        assert isinstance(within_inames, frozenset)
        assert isinstance(depends_on, frozenset) or depends_on is None
        assert isinstance(groups, frozenset)
        assert isinstance(conflicts_with_groups, frozenset)

        ImmutableRecord.__init__(self,
                                 id=id,
                                 depends_on=depends_on,
                                 depends_on_is_final=depends_on_is_final,
                                 no_sync_with=no_sync_with,
                                 groups=groups,
                                 conflicts_with_groups=conflicts_with_groups,
                                 within_inames_is_final=within_inames_is_final,
                                 within_inames=within_inames,
                                 priority=priority,
                                 boostable=boostable,
                                 boostable_into=boostable_into,
                                 predicates=predicates,
                                 tags=tags)
Esempio n. 26
0
def precompute(
        kernel,
        subst_use,
        sweep_inames=[],
        within=None,
        storage_axes=None,
        temporary_name=None,
        precompute_inames=None,
        precompute_outer_inames=None,
        storage_axis_to_tag={},

        # "None" is a valid value here, distinct from the default.
        default_tag=_not_provided,
        dtype=None,
        fetch_bounding_box=False,
        temporary_address_space=None,
        compute_insn_id=None,
        **kwargs):
    """Precompute the expression described in the substitution rule determined by
    *subst_use* and store it in a temporary array. A precomputation needs two
    things to operate, a list of *sweep_inames* (order irrelevant) and an
    ordered list of *storage_axes* (whose order will describe the axis ordering
    of the temporary array).

    :arg subst_use: Describes what to prefetch.

        The following objects may be given for *subst_use*:

        * The name of the substitution rule.

        * The tagged name ("name$tag") of the substitution rule.

        * A list of invocations of the substitution rule.
          This list of invocations, when swept across *sweep_inames*, then serves
          to define the footprint of the precomputation.

          Invocations may be tagged ("name$tag") to filter out a subset of the
          usage sites of the substitution rule. (Namely those usage sites that
          use the same tagged name.)

          Invocations may be given as a string or as a
          :class:`pymbolic.primitives.Expression` object.

          If only one invocation is to be given, then the only entry of the list
          may be given directly.

    If the list of invocations generating the footprint is not given,
    all (tag-matching, if desired) usage sites of the substitution rule
    are used to determine the footprint.

    The following cases can arise for each sweep axis:

    * The axis is an iname that occurs within arguments specified at
      usage sites of the substitution rule. This case is assumed covered
      by the storage axes provided for the argument.

    * The axis is an iname that occurs within the *value* of the rule, but not
      within its arguments. A new, dedicated storage axis is allocated for
      such an axis.

    :arg sweep_inames: A :class:`list` of inames to be swept.
        May also equivalently be a comma-separated string.
    :arg within: a stack match as understood by
        :func:`loopy.match.parse_stack_match`.
    :arg storage_axes: A :class:`list` of inames and/or rule argument
        names/indices to be used as storage axes.
        May also equivalently be a comma-separated string.
    :arg temporary_name:
        The temporary variable name to use for storing the precomputed data.
        If it does not exist, it will be created. If it does exist, its properties
        (such as size, type) are checked (and updated, if possible) to match
        its use.
    :arg precompute_inames:
        A tuple of inames to be used to carry out the precomputation.
        If the specified inames do not already exist, they will be
        created. If they do already exist, their loop domain is verified
        against the one required for this precomputation. This tuple may
        be shorter than the (provided or automatically found) *storage_axes*
        tuple, in which case names will be automatically created.
        May also equivalently be a comma-separated string.

    :arg precompute_outer_inames: A :class:`frozenset` of inames within which
        the compute instruction is nested. If *None*, make an educated guess.
        May also be specified as a comma-separated string.

    :arg default_tag: The :ref:`iname tag <iname-tags>` to be applied to the
        inames created to perform the precomputation. The current default will
        make them local axes and automatically split them to fit the work
        group size, but this default will disappear in favor of simply leaving them
        untagged in 2019. For 2018, a warning will be issued if no *default_tag* is
        specified.

    :arg compute_insn_id: The ID of the instruction generated to perform the
        precomputation.

    If `storage_axes` is not specified, it defaults to the arrangement
    `<direct sweep axes><arguments>` with the direct sweep axes being the
    slower-varying indices.

    Trivial storage axes (i.e. axes of length 1 with respect to the sweep) are
    eliminated.
    """

    # {{{ unify temporary_address_space / temporary_scope

    temporary_scope = kwargs.pop("temporary_scope", None)

    from loopy.kernel.data import AddressSpace
    if temporary_scope is not None:
        from warnings import warn
        warn(
            "temporary_scope is deprecated. Use temporary_address_space instead",
            DeprecationWarning,
            stacklevel=2)

        if temporary_address_space is not None:
            raise LoopyError(
                "may not specify both temporary_address_space and "
                "temporary_scope")

        temporary_address_space = temporary_scope

    del temporary_scope

    # }}}

    if kwargs:
        raise TypeError("unrecognized keyword arguments: %s" %
                        ", ".join(kwargs.keys()))

    # {{{ check, standardize arguments

    if isinstance(sweep_inames, str):
        sweep_inames = [iname.strip() for iname in sweep_inames.split(",")]

    for iname in sweep_inames:
        if iname not in kernel.all_inames():
            raise RuntimeError("sweep iname '%s' is not a known iname" % iname)

    sweep_inames = list(sweep_inames)
    sweep_inames_set = frozenset(sweep_inames)

    if isinstance(storage_axes, str):
        storage_axes = [ax.strip() for ax in storage_axes.split(",")]

    if isinstance(precompute_inames, str):
        precompute_inames = [
            iname.strip() for iname in precompute_inames.split(",")
        ]

    if isinstance(precompute_outer_inames, str):
        precompute_outer_inames = frozenset(
            iname.strip() for iname in precompute_outer_inames.split(","))

    if isinstance(subst_use, str):
        subst_use = [subst_use]

    footprint_generators = None

    subst_name = None
    subst_tag = None

    from pymbolic.primitives import Variable, Call
    from loopy.symbolic import parse, TaggedVariable

    for use in subst_use:
        if isinstance(use, str):
            use = parse(use)

        if isinstance(use, Call):
            if footprint_generators is None:
                footprint_generators = []

            footprint_generators.append(use)
            subst_name_as_expr = use.function
        else:
            subst_name_as_expr = use

        if isinstance(subst_name_as_expr, TaggedVariable):
            new_subst_name = subst_name_as_expr.name
            new_subst_tag = subst_name_as_expr.tag
        elif isinstance(subst_name_as_expr, Variable):
            new_subst_name = subst_name_as_expr.name
            new_subst_tag = None
        else:
            raise ValueError("unexpected type of subst_name")

        if (subst_name, subst_tag) == (None, None):
            subst_name, subst_tag = new_subst_name, new_subst_tag
        else:
            if (subst_name, subst_tag) != (new_subst_name, new_subst_tag):
                raise ValueError("not all uses in subst_use agree "
                                 "on rule name and tag")

    from loopy.match import parse_stack_match
    within = parse_stack_match(within)

    try:
        subst = kernel.substitutions[subst_name]
    except KeyError:
        raise LoopyError("substitution rule '%s' not found" % subst_name)

    c_subst_name = subst_name.replace(".", "_")

    # {{{ handle default_tag

    from loopy.transform.data import _not_provided \
            as transform_data_not_provided

    if default_tag is _not_provided or default_tag is transform_data_not_provided:
        # no need to warn for scalar precomputes
        if sweep_inames:
            from warnings import warn
            warn(
                "Not specifying default_tag is deprecated, and default_tag "
                "will become mandatory in 2019.x. "
                "Pass 'default_tag=\"l.auto\" to match the current default, "
                "or Pass 'default_tag=None to leave the loops untagged, which "
                "is the recommended behavior.",
                DeprecationWarning,
                stacklevel=(

                    # In this case, we came here through add_prefetch. Increase
                    # the stacklevel.
                    3 if default_tag is transform_data_not_provided else 2))

        default_tag = "l.auto"

    from loopy.kernel.data import parse_tag
    default_tag = parse_tag(default_tag)

    # }}}

    # }}}

    # {{{ process invocations in footprint generators, start access_descriptors

    if footprint_generators:
        from pymbolic.primitives import Variable, Call

        access_descriptors = []

        for fpg in footprint_generators:
            if isinstance(fpg, Variable):
                args = ()
            elif isinstance(fpg, Call):
                args = fpg.parameters
            else:
                raise ValueError("footprint generator must "
                                 "be substitution rule invocation")

            access_descriptors.append(
                RuleAccessDescriptor(identifier=access_descriptor_id(
                    args, None),
                                     args=args))

    # }}}

    # {{{ gather up invocations in kernel code, finish access_descriptors

    if not footprint_generators:
        rule_mapping_context = SubstitutionRuleMappingContext(
            kernel.substitutions, kernel.get_var_name_generator())
        invg = RuleInvocationGatherer(rule_mapping_context, kernel, subst_name,
                                      subst_tag, within)
        del rule_mapping_context

        import loopy as lp
        for insn in kernel.instructions:
            if isinstance(insn, lp.MultiAssignmentBase):
                for assignee in insn.assignees:
                    invg(assignee, kernel, insn)
                invg(insn.expression, kernel, insn)

        access_descriptors = invg.access_descriptors
        if not access_descriptors:
            raise RuntimeError("no invocations of '%s' found" % subst_name)

    # }}}

    # {{{ find inames used in arguments

    expanding_usage_arg_deps = set()

    for accdesc in access_descriptors:
        for arg in accdesc.args:
            expanding_usage_arg_deps.update(
                get_dependencies(arg) & kernel.all_inames())

    # }}}

    var_name_gen = kernel.get_var_name_generator()

    # {{{ use given / find new storage_axes

    # extra axes made necessary because they don't occur in the arguments
    extra_storage_axes = set(sweep_inames_set - expanding_usage_arg_deps)

    from loopy.symbolic import SubstitutionRuleExpander
    submap = SubstitutionRuleExpander(kernel.substitutions)

    value_inames = (get_dependencies(submap(subst.expression)) -
                    frozenset(subst.arguments)) & kernel.all_inames()
    if value_inames - expanding_usage_arg_deps < extra_storage_axes:
        raise RuntimeError("unreferenced sweep inames specified: " +
                           ", ".join(extra_storage_axes - value_inames -
                                     expanding_usage_arg_deps))

    new_iname_to_tag = {}

    if storage_axes is None:
        storage_axes = []

        # Add sweep_inames (in given--rather than arbitrary--order) to
        # storage_axes *if* they are part of extra_storage_axes.
        for iname in sweep_inames:
            if iname in extra_storage_axes:
                extra_storage_axes.remove(iname)
                storage_axes.append(iname)

        if extra_storage_axes:
            if (precompute_inames is not None
                    and len(storage_axes) < len(precompute_inames)):
                raise LoopyError(
                    "must specify a sufficient number of "
                    "storage_axes to uniquely determine the meaning "
                    "of the given precompute_inames. (%d storage_axes "
                    "needed)" % len(precompute_inames))
            storage_axes.extend(sorted(extra_storage_axes))

        storage_axes.extend(range(len(subst.arguments)))

    del extra_storage_axes

    prior_storage_axis_name_dict = {}

    storage_axis_names = []
    storage_axis_sources = []  # number for arg#, or iname

    # {{{ check for pre-existing precompute_inames

    if precompute_inames is not None:
        preexisting_precompute_inames = (set(precompute_inames)
                                         & kernel.all_inames())
    else:
        preexisting_precompute_inames = set()

    # }}}

    for i, saxis in enumerate(storage_axes):
        tag_lookup_saxis = saxis

        if saxis in subst.arguments:
            saxis = subst.arguments.index(saxis)

        storage_axis_sources.append(saxis)

        if isinstance(saxis, int):
            # argument index
            name = old_name = subst.arguments[saxis]
        else:
            old_name = saxis
            name = "%s_%s" % (c_subst_name, old_name)

        if (precompute_inames is not None and i < len(precompute_inames)
                and precompute_inames[i]):
            name = precompute_inames[i]
            tag_lookup_saxis = name
            if (name not in preexisting_precompute_inames
                    and var_name_gen.is_name_conflicting(name)):
                raise RuntimeError("new storage axis name '%s' "
                                   "conflicts with existing name" % name)
        else:
            name = var_name_gen(name)

        storage_axis_names.append(name)
        if name not in preexisting_precompute_inames:
            new_iname_to_tag[name] = storage_axis_to_tag.get(
                tag_lookup_saxis, default_tag)

        prior_storage_axis_name_dict[name] = old_name

    del storage_axis_to_tag
    del storage_axes
    del precompute_inames

    # }}}

    # {{{ fill out access_descriptors[...].storage_axis_exprs

    access_descriptors = [
        accdesc.copy(storage_axis_exprs=storage_axis_exprs(
            storage_axis_sources, accdesc.args))
        for accdesc in access_descriptors
    ]

    # }}}

    expanding_inames = sweep_inames_set | frozenset(expanding_usage_arg_deps)
    assert expanding_inames <= kernel.all_inames()

    if storage_axis_names:
        # {{{ find domain to be changed

        change_inames = expanding_inames | preexisting_precompute_inames

        from loopy.kernel.tools import DomainChanger
        domch = DomainChanger(kernel, change_inames)

        if domch.leaf_domain_index is not None:
            # If the sweep inames are at home in parent domains, then we'll add
            # fetches with loops over copies of these parent inames that will end
            # up being scheduled *within* loops over these parents.

            for iname in sweep_inames_set:
                if kernel.get_home_domain_index(
                        iname) != domch.leaf_domain_index:
                    raise RuntimeError(
                        "sweep iname '%s' is not 'at home' in the "
                        "sweep's leaf domain" % iname)

        # }}}

        abm = ArrayToBufferMap(kernel, domch.domain, sweep_inames,
                               access_descriptors, len(storage_axis_names))

        non1_storage_axis_names = []
        for i, saxis in enumerate(storage_axis_names):
            if abm.non1_storage_axis_flags[i]:
                non1_storage_axis_names.append(saxis)
            else:
                del new_iname_to_tag[saxis]

                if saxis in preexisting_precompute_inames:
                    raise LoopyError(
                        "precompute axis %d (1-based) was "
                        "eliminated as "
                        "having length 1 but also mapped to existing "
                        "iname '%s'" % (i + 1, saxis))

        mod_domain = domch.domain

        # {{{ modify the domain, taking into account preexisting inames

        # inames may already exist in mod_domain, add them primed to start
        primed_non1_saxis_names = [
            iname + "'" for iname in non1_storage_axis_names
        ]

        mod_domain = abm.augment_domain_with_sweep(
            domch.domain,
            primed_non1_saxis_names,
            boxify_sweep=fetch_bounding_box)

        check_domain = mod_domain

        for i, saxis in enumerate(non1_storage_axis_names):
            var_dict = mod_domain.get_var_dict(isl.dim_type.set)

            if saxis in preexisting_precompute_inames:
                # add equality constraint between existing and new variable

                dt, dim_idx = var_dict[saxis]
                saxis_aff = isl.Aff.var_on_domain(mod_domain.space, dt,
                                                  dim_idx)

                dt, dim_idx = var_dict[primed_non1_saxis_names[i]]
                new_var_aff = isl.Aff.var_on_domain(mod_domain.space, dt,
                                                    dim_idx)

                mod_domain = mod_domain.add_constraint(
                    isl.Constraint.equality_from_aff(new_var_aff - saxis_aff))

                # project out the new one
                mod_domain = mod_domain.project_out(dt, dim_idx, 1)

            else:
                # remove the prime from the new variable
                dt, dim_idx = var_dict[primed_non1_saxis_names[i]]
                mod_domain = mod_domain.set_dim_name(dt, dim_idx, saxis)

        def add_assumptions(d):
            assumption_non_param = isl.BasicSet.from_params(kernel.assumptions)
            assumptions, domain = isl.align_two(assumption_non_param, d)
            return assumptions & domain

        # {{{ check that we got the desired domain

        check_domain = add_assumptions(
            check_domain.project_out_except(primed_non1_saxis_names,
                                            [isl.dim_type.set]))

        mod_check_domain = add_assumptions(mod_domain)

        # re-add the prime from the new variable
        var_dict = mod_check_domain.get_var_dict(isl.dim_type.set)

        for saxis in non1_storage_axis_names:
            dt, dim_idx = var_dict[saxis]
            mod_check_domain = mod_check_domain.set_dim_name(
                dt, dim_idx, saxis + "'")

        mod_check_domain = mod_check_domain.project_out_except(
            primed_non1_saxis_names, [isl.dim_type.set])

        mod_check_domain, check_domain = isl.align_two(mod_check_domain,
                                                       check_domain)

        # The modified domain can't get bigger by adding constraints
        assert mod_check_domain <= check_domain

        if not check_domain <= mod_check_domain:
            print(check_domain)
            print(mod_check_domain)
            raise LoopyError("domain of preexisting inames does not match "
                             "domain needed for precompute")

        # }}}

        # {{{ check that we didn't shrink the original domain

        # project out the new names from the modified domain
        orig_domain_inames = list(domch.domain.get_var_dict(isl.dim_type.set))
        mod_check_domain = add_assumptions(
            mod_domain.project_out_except(orig_domain_inames,
                                          [isl.dim_type.set]))

        check_domain = add_assumptions(domch.domain)

        mod_check_domain, check_domain = isl.align_two(mod_check_domain,
                                                       check_domain)

        # The modified domain can't get bigger by adding constraints
        assert mod_check_domain <= check_domain

        if not check_domain <= mod_check_domain:
            print(check_domain)
            print(mod_check_domain)
            raise LoopyError(
                "original domain got shrunk by applying the precompute")

        # }}}

        # }}}

        new_kernel_domains = domch.get_domains_with(mod_domain)

    else:
        # leave kernel domains unchanged
        new_kernel_domains = kernel.domains

        non1_storage_axis_names = []
        abm = NoOpArrayToBufferMap()

    kernel = kernel.copy(domains=new_kernel_domains)

    # {{{ set up compute insn

    if temporary_name is None:
        temporary_name = var_name_gen(based_on=c_subst_name)

    assignee = var(temporary_name)

    if non1_storage_axis_names:
        assignee = assignee[tuple(
            var(iname) for iname in non1_storage_axis_names)]

    # {{{ process substitutions on compute instruction

    storage_axis_subst_dict = {}

    for arg_name, bi in zip(storage_axis_names, abm.storage_base_indices):
        if arg_name in non1_storage_axis_names:
            arg = var(arg_name)
        else:
            arg = 0

        storage_axis_subst_dict[prior_storage_axis_name_dict.get(
            arg_name, arg_name)] = arg + bi

    rule_mapping_context = SubstitutionRuleMappingContext(
        kernel.substitutions, kernel.get_var_name_generator())

    from loopy.match import parse_stack_match
    expr_subst_map = RuleAwareSubstitutionMapper(
        rule_mapping_context,
        make_subst_func(storage_axis_subst_dict),
        within=parse_stack_match(None))

    compute_expression = expr_subst_map(subst.expression, kernel, None)

    # }}}

    from loopy.kernel.data import Assignment
    if compute_insn_id is None:
        compute_insn_id = kernel.make_unique_instruction_id(
            based_on=c_subst_name)

    compute_insn = Assignment(
        id=compute_insn_id,
        assignee=assignee,
        expression=compute_expression,
        # within_inames determined below
    )
    compute_dep_id = compute_insn_id
    added_compute_insns = [compute_insn]

    if temporary_address_space == AddressSpace.GLOBAL:
        barrier_insn_id = kernel.make_unique_instruction_id(
            based_on=c_subst_name + "_barrier")
        from loopy.kernel.instruction import BarrierInstruction
        barrier_insn = BarrierInstruction(id=barrier_insn_id,
                                          depends_on=frozenset(
                                              [compute_insn_id]),
                                          synchronization_kind="global",
                                          mem_kind="global")
        compute_dep_id = barrier_insn_id

        added_compute_insns.append(barrier_insn)

    # }}}

    # {{{ substitute rule into expressions in kernel (if within footprint)

    from loopy.symbolic import SubstitutionRuleExpander
    expander = SubstitutionRuleExpander(kernel.substitutions)

    invr = RuleInvocationReplacer(rule_mapping_context,
                                  subst_name,
                                  subst_tag,
                                  within,
                                  access_descriptors,
                                  abm,
                                  storage_axis_names,
                                  storage_axis_sources,
                                  non1_storage_axis_names,
                                  temporary_name,
                                  compute_insn_id,
                                  compute_dep_id,
                                  compute_read_variables=get_dependencies(
                                      expander(compute_expression)))

    kernel = invr.map_kernel(kernel)
    kernel = kernel.copy(instructions=added_compute_insns +
                         kernel.instructions)
    kernel = rule_mapping_context.finish_kernel(kernel)

    # }}}

    # {{{ add dependencies to compute insn

    kernel = kernel.copy(instructions=[
        insn.copy(depends_on=frozenset(invr.compute_insn_depends_on)) if insn.
        id == compute_insn_id else insn for insn in kernel.instructions
    ])

    # }}}

    # {{{ propagate storage iname subst to dependencies of compute instructions

    from loopy.kernel.tools import find_recursive_dependencies
    compute_deps = find_recursive_dependencies(kernel,
                                               frozenset([compute_insn_id]))

    # FIXME: Need to verify that there are no outside dependencies
    # on compute_deps

    prior_storage_axis_names = frozenset(storage_axis_subst_dict)

    new_insns = []
    for insn in kernel.instructions:
        if (insn.id in compute_deps
                and insn.within_inames & prior_storage_axis_names):
            insn = (insn.with_transformed_expressions(
                lambda expr: expr_subst_map(expr, kernel, insn)).copy(
                    within_inames=frozenset(
                        storage_axis_subst_dict.get(iname, var(iname)).name
                        for iname in insn.within_inames)))

            new_insns.append(insn)
        else:
            new_insns.append(insn)

    kernel = kernel.copy(instructions=new_insns)

    # }}}

    # {{{ determine inames for compute insn

    if precompute_outer_inames is None:
        from loopy.kernel.tools import guess_iname_deps_based_on_var_use
        precompute_outer_inames = (
            frozenset(non1_storage_axis_names)
            | frozenset((expanding_usage_arg_deps | value_inames) -
                        sweep_inames_set)
            | guess_iname_deps_based_on_var_use(kernel, compute_insn))
    else:
        if not isinstance(precompute_outer_inames, frozenset):
            raise TypeError("precompute_outer_inames must be a frozenset")

        precompute_outer_inames = precompute_outer_inames \
                | frozenset(non1_storage_axis_names)

    kernel = kernel.copy(instructions=[
        insn.copy(within_inames=precompute_outer_inames) if insn.id ==
        compute_insn_id else insn for insn in kernel.instructions
    ])

    # }}}

    # {{{ set up temp variable

    import loopy as lp
    if dtype is not None:
        dtype = np.dtype(dtype)

    if temporary_address_space is None:
        temporary_address_space = lp.auto

    new_temp_shape = tuple(abm.non1_storage_shape)

    new_temporary_variables = kernel.temporary_variables.copy()
    if temporary_name not in new_temporary_variables:
        temp_var = lp.TemporaryVariable(
            name=temporary_name,
            dtype=dtype,
            base_indices=(0, ) * len(new_temp_shape),
            shape=tuple(abm.non1_storage_shape),
            address_space=temporary_address_space,
            dim_names=tuple(non1_storage_axis_names))

    else:
        temp_var = new_temporary_variables[temporary_name]

        # {{{ check and adapt existing temporary

        if temp_var.dtype is lp.auto:
            pass
        elif temp_var.dtype is not lp.auto and dtype is lp.auto:
            dtype = temp_var.dtype
        elif temp_var.dtype is not lp.auto and dtype is not lp.auto:
            if temp_var.dtype != dtype:
                raise LoopyError("Existing and new dtype of temporary '%s' "
                                 "do not match (existing: %s, new: %s)" %
                                 (temporary_name, temp_var.dtype, dtype))

        temp_var = temp_var.copy(dtype=dtype)

        if len(temp_var.shape) != len(new_temp_shape):
            raise LoopyError(
                "Existing and new temporary '%s' do not "
                "have matching number of dimensions ('%d' vs. '%d') " %
                (temporary_name, len(temp_var.shape), len(new_temp_shape)))

        if temp_var.base_indices != (0, ) * len(new_temp_shape):
            raise LoopyError(
                "Existing and new temporary '%s' do not "
                "have matching number of dimensions ('%d' vs. '%d') " %
                (temporary_name, len(temp_var.shape), len(new_temp_shape)))

        new_temp_shape = tuple(
            max(i, ex_i) for i, ex_i in zip(new_temp_shape, temp_var.shape))

        temp_var = temp_var.copy(shape=new_temp_shape)

        if temporary_address_space == temp_var.address_space:
            pass
        elif temporary_address_space is lp.auto:
            temporary_address_space = temp_var.address_space
        elif temp_var.address_space is lp.auto:
            pass
        else:
            raise LoopyError("Existing and new temporary '%s' do not "
                             "have matching scopes (existing: %s, new: %s)" %
                             (temporary_name,
                              AddressSpace.stringify(temp_var.address_space),
                              AddressSpace.stringify(temporary_address_space)))

        temp_var = temp_var.copy(address_space=temporary_address_space)

        # }}}

    new_temporary_variables[temporary_name] = temp_var

    kernel = kernel.copy(temporary_variables=new_temporary_variables)

    # }}}

    from loopy import tag_inames
    kernel = tag_inames(kernel, new_iname_to_tag)

    from loopy.kernel.data import AutoFitLocalIndexTag, filter_iname_tags_by_type

    if filter_iname_tags_by_type(new_iname_to_tag.values(),
                                 AutoFitLocalIndexTag):
        from loopy.kernel.tools import assign_automatic_axes
        kernel = assign_automatic_axes(kernel)

    return kernel
Esempio n. 27
0
def add_prefetch(kernel,
                 var_name,
                 sweep_inames=[],
                 dim_arg_names=None,
                 default_tag="l.auto",
                 rule_name=None,
                 temporary_name=None,
                 temporary_scope=None,
                 temporary_is_local=None,
                 footprint_subscripts=None,
                 fetch_bounding_box=False,
                 fetch_outer_inames=None):
    """Prefetch all accesses to the variable *var_name*, with all accesses
    being swept through *sweep_inames*.

    :arg dim_arg_names: List of names representing each fetch axis.
    :arg rule_name: base name of the generated temporary variable.
    :arg footprint_subscripts: A list of tuples indicating the index (i.e.
        subscript) tuples used to generate the footprint.

        If only one such set of indices is desired, this may also be specified
        directly by putting an index expression into *var_name*. Substitutions
        such as those occurring in dimension splits are recorded and also
        applied to these indices.

    :arg fetch_outer_inames: The inames within which the fetch
        instruction is nested. If *None*, make an educated guess.

    This function combines :func:`extract_subst` and :func:`precompute`.
    """

    # {{{ fish indexing out of var_name and into footprint_subscripts

    from loopy.symbolic import parse
    parsed_var_name = parse(var_name)

    from pymbolic.primitives import Variable, Subscript
    if isinstance(parsed_var_name, Variable):
        # nothing to see
        pass
    elif isinstance(parsed_var_name, Subscript):
        if footprint_subscripts is not None:
            raise TypeError(
                "if footprint_subscripts is specified, then var_name "
                "may not contain a subscript")

        assert isinstance(parsed_var_name.aggregate, Variable)
        footprint_subscripts = [parsed_var_name.index]
        parsed_var_name = parsed_var_name.aggregate
    else:
        raise ValueError(
            "var_name must either be a variable name or a subscript")

    # }}}

    # {{{ fish out tag

    from loopy.symbolic import TaggedVariable
    if isinstance(parsed_var_name, TaggedVariable):
        var_name = parsed_var_name.name
        tag = parsed_var_name.tag
    else:
        var_name = parsed_var_name.name
        tag = None

    # }}}

    c_name = var_name
    if tag is not None:
        c_name = c_name + "_" + tag

    var_name_gen = kernel.get_var_name_generator()

    if rule_name is None:
        rule_name = var_name_gen("%s_fetch_rule" % c_name)
    if temporary_name is None:
        temporary_name = var_name_gen("%s_fetch" % c_name)

    arg = kernel.arg_dict[var_name]

    # {{{ make parameter names and unification template

    parameters = []
    for i in range(arg.num_user_axes()):
        based_on = "%s_dim_%d" % (c_name, i)
        if arg.dim_names is not None:
            based_on = "%s_dim_%s" % (c_name, arg.dim_names[i])
        if dim_arg_names is not None and i < len(dim_arg_names):
            based_on = dim_arg_names[i]

        par_name = var_name_gen(based_on=based_on)
        parameters.append(par_name)

    from pymbolic import var
    uni_template = parsed_var_name
    if len(parameters) > 1:
        uni_template = uni_template.index(
            tuple(var(par_name) for par_name in parameters))
    elif len(parameters) == 1:
        uni_template = uni_template.index(var(parameters[0]))

    # }}}

    from loopy.transform.subst import extract_subst
    kernel = extract_subst(kernel, rule_name, uni_template, parameters)

    if isinstance(sweep_inames, str):
        sweep_inames = [s.strip() for s in sweep_inames.split(",")]
    else:
        # copy, standardize to list
        sweep_inames = list(sweep_inames)

    kernel, subst_use, sweep_inames, inames_to_be_removed = \
            _process_footprint_subscripts(
                    kernel,  rule_name, sweep_inames,
                    footprint_subscripts, arg)

    from loopy.transform.precompute import precompute
    new_kernel = precompute(kernel,
                            subst_use,
                            sweep_inames,
                            precompute_inames=dim_arg_names,
                            default_tag=default_tag,
                            dtype=arg.dtype,
                            fetch_bounding_box=fetch_bounding_box,
                            temporary_name=temporary_name,
                            temporary_scope=temporary_scope,
                            temporary_is_local=temporary_is_local,
                            precompute_outer_inames=fetch_outer_inames)

    # {{{ remove inames that were temporarily added by slice sweeps

    new_domains = new_kernel.domains[:]

    for iname in inames_to_be_removed:
        home_domain_index = kernel.get_home_domain_index(iname)
        domain = new_domains[home_domain_index]

        dt, idx = domain.get_var_dict()[iname]
        assert dt == dim_type.set

        new_domains[home_domain_index] = domain.project_out(dt, idx, 1)

    new_kernel = new_kernel.copy(domains=new_domains)

    # }}}

    # If the rule survived past precompute() (i.e. some accesses fell outside
    # the footprint), get rid of it before moving on.
    if rule_name in new_kernel.substitutions:
        from loopy.transform.subst import expand_subst
        return expand_subst(new_kernel, "... > id:" + rule_name)
    else:
        return new_kernel
Esempio n. 28
0
def _pymbolic_parse_if_necessary(x):
    if isinstance(x, str):
        from pymbolic import parse
        return parse(x)
    else:
        return x
Esempio n. 29
0
File: data.py Progetto: shwina/loopy
def _process_footprint_subscripts(kernel, rule_name, sweep_inames,
        footprint_subscripts, arg):
    """Track applied iname rewrites, deal with slice specifiers ':'."""

    name_gen = kernel.get_var_name_generator()

    from pymbolic.primitives import Variable

    if footprint_subscripts is None:
        return kernel, rule_name, sweep_inames, []

    if not isinstance(footprint_subscripts, (list, tuple)):
        footprint_subscripts = [footprint_subscripts]

    inames_to_be_removed = []

    new_footprint_subscripts = []
    for fsub in footprint_subscripts:
        if isinstance(fsub, str):
            from loopy.symbolic import parse
            fsub = parse(fsub)

        if not isinstance(fsub, tuple):
            fsub = (fsub,)

        if len(fsub) != arg.num_user_axes():
            raise ValueError("sweep index '%s' has the wrong number of dimensions"
                    % str(fsub))

        for subst_map in kernel.applied_iname_rewrites:
            from loopy.symbolic import SubstitutionMapper
            from pymbolic.mapper.substitutor import make_subst_func
            fsub = SubstitutionMapper(make_subst_func(subst_map))(fsub)

        from loopy.symbolic import get_dependencies
        fsub_dependencies = get_dependencies(fsub)

        new_fsub = []
        for axis_nr, fsub_axis in enumerate(fsub):
            from pymbolic.primitives import Slice
            if isinstance(fsub_axis, Slice):
                if fsub_axis.children != (None,):
                    raise NotImplementedError("add_prefetch only "
                            "supports full slices")

                axis_name = name_gen(
                        based_on="%s_fetch_axis_%d" % (arg.name, axis_nr))

                kernel = _add_kernel_axis(kernel, axis_name, 0, arg.shape[axis_nr],
                        frozenset(sweep_inames) | fsub_dependencies)
                sweep_inames = sweep_inames + [axis_name]

                inames_to_be_removed.append(axis_name)
                new_fsub.append(Variable(axis_name))

            else:
                new_fsub.append(fsub_axis)

        new_footprint_subscripts.append(tuple(new_fsub))
        del new_fsub

    footprint_subscripts = new_footprint_subscripts
    del new_footprint_subscripts

    subst_use = [Variable(rule_name)(*si) for si in footprint_subscripts]
    return kernel, subst_use, sweep_inames, inames_to_be_removed
Esempio n. 30
0
def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
        store_expression=None, within=None, default_tag="l.auto",
        temporary_scope=None, temporary_is_local=None,
        fetch_bounding_box=False):
    """Replace accesses to *var_name* with ones to a temporary, which is
    created and acts as a buffer. To perform this transformation, the access
    footprint to *var_name* is determined and a temporary of a suitable
    :class:`loopy.AddressSpace` and shape is created.

    By default, the value of the buffered cells in *var_name* are read prior to
    any (read/write) use, and the modified values are written out after use has
    concluded, but for special use cases (e.g. additive accumulation), the
    behavior can be modified using *init_expression* and *store_expression*.

    :arg buffer_inames: The inames across which the buffer should be usable--i.e.
        all possible values of these inames will be covered by the buffer footprint.
        A tuple of inames or a comma-separated string.
    :arg init_expression: Either *None* (indicating the prior value of the buffered
        array should be read) or an expression optionally involving the
        variable 'base' (which references the associated location in the array
        being buffered).
    :arg store_expression: Either *None*, *False*, or an expression involving
        variables 'base' and 'buffer' (without array indices).
        (*None* indicates that a default storage instruction should be used,
        *False* indicates that no storing of the temporary should occur
        at all.)
    :arg within: If not None, limit the action of the transformation to
        matching contexts.  See :func:`loopy.match.parse_stack_match`
        for syntax.
    :arg temporary_scope: If given, override the choice of
        :class:`AddressSpace` for the created temporary.
    :arg default_tag: The default :ref:`iname-tags` to be assigned to the
        inames used for fetching and storing
    :arg fetch_bounding_box: If the access footprint is non-convex
        (resulting in an error), setting this argument to *True* will force a
        rectangular (and hence convex) superset of the footprint to be
        fetched.
    """

    # {{{ unify temporary_scope / temporary_is_local

    from loopy.kernel.data import AddressSpace
    if temporary_is_local is not None:
        from warnings import warn
        warn("temporary_is_local is deprecated. Use temporary_scope instead",
                DeprecationWarning, stacklevel=2)

        if temporary_scope is not None:
            raise LoopyError("may not specify both temporary_is_local and "
                    "temporary_scope")

        if temporary_is_local:
            temporary_scope = AddressSpace.LOCAL
        else:
            temporary_scope = AddressSpace.PRIVATE

    del temporary_is_local

    # }}}

    # {{{ process arguments

    if isinstance(init_expression, str):
        from loopy.symbolic import parse
        init_expression = parse(init_expression)

    if isinstance(store_expression, str):
        from loopy.symbolic import parse
        store_expression = parse(store_expression)

    if isinstance(buffer_inames, str):
        buffer_inames = [s.strip()
                for s in buffer_inames.split(",") if s.strip()]

    for iname in buffer_inames:
        if iname not in kernel.all_inames():
            raise RuntimeError("sweep iname '%s' is not a known iname"
                    % iname)

    buffer_inames = list(buffer_inames)
    buffer_inames_set = frozenset(buffer_inames)

    from loopy.match import parse_stack_match
    within = parse_stack_match(within)

    if var_name in kernel.arg_dict:
        var_descr = kernel.arg_dict[var_name]
    elif var_name in kernel.temporary_variables:
        var_descr = kernel.temporary_variables[var_name]
    else:
        raise ValueError("variable '%s' not found" % var_name)

    from loopy.kernel.data import ArrayBase
    if isinstance(var_descr, ArrayBase):
        var_shape = var_descr.shape
    else:
        var_shape = ()

    if temporary_scope is None:
        import loopy as lp
        temporary_scope = lp.auto

    # }}}

    # {{{ caching

    from loopy import CACHING_ENABLED

    from loopy.preprocess import prepare_for_caching
    key_kernel = prepare_for_caching(kernel)
    cache_key = (key_kernel, var_name, tuple(buffer_inames),
            PymbolicExpressionHashWrapper(init_expression),
            PymbolicExpressionHashWrapper(store_expression), within,
            default_tag, temporary_scope, fetch_bounding_box)

    if CACHING_ENABLED:
        try:
            result = buffer_array_cache[cache_key]
            logger.info("%s: buffer_array cache hit" % kernel.name)
            return result
        except KeyError:
            pass

    # }}}

    var_name_gen = kernel.get_var_name_generator()
    within_inames = set()

    access_descriptors = []
    for insn in kernel.instructions:
        if not within(kernel, insn.id, ()):
            continue

        from pymbolic.primitives import Variable, Subscript
        from loopy.symbolic import LinearSubscript

        for assignee in insn.assignees:
            if isinstance(assignee, Variable):
                assignee_name = assignee.name
                index = ()

            elif isinstance(assignee, Subscript):
                assignee_name = assignee.aggregate.name
                index = assignee.index_tuple

            elif isinstance(assignee, LinearSubscript):
                if assignee.aggregate.name == var_name:
                    raise LoopyError("buffer_array may not be applied in the "
                            "presence of linear write indexing into '%s'" % var_name)

            else:
                raise LoopyError("invalid lvalue '%s'" % assignee)

            if assignee_name == var_name:
                within_inames.update(
                        (get_dependencies(index) & kernel.all_inames())
                        - buffer_inames_set)
                access_descriptors.append(
                        AccessDescriptor(
                            identifier=insn.id,
                            storage_axis_exprs=index))

    # {{{ find fetch/store inames

    init_inames = []
    store_inames = []
    new_iname_to_tag = {}

    for i in range(len(var_shape)):
        dim_name = str(i)
        if isinstance(var_descr, ArrayBase) and var_descr.dim_names is not None:
            dim_name = var_descr.dim_names[i]

        init_iname = var_name_gen(f"{var_name}_init_{dim_name}")
        store_iname = var_name_gen(f"{var_name}_store_{dim_name}")

        new_iname_to_tag[init_iname] = default_tag
        new_iname_to_tag[store_iname] = default_tag

        init_inames.append(init_iname)
        store_inames.append(store_iname)

    # }}}

    # {{{ modify loop domain

    non1_init_inames = []
    non1_store_inames = []

    if var_shape:
        # {{{ find domain to be changed

        from loopy.kernel.tools import DomainChanger
        domch = DomainChanger(kernel, buffer_inames_set | within_inames)

        if domch.leaf_domain_index is not None:
            # If the sweep inames are at home in parent domains, then we'll add
            # fetches with loops over copies of these parent inames that will end
            # up being scheduled *within* loops over these parents.

            for iname in buffer_inames_set:
                if kernel.get_home_domain_index(iname) != domch.leaf_domain_index:
                    raise RuntimeError("buffer iname '%s' is not 'at home' in the "
                            "sweep's leaf domain" % iname)

        # }}}

        abm = ArrayToBufferMap(kernel, domch.domain, buffer_inames,
                access_descriptors, len(var_shape))

        for i in range(len(var_shape)):
            if abm.non1_storage_axis_flags[i]:
                non1_init_inames.append(init_inames[i])
                non1_store_inames.append(store_inames[i])
            else:
                del new_iname_to_tag[init_inames[i]]
                del new_iname_to_tag[store_inames[i]]

        new_domain = domch.domain
        new_domain = abm.augment_domain_with_sweep(
                    new_domain, non1_init_inames,
                    boxify_sweep=fetch_bounding_box)
        new_domain = abm.augment_domain_with_sweep(
                    new_domain, non1_store_inames,
                    boxify_sweep=fetch_bounding_box)
        new_kernel_domains = domch.get_domains_with(new_domain)
        del new_domain

    else:
        # leave kernel domains unchanged
        new_kernel_domains = kernel.domains

        abm = NoOpArrayToBufferMap()

    # }}}

    # {{{ set up temp variable

    import loopy as lp

    buf_var_name = var_name_gen(based_on=var_name+"_buf")

    new_temporary_variables = kernel.temporary_variables.copy()
    temp_var = lp.TemporaryVariable(
            name=buf_var_name,
            dtype=var_descr.dtype,
            base_indices=(0,)*len(abm.non1_storage_shape),
            shape=tuple(abm.non1_storage_shape),
            address_space=temporary_scope)

    new_temporary_variables[buf_var_name] = temp_var

    # }}}

    new_insns = []

    buf_var = var(buf_var_name)

    # {{{ generate init instruction

    buf_var_init = buf_var
    if non1_init_inames:
        buf_var_init = buf_var_init.index(
                tuple(var(iname) for iname in non1_init_inames))

    init_base = var(var_name)

    init_subscript = []
    init_iname_idx = 0
    if var_shape:
        for i in range(len(var_shape)):
            ax_subscript = abm.storage_base_indices[i]
            if abm.non1_storage_axis_flags[i]:
                ax_subscript += var(non1_init_inames[init_iname_idx])
                init_iname_idx += 1
            init_subscript.append(ax_subscript)

    if init_subscript:
        init_base = init_base.index(tuple(init_subscript))

    if init_expression is None:
        init_expression = init_base
    else:
        init_expression = init_expression
        init_expression = SubstitutionMapper(
                make_subst_func({
                    "base": init_base,
                    }))(init_expression)

    init_insn_id = kernel.make_unique_instruction_id(based_on="init_"+var_name)
    from loopy.kernel.data import Assignment
    init_instruction = Assignment(id=init_insn_id,
                assignee=buf_var_init,
                expression=init_expression,
                within_inames=(
                    frozenset(within_inames)
                    | frozenset(non1_init_inames)),
                depends_on=frozenset(),
                depends_on_is_final=True)

    # }}}

    rule_mapping_context = SubstitutionRuleMappingContext(
            kernel.substitutions, kernel.get_var_name_generator())
    aar = ArrayAccessReplacer(rule_mapping_context, var_name,
            within, abm, buf_var)
    kernel = rule_mapping_context.finish_kernel(aar.map_kernel(kernel))

    did_write = False
    for insn_id in aar.modified_insn_ids:
        insn = kernel.id_to_insn[insn_id]
        if buf_var_name in insn.assignee_var_names():
            did_write = True

    # {{{ add init_insn_id to depends_on

    new_insns = []

    def none_to_empty_set(s):
        if s is None:
            return frozenset()
        else:
            return s

    for insn in kernel.instructions:
        if insn.id in aar.modified_insn_ids:
            new_insns.append(
                    insn.copy(
                        depends_on=(
                            none_to_empty_set(insn.depends_on)
                            | frozenset([init_insn_id]))))
        else:
            new_insns.append(insn)

    # }}}

    # {{{ generate store instruction

    buf_var_store = buf_var
    if non1_store_inames:
        buf_var_store = buf_var_store.index(
                tuple(var(iname) for iname in non1_store_inames))

    store_subscript = []
    store_iname_idx = 0
    if var_shape:
        for i in range(len(var_shape)):
            ax_subscript = abm.storage_base_indices[i]
            if abm.non1_storage_axis_flags[i]:
                ax_subscript += var(non1_store_inames[store_iname_idx])
                store_iname_idx += 1
            store_subscript.append(ax_subscript)

    store_target = var(var_name)
    if store_subscript:
        store_target = store_target.index(tuple(store_subscript))

    if store_expression is None:
        store_expression = buf_var_store
    else:
        store_expression = SubstitutionMapper(
                make_subst_func({
                    "base": store_target,
                    "buffer": buf_var_store,
                    }))(store_expression)

    if store_expression is not False:
        from loopy.kernel.data import Assignment
        store_instruction = Assignment(
                    id=kernel.make_unique_instruction_id(based_on="store_"+var_name),
                    depends_on=frozenset(aar.modified_insn_ids),
                    no_sync_with=frozenset([(init_insn_id, "any")]),
                    assignee=store_target,
                    expression=store_expression,
                    within_inames=(
                        frozenset(within_inames)
                        | frozenset(non1_store_inames)))
    else:
        did_write = False

    # }}}

    new_insns.append(init_instruction)
    if did_write:
        new_insns.append(store_instruction)
    else:
        for iname in store_inames:
            del new_iname_to_tag[iname]

    kernel = kernel.copy(
            domains=new_kernel_domains,
            instructions=new_insns,
            temporary_variables=new_temporary_variables)

    from loopy import tag_inames
    kernel = tag_inames(kernel, new_iname_to_tag)

    from loopy.kernel.tools import assign_automatic_axes
    kernel = assign_automatic_axes(kernel)

    if CACHING_ENABLED:
        from loopy.preprocess import prepare_for_caching
        buffer_array_cache.store_if_not_present(
                cache_key, prepare_for_caching(kernel))

    return kernel
Esempio n. 31
0
def buffer_array(kernel, var_name, buffer_inames, init_expression=None,
        store_expression=None, within=None, default_tag="l.auto",
        temporary_scope=None, temporary_is_local=None,
        fetch_bounding_box=False):
    """
    :arg init_expression: Either *None* (indicating the prior value of the buffered
        array should be read) or an expression optionally involving the
        variable 'base' (which references the associated location in the array
        being buffered).
    :arg store_expression: Either *None*, *False*, or an expression involving
        variables 'base' and 'buffer' (without array indices).
        (*None* indicates that a default storage instruction should be used,
        *False* indicates that no storing of the temporary should occur
        at all.)
    """

    # {{{ unify temporary_scope / temporary_is_local

    from loopy.kernel.data import temp_var_scope
    if temporary_is_local is not None:
        from warnings import warn
        warn("temporary_is_local is deprecated. Use temporary_scope instead",
                DeprecationWarning, stacklevel=2)

        if temporary_scope is not None:
            raise LoopyError("may not specify both temporary_is_local and "
                    "temporary_scope")

        if temporary_is_local:
            temporary_scope = temp_var_scope.LOCAL
        else:
            temporary_scope = temp_var_scope.PRIVATE

    del temporary_is_local

    # }}}

    # {{{ process arguments

    if isinstance(init_expression, str):
        from loopy.symbolic import parse
        init_expression = parse(init_expression)

    if isinstance(store_expression, str):
        from loopy.symbolic import parse
        store_expression = parse(store_expression)

    if isinstance(buffer_inames, str):
        buffer_inames = [s.strip()
                for s in buffer_inames.split(",") if s.strip()]

    for iname in buffer_inames:
        if iname not in kernel.all_inames():
            raise RuntimeError("sweep iname '%s' is not a known iname"
                    % iname)

    buffer_inames = list(buffer_inames)
    buffer_inames_set = frozenset(buffer_inames)

    from loopy.match import parse_stack_match
    within = parse_stack_match(within)

    if var_name in kernel.arg_dict:
        var_descr = kernel.arg_dict[var_name]
    elif var_name in kernel.temporary_variables:
        var_descr = kernel.temporary_variables[var_name]
    else:
        raise ValueError("variable '%s' not found" % var_name)

    from loopy.kernel.data import ArrayBase
    if isinstance(var_descr, ArrayBase):
        var_shape = var_descr.shape
    else:
        var_shape = ()

    if temporary_scope is None:
        import loopy as lp
        temporary_scope = lp.auto

    # }}}

    # {{{ caching

    from loopy import CACHING_ENABLED

    from loopy.preprocess import prepare_for_caching
    key_kernel = prepare_for_caching(kernel)
    cache_key = (key_kernel, var_name, tuple(buffer_inames),
            PymbolicExpressionHashWrapper(init_expression),
            PymbolicExpressionHashWrapper(store_expression), within,
            default_tag, temporary_scope, fetch_bounding_box)

    if CACHING_ENABLED:
        try:
            result = buffer_array_cache[cache_key]
            logger.info("%s: buffer_array cache hit" % kernel.name)
            return result
        except KeyError:
            pass

    # }}}

    var_name_gen = kernel.get_var_name_generator()
    within_inames = set()

    access_descriptors = []
    for insn in kernel.instructions:
        if not within(kernel, insn.id, ()):
            continue

        from pymbolic.primitives import Variable, Subscript
        from loopy.symbolic import LinearSubscript

        for assignee in insn.assignees:
            if isinstance(assignee, Variable):
                assignee_name = assignee.name
                index = ()

            elif isinstance(assignee, Subscript):
                assignee_name = assignee.aggregate.name
                index = assignee.index_tuple

            elif isinstance(assignee, LinearSubscript):
                if assignee.aggregate.name == var_name:
                    raise LoopyError("buffer_array may not be applied in the "
                            "presence of linear write indexing into '%s'" % var_name)

            else:
                raise LoopyError("invalid lvalue '%s'" % assignee)

            if assignee_name == var_name:
                within_inames.update(
                        (get_dependencies(index) & kernel.all_inames())
                        - buffer_inames_set)
                access_descriptors.append(
                        AccessDescriptor(
                            identifier=insn.id,
                            storage_axis_exprs=index))

    # {{{ find fetch/store inames

    init_inames = []
    store_inames = []
    new_iname_to_tag = {}

    for i in range(len(var_shape)):
        dim_name = str(i)
        if isinstance(var_descr, ArrayBase) and var_descr.dim_names is not None:
            dim_name = var_descr.dim_names[i]

        init_iname = var_name_gen("%s_init_%s" % (var_name, dim_name))
        store_iname = var_name_gen("%s_store_%s" % (var_name, dim_name))

        new_iname_to_tag[init_iname] = default_tag
        new_iname_to_tag[store_iname] = default_tag

        init_inames.append(init_iname)
        store_inames.append(store_iname)

    # }}}

    # {{{ modify loop domain

    non1_init_inames = []
    non1_store_inames = []

    if var_shape:
        # {{{ find domain to be changed

        from loopy.kernel.tools import DomainChanger
        domch = DomainChanger(kernel, buffer_inames_set | within_inames)

        if domch.leaf_domain_index is not None:
            # If the sweep inames are at home in parent domains, then we'll add
            # fetches with loops over copies of these parent inames that will end
            # up being scheduled *within* loops over these parents.

            for iname in buffer_inames_set:
                if kernel.get_home_domain_index(iname) != domch.leaf_domain_index:
                    raise RuntimeError("buffer iname '%s' is not 'at home' in the "
                            "sweep's leaf domain" % iname)

        # }}}

        abm = ArrayToBufferMap(kernel, domch.domain, buffer_inames,
                access_descriptors, len(var_shape))

        for i in range(len(var_shape)):
            if abm.non1_storage_axis_flags[i]:
                non1_init_inames.append(init_inames[i])
                non1_store_inames.append(store_inames[i])
            else:
                del new_iname_to_tag[init_inames[i]]
                del new_iname_to_tag[store_inames[i]]

        new_domain = domch.domain
        new_domain = abm.augment_domain_with_sweep(
                    new_domain, non1_init_inames,
                    boxify_sweep=fetch_bounding_box)
        new_domain = abm.augment_domain_with_sweep(
                    new_domain, non1_store_inames,
                    boxify_sweep=fetch_bounding_box)
        new_kernel_domains = domch.get_domains_with(new_domain)
        del new_domain

    else:
        # leave kernel domains unchanged
        new_kernel_domains = kernel.domains

        abm = NoOpArrayToBufferMap()

    # }}}

    # {{{ set up temp variable

    import loopy as lp

    buf_var_name = var_name_gen(based_on=var_name+"_buf")

    new_temporary_variables = kernel.temporary_variables.copy()
    temp_var = lp.TemporaryVariable(
            name=buf_var_name,
            dtype=var_descr.dtype,
            base_indices=(0,)*len(abm.non1_storage_shape),
            shape=tuple(abm.non1_storage_shape),
            scope=temporary_scope)

    new_temporary_variables[buf_var_name] = temp_var

    # }}}

    new_insns = []

    buf_var = var(buf_var_name)

    # {{{ generate init instruction

    buf_var_init = buf_var
    if non1_init_inames:
        buf_var_init = buf_var_init.index(
                tuple(var(iname) for iname in non1_init_inames))

    init_base = var(var_name)

    init_subscript = []
    init_iname_idx = 0
    if var_shape:
        for i in range(len(var_shape)):
            ax_subscript = abm.storage_base_indices[i]
            if abm.non1_storage_axis_flags[i]:
                ax_subscript += var(non1_init_inames[init_iname_idx])
                init_iname_idx += 1
            init_subscript.append(ax_subscript)

    if init_subscript:
        init_base = init_base.index(tuple(init_subscript))

    if init_expression is None:
        init_expression = init_base
    else:
        init_expression = init_expression
        init_expression = SubstitutionMapper(
                make_subst_func({
                    "base": init_base,
                    }))(init_expression)

    init_insn_id = kernel.make_unique_instruction_id(based_on="init_"+var_name)
    from loopy.kernel.data import Assignment
    init_instruction = Assignment(id=init_insn_id,
                assignee=buf_var_init,
                expression=init_expression,
                forced_iname_deps=(
                    frozenset(within_inames)
                    | frozenset(non1_init_inames)),
                depends_on=frozenset(),
                depends_on_is_final=True)

    # }}}

    rule_mapping_context = SubstitutionRuleMappingContext(
            kernel.substitutions, kernel.get_var_name_generator())
    aar = ArrayAccessReplacer(rule_mapping_context, var_name,
            within, abm, buf_var)
    kernel = rule_mapping_context.finish_kernel(aar.map_kernel(kernel))

    did_write = False
    for insn_id in aar.modified_insn_ids:
        insn = kernel.id_to_insn[insn_id]
        if buf_var_name in insn.assignee_var_names():
            did_write = True

    # {{{ add init_insn_id to depends_on

    new_insns = []

    def none_to_empty_set(s):
        if s is None:
            return frozenset()
        else:
            return s

    for insn in kernel.instructions:
        if insn.id in aar.modified_insn_ids:
            new_insns.append(
                    insn.copy(
                        depends_on=(
                            none_to_empty_set(insn.depends_on)
                            | frozenset([init_insn_id]))))
        else:
            new_insns.append(insn)

    # }}}

    # {{{ generate store instruction

    buf_var_store = buf_var
    if non1_store_inames:
        buf_var_store = buf_var_store.index(
                tuple(var(iname) for iname in non1_store_inames))

    store_subscript = []
    store_iname_idx = 0
    if var_shape:
        for i in range(len(var_shape)):
            ax_subscript = abm.storage_base_indices[i]
            if abm.non1_storage_axis_flags[i]:
                ax_subscript += var(non1_store_inames[store_iname_idx])
                store_iname_idx += 1
            store_subscript.append(ax_subscript)

    store_target = var(var_name)
    if store_subscript:
        store_target = store_target.index(tuple(store_subscript))

    if store_expression is None:
        store_expression = buf_var_store
    else:
        store_expression = SubstitutionMapper(
                make_subst_func({
                    "base": store_target,
                    "buffer": buf_var_store,
                    }))(store_expression)

    if store_expression is not False:
        from loopy.kernel.data import Assignment
        store_instruction = Assignment(
                    id=kernel.make_unique_instruction_id(based_on="store_"+var_name),
                    depends_on=frozenset(aar.modified_insn_ids),
                    no_sync_with=frozenset([init_insn_id]),
                    assignee=store_target,
                    expression=store_expression,
                    forced_iname_deps=(
                        frozenset(within_inames)
                        | frozenset(non1_store_inames)))
    else:
        did_write = False

    # }}}

    new_insns.append(init_instruction)
    if did_write:
        new_insns.append(store_instruction)
    else:
        for iname in store_inames:
            del new_iname_to_tag[iname]

    kernel = kernel.copy(
            domains=new_kernel_domains,
            instructions=new_insns,
            temporary_variables=new_temporary_variables)

    from loopy import tag_inames
    kernel = tag_inames(kernel, new_iname_to_tag)

    from loopy.kernel.tools import assign_automatic_axes
    kernel = assign_automatic_axes(kernel)

    if CACHING_ENABLED:
        from loopy.preprocess import prepare_for_caching
        buffer_array_cache[cache_key] = prepare_for_caching(kernel)

    return kernel
Esempio n. 32
0
    def __init__(self,
                 iname_exprs,
                 code,
                 read_variables=frozenset(),
                 assignees=tuple(),
                 id=None,
                 depends_on=None,
                 depends_on_is_final=None,
                 groups=None,
                 conflicts_with_groups=None,
                 no_sync_with=None,
                 within_inames_is_final=None,
                 within_inames=None,
                 priority=0,
                 boostable=None,
                 boostable_into=None,
                 predicates=frozenset(),
                 tags=None,
                 insn_deps=None,
                 insn_deps_is_final=None):
        """
        :arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples,
            simple strings pepresenting inames are also allowed. A single
            string is also allowed, which should consists of comma-separated
            inames.
        :arg assignees: Like :attr:`assignees`, but may also be a
            semicolon-separated string of such expressions or a
            sequence of strings parseable into the desired format.
        """

        InstructionBase.__init__(self,
                                 id=id,
                                 depends_on=depends_on,
                                 depends_on_is_final=depends_on_is_final,
                                 groups=groups,
                                 conflicts_with_groups=conflicts_with_groups,
                                 no_sync_with=no_sync_with,
                                 within_inames_is_final=within_inames_is_final,
                                 within_inames=within_inames,
                                 boostable=boostable,
                                 boostable_into=boostable_into,
                                 priority=priority,
                                 predicates=predicates,
                                 tags=tags,
                                 insn_deps=insn_deps,
                                 insn_deps_is_final=insn_deps_is_final)

        # {{{ normalize iname_exprs

        if isinstance(iname_exprs, str):
            iname_exprs = [i.strip() for i in iname_exprs.split(",")]
            iname_exprs = [i for i in iname_exprs if i]

        from pymbolic import var
        new_iname_exprs = []
        for i in iname_exprs:
            if isinstance(i, str):
                new_iname_exprs.append((i, var(i)))
            else:
                new_iname_exprs.append(i)

        # }}}

        # {{{ normalize assignees

        if isinstance(assignees, str):
            assignees = [i.strip() for i in assignees.split(";")]
            assignees = [i for i in assignees if i]

        new_assignees = []
        from loopy.symbolic import parse
        for i in assignees:
            if isinstance(i, str):
                new_assignees.append(parse(i))
            else:
                new_assignees.append(i)
        # }}}

        self.iname_exprs = new_iname_exprs
        from loopy.tools import remove_common_indentation
        self.code = remove_common_indentation(code)
        self.read_variables = read_variables
        self.assignees = new_assignees
Esempio n. 33
0
    def __init__(self,
            assignees, expression,
            id=None,
            depends_on=None,
            depends_on_is_final=None,
            groups=None,
            conflicts_with_groups=None,
            no_sync_with=None,
            within_inames_is_final=None,
            within_inames=None,
            boostable=None, boostable_into=None, tags=None,
            temp_var_types=None,
            priority=0, predicates=frozenset(),
            insn_deps=None, insn_deps_is_final=None,
            forced_iname_deps=None,
            forced_iname_deps_is_final=None):

        super(CallInstruction, self).__init__(
                id=id,
                depends_on=depends_on,
                depends_on_is_final=depends_on_is_final,
                groups=groups,
                conflicts_with_groups=conflicts_with_groups,
                no_sync_with=no_sync_with,
                within_inames_is_final=within_inames_is_final,
                within_inames=within_inames,
                boostable=boostable,
                boostable_into=boostable_into,
                priority=priority,
                predicates=predicates,
                tags=tags,
                insn_deps=insn_deps,
                insn_deps_is_final=insn_deps_is_final,
                forced_iname_deps=forced_iname_deps,
                forced_iname_deps_is_final=forced_iname_deps_is_final)

        from pymbolic.primitives import Call
        from loopy.symbolic import Reduction
        if not isinstance(expression, (Call, Reduction)) and expression is not None:
            raise LoopyError("'expression' argument to CallInstruction "
                    "must be a function call")

        from loopy.symbolic import parse
        if isinstance(assignees, str):
            assignees = parse(assignees)
        if not isinstance(assignees, tuple):
            raise LoopyError("'assignees' argument to CallInstruction "
                    "must be a tuple or a string parseable to a tuple"
                    "--got '%s'" % type(assignees).__name__)

        if isinstance(expression, str):
            expression = parse(expression)

        from pymbolic.primitives import Variable, Subscript
        from loopy.symbolic import LinearSubscript
        for assignee in assignees:
            if not isinstance(assignee, (Variable, Subscript, LinearSubscript)):
                raise LoopyError("invalid lvalue '%s'" % assignee)

        self.assignees = assignees
        self.expression = expression

        if temp_var_types is None:
            self.temp_var_types = (Optional(),) * len(self.assignees)
        else:
            self.temp_var_types = tuple(
                    _check_and_fix_temp_var_type(tvt, stacklevel=3)
                    for tvt in temp_var_types)
Esempio n. 34
0
    def __init__(self,
            iname_exprs, code,
            read_variables=frozenset(), assignees=tuple(),
            id=None, depends_on=None, depends_on_is_final=None,
            groups=None, conflicts_with_groups=None,
            no_sync_with=None,
            within_inames_is_final=None, within_inames=None,
            priority=0, boostable=None, boostable_into=None,
            predicates=frozenset(), tags=None,
            insn_deps=None, insn_deps_is_final=None):
        """
        :arg iname_exprs: Like :attr:`iname_exprs`, but instead of tuples,
            simple strings pepresenting inames are also allowed. A single
            string is also allowed, which should consists of comma-separated
            inames.
        :arg assignees: Like :attr:`assignees`, but may also be a
            semicolon-separated string of such expressions or a
            sequence of strings parseable into the desired format.
        """

        InstructionBase.__init__(self,
                id=id,
                depends_on=depends_on,
                depends_on_is_final=depends_on_is_final,
                groups=groups, conflicts_with_groups=conflicts_with_groups,
                no_sync_with=no_sync_with,
                within_inames_is_final=within_inames_is_final,
                within_inames=within_inames,
                boostable=boostable,
                boostable_into=boostable_into,
                priority=priority, predicates=predicates, tags=tags,
                insn_deps=insn_deps,
                insn_deps_is_final=insn_deps_is_final)

        # {{{ normalize iname_exprs

        if isinstance(iname_exprs, str):
            iname_exprs = [i.strip() for i in iname_exprs.split(",")]
            iname_exprs = [i for i in iname_exprs if i]

        from pymbolic import var
        new_iname_exprs = []
        for i in iname_exprs:
            if isinstance(i, str):
                new_iname_exprs.append((i, var(i)))
            else:
                new_iname_exprs.append(i)

        # }}}

        # {{{ normalize assignees

        if isinstance(assignees, str):
            assignees = [i.strip() for i in assignees.split(";")]
            assignees = [i for i in assignees if i]

        new_assignees = []
        from loopy.symbolic import parse
        for i in assignees:
            if isinstance(i, str):
                new_assignees.append(parse(i))
            else:
                new_assignees.append(i)
        # }}}

        self.iname_exprs = new_iname_exprs
        from loopy.tools import remove_common_indentation
        self.code = remove_common_indentation(code)
        self.read_variables = read_variables
        self.assignees = new_assignees
Esempio n. 35
0
def _process_footprint_subscripts(kernel, rule_name, sweep_inames,
        footprint_subscripts, arg):
    """Track applied iname rewrites, deal with slice specifiers ':'."""

    name_gen = kernel.get_var_name_generator()

    from pymbolic.primitives import Variable

    if footprint_subscripts is None:
        return kernel, rule_name, sweep_inames, []

    if not isinstance(footprint_subscripts, (list, tuple)):
        footprint_subscripts = [footprint_subscripts]

    inames_to_be_removed = []

    new_footprint_subscripts = []
    for fsub in footprint_subscripts:
        if isinstance(fsub, str):
            from loopy.symbolic import parse
            fsub = parse(fsub)

        if not isinstance(fsub, tuple):
            fsub = (fsub,)

        if len(fsub) != arg.num_user_axes():
            raise ValueError("sweep index '%s' has the wrong number of dimensions"
                    % str(fsub))

        for subst_map in kernel.applied_iname_rewrites:
            from loopy.symbolic import SubstitutionMapper
            from pymbolic.mapper.substitutor import make_subst_func
            fsub = SubstitutionMapper(make_subst_func(subst_map))(fsub)

        from loopy.symbolic import get_dependencies
        fsub_dependencies = get_dependencies(fsub)

        new_fsub = []
        for axis_nr, fsub_axis in enumerate(fsub):
            from pymbolic.primitives import Slice
            if isinstance(fsub_axis, Slice):
                if fsub_axis.children != (None,):
                    raise NotImplementedError("add_prefetch only "
                            "supports full slices")

                axis_name = name_gen(
                        based_on="%s_fetch_axis_%d" % (arg.name, axis_nr))

                kernel = _add_kernel_axis(kernel, axis_name, 0, arg.shape[axis_nr],
                        frozenset(sweep_inames) | fsub_dependencies)
                sweep_inames = sweep_inames + [axis_name]

                inames_to_be_removed.append(axis_name)
                new_fsub.append(Variable(axis_name))

            else:
                new_fsub.append(fsub_axis)

        new_footprint_subscripts.append(tuple(new_fsub))
        del new_fsub

    footprint_subscripts = new_footprint_subscripts
    del new_footprint_subscripts

    subst_use = [Variable(rule_name)(*si) for si in footprint_subscripts]
    return kernel, subst_use, sweep_inames, inames_to_be_removed