Ejemplo n.º 1
0
def add_default_dependencies(kernel):
    logger.debug("%s: default deps" % kernel.name)

    from loopy.transform.subst import expand_subst
    expanded_kernel = expand_subst(kernel)

    writer_map = kernel.writer_map()

    arg_names = set(arg.name for arg in kernel.args)

    var_names = arg_names | set(six.iterkeys(kernel.temporary_variables))

    dep_map = dict(
            (insn.id, insn.read_dependency_names() & var_names)
            for insn in expanded_kernel.instructions)

    new_insns = []
    for insn in kernel.instructions:
        if not insn.depends_on_is_final:
            auto_deps = set()

            # {{{ add automatic dependencies

            all_my_var_writers = set()
            for var in dep_map[insn.id]:
                var_writers = writer_map.get(var, set())
                all_my_var_writers |= var_writers

                if not var_writers and var not in arg_names:
                    tv = kernel.temporary_variables[var]
                    if tv.initializer is None:
                        warn_with_kernel(kernel, "read_no_write(%s)" % var,
                                "temporary variable '%s' is read, but never written."
                                % var)

                if len(var_writers) == 1:
                    auto_deps.update(
                            var_writers
                            - set([insn.id]))

            # }}}

            depends_on = insn.depends_on
            if depends_on is None:
                depends_on = frozenset()

            insn = insn.copy(depends_on=frozenset(auto_deps) | depends_on)

        new_insns.append(insn)

    return kernel.copy(instructions=new_insns)
Ejemplo n.º 2
0
def add_default_dependencies(kernel):
    logger.debug("%s: default deps" % kernel.name)

    from loopy.transform.subst import expand_subst
    expanded_kernel = expand_subst(kernel)

    writer_map = kernel.writer_map()

    arg_names = set(arg.name for arg in kernel.args)

    var_names = arg_names | set(six.iterkeys(kernel.temporary_variables))

    dep_map = dict(
            (insn.id, insn.read_dependency_names() & var_names)
            for insn in expanded_kernel.instructions)

    new_insns = []
    for insn in kernel.instructions:
        if not insn.depends_on_is_final:
            auto_deps = set()

            # {{{ add automatic dependencies

            all_my_var_writers = set()
            for var in dep_map[insn.id]:
                var_writers = writer_map.get(var, set())
                all_my_var_writers |= var_writers

                if not var_writers and var not in arg_names:
                    tv = kernel.temporary_variables[var]
                    if tv.initializer is None:
                        warn_with_kernel(kernel, "read_no_write(%s)" % var,
                                "temporary variable '%s' is read, but never written."
                                % var)

                if len(var_writers) == 1:
                    auto_deps.update(
                            var_writers
                            - set([insn.id]))

            # }}}

            depends_on = insn.depends_on
            if depends_on is None:
                depends_on = frozenset()

            insn = insn.copy(depends_on=frozenset(auto_deps) | depends_on)

        new_insns.append(insn)

    return kernel.copy(instructions=new_insns)
Ejemplo n.º 3
0
def infer_unknown_types(kernel, expect_completion=False):
    """Infer types on temporaries and arguments."""

    logger.debug("%s: infer types" % kernel.name)

    def debug(s):
        logger.debug("%s: %s" % (kernel.name, s))

    unexpanded_kernel = kernel
    if kernel.substitutions:
        from loopy.transform.subst import expand_subst
        kernel = expand_subst(kernel)

    new_temp_vars = kernel.temporary_variables.copy()
    new_arg_dict = kernel.arg_dict.copy()

    # {{{ fill queue

    # queue contains temporary variables
    queue = []

    import loopy as lp
    for tv in six.itervalues(kernel.temporary_variables):
        if tv.dtype is lp.auto:
            queue.append(tv)

    for arg in kernel.args:
        if arg.dtype is None:
            queue.append(arg)

    # }}}

    from loopy.expression import TypeInferenceMapper
    type_inf_mapper = TypeInferenceMapper(kernel,
            _DictUnionView([
                new_temp_vars,
                new_arg_dict
                ]))

    from loopy.symbolic import SubstitutionRuleExpander
    subst_expander = SubstitutionRuleExpander(kernel.substitutions)

    # {{{ work on type inference queue

    from loopy.kernel.data import TemporaryVariable, KernelArgument

    failed_names = set()
    while queue:
        item = queue.pop(0)

        debug("inferring type for %s %s" % (type(item).__name__, item.name))

        result, symbols_with_unavailable_types = \
                _infer_var_type(kernel, item.name, type_inf_mapper, subst_expander)

        failed = result is None
        if not failed:
            debug("     success: %s" % result)
            if isinstance(item, TemporaryVariable):
                new_temp_vars[item.name] = item.copy(dtype=result)
            elif isinstance(item, KernelArgument):
                new_arg_dict[item.name] = item.copy(dtype=result)
            else:
                raise LoopyError("unexpected item type in type inference")
        else:
            debug("     failure")

        if failed:
            if item.name in failed_names:
                # this item has failed before, give up.
                advice = ""
                if symbols_with_unavailable_types:
                    advice += (
                            " (need type of '%s'--check for missing arguments)"
                            % ", ".join(symbols_with_unavailable_types))

                if expect_completion:
                    raise LoopyError(
                            "could not determine type of '%s'%s"
                            % (item.name, advice))

                else:
                    # We're done here.
                    break

            # remember that this item failed
            failed_names.add(item.name)

            queue_names = set(qi.name for qi in queue)

            if queue_names == failed_names:
                # We did what we could...
                print(queue_names, failed_names, item.name)
                assert not expect_completion
                break

            # can't infer type yet, put back into queue
            queue.append(item)
        else:
            # we've made progress, reset failure markers
            failed_names = set()

    # }}}

    return unexpanded_kernel.copy(
            temporary_variables=new_temp_vars,
            args=[new_arg_dict[arg.name] for arg in kernel.args],
            )
Ejemplo n.º 4
0
def infer_unknown_types(kernel, expect_completion=False):
    """Infer types on temporaries and arguments."""

    logger.debug("%s: infer types" % kernel.name)

    from functools import partial
    debug = partial(_debug, kernel)

    import time
    start_time = time.time()

    unexpanded_kernel = kernel
    if kernel.substitutions:
        from loopy.transform.subst import expand_subst
        kernel = expand_subst(kernel)

    new_temp_vars = kernel.temporary_variables.copy()
    new_arg_dict = kernel.arg_dict.copy()

    # {{{ find names_with_unknown_types

    # contains both arguments and temporaries
    names_for_type_inference = []

    import loopy as lp
    for tv in six.itervalues(kernel.temporary_variables):
        assert tv.dtype is not lp.auto
        if tv.dtype is None:
            names_for_type_inference.append(tv.name)

    for arg in kernel.args:
        assert arg.dtype is not lp.auto
        if arg.dtype is None:
            names_for_type_inference.append(arg.name)

    # }}}

    logger.debug("finding types for {count:d} names".format(
            count=len(names_for_type_inference)))

    writer_map = kernel.writer_map()

    dep_graph = dict(
            (written_var, set(
                read_var
                for insn_id in writer_map.get(written_var, [])
                for read_var in kernel.id_to_insn[insn_id].read_dependency_names()
                if read_var in names_for_type_inference))
            for written_var in names_for_type_inference)

    from loopy.tools import compute_sccs

    # To speed up processing, we sort the variables by computing the SCCs of the
    # type dependency graph. Each SCC represents a set of variables whose types
    # mutually depend on themselves. The SCCs are returned and processed in
    # topological order.
    sccs = compute_sccs(dep_graph)

    item_lookup = _DictUnionView([
            new_temp_vars,
            new_arg_dict
            ])
    type_inf_mapper = TypeInferenceMapper(kernel, item_lookup)

    from loopy.symbolic import SubstitutionRuleExpander
    subst_expander = SubstitutionRuleExpander(kernel.substitutions)

    # {{{ work on type inference queue

    from loopy.kernel.data import TemporaryVariable, KernelArgument

    for var_chain in sccs:
        changed_during_last_queue_run = False
        queue = var_chain[:]
        failed_names = set()

        while queue or changed_during_last_queue_run:
            if not queue and changed_during_last_queue_run:
                changed_during_last_queue_run = False
                # Optimization: If there's a single variable in the SCC without
                # a self-referential dependency, then the type is known after a
                # single iteration (we don't need to look at the expressions
                # again).
                if len(var_chain) == 1:
                    single_var, = var_chain
                    if single_var not in dep_graph[single_var]:
                        break
                queue = var_chain[:]

            name = queue.pop(0)
            item = item_lookup[name]

            debug("inferring type for %s %s", type(item).__name__, item.name)

            result, symbols_with_unavailable_types = (
                    _infer_var_type(
                            kernel, item.name, type_inf_mapper, subst_expander))

            failed = not result
            if not failed:
                new_dtype, = result
                if new_dtype.target is None:
                    new_dtype = new_dtype.with_target(kernel.target)

                debug("     success: %s", new_dtype)
                if new_dtype != item.dtype:
                    debug("     changed from: %s", item.dtype)
                    changed_during_last_queue_run = True

                    if isinstance(item, TemporaryVariable):
                        new_temp_vars[name] = item.copy(dtype=new_dtype)
                    elif isinstance(item, KernelArgument):
                        new_arg_dict[name] = item.copy(dtype=new_dtype)
                    else:
                        raise LoopyError("unexpected item type in type inference")
            else:
                debug("     failure")

            if failed:
                if item.name in failed_names:
                    # this item has failed before, give up.
                    advice = ""
                    if symbols_with_unavailable_types:
                        advice += (
                                " (need type of '%s'--check for missing arguments)"
                                % ", ".join(symbols_with_unavailable_types))

                    if expect_completion:
                        raise LoopyError(
                                "could not determine type of '%s'%s"
                                % (item.name, advice))

                    else:
                        # We're done here.
                        break

                # remember that this item failed
                failed_names.add(item.name)

                if set(queue) == failed_names:
                    # We did what we could...
                    print(queue, failed_names, item.name)
                    assert not expect_completion
                    break

                # can't infer type yet, put back into queue
                queue.append(name)
            else:
                # we've made progress, reset failure markers
                failed_names = set()

    # }}}

    end_time = time.time()
    logger.debug("type inference took {dur:.2f} seconds".format(
            dur=end_time - start_time))

    return unexpanded_kernel.copy(
            temporary_variables=new_temp_vars,
            args=[new_arg_dict[arg.name] for arg in kernel.args],
            )
Ejemplo n.º 5
0
def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
    assert isinstance(kernel, LoopKernel)
    # FIXME: Does not understand subst rules for now
    if kernel.substitutions:
        from loopy.transform.subst import expand_subst
        kernel = expand_subst(kernel)

    if var_name in kernel.temporary_variables:
        var_descr = kernel.temporary_variables[var_name]
    elif var_name in kernel.arg_dict:
        var_descr = kernel.arg_dict[var_name]
    else:
        raise NameError("array '%s' was not found" % var_name)

    # {{{ check/normalize vary_by_axes

    if isinstance(vary_by_axes, str):
        vary_by_axes = vary_by_axes.split(",")

    from loopy.kernel.array import ArrayBase
    if isinstance(var_descr, ArrayBase):
        if var_descr.dim_names is not None:
            name_to_index = {
                name: idx
                for idx, name in enumerate(var_descr.dim_names)
            }
        else:
            name_to_index = {}

        def map_ax_name_to_index(ax):
            if isinstance(ax, str):
                try:
                    return name_to_index[ax]
                except KeyError:
                    raise LoopyError("axis name '%s' not understood " % ax)
            else:
                return ax

        vary_by_axes = [map_ax_name_to_index(ax) for ax in vary_by_axes]

        if (vary_by_axes
                and (min(vary_by_axes) < 0
                     or max(vary_by_axes) > var_descr.num_user_axes())):
            raise LoopyError("vary_by_axes refers to out-of-bounds axis index")

    # }}}

    from pymbolic.mapper.substitutor import make_subst_func
    from pymbolic.primitives import (Sum, Product, is_zero, flattened_sum,
                                     flattened_product, Subscript, Variable)
    from loopy.symbolic import (get_dependencies, SubstitutionMapper,
                                UnidirectionalUnifier)

    # {{{ common factor key list maintenance

    # list of (index_key, common factors found)
    common_factors = []

    def find_unifiable_cf_index(index_key):
        for i, (key, _val) in enumerate(common_factors):
            unif = UnidirectionalUnifier(
                lhs_mapping_candidates=get_dependencies(key))

            unif_result = unif(key, index_key)

            if unif_result:
                assert len(unif_result) == 1
                return i, unif_result[0]

        return None, None

    def extract_index_key(access_expr):
        if isinstance(access_expr, Variable):
            return ()

        elif isinstance(access_expr, Subscript):
            index = access_expr.index_tuple
            return tuple(index[ax] for ax in vary_by_axes)
        else:
            raise ValueError("unexpected type of access_expr")

    def is_assignee(insn):
        return var_name in insn.assignee_var_names()

    def iterate_as(cls, expr):
        if isinstance(expr, cls):
            yield from expr.children
        else:
            yield expr

    # }}}

    # {{{ find common factors

    from loopy.kernel.data import Assignment

    for insn in kernel.instructions:
        if not is_assignee(insn):
            continue

        if not isinstance(insn, Assignment):
            raise LoopyError("'%s' modified by non-single-assignment" %
                             var_name)

        lhs = insn.assignee
        rhs = insn.expression

        if is_zero(rhs):
            continue

        index_key = extract_index_key(lhs)
        cf_index, unif_result = find_unifiable_cf_index(index_key)

        if cf_index is None:
            # {{{ doesn't exist yet

            assert unif_result is None

            my_common_factors = None

            for term in iterate_as(Sum, rhs):
                if term == lhs:
                    continue

                for part in iterate_as(Product, term):
                    if var_name in get_dependencies(part):
                        raise LoopyError("unexpected dependency on '%s' "
                                         "in RHS of instruction '%s'" %
                                         (var_name, insn.id))

                product_parts = set(iterate_as(Product, term))

                if my_common_factors is None:
                    my_common_factors = product_parts
                else:
                    my_common_factors = my_common_factors & product_parts

            if my_common_factors is not None:
                common_factors.append((index_key, my_common_factors))

            # }}}
        else:
            # {{{ match, filter existing common factors

            _, my_common_factors = common_factors[cf_index]

            unif_subst_map = SubstitutionMapper(
                make_subst_func(unif_result.lmap))

            for term in iterate_as(Sum, rhs):
                if term == lhs:
                    continue

                for part in iterate_as(Product, term):
                    if var_name in get_dependencies(part):
                        raise LoopyError("unexpected dependency on '%s' "
                                         "in RHS of instruction '%s'" %
                                         (var_name, insn.id))

                product_parts = set(iterate_as(Product, term))

                my_common_factors = {
                    cf
                    for cf in my_common_factors
                    if unif_subst_map(cf) in product_parts
                }

            common_factors[cf_index] = (index_key, my_common_factors)

            # }}}

    # }}}

    common_factors = [(ik, cf) for ik, cf in common_factors if cf]

    if not common_factors:
        raise LoopyError("no common factors found")

    # {{{ remove common factors

    new_insns = []

    for insn in kernel.instructions:
        if not isinstance(insn, Assignment) or not is_assignee(insn):
            new_insns.append(insn)
            continue

        index_key = extract_index_key(insn.assignee)

        lhs = insn.assignee
        rhs = insn.expression

        if is_zero(rhs):
            new_insns.append(insn)
            continue

        index_key = extract_index_key(lhs)
        cf_index, unif_result = find_unifiable_cf_index(index_key)

        if cf_index is None:
            new_insns.append(insn)
            continue

        _, my_common_factors = common_factors[cf_index]

        unif_subst_map = SubstitutionMapper(make_subst_func(unif_result.lmap))

        mapped_my_common_factors = {
            unif_subst_map(cf)
            for cf in my_common_factors
        }

        new_sum_terms = []

        for term in iterate_as(Sum, rhs):
            if term == lhs:
                new_sum_terms.append(term)
                continue

            new_sum_terms.append(
                flattened_product([
                    part for part in iterate_as(Product, term)
                    if part not in mapped_my_common_factors
                ]))

        new_insns.append(insn.copy(expression=flattened_sum(new_sum_terms)))

    # }}}

    # {{{ substitute common factors into usage sites

    def find_substitution(expr):
        if isinstance(expr, Subscript):
            v = expr.aggregate.name
        elif isinstance(expr, Variable):
            v = expr.name
        else:
            return expr

        if v != var_name:
            return expr

        index_key = extract_index_key(expr)
        cf_index, unif_result = find_unifiable_cf_index(index_key)

        unif_subst_map = SubstitutionMapper(make_subst_func(unif_result.lmap))

        _, my_common_factors = common_factors[cf_index]

        if my_common_factors is not None:
            return flattened_product(
                [unif_subst_map(cf) for cf in my_common_factors] + [expr])
        else:
            return expr

    insns = new_insns
    new_insns = []

    subm = SubstitutionMapper(find_substitution)

    for insn in insns:
        if not isinstance(insn, Assignment) or is_assignee(insn):
            new_insns.append(insn)
            continue

        new_insns.append(insn.with_transformed_expressions(subm))

    # }}}

    return kernel.copy(instructions=new_insns)
Ejemplo n.º 6
0
def preprocess_kernel(kernel, device=None):
    if device is not None:
        from warnings import warn
        warn("passing 'device' to preprocess_kernel() is deprecated",
                DeprecationWarning, stacklevel=2)

    from loopy.kernel import kernel_state
    if kernel.state != kernel_state.INITIAL:
        raise LoopyError("cannot re-preprocess an already preprocessed "
                "kernel")

    # {{{ cache retrieval

    from loopy import CACHING_ENABLED
    if CACHING_ENABLED:
        input_kernel = kernel

        try:
            result = preprocess_cache[kernel]
            logger.info("%s: preprocess cache hit" % kernel.name)
            return result
        except KeyError:
            pass

    # }}}

    logger.info("%s: preprocess start" % kernel.name)

    # {{{ check that there are no l.auto-tagged inames

    from loopy.kernel.data import AutoLocalIndexTagBase
    for iname, tag in six.iteritems(kernel.iname_to_tag):
        if (isinstance(tag, AutoLocalIndexTagBase)
                 and iname in kernel.all_inames()):
            raise LoopyError("kernel with automatically-assigned "
                    "local axes passed to preprocessing")

    # }}}

    from loopy.transform.subst import expand_subst
    kernel = expand_subst(kernel)

    # Ordering restriction:
    # Type inference and reduction iname uniqueness don't handle substitutions.
    # Get them out of the way.

    kernel = infer_unknown_types(kernel, expect_completion=False)

    check_reduction_iname_uniqueness(kernel)

    kernel = add_default_dependencies(kernel)

    # Ordering restrictions:
    #
    # - realize_reduction must happen after type inference because it needs
    #   to be able to determine the types of the reduced expressions.
    #
    # - realize_reduction must happen after default dependencies are added
    #   because it manipulates the depends_on field, which could prevent
    #   defaults from being applied.

    kernel = realize_reduction(kernel, unknown_types_ok=False)

    # Ordering restriction:
    # add_axes_to_temporaries_for_ilp because reduction accumulators
    # need to be duplicated by this.

    from loopy.transform.ilp import add_axes_to_temporaries_for_ilp_and_vec
    kernel = add_axes_to_temporaries_for_ilp_and_vec(kernel)

    kernel = find_temporary_scope(kernel)
    kernel = find_idempotence(kernel)
    kernel = limit_boostability(kernel)

    kernel = kernel.target.preprocess(kernel)

    logger.info("%s: preprocess done" % kernel.name)

    kernel = kernel.copy(
            state=kernel_state.PREPROCESSED)

    # {{{ prepare for caching

    # PicklableDtype instances for example need to know the target they're working
    # towards in order to pickle and unpickle them. This is the first pass that
    # uses caching, so we need to be ready to pickle. This means propagating
    # this target information.

    if CACHING_ENABLED:
        input_kernel = prepare_for_caching(input_kernel)

    kernel = prepare_for_caching(kernel)

    # }}}

    if CACHING_ENABLED:
        preprocess_cache[input_kernel] = kernel

    return kernel
Ejemplo n.º 7
0
def collect_common_factors_on_increment(kernel, var_name, vary_by_axes=()):
    # FIXME: Does not understand subst rules for now
    if kernel.substitutions:
        from loopy.transform.subst import expand_subst
        kernel = expand_subst(kernel)

    if var_name in kernel.temporary_variables:
        var_descr = kernel.temporary_variables[var_name]
    elif var_name in kernel.arg_dict:
        var_descr = kernel.arg_dict[var_name]
    else:
        raise NameError("array '%s' was not found" % var_name)

    # {{{ check/normalize vary_by_axes

    if isinstance(vary_by_axes, str):
        vary_by_axes = vary_by_axes.split(",")

    from loopy.kernel.array import ArrayBase
    if isinstance(var_descr, ArrayBase):
        if var_descr.dim_names is not None:
            name_to_index = dict(
                    (name, idx)
                    for idx, name in enumerate(var_descr.dim_names))
        else:
            name_to_index = {}

        def map_ax_name_to_index(ax):
            if isinstance(ax, str):
                try:
                    return name_to_index[ax]
                except KeyError:
                    raise LoopyError("axis name '%s' not understood " % ax)
            else:
                return ax

        vary_by_axes = [map_ax_name_to_index(ax) for ax in vary_by_axes]

        if (
                vary_by_axes
                and
                (min(vary_by_axes) < 0
                or
                max(vary_by_axes) > var_descr.num_user_axes())):
            raise LoopyError("vary_by_axes refers to out-of-bounds axis index")

    # }}}

    from pymbolic.mapper.substitutor import make_subst_func
    from pymbolic.primitives import (Sum, Product, is_zero,
            flattened_sum, flattened_product, Subscript, Variable)
    from loopy.symbolic import (get_dependencies, SubstitutionMapper,
            UnidirectionalUnifier)

    # {{{ common factor key list maintenance

    # list of (index_key, common factors found)
    common_factors = []

    def find_unifiable_cf_index(index_key):
        for i, (key, val) in enumerate(common_factors):
            unif = UnidirectionalUnifier(
                    lhs_mapping_candidates=get_dependencies(key))

            unif_result = unif(key, index_key)

            if unif_result:
                assert len(unif_result) == 1
                return i, unif_result[0]

        return None, None

    def extract_index_key(access_expr):
        if isinstance(access_expr, Variable):
            return ()

        elif isinstance(access_expr, Subscript):
            index = access_expr.index_tuple
            return tuple(index[ax] for ax in vary_by_axes)
        else:
            raise ValueError("unexpected type of access_expr")

    def is_assignee(insn):
        return any(
                lhs == var_name
                for lhs, sbscript in insn.assignees_and_indices())

    def iterate_as(cls, expr):
        if isinstance(expr, cls):
            for ch in expr.children:
                yield ch
        else:
            yield expr

    # }}}

    # {{{ find common factors

    from loopy.kernel.data import Assignment

    for insn in kernel.instructions:
        if not is_assignee(insn):
            continue

        if not isinstance(insn, Assignment):
            raise LoopyError("'%s' modified by non-expression instruction"
                    % var_name)

        lhs = insn.assignee
        rhs = insn.expression

        if is_zero(rhs):
            continue

        index_key = extract_index_key(lhs)
        cf_index, unif_result = find_unifiable_cf_index(index_key)

        if cf_index is None:
            # {{{ doesn't exist yet

            assert unif_result is None

            my_common_factors = None

            for term in iterate_as(Sum, rhs):
                if term == lhs:
                    continue

                for part in iterate_as(Product, term):
                    if var_name in get_dependencies(part):
                        raise LoopyError("unexpected dependency on '%s' "
                                "in RHS of instruction '%s'"
                                % (var_name, insn.id))

                product_parts = set(iterate_as(Product, term))

                if my_common_factors is None:
                    my_common_factors = product_parts
                else:
                    my_common_factors = my_common_factors & product_parts

            if my_common_factors is not None:
                common_factors.append((index_key, my_common_factors))

            # }}}
        else:
            # {{{ match, filter existing common factors

            _, my_common_factors = common_factors[cf_index]

            unif_subst_map = SubstitutionMapper(
                    make_subst_func(unif_result.lmap))

            for term in iterate_as(Sum, rhs):
                if term == lhs:
                    continue

                for part in iterate_as(Product, term):
                    if var_name in get_dependencies(part):
                        raise LoopyError("unexpected dependency on '%s' "
                                "in RHS of instruction '%s'"
                                % (var_name, insn.id))

                product_parts = set(iterate_as(Product, term))

                my_common_factors = set(
                        cf for cf in my_common_factors
                        if unif_subst_map(cf) in product_parts)

            common_factors[cf_index] = (index_key, my_common_factors)

            # }}}

    # }}}

    # {{{ remove common factors

    new_insns = []

    for insn in kernel.instructions:
        if not isinstance(insn, Assignment) or not is_assignee(insn):
            new_insns.append(insn)
            continue

        (_, index_key), = insn.assignees_and_indices()

        lhs = insn.assignee
        rhs = insn.expression

        if is_zero(rhs):
            new_insns.append(insn)
            continue

        index_key = extract_index_key(lhs)
        cf_index, unif_result = find_unifiable_cf_index(index_key)

        if cf_index is None:
            new_insns.append(insn)
            continue

        _, my_common_factors = common_factors[cf_index]

        unif_subst_map = SubstitutionMapper(
                make_subst_func(unif_result.lmap))

        mapped_my_common_factors = set(
                unif_subst_map(cf)
                for cf in my_common_factors)

        new_sum_terms = []

        for term in iterate_as(Sum, rhs):
            if term == lhs:
                new_sum_terms.append(term)
                continue

            new_sum_terms.append(
                    flattened_product([
                        part
                        for part in iterate_as(Product, term)
                        if part not in mapped_my_common_factors
                        ]))

        new_insns.append(
                insn.copy(expression=flattened_sum(new_sum_terms)))

    # }}}

    # {{{ substitute common factors into usage sites

    def find_substitution(expr):
        if isinstance(expr, Subscript):
            v = expr.aggregate.name
        elif isinstance(expr, Variable):
            v = expr.name
        else:
            return expr

        if v != var_name:
            return expr

        index_key = extract_index_key(expr)
        cf_index, unif_result = find_unifiable_cf_index(index_key)

        unif_subst_map = SubstitutionMapper(
                make_subst_func(unif_result.lmap))

        _, my_common_factors = common_factors[cf_index]

        if my_common_factors is not None:
            return flattened_product(
                    [unif_subst_map(cf) for cf in my_common_factors]
                    + [expr])
        else:
            return expr

    insns = new_insns
    new_insns = []

    subm = SubstitutionMapper(find_substitution)

    for insn in insns:
        if not isinstance(insn, Assignment) or is_assignee(insn):
            new_insns.append(insn)
            continue

        new_insns.append(insn.with_transformed_expressions(subm))

    # }}}

    return kernel.copy(instructions=new_insns)
Ejemplo n.º 8
0
Archivo: data.py Proyecto: shwina/loopy
def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,

        # "None" is a valid value here, distinct from the default.
        default_tag=_not_provided,

        rule_name=None,
        temporary_name=None,
        temporary_scope=None, temporary_is_local=None,
        footprint_subscripts=None,
        fetch_bounding_box=False,
        fetch_outer_inames=None):
    """Prefetch all accesses to the variable *var_name*, with all accesses
    being swept through *sweep_inames*.

    :arg var_name: A string, the name of the variable being prefetched.
        This may be a 'tagged variable name' (such as ``field$mytag``
        to restrict the effect of the operation to only variable accesses
        with a matching tag.

        This may also be a subscripted version of the variable, in which
        case this access dictates the footprint that is prefetched,
        e.g. ``A[:,:]`` or ``field[i,j,:,:]``. In this case, accesses
        in the kernel are disregarded.

    :arg sweep_inames: A list of inames, or a comma-separated string of them.
        This routine 'sweeps' all accesses to *var_name* through all allowed
        values of the *sweep_inames* to generate a footprint. All values
        in this footprint are then stored in a temporary variable, and
        the original variable accesses replaced with accesses to this
        temporary.

    :arg dim_arg_names: List of names representing each fetch axis.
        These names show up as inames in the generated fetch code

    :arg default_tag: The :ref:`implementation tag <iname-tags>` to
        assign to the inames driving the prefetch code. Use *None* to
        leave them undefined (to assign them later by hand). The current
        default will make them local axes and automatically split them to
        fit the work group size, but this default will disappear in favor
        of simply leaving them untagged in 2019.x. For 2018.x, a warning
        will be issued if no *default_tag* is specified.

    :arg rule_name: base name of the generated temporary variable.
    :arg temporary_name: The name of the temporary to be used.
    :arg temporary_scope: The :class:`temp_var_scope` to use for the
        temporary.
    :arg temporary_is_local: Deprecated, use *temporary_scope* instead.
    :arg footprint_subscripts: A list of tuples indicating the index (i.e.
        subscript) tuples used to generate the footprint.

        If only one such set of indices is desired, this may also be specified
        directly by putting an index expression into *var_name*. Substitutions
        such as those occurring in dimension splits are recorded and also
        applied to these indices.

    :arg fetch_bounding_box: To fit within :mod:`loopy`'s execution model,
        the 'footprint' of the fetch currently has to be a convex set.
        Sometimes this is not the case, e.g. for a high-order stencil::

              o
              o
            ooooo
              o
              o

        The footprint of the stencil when 'swept' over a base domain
        would look like this, and because of the 'missing corners',
        this set is not convex::

              oooooooooo
              oooooooooo
            oooooooooooooo
            oooooooooooooo
            oooooooooooooo
            oooooooooooooo
              oooooooooo
              oooooooooo

        Passing ``fetch_bounding_box=True`` gives :mod:`loopy` permission
        to instead fetch the 'bounding box' of the footprint, i.e.
        this set in the stencil example::

            OOooooooooooOO
            OOooooooooooOO
            oooooooooooooo
            oooooooooooooo
            oooooooooooooo
            oooooooooooooo
            OOooooooooooOO
            OOooooooooooOO

        Note the added corners marked with "``O``". The resulting footprint is
        guaranteed to be convex.


    :arg fetch_outer_inames: The inames within which the fetch
        instruction is nested. If *None*, make an educated guess.

    This function internally uses :func:`extract_subst` and :func:`precompute`.
    """

    # {{{ fish indexing out of var_name and into footprint_subscripts

    from loopy.symbolic import parse
    parsed_var_name = parse(var_name)

    from pymbolic.primitives import Variable, Subscript
    if isinstance(parsed_var_name, Variable):
        # nothing to see
        pass
    elif isinstance(parsed_var_name, Subscript):
        if footprint_subscripts is not None:
            raise TypeError("if footprint_subscripts is specified, then var_name "
                    "may not contain a subscript")

        assert isinstance(parsed_var_name.aggregate, Variable)
        footprint_subscripts = [parsed_var_name.index]
        parsed_var_name = parsed_var_name.aggregate
    else:
        raise ValueError("var_name must either be a variable name or a subscript")

    # }}}

    # {{{ fish out tag

    from loopy.symbolic import TaggedVariable
    if isinstance(parsed_var_name, TaggedVariable):
        var_name = parsed_var_name.name
        tag = parsed_var_name.tag
    else:
        var_name = parsed_var_name.name
        tag = None

    # }}}

    c_name = var_name
    if tag is not None:
        c_name = c_name + "_" + tag

    var_name_gen = kernel.get_var_name_generator()

    if rule_name is None:
        rule_name = var_name_gen("%s_fetch_rule" % c_name)
    if temporary_name is None:
        temporary_name = var_name_gen("%s_fetch" % c_name)

    arg = kernel.arg_dict[var_name]

    # {{{ make parameter names and unification template

    parameters = []
    for i in range(arg.num_user_axes()):
        based_on = "%s_dim_%d" % (c_name, i)
        if arg.dim_names is not None:
            based_on = "%s_dim_%s" % (c_name, arg.dim_names[i])
        if dim_arg_names is not None and i < len(dim_arg_names):
            based_on = dim_arg_names[i]

        par_name = var_name_gen(based_on=based_on)
        parameters.append(par_name)

    from pymbolic import var
    uni_template = parsed_var_name
    if len(parameters) > 1:
        uni_template = uni_template.index(
                tuple(var(par_name) for par_name in parameters))
    elif len(parameters) == 1:
        uni_template = uni_template.index(var(parameters[0]))

    # }}}

    from loopy.transform.subst import extract_subst
    kernel = extract_subst(kernel, rule_name, uni_template, parameters)

    if isinstance(sweep_inames, str):
        sweep_inames = [s.strip() for s in sweep_inames.split(",")]
    else:
        # copy, standardize to list
        sweep_inames = list(sweep_inames)

    kernel, subst_use, sweep_inames, inames_to_be_removed = \
            _process_footprint_subscripts(
                    kernel,  rule_name, sweep_inames,
                    footprint_subscripts, arg)

    # Our _not_provided is actually a different object from the one in the
    # precompute module, but precompute acutally uses that to adjust its
    # warning message.

    from loopy.transform.precompute import precompute
    new_kernel = precompute(kernel, subst_use, sweep_inames,
            precompute_inames=dim_arg_names,
            default_tag=default_tag, dtype=arg.dtype,
            fetch_bounding_box=fetch_bounding_box,
            temporary_name=temporary_name,
            temporary_scope=temporary_scope, temporary_is_local=temporary_is_local,
            precompute_outer_inames=fetch_outer_inames)

    # {{{ remove inames that were temporarily added by slice sweeps

    new_domains = new_kernel.domains[:]

    for iname in inames_to_be_removed:
        home_domain_index = kernel.get_home_domain_index(iname)
        domain = new_domains[home_domain_index]

        dt, idx = domain.get_var_dict()[iname]
        assert dt == dim_type.set

        new_domains[home_domain_index] = domain.project_out(dt, idx, 1)

    new_kernel = new_kernel.copy(domains=new_domains)

    # }}}

    # If the rule survived past precompute() (i.e. some accesses fell outside
    # the footprint), get rid of it before moving on.
    if rule_name in new_kernel.substitutions:
        from loopy.transform.subst import expand_subst
        return expand_subst(new_kernel, "... > id:"+rule_name)
    else:
        return new_kernel
Ejemplo n.º 9
0
def find_all_insn_inames(kernel):
    logger.debug("%s: find_all_insn_inames: start" % kernel.name)

    writer_map = kernel.writer_map()

    insn_id_to_inames = {}
    insn_assignee_inames = {}

    all_read_deps = {}
    all_write_deps = {}

    from loopy.transform.subst import expand_subst
    kernel = expand_subst(kernel)

    for insn in kernel.instructions:
        all_read_deps[insn.id] = read_deps = insn.read_dependency_names()
        all_write_deps[insn.id] = write_deps = insn.write_dependency_names()
        deps = read_deps | write_deps

        if insn.within_inames_is_final:
            iname_deps = insn.within_inames
        else:
            iname_deps = (deps & kernel.all_inames() | insn.within_inames)

        assert isinstance(read_deps, frozenset), type(insn)
        assert isinstance(write_deps, frozenset), type(insn)
        assert isinstance(iname_deps, frozenset), type(insn)

        logger.debug("%s: find_all_insn_inames: %s (init): %s - "
                     "read deps: %s - write deps: %s" % (
                         kernel.name,
                         insn.id,
                         ", ".join(sorted(iname_deps)),
                         ", ".join(sorted(read_deps)),
                         ", ".join(sorted(write_deps)),
                     ))

        insn_id_to_inames[insn.id] = iname_deps
        insn_assignee_inames[insn.id] = write_deps & kernel.all_inames()

    # fixed point iteration until all iname dep sets have converged

    # Why is fixed point iteration necessary here? Consider the following
    # scenario:
    #
    # z = expr(iname)
    # y = expr(z)
    # x = expr(y)
    #
    # x clearly has a dependency on iname, but this is not found until that
    # dependency has propagated all the way up. Doing this recursively is
    # not guaranteed to terminate because of circular dependencies.

    while True:
        did_something = False
        for insn in kernel.instructions:

            if insn.within_inames_is_final:
                continue

            # {{{ depdency-based propagation

            inames_old = insn_id_to_inames[insn.id]
            inames_new = inames_old | guess_iname_deps_based_on_var_use(
                kernel, insn, insn_id_to_inames)

            insn_id_to_inames[insn.id] = inames_new

            if inames_new != inames_old:
                did_something = True

                warn_with_kernel(
                    kernel, "inferred_iname",
                    "The iname(s) '%s' on instruction '%s' "
                    "was/were automatically added. "
                    "This is deprecated. Please add the iname "
                    "to the instruction "
                    "explicitly, e.g. by adding 'for' loops" %
                    (", ".join(inames_new - inames_old), insn.id))

            # }}}

            # {{{ domain-based propagation

            inames_old = insn_id_to_inames[insn.id]
            inames_new = set(insn_id_to_inames[insn.id])

            for iname in inames_old:
                home_domain = kernel.domains[kernel.get_home_domain_index(
                    iname)]

                for par in home_domain.get_var_names(dim_type.param):
                    # Add all inames occurring in parameters of domains that my
                    # current inames refer to.

                    if par in kernel.all_inames():
                        inames_new.add(intern(par))

                    # If something writes the bounds of a loop in which I'm
                    # sitting, I had better be in the inames that the writer is
                    # in.

                    if par in kernel.temporary_variables:
                        for writer_id in writer_map.get(par, []):
                            inames_new.update(insn_id_to_inames[writer_id])

            if inames_new != inames_old:
                did_something = True
                insn_id_to_inames[insn.id] = frozenset(inames_new)

                warn_with_kernel(
                    kernel, "inferred_iname",
                    "The iname(s) '%s' on instruction '%s' was "
                    "automatically added. "
                    "This is deprecated. Please add the iname "
                    "to the instruction "
                    "explicitly, e.g. by adding 'for' loops" %
                    (", ".join(inames_new - inames_old), insn.id))

            # }}}

        if not did_something:
            break

    logger.debug("%s: find_all_insn_inames: done" % kernel.name)

    for v in six.itervalues(insn_id_to_inames):
        assert isinstance(v, frozenset)

    return insn_id_to_inames
Ejemplo n.º 10
0
def infer_unknown_types(kernel, expect_completion=False):
    """Infer types on temporaries and arguments."""

    logger.debug("%s: infer types" % kernel.name)

    from functools import partial
    debug = partial(_debug, kernel)

    import time
    start_time = time.time()

    unexpanded_kernel = kernel
    if kernel.substitutions:
        from loopy.transform.subst import expand_subst
        kernel = expand_subst(kernel)

    new_temp_vars = kernel.temporary_variables.copy()
    new_arg_dict = kernel.arg_dict.copy()

    # {{{ find names_with_unknown_types

    # contains both arguments and temporaries
    names_for_type_inference = []

    import loopy as lp
    for tv in six.itervalues(kernel.temporary_variables):
        if tv.dtype is lp.auto:
            names_for_type_inference.append(tv.name)

    for arg in kernel.args:
        if arg.dtype is None:
            names_for_type_inference.append(arg.name)

    # }}}

    logger.debug("finding types for {count:d} names".format(
        count=len(names_for_type_inference)))

    writer_map = kernel.writer_map()

    dep_graph = dict((
        written_var,
        set(read_var for insn_id in writer_map.get(written_var, [])
            for read_var in kernel.id_to_insn[insn_id].read_dependency_names()
            if read_var in names_for_type_inference))
                     for written_var in names_for_type_inference)

    from loopy.tools import compute_sccs

    # To speed up processing, we sort the variables by computing the SCCs of the
    # type dependency graph. Each SCC represents a set of variables whose types
    # mutually depend on themselves. The SCCs are returned and processed in
    # topological order.
    sccs = compute_sccs(dep_graph)

    item_lookup = _DictUnionView([new_temp_vars, new_arg_dict])
    type_inf_mapper = TypeInferenceMapper(kernel, item_lookup)

    from loopy.symbolic import SubstitutionRuleExpander
    subst_expander = SubstitutionRuleExpander(kernel.substitutions)

    # {{{ work on type inference queue

    from loopy.kernel.data import TemporaryVariable, KernelArgument

    for var_chain in sccs:
        changed_during_last_queue_run = False
        queue = var_chain[:]
        failed_names = set()

        while queue or changed_during_last_queue_run:
            if not queue and changed_during_last_queue_run:
                changed_during_last_queue_run = False
                # Optimization: If there's a single variable in the SCC without
                # a self-referential dependency, then the type is known after a
                # single iteration (we don't need to look at the expressions
                # again).
                if len(var_chain) == 1:
                    single_var, = var_chain
                    if single_var not in dep_graph[single_var]:
                        break
                queue = var_chain[:]

            name = queue.pop(0)
            item = item_lookup[name]

            debug("inferring type for %s %s", type(item).__name__, item.name)

            result, symbols_with_unavailable_types = (_infer_var_type(
                kernel, item.name, type_inf_mapper, subst_expander))

            failed = not result
            if not failed:
                new_dtype, = result
                debug("     success: %s", new_dtype)
                if new_dtype != item.dtype:
                    debug("     changed from: %s", item.dtype)
                    changed_during_last_queue_run = True

                    if isinstance(item, TemporaryVariable):
                        new_temp_vars[name] = item.copy(dtype=new_dtype)
                    elif isinstance(item, KernelArgument):
                        new_arg_dict[name] = item.copy(dtype=new_dtype)
                    else:
                        raise LoopyError(
                            "unexpected item type in type inference")
            else:
                debug("     failure")

            if failed:
                if item.name in failed_names:
                    # this item has failed before, give up.
                    advice = ""
                    if symbols_with_unavailable_types:
                        advice += (
                            " (need type of '%s'--check for missing arguments)"
                            % ", ".join(symbols_with_unavailable_types))

                    if expect_completion:
                        raise LoopyError("could not determine type of '%s'%s" %
                                         (item.name, advice))

                    else:
                        # We're done here.
                        break

                # remember that this item failed
                failed_names.add(item.name)

                if set(queue) == failed_names:
                    # We did what we could...
                    print(queue, failed_names, item.name)
                    assert not expect_completion
                    break

                # can't infer type yet, put back into queue
                queue.append(name)
            else:
                # we've made progress, reset failure markers
                failed_names = set()

    # }}}

    end_time = time.time()
    logger.debug("type inference took {dur:.2f} seconds".format(dur=end_time -
                                                                start_time))

    return unexpanded_kernel.copy(
        temporary_variables=new_temp_vars,
        args=[new_arg_dict[arg.name] for arg in kernel.args],
    )
Ejemplo n.º 11
0
def infer_unknown_types(kernel, expect_completion=False):
    """Infer types on temporaries and arguments."""

    logger.debug("%s: infer types" % kernel.name)

    def debug(s):
        logger.debug("%s: %s" % (kernel.name, s))

    unexpanded_kernel = kernel
    if kernel.substitutions:
        from loopy.transform.subst import expand_subst
        kernel = expand_subst(kernel)

    new_temp_vars = kernel.temporary_variables.copy()
    new_arg_dict = kernel.arg_dict.copy()

    # {{{ fill queue

    # queue contains temporary variables
    queue = []

    import loopy as lp
    for tv in six.itervalues(kernel.temporary_variables):
        if tv.dtype is lp.auto:
            queue.append(tv)

    for arg in kernel.args:
        if arg.dtype is None:
            queue.append(arg)

    # }}}

    from loopy.expression import TypeInferenceMapper
    type_inf_mapper = TypeInferenceMapper(kernel,
            _DictUnionView([
                new_temp_vars,
                new_arg_dict
                ]))

    from loopy.symbolic import SubstitutionRuleExpander
    subst_expander = SubstitutionRuleExpander(kernel.substitutions)

    # {{{ work on type inference queue

    from loopy.kernel.data import TemporaryVariable, KernelArgument

    failed_names = set()
    while queue:
        item = queue.pop(0)

        debug("inferring type for %s %s" % (type(item).__name__, item.name))

        result, symbols_with_unavailable_types = \
                _infer_var_type(kernel, item.name, type_inf_mapper, subst_expander)

        failed = result is None
        if not failed:
            debug("     success: %s" % result)
            if isinstance(item, TemporaryVariable):
                new_temp_vars[item.name] = item.copy(dtype=result)
            elif isinstance(item, KernelArgument):
                new_arg_dict[item.name] = item.copy(dtype=result)
            else:
                raise LoopyError("unexpected item type in type inference")
        else:
            debug("     failure")

        if failed:
            if item.name in failed_names:
                # this item has failed before, give up.
                advice = ""
                if symbols_with_unavailable_types:
                    advice += (
                            " (need type of '%s'--check for missing arguments)"
                            % ", ".join(symbols_with_unavailable_types))

                if expect_completion:
                    raise LoopyError(
                            "could not determine type of '%s'%s"
                            % (item.name, advice))

                else:
                    # We're done here.
                    break

            # remember that this item failed
            failed_names.add(item.name)

            queue_names = set(qi.name for qi in queue)

            if queue_names == failed_names:
                # We did what we could...
                print(queue_names, failed_names, item.name)
                assert not expect_completion
                break

            # can't infer type yet, put back into queue
            queue.append(item)
        else:
            # we've made progress, reset failure markers
            failed_names = set()

    # }}}

    return unexpanded_kernel.copy(
            temporary_variables=new_temp_vars,
            args=[new_arg_dict[arg.name] for arg in kernel.args],
            )
Ejemplo n.º 12
0
def find_all_insn_inames(kernel):
    logger.debug("%s: find_all_insn_inames: start" % kernel.name)

    writer_map = kernel.writer_map()

    insn_id_to_inames = {}
    insn_assignee_inames = {}

    all_read_deps = {}
    all_write_deps = {}

    from loopy.transform.subst import expand_subst

    kernel = expand_subst(kernel)

    for insn in kernel.instructions:
        all_read_deps[insn.id] = read_deps = insn.read_dependency_names()
        all_write_deps[insn.id] = write_deps = insn.write_dependency_names()
        deps = read_deps | write_deps

        if insn.forced_iname_deps_is_final:
            iname_deps = insn.forced_iname_deps
        else:
            iname_deps = deps & kernel.all_inames() | insn.forced_iname_deps

        assert isinstance(read_deps, frozenset), type(insn)
        assert isinstance(write_deps, frozenset), type(insn)
        assert isinstance(iname_deps, frozenset), type(insn)

        logger.debug(
            "%s: find_all_insn_inames: %s (init): %s - "
            "read deps: %s - write deps: %s"
            % (
                kernel.name,
                insn.id,
                ", ".join(sorted(iname_deps)),
                ", ".join(sorted(read_deps)),
                ", ".join(sorted(write_deps)),
            )
        )

        insn_id_to_inames[insn.id] = iname_deps
        insn_assignee_inames[insn.id] = write_deps & kernel.all_inames()

    written_vars = kernel.get_written_variables()

    # fixed point iteration until all iname dep sets have converged

    # Why is fixed point iteration necessary here? Consider the following
    # scenario:
    #
    # z = expr(iname)
    # y = expr(z)
    # x = expr(y)
    #
    # x clearly has a dependency on iname, but this is not found until that
    # dependency has propagated all the way up. Doing this recursively is
    # not guaranteed to terminate because of circular dependencies.

    while True:
        did_something = False
        for insn in kernel.instructions:

            if insn.forced_iname_deps_is_final:
                continue

            # {{{ depdency-based propagation

            # For all variables that insn depends on, find the intersection
            # of iname deps of all writers, and add those to insn's
            # dependencies.

            for tv_name in all_read_deps[insn.id] & written_vars:
                implicit_inames = None

                for writer_id in writer_map[tv_name]:
                    writer_implicit_inames = insn_id_to_inames[writer_id] - insn_assignee_inames[writer_id]
                    if implicit_inames is None:
                        implicit_inames = writer_implicit_inames
                    else:
                        implicit_inames = implicit_inames & writer_implicit_inames

                inames_old = insn_id_to_inames[insn.id]
                inames_new = (inames_old | implicit_inames) - insn.reduction_inames()
                insn_id_to_inames[insn.id] = inames_new

                if inames_new != inames_old:
                    did_something = True
                    logger.debug(
                        "%s: find_all_insn_inames: %s -> %s (dep-based)"
                        % (kernel.name, insn.id, ", ".join(sorted(inames_new)))
                    )

            # }}}

            # {{{ domain-based propagation

            inames_old = insn_id_to_inames[insn.id]
            inames_new = set(insn_id_to_inames[insn.id])

            for iname in inames_old:
                home_domain = kernel.domains[kernel.get_home_domain_index(iname)]

                for par in home_domain.get_var_names(dim_type.param):
                    # Add all inames occurring in parameters of domains that my
                    # current inames refer to.

                    if par in kernel.all_inames():
                        inames_new.add(intern(par))

                    # If something writes the bounds of a loop in which I'm
                    # sitting, I had better be in the inames that the writer is
                    # in.

                    if par in kernel.temporary_variables:
                        for writer_id in writer_map.get(par, []):
                            inames_new.update(insn_id_to_inames[writer_id])

            if inames_new != inames_old:
                did_something = True
                insn_id_to_inames[insn.id] = frozenset(inames_new)
                logger.debug(
                    "%s: find_all_insn_inames: %s -> %s (domain-based)"
                    % (kernel.name, insn.id, ", ".join(sorted(inames_new)))
                )

            # }}}

        if not did_something:
            break

    logger.debug("%s: find_all_insn_inames: done" % kernel.name)

    for v in six.itervalues(insn_id_to_inames):
        assert isinstance(v, frozenset)

    return insn_id_to_inames
Ejemplo n.º 13
0
def preprocess_kernel(kernel, device=None):
    if device is not None:
        from warnings import warn
        warn("passing 'device' to preprocess_kernel() is deprecated",
                DeprecationWarning, stacklevel=2)

    from loopy.kernel import kernel_state
    if kernel.state != kernel_state.INITIAL:
        raise LoopyError("cannot re-preprocess an already preprocessed "
                "kernel")

    # {{{ cache retrieval

    from loopy import CACHING_ENABLED
    if CACHING_ENABLED:
        input_kernel = kernel

        try:
            result = preprocess_cache[kernel]
            logger.debug("%s: preprocess cache hit" % kernel.name)
            return result
        except KeyError:
            pass

    # }}}

    logger.info("%s: preprocess start" % kernel.name)

    from loopy.check import check_identifiers_in_subst_rules
    check_identifiers_in_subst_rules(kernel)

    # {{{ check that there are no l.auto-tagged inames

    from loopy.kernel.data import AutoLocalIndexTagBase
    for iname, tag in six.iteritems(kernel.iname_to_tag):
        if (isinstance(tag, AutoLocalIndexTagBase)
                 and iname in kernel.all_inames()):
            raise LoopyError("kernel with automatically-assigned "
                    "local axes passed to preprocessing")

    # }}}

    from loopy.transform.subst import expand_subst
    kernel = expand_subst(kernel)

    # Ordering restriction:
    # Type inference and reduction iname uniqueness don't handle substitutions.
    # Get them out of the way.

    kernel = infer_unknown_types(kernel, expect_completion=False)

    check_reduction_iname_uniqueness(kernel)

    kernel = add_default_dependencies(kernel)

    # Ordering restrictions:
    #
    # - realize_reduction must happen after type inference because it needs
    #   to be able to determine the types of the reduced expressions.
    #
    # - realize_reduction must happen after default dependencies are added
    #   because it manipulates the depends_on field, which could prevent
    #   defaults from being applied.

    kernel = realize_reduction(kernel, unknown_types_ok=False)

    # Ordering restriction:
    # add_axes_to_temporaries_for_ilp because reduction accumulators
    # need to be duplicated by this.

    from loopy.transform.ilp import add_axes_to_temporaries_for_ilp_and_vec
    kernel = add_axes_to_temporaries_for_ilp_and_vec(kernel)

    kernel = find_temporary_scope(kernel)

    # boostability should be removed in 2017.x.
    kernel = find_idempotence(kernel)
    kernel = limit_boostability(kernel)

    kernel = kernel.target.preprocess(kernel)

    logger.info("%s: preprocess done" % kernel.name)

    kernel = kernel.copy(
            state=kernel_state.PREPROCESSED)

    # {{{ prepare for caching

    # PicklableDtype instances for example need to know the target they're working
    # towards in order to pickle and unpickle them. This is the first pass that
    # uses caching, so we need to be ready to pickle. This means propagating
    # this target information.

    if CACHING_ENABLED:
        input_kernel = prepare_for_caching(input_kernel)

    kernel = prepare_for_caching(kernel)

    # }}}

    if CACHING_ENABLED:
        preprocess_cache[input_kernel] = kernel

    return kernel
Ejemplo n.º 14
0
def infer_unknown_types_for_a_single_kernel(kernel, clbl_inf_ctx):
    """Infer types on temporaries and arguments."""

    logger.debug("%s: infer types" % kernel.name)

    from functools import partial
    debug = partial(_debug, kernel)

    import time
    start_time = time.time()

    unexpanded_kernel = kernel
    if kernel.substitutions:
        from loopy.transform.subst import expand_subst
        kernel = expand_subst(kernel)

    new_temp_vars = kernel.temporary_variables.copy()
    new_arg_dict = kernel.arg_dict.copy()

    # {{{ find names_with_unknown_types

    # contains both arguments and temporaries
    names_for_type_inference = []

    import loopy as lp
    for tv in kernel.temporary_variables.values():
        assert tv.dtype is not lp.auto
        if tv.dtype is None:
            names_for_type_inference.append(tv.name)

    for arg in kernel.args:
        assert arg.dtype is not lp.auto
        if arg.dtype is None:
            names_for_type_inference.append(arg.name)

    # }}}

    logger.debug("finding types for {count:d} names".format(
        count=len(names_for_type_inference)))

    writer_map = kernel.writer_map()

    dep_graph = {
        written_var: {
            read_var
            for insn_id in writer_map.get(written_var, [])
            for read_var in kernel.id_to_insn[insn_id].read_dependency_names()
            if read_var in names_for_type_inference
        }
        for written_var in names_for_type_inference
    }

    from pytools.graph import compute_sccs

    # To speed up processing, we sort the variables by computing the SCCs of the
    # type dependency graph. Each SCC represents a set of variables whose types
    # mutually depend on themselves. The SCCs are returned and processed in
    # topological order.
    sccs = compute_sccs(dep_graph)

    item_lookup = _DictUnionView([new_temp_vars, new_arg_dict])
    type_inf_mapper = TypeInferenceMapper(kernel, clbl_inf_ctx, item_lookup)

    from loopy.symbolic import SubstitutionRuleExpander
    subst_expander = SubstitutionRuleExpander(kernel.substitutions)

    # {{{ work on type inference queue

    from loopy.kernel.data import TemporaryVariable, KernelArgument

    old_calls_to_new_calls = {}

    for var_chain in sccs:
        changed_during_last_queue_run = False
        var_queue = var_chain[:]
        failed_names = set()

        while var_queue or changed_during_last_queue_run:
            if not var_queue and changed_during_last_queue_run:
                changed_during_last_queue_run = False
                # Optimization: If there's a single variable in the SCC without
                # a self-referential dependency, then the type is known after a
                # single iteration (we don't need to look at the expressions
                # again).
                if len(var_chain) == 1:
                    single_var, = var_chain
                    if single_var not in dep_graph[single_var]:
                        break
                var_queue = var_chain[:]

            name = var_queue.pop(0)
            item = item_lookup[name]

            debug("inferring type for %s %s", type(item).__name__, item.name)
            try:
                (result, symbols_with_unknown_types,
                 new_old_calls_to_new_calls,
                 clbl_inf_ctx) = (_infer_var_type(kernel, item.name,
                                                  type_inf_mapper,
                                                  subst_expander))
            except DependencyTypeInferenceFailure:
                result = ()
                symbols_with_unknown_types = ()
            type_inf_mapper = type_inf_mapper.copy(clbl_inf_ctx=clbl_inf_ctx)

            if result:
                new_dtype, = result

                debug("     success: %s", new_dtype)
                if new_dtype != item.dtype:
                    debug("     changed from: %s", item.dtype)
                    changed_during_last_queue_run = True

                    if isinstance(item, TemporaryVariable):
                        new_temp_vars[name] = item.copy(dtype=new_dtype)
                    elif isinstance(item, KernelArgument):
                        new_arg_dict[name] = item.copy(dtype=new_dtype)
                    else:
                        raise LoopyError(
                            "unexpected item type in type inference")
                old_calls_to_new_calls.update(new_old_calls_to_new_calls)

                # we've made progress, reset failure markers
                failed_names = set()

            else:
                debug("     failure")

                if item.name in failed_names:
                    # this item has failed before, give up.
                    advice = ""
                    if symbols_with_unknown_types:
                        advice += (
                            " (need type of '%s'--check for missing arguments)"
                            % ", ".join(symbols_with_unknown_types))

                    debug("could not determine type of '%s'%s" %
                          (item.name, advice))
                    # We're done here
                    break

                # remember that this item failed
                failed_names.add(item.name)

                if set(var_queue) == failed_names:
                    # We did what we could...
                    print(var_queue, failed_names, item.name)
                    break

                # can't infer type yet, put back into var_queue
                var_queue.append(name)

    # }}}

    # {{{ check if insn missed during type inference

    def _instruction_missed_during_inference(insn):
        for assignee in insn.assignees:
            if isinstance(assignee, Lookup):
                assignee = assignee.aggregate

            if isinstance(assignee, Variable):
                if assignee.name in kernel.arg_dict:
                    if kernel.arg_dict[assignee.name].dtype is None:
                        return False
                else:
                    assert assignee.name in kernel.temporary_variables
                    if kernel.temporary_variables[assignee.name].dtype is None:
                        return False

            elif isinstance(assignee, (Subscript, LinearSubscript)):
                if assignee.aggregate.name in kernel.arg_dict:
                    if kernel.arg_dict[assignee.aggregate.name].dtype is None:
                        return False
                else:
                    assert assignee.aggregate.name in kernel.temporary_variables
                    if kernel.temporary_variables[
                            assignee.aggregate.name].dtype is None:
                        return False
            else:
                assert isinstance(assignee, SubArrayRef)
                if assignee.subscript.aggregate.name in kernel.arg_dict:
                    if kernel.arg_dict[
                            assignee.subscript.aggregate.name].dtype is None:
                        return False
                else:
                    assert assignee.subscript.aggregate.name in (
                        kernel.temporary_variables)
                    if kernel.temporary_variables[
                            assignee.subscript.aggregate.name] is None:
                        return False

        return True

    # }}}

    for insn in kernel.instructions:
        if isinstance(insn, lp.MultiAssignmentBase):
            # just a dummy run over the expression, to pass over all the
            # functions
            if _instruction_missed_during_inference(insn):
                type_inf_mapper(insn.expression,
                                return_tuple=len(insn.assignees) != 1,
                                return_dtype_set=True)
        elif isinstance(insn, (_DataObliviousInstruction, lp.CInstruction)):
            pass
        else:
            raise NotImplementedError("Unknown instructions type %s." %
                                      (type(insn).__name__))

    clbl_inf_ctx = type_inf_mapper.clbl_inf_ctx
    old_calls_to_new_calls.update(type_inf_mapper.old_calls_to_new_calls)

    end_time = time.time()
    logger.debug("type inference took {dur:.2f} seconds".format(dur=end_time -
                                                                start_time))

    pre_type_specialized_knl = unexpanded_kernel.copy(
        temporary_variables=new_temp_vars,
        args=[new_arg_dict[arg.name] for arg in kernel.args],
    )

    type_specialized_kernel = change_names_of_pymbolic_calls(
        pre_type_specialized_knl, old_calls_to_new_calls)

    return type_specialized_kernel, clbl_inf_ctx
Ejemplo n.º 15
0
def add_prefetch(kernel,
                 var_name,
                 sweep_inames=[],
                 dim_arg_names=None,
                 default_tag="l.auto",
                 rule_name=None,
                 temporary_name=None,
                 temporary_scope=None,
                 temporary_is_local=None,
                 footprint_subscripts=None,
                 fetch_bounding_box=False,
                 fetch_outer_inames=None):
    """Prefetch all accesses to the variable *var_name*, with all accesses
    being swept through *sweep_inames*.

    :arg dim_arg_names: List of names representing each fetch axis.
    :arg rule_name: base name of the generated temporary variable.
    :arg footprint_subscripts: A list of tuples indicating the index (i.e.
        subscript) tuples used to generate the footprint.

        If only one such set of indices is desired, this may also be specified
        directly by putting an index expression into *var_name*. Substitutions
        such as those occurring in dimension splits are recorded and also
        applied to these indices.

    :arg fetch_outer_inames: The inames within which the fetch
        instruction is nested. If *None*, make an educated guess.

    This function combines :func:`extract_subst` and :func:`precompute`.
    """

    # {{{ fish indexing out of var_name and into footprint_subscripts

    from loopy.symbolic import parse
    parsed_var_name = parse(var_name)

    from pymbolic.primitives import Variable, Subscript
    if isinstance(parsed_var_name, Variable):
        # nothing to see
        pass
    elif isinstance(parsed_var_name, Subscript):
        if footprint_subscripts is not None:
            raise TypeError(
                "if footprint_subscripts is specified, then var_name "
                "may not contain a subscript")

        assert isinstance(parsed_var_name.aggregate, Variable)
        footprint_subscripts = [parsed_var_name.index]
        parsed_var_name = parsed_var_name.aggregate
    else:
        raise ValueError(
            "var_name must either be a variable name or a subscript")

    # }}}

    # {{{ fish out tag

    from loopy.symbolic import TaggedVariable
    if isinstance(parsed_var_name, TaggedVariable):
        var_name = parsed_var_name.name
        tag = parsed_var_name.tag
    else:
        var_name = parsed_var_name.name
        tag = None

    # }}}

    c_name = var_name
    if tag is not None:
        c_name = c_name + "_" + tag

    var_name_gen = kernel.get_var_name_generator()

    if rule_name is None:
        rule_name = var_name_gen("%s_fetch_rule" % c_name)
    if temporary_name is None:
        temporary_name = var_name_gen("%s_fetch" % c_name)

    arg = kernel.arg_dict[var_name]

    # {{{ make parameter names and unification template

    parameters = []
    for i in range(arg.num_user_axes()):
        based_on = "%s_dim_%d" % (c_name, i)
        if arg.dim_names is not None:
            based_on = "%s_dim_%s" % (c_name, arg.dim_names[i])
        if dim_arg_names is not None and i < len(dim_arg_names):
            based_on = dim_arg_names[i]

        par_name = var_name_gen(based_on=based_on)
        parameters.append(par_name)

    from pymbolic import var
    uni_template = parsed_var_name
    if len(parameters) > 1:
        uni_template = uni_template.index(
            tuple(var(par_name) for par_name in parameters))
    elif len(parameters) == 1:
        uni_template = uni_template.index(var(parameters[0]))

    # }}}

    from loopy.transform.subst import extract_subst
    kernel = extract_subst(kernel, rule_name, uni_template, parameters)

    if isinstance(sweep_inames, str):
        sweep_inames = [s.strip() for s in sweep_inames.split(",")]
    else:
        # copy, standardize to list
        sweep_inames = list(sweep_inames)

    kernel, subst_use, sweep_inames, inames_to_be_removed = \
            _process_footprint_subscripts(
                    kernel,  rule_name, sweep_inames,
                    footprint_subscripts, arg)

    from loopy.transform.precompute import precompute
    new_kernel = precompute(kernel,
                            subst_use,
                            sweep_inames,
                            precompute_inames=dim_arg_names,
                            default_tag=default_tag,
                            dtype=arg.dtype,
                            fetch_bounding_box=fetch_bounding_box,
                            temporary_name=temporary_name,
                            temporary_scope=temporary_scope,
                            temporary_is_local=temporary_is_local,
                            precompute_outer_inames=fetch_outer_inames)

    # {{{ remove inames that were temporarily added by slice sweeps

    new_domains = new_kernel.domains[:]

    for iname in inames_to_be_removed:
        home_domain_index = kernel.get_home_domain_index(iname)
        domain = new_domains[home_domain_index]

        dt, idx = domain.get_var_dict()[iname]
        assert dt == dim_type.set

        new_domains[home_domain_index] = domain.project_out(dt, idx, 1)

    new_kernel = new_kernel.copy(domains=new_domains)

    # }}}

    # If the rule survived past precompute() (i.e. some accesses fell outside
    # the footprint), get rid of it before moving on.
    if rule_name in new_kernel.substitutions:
        from loopy.transform.subst import expand_subst
        return expand_subst(new_kernel, "... > id:" + rule_name)
    else:
        return new_kernel
Ejemplo n.º 16
0
def find_all_insn_inames(kernel):
    logger.debug("%s: find_all_insn_inames: start" % kernel.name)

    writer_map = kernel.writer_map()

    insn_id_to_inames = {}
    insn_assignee_inames = {}

    all_read_deps = {}
    all_write_deps = {}

    from loopy.transform.subst import expand_subst
    kernel = expand_subst(kernel)

    for insn in kernel.instructions:
        all_read_deps[insn.id] = read_deps = insn.read_dependency_names()
        all_write_deps[insn.id] = write_deps = insn.write_dependency_names()
        deps = read_deps | write_deps

        if insn.forced_iname_deps_is_final:
            iname_deps = insn.forced_iname_deps
        else:
            iname_deps = (
                    deps & kernel.all_inames()
                    | insn.forced_iname_deps)

        assert isinstance(read_deps, frozenset), type(insn)
        assert isinstance(write_deps, frozenset), type(insn)
        assert isinstance(iname_deps, frozenset), type(insn)

        logger.debug("%s: find_all_insn_inames: %s (init): %s - "
                "read deps: %s - write deps: %s" % (
                    kernel.name, insn.id, ", ".join(sorted(iname_deps)),
                    ", ".join(sorted(read_deps)), ", ".join(sorted(write_deps)),
                    ))

        insn_id_to_inames[insn.id] = iname_deps
        insn_assignee_inames[insn.id] = write_deps & kernel.all_inames()

    # fixed point iteration until all iname dep sets have converged

    # Why is fixed point iteration necessary here? Consider the following
    # scenario:
    #
    # z = expr(iname)
    # y = expr(z)
    # x = expr(y)
    #
    # x clearly has a dependency on iname, but this is not found until that
    # dependency has propagated all the way up. Doing this recursively is
    # not guaranteed to terminate because of circular dependencies.

    while True:
        did_something = False
        for insn in kernel.instructions:

            if insn.forced_iname_deps_is_final:
                continue

            # {{{ depdency-based propagation

            inames_old = insn_id_to_inames[insn.id]
            inames_new = inames_old | guess_iname_deps_based_on_var_use(
                    kernel, insn, insn_id_to_inames)

            insn_id_to_inames[insn.id] = inames_new

            if inames_new != inames_old:
                did_something = True

                warn_with_kernel(kernel, "inferred_iname",
                        "The iname(s) '%s' on instruction '%s' in kernel '%s' "
                        "was/were automatically added. "
                        "This is deprecated. Please add the iname "
                        "to the instruction "
                        "explicitly, e.g. by adding 'for' loops"
                        % (", ".join(inames_new-inames_old), insn.id, kernel.name))

            # }}}

            # {{{ domain-based propagation

            inames_old = insn_id_to_inames[insn.id]
            inames_new = set(insn_id_to_inames[insn.id])

            for iname in inames_old:
                home_domain = kernel.domains[kernel.get_home_domain_index(iname)]

                for par in home_domain.get_var_names(dim_type.param):
                    # Add all inames occurring in parameters of domains that my
                    # current inames refer to.

                    if par in kernel.all_inames():
                        inames_new.add(intern(par))

                    # If something writes the bounds of a loop in which I'm
                    # sitting, I had better be in the inames that the writer is
                    # in.

                    if par in kernel.temporary_variables:
                        for writer_id in writer_map.get(par, []):
                            inames_new.update(insn_id_to_inames[writer_id])

            if inames_new != inames_old:
                did_something = True
                insn_id_to_inames[insn.id] = frozenset(inames_new)

                warn_with_kernel(kernel, "inferred_iname",
                        "The iname(s) '%s' on instruction '%s' was "
                        "automatically added. "
                        "This is deprecated. Please add the iname "
                        "to the instruction "
                        "explicitly, e.g. by adding '{inames=...}"
                        % (", ".join(inames_new-inames_old), insn.id))

            # }}}

        if not did_something:
            break

    logger.debug("%s: find_all_insn_inames: done" % kernel.name)

    for v in six.itervalues(insn_id_to_inames):
        assert isinstance(v, frozenset)

    return insn_id_to_inames
Ejemplo n.º 17
0
def add_prefetch(kernel, var_name, sweep_inames=[], dim_arg_names=None,
        default_tag="l.auto", rule_name=None,
        temporary_name=None, temporary_is_local=None,
        footprint_subscripts=None,
        fetch_bounding_box=False):
    """Prefetch all accesses to the variable *var_name*, with all accesses
    being swept through *sweep_inames*.

    :arg dim_arg_names: List of names representing each fetch axis.
    :arg rule_name: base name of the generated temporary variable.
    :arg footprint_subscripts: A list of tuples indicating the index (i.e.
        subscript) tuples used to generate the footprint.

        If only one such set of indices is desired, this may also be specified
        directly by putting an index expression into *var_name*. Substitutions
        such as those occurring in dimension splits are recorded and also
        applied to these indices.

    This function combines :func:`extract_subst` and :func:`precompute`.
    """

    # {{{ fish indexing out of var_name and into footprint_subscripts

    from loopy.symbolic import parse
    parsed_var_name = parse(var_name)

    from pymbolic.primitives import Variable, Subscript
    if isinstance(parsed_var_name, Variable):
        # nothing to see
        pass
    elif isinstance(parsed_var_name, Subscript):
        if footprint_subscripts is not None:
            raise TypeError("if footprint_subscripts is specified, then var_name "
                    "may not contain a subscript")

        assert isinstance(parsed_var_name.aggregate, Variable)
        footprint_subscripts = [parsed_var_name.index]
        parsed_var_name = parsed_var_name.aggregate
    else:
        raise ValueError("var_name must either be a variable name or a subscript")

    # }}}

    # {{{ fish out tag

    from loopy.symbolic import TaggedVariable
    if isinstance(parsed_var_name, TaggedVariable):
        var_name = parsed_var_name.name
        tag = parsed_var_name.tag
    else:
        var_name = parsed_var_name.name
        tag = None

    # }}}

    c_name = var_name
    if tag is not None:
        c_name = c_name + "_" + tag

    var_name_gen = kernel.get_var_name_generator()

    if rule_name is None:
        rule_name = var_name_gen("%s_fetch_rule" % c_name)
    if temporary_name is None:
        temporary_name = var_name_gen("%s_fetch" % c_name)

    arg = kernel.arg_dict[var_name]

    # {{{ make parameter names and unification template

    parameters = []
    for i in range(arg.num_user_axes()):
        based_on = "%s_dim_%d" % (c_name, i)
        if arg.dim_names is not None:
            based_on = "%s_dim_%s" % (c_name, arg.dim_names[i])
        if dim_arg_names is not None and i < len(dim_arg_names):
            based_on = dim_arg_names[i]

        par_name = var_name_gen(based_on=based_on)
        parameters.append(par_name)

    from pymbolic import var
    uni_template = parsed_var_name
    if len(parameters) > 1:
        uni_template = uni_template.index(
                tuple(var(par_name) for par_name in parameters))
    elif len(parameters) == 1:
        uni_template = uni_template.index(var(parameters[0]))

    # }}}

    from loopy.transform.subst import extract_subst
    kernel = extract_subst(kernel, rule_name, uni_template, parameters)

    if isinstance(sweep_inames, str):
        sweep_inames = [s.strip() for s in sweep_inames.split(",")]
    else:
        # copy, standardize to list
        sweep_inames = list(sweep_inames)

    kernel, subst_use, sweep_inames, inames_to_be_removed = \
            _process_footprint_subscripts(
                    kernel,  rule_name, sweep_inames,
                    footprint_subscripts, arg)

    from loopy.transform.precompute import precompute
    new_kernel = precompute(kernel, subst_use, sweep_inames,
            precompute_inames=dim_arg_names,
            default_tag=default_tag, dtype=arg.dtype,
            fetch_bounding_box=fetch_bounding_box,
            temporary_name=temporary_name,
            temporary_is_local=temporary_is_local)

    # {{{ remove inames that were temporarily added by slice sweeps

    new_domains = new_kernel.domains[:]

    for iname in inames_to_be_removed:
        home_domain_index = kernel.get_home_domain_index(iname)
        domain = new_domains[home_domain_index]

        dt, idx = domain.get_var_dict()[iname]
        assert dt == dim_type.set

        new_domains[home_domain_index] = domain.project_out(dt, idx, 1)

    new_kernel = new_kernel.copy(domains=new_domains)

    # }}}

    # If the rule survived past precompute() (i.e. some accesses fell outside
    # the footprint), get rid of it before moving on.
    if rule_name in new_kernel.substitutions:
        from loopy.transform.subst import expand_subst
        return expand_subst(new_kernel, "... > id:"+rule_name)
    else:
        return new_kernel