예제 #1
0
def _get_topological_order(kernel):
    """
    Returns a :class:`list` of insn ids of *kernel* in a topological sort
    order.

    If there is a dependency cycle within the instructions of *kernel* raises a
    :class:`loopy.diagnostic.DependencyCycleFound` exception.
    """
    from pytools.graph import compute_sccs
    from loopy.diagnostic import DependencyCycleFound

    dep_map = {insn.id: insn.depends_on for insn in kernel.instructions}

    # pytools.graph.compute_sccs serves 2 purposes:
    #   1. computes topological sort order of instructions.
    #   2. provides info. about any cycles in the graph.
    sccs = compute_sccs(dep_map)
    order = []

    for scc in sccs:
        if len(scc) != 1:
            raise DependencyCycleFound(", ".join(scc))
        order.append(scc[0])

    return order
예제 #2
0
def _find_boostable_insn_ids(kernel):
    """There used to exist a broken heuristic called "boostability" that allowed
    instructions to be pushed into hardware-parallel loops. This function survives
    of that, for now, to provide a thin veneer of compatibility.
    """
    logger.debug("%s: idempotence" % kernel.name)

    writer_map = kernel.writer_map()

    arg_names = {arg.name for arg in kernel.args}

    var_names = arg_names | set(kernel.temporary_variables.keys())

    reads_map = {
        insn.id: insn.read_dependency_names() & var_names
        for insn in kernel.instructions
    }

    from collections import defaultdict
    dep_graph = defaultdict(set)

    for insn in kernel.instructions:
        dep_graph[insn.id] = {
            writer_id
            for var in reads_map[insn.id]
            for writer_id in writer_map.get(var, set())
        }

    # Find SCCs of dep_graph. These are used for checking if the instruction is
    # in a dependency cycle.
    from pytools.graph import compute_sccs

    sccs = {item: scc for scc in compute_sccs(dep_graph) for item in scc}

    non_idempotently_updated_vars = set()
    boostable_insn_ids = set()

    for insn in kernel.instructions:
        boostable = len(
            sccs[insn.id]) == 1 and insn.id not in dep_graph[insn.id]

        if boostable:
            boostable_insn_ids.add(insn.id)
        else:
            non_idempotently_updated_vars.update(insn.assignee_var_names())

    # {{{ remove boostability from isns that access non-idempotently updated vars

    for insn_id in boostable_insn_ids.copy():
        insn = kernel.id_to_insn[insn_id]
        if bool(non_idempotently_updated_vars & insn.dependency_names()):
            boostable_insn_ids.remove(insn_id)

    # }}}

    return boostable_insn_ids
예제 #3
0
def test_compute_sccs():
    from pytools.graph import compute_sccs
    import random

    rng = random.Random(0)

    def generate_random_graph(nnodes):
        graph = dict((i, set()) for i in range(nnodes))
        for i in range(nnodes):
            for j in range(nnodes):
                # Edge probability 2/n: Generates decently interesting inputs.
                if rng.randint(0, nnodes - 1) <= 1:
                    graph[i].add(j)
        return graph

    def verify_sccs(graph, sccs):
        visited = set()

        def visit(node):
            if node in visited:
                return []
            else:
                visited.add(node)
                result = []
                for child in graph[node]:
                    result = result + visit(child)
                return result + [node]

        for scc in sccs:
            scc = set(scc)
            assert not scc & visited
            # Check that starting from each element of the SCC results
            # in the same set of reachable nodes.
            for scc_root in scc:
                visited.difference_update(scc)
                result = visit(scc_root)
                assert set(result) == scc, (set(result), scc)

    for nnodes in range(10, 20):
        for i in range(40):
            graph = generate_random_graph(nnodes)
            verify_sccs(graph, compute_sccs(graph))
예제 #4
0
def infer_unknown_types_for_a_single_kernel(kernel, clbl_inf_ctx):
    """Infer types on temporaries and arguments."""

    logger.debug("%s: infer types" % kernel.name)

    from functools import partial
    debug = partial(_debug, kernel)

    import time
    start_time = time.time()

    unexpanded_kernel = kernel
    if kernel.substitutions:
        from loopy.transform.subst import expand_subst
        kernel = expand_subst(kernel)

    new_temp_vars = kernel.temporary_variables.copy()
    new_arg_dict = kernel.arg_dict.copy()

    # {{{ find names_with_unknown_types

    # contains both arguments and temporaries
    names_for_type_inference = []

    import loopy as lp
    for tv in kernel.temporary_variables.values():
        assert tv.dtype is not lp.auto
        if tv.dtype is None:
            names_for_type_inference.append(tv.name)

    for arg in kernel.args:
        assert arg.dtype is not lp.auto
        if arg.dtype is None:
            names_for_type_inference.append(arg.name)

    # }}}

    logger.debug("finding types for {count:d} names".format(
        count=len(names_for_type_inference)))

    writer_map = kernel.writer_map()

    dep_graph = {
        written_var: {
            read_var
            for insn_id in writer_map.get(written_var, [])
            for read_var in kernel.id_to_insn[insn_id].read_dependency_names()
            if read_var in names_for_type_inference
        }
        for written_var in names_for_type_inference
    }

    from pytools.graph import compute_sccs

    # To speed up processing, we sort the variables by computing the SCCs of the
    # type dependency graph. Each SCC represents a set of variables whose types
    # mutually depend on themselves. The SCCs are returned and processed in
    # topological order.
    sccs = compute_sccs(dep_graph)

    item_lookup = _DictUnionView([new_temp_vars, new_arg_dict])
    type_inf_mapper = TypeInferenceMapper(kernel, clbl_inf_ctx, item_lookup)

    from loopy.symbolic import SubstitutionRuleExpander
    subst_expander = SubstitutionRuleExpander(kernel.substitutions)

    # {{{ work on type inference queue

    from loopy.kernel.data import TemporaryVariable, KernelArgument

    old_calls_to_new_calls = {}

    for var_chain in sccs:
        changed_during_last_queue_run = False
        var_queue = var_chain[:]
        failed_names = set()

        while var_queue or changed_during_last_queue_run:
            if not var_queue and changed_during_last_queue_run:
                changed_during_last_queue_run = False
                # Optimization: If there's a single variable in the SCC without
                # a self-referential dependency, then the type is known after a
                # single iteration (we don't need to look at the expressions
                # again).
                if len(var_chain) == 1:
                    single_var, = var_chain
                    if single_var not in dep_graph[single_var]:
                        break
                var_queue = var_chain[:]

            name = var_queue.pop(0)
            item = item_lookup[name]

            debug("inferring type for %s %s", type(item).__name__, item.name)
            try:
                (result, symbols_with_unknown_types,
                 new_old_calls_to_new_calls,
                 clbl_inf_ctx) = (_infer_var_type(kernel, item.name,
                                                  type_inf_mapper,
                                                  subst_expander))
            except DependencyTypeInferenceFailure:
                result = ()
                symbols_with_unknown_types = ()
            type_inf_mapper = type_inf_mapper.copy(clbl_inf_ctx=clbl_inf_ctx)

            if result:
                new_dtype, = result

                debug("     success: %s", new_dtype)
                if new_dtype != item.dtype:
                    debug("     changed from: %s", item.dtype)
                    changed_during_last_queue_run = True

                    if isinstance(item, TemporaryVariable):
                        new_temp_vars[name] = item.copy(dtype=new_dtype)
                    elif isinstance(item, KernelArgument):
                        new_arg_dict[name] = item.copy(dtype=new_dtype)
                    else:
                        raise LoopyError(
                            "unexpected item type in type inference")
                old_calls_to_new_calls.update(new_old_calls_to_new_calls)

                # we've made progress, reset failure markers
                failed_names = set()

            else:
                debug("     failure")

                if item.name in failed_names:
                    # this item has failed before, give up.
                    advice = ""
                    if symbols_with_unknown_types:
                        advice += (
                            " (need type of '%s'--check for missing arguments)"
                            % ", ".join(symbols_with_unknown_types))

                    debug("could not determine type of '%s'%s" %
                          (item.name, advice))
                    # We're done here
                    break

                # remember that this item failed
                failed_names.add(item.name)

                if set(var_queue) == failed_names:
                    # We did what we could...
                    print(var_queue, failed_names, item.name)
                    break

                # can't infer type yet, put back into var_queue
                var_queue.append(name)

    # }}}

    # {{{ check if insn missed during type inference

    def _instruction_missed_during_inference(insn):
        for assignee in insn.assignees:
            if isinstance(assignee, Lookup):
                assignee = assignee.aggregate

            if isinstance(assignee, Variable):
                if assignee.name in kernel.arg_dict:
                    if kernel.arg_dict[assignee.name].dtype is None:
                        return False
                else:
                    assert assignee.name in kernel.temporary_variables
                    if kernel.temporary_variables[assignee.name].dtype is None:
                        return False

            elif isinstance(assignee, (Subscript, LinearSubscript)):
                if assignee.aggregate.name in kernel.arg_dict:
                    if kernel.arg_dict[assignee.aggregate.name].dtype is None:
                        return False
                else:
                    assert assignee.aggregate.name in kernel.temporary_variables
                    if kernel.temporary_variables[
                            assignee.aggregate.name].dtype is None:
                        return False
            else:
                assert isinstance(assignee, SubArrayRef)
                if assignee.subscript.aggregate.name in kernel.arg_dict:
                    if kernel.arg_dict[
                            assignee.subscript.aggregate.name].dtype is None:
                        return False
                else:
                    assert assignee.subscript.aggregate.name in (
                        kernel.temporary_variables)
                    if kernel.temporary_variables[
                            assignee.subscript.aggregate.name] is None:
                        return False

        return True

    # }}}

    for insn in kernel.instructions:
        if isinstance(insn, lp.MultiAssignmentBase):
            # just a dummy run over the expression, to pass over all the
            # functions
            if _instruction_missed_during_inference(insn):
                type_inf_mapper(insn.expression,
                                return_tuple=len(insn.assignees) != 1,
                                return_dtype_set=True)
        elif isinstance(insn, (_DataObliviousInstruction, lp.CInstruction)):
            pass
        else:
            raise NotImplementedError("Unknown instructions type %s." %
                                      (type(insn).__name__))

    clbl_inf_ctx = type_inf_mapper.clbl_inf_ctx
    old_calls_to_new_calls.update(type_inf_mapper.old_calls_to_new_calls)

    end_time = time.time()
    logger.debug("type inference took {dur:.2f} seconds".format(dur=end_time -
                                                                start_time))

    pre_type_specialized_knl = unexpanded_kernel.copy(
        temporary_variables=new_temp_vars,
        args=[new_arg_dict[arg.name] for arg in kernel.args],
    )

    type_specialized_kernel = change_names_of_pymbolic_calls(
        pre_type_specialized_knl, old_calls_to_new_calls)

    return type_specialized_kernel, clbl_inf_ctx
예제 #5
0
def infer_unknown_types(kernel, expect_completion=False):
    """Infer types on temporaries and arguments."""

    logger.debug("%s: infer types" % kernel.name)

    from functools import partial
    debug = partial(_debug, kernel)

    import time
    start_time = time.time()

    unexpanded_kernel = kernel
    if kernel.substitutions:
        from loopy.transform.subst import expand_subst
        kernel = expand_subst(kernel)

    new_temp_vars = kernel.temporary_variables.copy()
    new_arg_dict = kernel.arg_dict.copy()

    # {{{ find names_with_unknown_types

    # contains both arguments and temporaries
    names_for_type_inference = []

    import loopy as lp
    for tv in kernel.temporary_variables.values():
        assert tv.dtype is not lp.auto
        if tv.dtype is None:
            names_for_type_inference.append(tv.name)

    for arg in kernel.args:
        assert arg.dtype is not lp.auto
        if arg.dtype is None:
            names_for_type_inference.append(arg.name)

    # }}}

    logger.debug("finding types for {count:d} names".format(
            count=len(names_for_type_inference)))

    writer_map = kernel.writer_map()

    dep_graph = {
            written_var: {
                read_var
                for insn_id in writer_map.get(written_var, [])
                for read_var in kernel.id_to_insn[insn_id].read_dependency_names()
                if read_var in names_for_type_inference}
            for written_var in names_for_type_inference}

    from pytools.graph import compute_sccs

    # To speed up processing, we sort the variables by computing the SCCs of the
    # type dependency graph. Each SCC represents a set of variables whose types
    # mutually depend on themselves. The SCCs are returned and processed in
    # topological order.
    sccs = compute_sccs(dep_graph)

    item_lookup = _DictUnionView([
            new_temp_vars,
            new_arg_dict
            ])
    type_inf_mapper = TypeInferenceMapper(kernel, item_lookup)

    from loopy.symbolic import SubstitutionRuleExpander
    subst_expander = SubstitutionRuleExpander(kernel.substitutions)

    # {{{ work on type inference queue

    from loopy.kernel.data import TemporaryVariable, KernelArgument

    for var_chain in sccs:
        changed_during_last_queue_run = False
        queue = var_chain[:]
        failed_names = set()

        while queue or changed_during_last_queue_run:
            if not queue and changed_during_last_queue_run:
                changed_during_last_queue_run = False
                # Optimization: If there's a single variable in the SCC without
                # a self-referential dependency, then the type is known after a
                # single iteration (we don't need to look at the expressions
                # again).
                if len(var_chain) == 1:
                    single_var, = var_chain
                    if single_var not in dep_graph[single_var]:
                        break
                queue = var_chain[:]

            name = queue.pop(0)
            item = item_lookup[name]

            debug("inferring type for %s %s", type(item).__name__, item.name)

            result, symbols_with_unavailable_types = (
                    _infer_var_type(
                            kernel, item.name, type_inf_mapper, subst_expander))

            failed = not result
            if not failed:
                new_dtype, = result
                if new_dtype.target is None:
                    new_dtype = new_dtype.with_target(kernel.target)

                debug("     success: %s", new_dtype)
                if new_dtype != item.dtype:
                    debug("     changed from: %s", item.dtype)
                    changed_during_last_queue_run = True

                    if isinstance(item, TemporaryVariable):
                        new_temp_vars[name] = item.copy(dtype=new_dtype)
                    elif isinstance(item, KernelArgument):
                        new_arg_dict[name] = item.copy(dtype=new_dtype)
                    else:
                        raise LoopyError("unexpected item type in type inference")
            else:
                debug("     failure")

            if failed:
                if item.name in failed_names:
                    # this item has failed before, give up.
                    advice = ""
                    if symbols_with_unavailable_types:
                        advice += (
                                " (need type of '%s'--check for missing arguments)"
                                % ", ".join(symbols_with_unavailable_types))

                    if expect_completion:
                        raise LoopyError(
                                "could not determine type of '%s'%s"
                                % (item.name, advice))

                    else:
                        # We're done here.
                        break

                # remember that this item failed
                failed_names.add(item.name)

                if set(queue) == failed_names:
                    # We did what we could...
                    print(queue, failed_names, item.name)
                    assert not expect_completion
                    break

                # can't infer type yet, put back into queue
                queue.append(name)
            else:
                # we've made progress, reset failure markers
                failed_names = set()

    # }}}

    end_time = time.time()
    logger.debug("type inference took {dur:.2f} seconds".format(
            dur=end_time - start_time))

    return unexpanded_kernel.copy(
            temporary_variables=new_temp_vars,
            args=[new_arg_dict[arg.name] for arg in kernel.args],
            )