def _check_variable_access_ordered_inner(kernel): from loopy.kernel.tools import find_aliasing_equivalence_classes from loopy.symbolic import AccessRangeOverlapChecker overlap_checker = AccessRangeOverlapChecker(kernel) aliasing_equiv_classes = find_aliasing_equivalence_classes(kernel) # dep_reqs_to_vars: A mapping (writer_id, dep_req_id) -> set of variable names, # where the tuple denotes a pair of instructions IDs, and the variable # names are the ones that necessitate a dependency. # # Note: This can be worst-case O(n^2) in the number of instructions. dep_reqs_to_vars = {} wmap = kernel.writer_map() rmap = kernel.reader_map() # {{{ populate 'dep_reqs_to_vars' for var in kernel.get_written_variables(): address_space = _get_address_space(kernel, var) eq_class = aliasing_equiv_classes[var] readers = set.union( *[rmap.get(eq_name, set()) for eq_name in eq_class]) writers = set.union( *[wmap.get(eq_name, set()) for eq_name in eq_class]) for writer in writers: required_deps = (readers | writers) - {writer} required_deps = { req_dep for req_dep in required_deps if not declares_nosync_with( kernel, address_space, writer, req_dep) } for req_dep in required_deps: dep_reqs_to_vars.setdefault((writer, req_dep), set()).add(var) # }}} # depends_on: mapping from insn_ids to their dependencies depends_on = {insn.id: set() for insn in kernel.instructions} # rev_depends: mapping from insn_ids to their reverse deps. rev_depends = {insn.id: set() for insn in kernel.instructions} # {{{ populate rev_depends, depends_on for insn in kernel.instructions: depends_on[insn.id].update(insn.depends_on) for dep in insn.depends_on: rev_depends[dep].add(insn.id) # }}} # {{{ remove pairs from dep_reqs_to_vars for which dependencies exist topological_order = _get_topological_order(kernel) def discard_dep_reqs_in_order(dep_reqs_to_vars, edges, order): """ Subtracts dependency requirements of insn_ids by all direct/indirect predecessors of a directed graph of insn_ids as nodes and *edges* as the connectivity. :arg order: An instance of :class:`list` of instruction ids in which the *edges* graph is to be traversed. """ # predecessors: mapping from insn_id to its direct/indirect # predecessors predecessors = {} for insn_id in order: # insn_predecessors:insn_id's direct+indirect predecessors # This set of predecessors is complete because we're # traversing in topological order: No predecessor # can occur after the instruction itself. insn_predecessors = predecessors.pop(insn_id, set()) for pred in insn_predecessors: dep_reqs_to_vars.pop( (insn_id, pred), # don't fail if pair doesn't exist None) for successor in edges[insn_id]: predecessors.setdefault( successor, set()).update(insn_predecessors | {insn_id}) # forward dep. graph traversal in reverse topological sort order # (proceeds "end of program" -> "beginning of program") discard_dep_reqs_in_order(dep_reqs_to_vars, depends_on, topological_order[::-1]) # reverse dep. graph traversal in topological sort order # (proceeds "beginning of program" -> "end of program") discard_dep_reqs_in_order(dep_reqs_to_vars, rev_depends, topological_order) # }}} # {{{ handle dependency requirements that weren't satisfied for (writer_id, other_id), variables in dep_reqs_to_vars.items(): writer = kernel.id_to_insn[writer_id] other = kernel.id_to_insn[other_id] for var in variables: eq_class = aliasing_equiv_classes[var] unaliased_readers = rmap.get(var, set()) unaliased_writers = wmap.get(var, set()) is_relationship_by_aliasing = not ( writer_id in unaliased_writers and (writer_id in unaliased_writers or other_id in unaliased_readers)) # Do not enforce ordering for disjoint access ranges if (not is_relationship_by_aliasing and not overlap_checker.do_access_ranges_overlap_conservative( writer_id, "w", other_id, "any", var)): continue # Do not enforce ordering for aliasing-based relationships # in different groups. if (is_relationship_by_aliasing and (bool(writer.groups & other.conflicts_with_groups) or bool(other.groups & writer.conflicts_with_groups))): continue msg = ("No dependency relationship found between " "'{writer_id}' which writes {var} and " "'{other_id}' which also accesses {var}. " "Either add a (possibly indirect) dependency " "between the two, or add them to each others' nosync " "set to indicate that no ordering is intended, or " "turn off this check by setting the " "'enforce_variable_access_ordered' option " "(more issues of this type may exist--only reporting " "the first one)".format( writer_id=writer_id, other_id=other_id, var=("the variable '%s'" % var if len(eq_class) == 1 else ("the aliasing equivalence class '%s'" % ", ".join(eq_class))))) from loopy.diagnostic import VariableAccessNotOrdered raise VariableAccessNotOrdered(msg)
def _check_variable_access_ordered_inner(kernel): logger.debug("%s: check_variable_access_ordered: start" % kernel.name) checked_variables = kernel.get_written_variables() & ( set(kernel.temporary_variables) | set(arg for arg in kernel.arg_dict)) wmap = kernel.writer_map() rmap = kernel.reader_map() from loopy.kernel.data import ValueArg, AddressSpace, ArrayArg from loopy.kernel.tools import find_aliasing_equivalence_classes depfind = IndirectDependencyEdgeFinder(kernel) aliasing_equiv_classes = find_aliasing_equivalence_classes(kernel) for name in checked_variables: # This is a tad redundant in that this could probably be restructured # to iterate only over equivalence classes and not individual variables. # But then the access-range overlap check below would have to be smarter. eq_class = aliasing_equiv_classes[name] readers = set.union( *[rmap.get(eq_name, set()) for eq_name in eq_class]) writers = set.union( *[wmap.get(eq_name, set()) for eq_name in eq_class]) unaliased_readers = rmap.get(name, set()) unaliased_writers = wmap.get(name, set()) if not writers: continue if name in kernel.temporary_variables: address_space = kernel.temporary_variables[name].address_space else: arg = kernel.arg_dict[name] if isinstance(arg, ArrayArg): address_space = arg.address_space elif isinstance(arg, ValueArg): address_space = AddressSpace.PRIVATE else: # No need to consider ConstantArg and ImageArg (for now) # because those won't be written. raise ValueError("could not determine address_space of '%s'" % name) # Check even for PRIVATE address space, to ensure intentional program order. from loopy.symbolic import AccessRangeOverlapChecker overlap_checker = AccessRangeOverlapChecker(kernel) for writer_id in writers: for other_id in readers | writers: if writer_id == other_id: continue writer = kernel.id_to_insn[writer_id] other = kernel.id_to_insn[other_id] has_dependency_relationship = ( declares_nosync_with(kernel, address_space, other, writer) or depfind(writer_id, other_id) or depfind(other_id, writer_id) ) if has_dependency_relationship: continue is_relationship_by_aliasing = not ( writer_id in unaliased_writers and (other_id in unaliased_writers or other_id in unaliased_readers)) # Do not enforce ordering for disjoint access ranges if (not is_relationship_by_aliasing and not overlap_checker.do_access_ranges_overlap_conservative( writer_id, "w", other_id, "any", name)): continue # Do not enforce ordering for aliasing-based relationships # in different groups. if (is_relationship_by_aliasing and ( bool(writer.groups & other.conflicts_with_groups) or bool(other.groups & writer.conflicts_with_groups))): continue msg = ("No dependency relationship found between " "'{writer_id}' which writes {var} and " "'{other_id}' which also accesses {var}. " "Either add a (possibly indirect) dependency " "between the two, or add them to each others' nosync " "set to indicate that no ordering is intended, or " "turn off this check by setting the " "'enforce_variable_access_ordered' option " "(more issues of this type may exist--only reporting " "the first one)" .format( writer_id=writer_id, other_id=other_id, var=( "the variable '%s'" % name if len(eq_class) == 1 else ( "the aliasing equivalence class '%s'" % ", ".join(eq_class)) ))) from loopy.diagnostic import VariableAccessNotOrdered raise VariableAccessNotOrdered(msg) logger.debug("%s: check_variable_access_ordered: done" % kernel.name)