def find_idempotence(kernel): logger.debug("%s: idempotence" % kernel.name) writer_map = kernel.writer_map() arg_names = set(arg.name for arg in kernel.args) var_names = arg_names | set(six.iterkeys(kernel.temporary_variables)) reads_map = dict( (insn.id, insn.read_dependency_names() & var_names) for insn in kernel.instructions) from collections import defaultdict dep_graph = defaultdict(lambda: set()) for insn in kernel.instructions: dep_graph[insn.id] = set(writer_id for var in reads_map[insn.id] for writer_id in writer_map.get(var, set())) # Find SCCs of dep_graph. These are used for checking if the instruction is # in a dependency cycle. from loopy.tools import compute_sccs sccs = dict((item, scc) for scc in compute_sccs(dep_graph) for item in scc) non_idempotently_updated_vars = set() new_insns = [] for insn in kernel.instructions: boostable = len(sccs[insn.id]) == 1 and insn.id not in dep_graph[insn.id] if not boostable: non_idempotently_updated_vars.update( insn.assignee_var_names()) new_insns.append(insn.copy(boostable=boostable)) # {{{ remove boostability from isns that access non-idempotently updated vars new2_insns = [] for insn in new_insns: if insn.boostable and bool( non_idempotently_updated_vars & insn.dependency_names()): new2_insns.append(insn.copy(boostable=False)) else: new2_insns.append(insn) # }}} return kernel.copy(instructions=new2_insns)
def test_compute_sccs(): from loopy.tools import compute_sccs import random rng = random.Random(0) def generate_random_graph(nnodes): graph = dict((i, set()) for i in range(nnodes)) for i in range(nnodes): for j in range(nnodes): # Edge probability 2/n: Generates decently interesting inputs. if rng.randint(0, nnodes - 1) <= 1: graph[i].add(j) return graph def verify_sccs(graph, sccs): visited = set() def visit(node): if node in visited: return [] else: visited.add(node) result = [] for child in graph[node]: result = result + visit(child) return result + [node] for scc in sccs: scc = set(scc) assert not scc & visited # Check that starting from each element of the SCC results # in the same set of reachable nodes. for scc_root in scc: visited.difference_update(scc) result = visit(scc_root) assert set(result) == scc, (set(result), scc) for nnodes in range(10, 20): for i in range(40): graph = generate_random_graph(nnodes) verify_sccs(graph, compute_sccs(graph))
def infer_unknown_types(kernel, expect_completion=False): """Infer types on temporaries and arguments.""" logger.debug("%s: infer types" % kernel.name) from functools import partial debug = partial(_debug, kernel) import time start_time = time.time() unexpanded_kernel = kernel if kernel.substitutions: from loopy.transform.subst import expand_subst kernel = expand_subst(kernel) new_temp_vars = kernel.temporary_variables.copy() new_arg_dict = kernel.arg_dict.copy() # {{{ find names_with_unknown_types # contains both arguments and temporaries names_for_type_inference = [] import loopy as lp for tv in six.itervalues(kernel.temporary_variables): if tv.dtype is lp.auto: names_for_type_inference.append(tv.name) for arg in kernel.args: if arg.dtype is None: names_for_type_inference.append(arg.name) # }}} logger.debug("finding types for {count:d} names".format( count=len(names_for_type_inference))) writer_map = kernel.writer_map() dep_graph = dict(( written_var, set(read_var for insn_id in writer_map.get(written_var, []) for read_var in kernel.id_to_insn[insn_id].read_dependency_names() if read_var in names_for_type_inference)) for written_var in names_for_type_inference) from loopy.tools import compute_sccs # To speed up processing, we sort the variables by computing the SCCs of the # type dependency graph. Each SCC represents a set of variables whose types # mutually depend on themselves. The SCCs are returned and processed in # topological order. sccs = compute_sccs(dep_graph) item_lookup = _DictUnionView([new_temp_vars, new_arg_dict]) type_inf_mapper = TypeInferenceMapper(kernel, item_lookup) from loopy.symbolic import SubstitutionRuleExpander subst_expander = SubstitutionRuleExpander(kernel.substitutions) # {{{ work on type inference queue from loopy.kernel.data import TemporaryVariable, KernelArgument for var_chain in sccs: changed_during_last_queue_run = False queue = var_chain[:] failed_names = set() while queue or changed_during_last_queue_run: if not queue and changed_during_last_queue_run: changed_during_last_queue_run = False # Optimization: If there's a single variable in the SCC without # a self-referential dependency, then the type is known after a # single iteration (we don't need to look at the expressions # again). if len(var_chain) == 1: single_var, = var_chain if single_var not in dep_graph[single_var]: break queue = var_chain[:] name = queue.pop(0) item = item_lookup[name] debug("inferring type for %s %s", type(item).__name__, item.name) result, symbols_with_unavailable_types = (_infer_var_type( kernel, item.name, type_inf_mapper, subst_expander)) failed = not result if not failed: new_dtype, = result debug(" success: %s", new_dtype) if new_dtype != item.dtype: debug(" changed from: %s", item.dtype) changed_during_last_queue_run = True if isinstance(item, TemporaryVariable): new_temp_vars[name] = item.copy(dtype=new_dtype) elif isinstance(item, KernelArgument): new_arg_dict[name] = item.copy(dtype=new_dtype) else: raise LoopyError( "unexpected item type in type inference") else: debug(" failure") if failed: if item.name in failed_names: # this item has failed before, give up. advice = "" if symbols_with_unavailable_types: advice += ( " (need type of '%s'--check for missing arguments)" % ", ".join(symbols_with_unavailable_types)) if expect_completion: raise LoopyError("could not determine type of '%s'%s" % (item.name, advice)) else: # We're done here. break # remember that this item failed failed_names.add(item.name) if set(queue) == failed_names: # We did what we could... print(queue, failed_names, item.name) assert not expect_completion break # can't infer type yet, put back into queue queue.append(name) else: # we've made progress, reset failure markers failed_names = set() # }}} end_time = time.time() logger.debug("type inference took {dur:.2f} seconds".format(dur=end_time - start_time)) return unexpanded_kernel.copy( temporary_variables=new_temp_vars, args=[new_arg_dict[arg.name] for arg in kernel.args], )
def infer_unknown_types(kernel, expect_completion=False): """Infer types on temporaries and arguments.""" logger.debug("%s: infer types" % kernel.name) from functools import partial debug = partial(_debug, kernel) import time start_time = time.time() unexpanded_kernel = kernel if kernel.substitutions: from loopy.transform.subst import expand_subst kernel = expand_subst(kernel) new_temp_vars = kernel.temporary_variables.copy() new_arg_dict = kernel.arg_dict.copy() # {{{ find names_with_unknown_types # contains both arguments and temporaries names_for_type_inference = [] import loopy as lp for tv in six.itervalues(kernel.temporary_variables): assert tv.dtype is not lp.auto if tv.dtype is None: names_for_type_inference.append(tv.name) for arg in kernel.args: assert arg.dtype is not lp.auto if arg.dtype is None: names_for_type_inference.append(arg.name) # }}} logger.debug("finding types for {count:d} names".format( count=len(names_for_type_inference))) writer_map = kernel.writer_map() dep_graph = dict( (written_var, set( read_var for insn_id in writer_map.get(written_var, []) for read_var in kernel.id_to_insn[insn_id].read_dependency_names() if read_var in names_for_type_inference)) for written_var in names_for_type_inference) from loopy.tools import compute_sccs # To speed up processing, we sort the variables by computing the SCCs of the # type dependency graph. Each SCC represents a set of variables whose types # mutually depend on themselves. The SCCs are returned and processed in # topological order. sccs = compute_sccs(dep_graph) item_lookup = _DictUnionView([ new_temp_vars, new_arg_dict ]) type_inf_mapper = TypeInferenceMapper(kernel, item_lookup) from loopy.symbolic import SubstitutionRuleExpander subst_expander = SubstitutionRuleExpander(kernel.substitutions) # {{{ work on type inference queue from loopy.kernel.data import TemporaryVariable, KernelArgument for var_chain in sccs: changed_during_last_queue_run = False queue = var_chain[:] failed_names = set() while queue or changed_during_last_queue_run: if not queue and changed_during_last_queue_run: changed_during_last_queue_run = False # Optimization: If there's a single variable in the SCC without # a self-referential dependency, then the type is known after a # single iteration (we don't need to look at the expressions # again). if len(var_chain) == 1: single_var, = var_chain if single_var not in dep_graph[single_var]: break queue = var_chain[:] name = queue.pop(0) item = item_lookup[name] debug("inferring type for %s %s", type(item).__name__, item.name) result, symbols_with_unavailable_types = ( _infer_var_type( kernel, item.name, type_inf_mapper, subst_expander)) failed = not result if not failed: new_dtype, = result if new_dtype.target is None: new_dtype = new_dtype.with_target(kernel.target) debug(" success: %s", new_dtype) if new_dtype != item.dtype: debug(" changed from: %s", item.dtype) changed_during_last_queue_run = True if isinstance(item, TemporaryVariable): new_temp_vars[name] = item.copy(dtype=new_dtype) elif isinstance(item, KernelArgument): new_arg_dict[name] = item.copy(dtype=new_dtype) else: raise LoopyError("unexpected item type in type inference") else: debug(" failure") if failed: if item.name in failed_names: # this item has failed before, give up. advice = "" if symbols_with_unavailable_types: advice += ( " (need type of '%s'--check for missing arguments)" % ", ".join(symbols_with_unavailable_types)) if expect_completion: raise LoopyError( "could not determine type of '%s'%s" % (item.name, advice)) else: # We're done here. break # remember that this item failed failed_names.add(item.name) if set(queue) == failed_names: # We did what we could... print(queue, failed_names, item.name) assert not expect_completion break # can't infer type yet, put back into queue queue.append(name) else: # we've made progress, reset failure markers failed_names = set() # }}} end_time = time.time() logger.debug("type inference took {dur:.2f} seconds".format( dur=end_time - start_time)) return unexpanded_kernel.copy( temporary_variables=new_temp_vars, args=[new_arg_dict[arg.name] for arg in kernel.args], )