def __init__(self, iet): self._efuncs = OrderedDict([('main', iet)]) self._dimensions = [] self._input = [] self._includes = [] self._call_graph = DAG(nodes=['main'])
def topological_sort(exprs): """Topologically sort the temporaries in a list of equations.""" mapper = {e.lhs: e for e in exprs} assert len(mapper) == len(exprs) # Expect SSA # Build DAG and topologically-sort temporaries temporaries, tensors = split(exprs, lambda e: not e.lhs.is_Indexed) dag = DAG(nodes=temporaries) for e in temporaries: for r in retrieve_terminals(e.rhs): if r not in mapper: continue elif mapper[r] is e: # Avoid cyclic dependences, such as # Eq(f, f + 1) continue elif r.is_Indexed: # Only scalars enforce an ordering continue else: dag.add_edge(mapper[r], e, force_add=True) processed = dag.topological_sort() # Append tensor equations at the end in user-provided order processed.extend(tensors) return processed
def process(func, state): """ Apply ``func`` to the IETs in ``state._efuncs``, and update ``state`` accordingly. """ # Create a Call graph. `func` will be applied to each node in the Call graph. # `func` might change an `efunc` signature; the Call graph will be used to # propagate such change through the `efunc` callers dag = DAG(nodes=['root']) queue = ['root'] while queue: caller = queue.pop(0) callees = FindNodes(Call).visit(state._efuncs[caller]) for callee in filter_ordered([i.name for i in callees]): if callee in state._efuncs: # Exclude foreign Calls, e.g., MPI calls try: dag.add_node(callee) queue.append(callee) except KeyError: # `callee` already in `dag` pass dag.add_edge(callee, caller) assert dag.size == len(state._efuncs) # Apply `func` for i in dag.topological_sort(): state._efuncs[i], metadata = func(state._efuncs[i]) # Track any new Dimensions introduced by `func` state._dimensions.extend(list(metadata.get('dimensions', []))) # Track any new #include required by `func` state._includes.extend(list(metadata.get('includes', []))) state._includes = filter_ordered(state._includes) # Track any new ElementalFunctions state._efuncs.update( OrderedDict([(i.name, i) for i in metadata.get('efuncs', [])])) # If there's a change to the `args` and the `iet` is an efunc, then # we must update the call sites as well, as the arguments dropped down # to the efunc have just increased args = as_tuple(metadata.get('args')) if args: # `extif` avoids redundant updates to the parameters list, due # to multiple children wanting to add the same input argument extif = lambda v: list(v) + [e for e in args if e not in v] stack = [i] + dag.all_downstreams(i) for n in stack: efunc = state._efuncs[n] calls = [ c for c in FindNodes(Call).visit(efunc) if c.name in stack ] mapper = { c: c._rebuild(arguments=extif(c.arguments)) for c in calls } efunc = Transformer(mapper).visit(efunc) if efunc.is_Callable: efunc = efunc._rebuild(parameters=extif(efunc.parameters)) state._efuncs[n] = efunc
def process(func, state): """ Apply ``func`` to the IETs in ``state._efuncs``, and update ``state`` accordingly. """ # Create a Call graph. `func` will be applied to each node in the Call graph. # `func` might change an `efunc` signature; the Call graph will be used to # propagate such change through the `efunc` callers dag = DAG(nodes=['root']) queue = ['root'] while queue: caller = queue.pop(0) callees = FindNodes(Call).visit(state._efuncs[caller]) for callee in filter_ordered([i.name for i in callees]): if callee in state._efuncs: # Exclude foreign Calls, e.g., MPI calls try: dag.add_node(callee) queue.append(callee) except KeyError: # `callee` already in `dag` pass dag.add_edge(callee, caller) assert dag.size == len(state._efuncs) # Apply `func` for i in dag.topological_sort(): state._efuncs[i], metadata = func(state._efuncs[i]) # Track any new Dimensions introduced by `func` state._dimensions.extend(list(metadata.get('dimensions', []))) # Track any new #include required by `func` state._includes.extend(list(metadata.get('includes', []))) state._includes = filter_ordered(state._includes) # Track any new ElementalFunctions state._efuncs.update(OrderedDict([(i.name, i) for i in metadata.get('efuncs', [])])) # If there's a change to the `args` and the `iet` is an efunc, then # we must update the call sites as well, as the arguments dropped down # to the efunc have just increased args = as_tuple(metadata.get('args')) if args: # `extif` avoids redundant updates to the parameters list, due # to multiple children wanting to add the same input argument extif = lambda v: list(v) + [e for e in args if e not in v] stack = [i] + dag.all_downstreams(i) for n in stack: efunc = state._efuncs[n] calls = [c for c in FindNodes(Call).visit(efunc) if c.name in stack] mapper = {c: c._rebuild(arguments=extif(c.arguments)) for c in calls} efunc = Transformer(mapper).visit(efunc) if efunc.is_Callable: efunc = efunc._rebuild(parameters=extif(efunc.parameters)) state._efuncs[n] = efunc
def topological_sort(exprs): """Topologically sort a list of equations.""" mapper = {e.lhs: e for e in exprs} assert len(mapper) == len(exprs) # Expect SSA dag = DAG(nodes=exprs) for e in exprs: for r in retrieve_terminals(e.rhs): if r not in mapper: continue elif mapper[r] is e: # Avoid cyclic dependences, such as # Eq(f, f + 1) continue elif r.is_Indexed: # Only scalars enforce an ordering continue else: dag.add_edge(mapper[r], e, force_add=True) processed = dag.topological_sort() return processed
def _build_dag(self, cgroups, prefix): """ A DAG capturing dependences between *all* ClusterGroups within an iteration space. Examples -------- When do we need to sequentialize two ClusterGroup `cg0` and `cg1`? Essentially any time there's a dependence between them, apart from when it's a carried flow-dependence within the given iteration space. Let's consider two ClusterGroups `cg0` and `cg1` within the iteration space identified by the Dimension `i`. 1) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j] ... Non-carried flow-dependence, so `cg1` must go after `cg0` 2) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j+1] ... Anti-dependence in `j`, so `cg1` must go after `cg0` 3) cg0 := b[i, j] = ... cg1 := ... = ... b[i-1, j+1] ... Flow-dependence in `i`, so `cg1` can safely go before or after `cg0` (but clearly still within the `i` iteration space). Note: the `j+1` in `cg1` has no impact -- the dependence is in `i`. 4) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j-1] ... Flow-dependence in `j`, so `cg1` must go after `cg0`. Unlike case 3), the flow-dependence is along an inner Dimension, so `cg0` and `cg1 need to be sequentialized. """ prefix = {i.dim for i in as_tuple(prefix)} dag = DAG(nodes=cgroups) for n, cg0 in enumerate(cgroups): for cg1 in cgroups[n + 1:]: scope = Scope(exprs=cg0.exprs + cg1.exprs) # Handle anti-dependences local_deps = cg0.scope.d_anti + cg1.scope.d_anti if scope.d_anti - local_deps: dag.add_edge(cg0, cg1) break # Flow-dependences along one of the `prefix` Dimensions can # be ignored; all others require sequentialization local_deps = cg0.scope.d_flow + cg1.scope.d_flow if any(not i.cause or not (i.cause & prefix) for i in scope.d_flow - local_deps): dag.add_edge(cg0, cg1) break return dag
def _create_call_graph(self): dag = DAG(nodes=['root']) queue = ['root'] while queue: caller = queue.pop(0) callees = FindNodes(Call).visit(self.efuncs[caller]) for callee in filter_ordered([i.name for i in callees]): if callee in self.efuncs: # Exclude foreign Calls, e.g., MPI calls try: dag.add_node(callee) queue.append(callee) except KeyError: # `callee` already in `dag` pass dag.add_edge(callee, caller) # Sanity check assert dag.size == len(self.efuncs) return dag
def _build_dag(self, cgroups, prefix): """ A DAG captures data dependences between ClusterGroups up to the iteration space depth dictated by ``prefix``. Examples -------- Consider two ClusterGroups `c0` and `c1`, and ``prefix=[i]``. 1) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j] ... Non-carried flow-dependence, so `cg1` must go after `cg0`. 2) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j-1] ... Carried flow-dependence in `j`, so `cg1` must go after `cg0`. 3) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j+1] ... Carried anti-dependence in `j`, so `cg1` must go after `cg0`. 4) cg0 := b[i, j] = ... cg1 := ... = ... b[i-1, j+1] ... Carried flow-dependence in `i`, so `cg1` can safely go before or after `cg0`. Note: the `j+1` in `cg1` has no impact -- the actual dependence betweeb `b[i, j]` and `b[i-1, j+1]` is along `i`. """ prefix = {i.dim for i in as_tuple(prefix)} dag = DAG(nodes=cgroups) for n, cg0 in enumerate(cgroups): for cg1 in cgroups[n + 1:]: scope = Scope(exprs=cg0.exprs + cg1.exprs) # Handle anti-dependences deps = scope.d_anti - (cg0.scope.d_anti + cg1.scope.d_anti) if any(i.cause & prefix for i in deps): # Anti-dependences break the execution flow # i) ClusterGroups between `cg0` and `cg1` must precede `cg1` for cg2 in cgroups[n:cgroups.index(cg1)]: dag.add_edge(cg2, cg1) # ii) ClusterGroups after `cg1` cannot precede `cg1` for cg2 in cgroups[cgroups.index(cg1) + 1:]: dag.add_edge(cg1, cg2) break elif deps: dag.add_edge(cg0, cg1) # Flow-dependences along one of the `prefix` Dimensions can # be ignored; all others require sequentialization deps = scope.d_flow - (cg0.scope.d_flow + cg1.scope.d_flow) if any(not (i.cause and i.cause & prefix) for i in deps): dag.add_edge(cg0, cg1) continue # Handle increment-after-write dependences deps = scope.d_output - (cg0.scope.d_output + cg1.scope.d_output) if any(i.is_iaw for i in deps): dag.add_edge(cg0, cg1) continue return dag
def _build_dag(self, cgroups, prefix): """ A DAG representing the data dependences across the ClusterGroups within a given scope. """ prefix = {i.dim for i in as_tuple(prefix)} dag = DAG(nodes=cgroups) for n, cg0 in enumerate(cgroups): for cg1 in cgroups[n + 1:]: # A Scope to compute all cross-ClusterGroup anti-dependences rule = lambda i: i.is_cross scope = Scope(exprs=cg0.exprs + cg1.exprs, rules=rule) # Optimization: we exploit the following property: # no prefix => (edge <=> at least one (any) dependence) # to jump out of this potentially expensive loop as quickly as possible if not prefix and any(scope.d_all_gen()): dag.add_edge(cg0, cg1) # Anti-dependences along `prefix` break the execution flow # (intuitively, "the loop nests are to be kept separated") # * All ClusterGroups between `cg0` and `cg1` must precede `cg1` # * All ClusterGroups after `cg1` cannot precede `cg1` elif any(i.cause & prefix for i in scope.d_anti_gen()): for cg2 in cgroups[n:cgroups.index(cg1)]: dag.add_edge(cg2, cg1) for cg2 in cgroups[cgroups.index(cg1) + 1:]: dag.add_edge(cg1, cg2) break # Any anti- and iaw-dependences impose that `cg1` follows `cg0` # while not being its immediate successor (unless it already is), # to avoid they are fused together (thus breaking the dependence) # TODO: the "not being its immediate successor" part *seems* to be # a work around to the fact that any two Clusters characterized # by anti-dependence should have been given a different stamp, # and same for guarded Clusters, but that is not the case (yet) elif any(scope.d_anti_gen()) or\ any(i.is_iaw for i in scope.d_output_gen()): dag.add_edge(cg0, cg1) index = cgroups.index(cg1) - 1 if index > n and self._key(cg0) == self._key(cg1): dag.add_edge(cg0, cgroups[index]) dag.add_edge(cgroups[index], cg1) # Any flow-dependences along an inner Dimension (i.e., a Dimension # that doesn't appear in `prefix`) impose that `cg1` follows `cg0` elif any(not (i.cause and i.cause & prefix) for i in scope.d_flow_gen()): dag.add_edge(cg0, cg1) # Clearly, output dependences must be honored elif any(scope.d_output_gen()): dag.add_edge(cg0, cg1) return dag
def _prepare_arguments(self, **kwargs): """ Process runtime arguments passed to ``.apply()` and derive default values for any remaining arguments. """ overrides, defaults = split(self.input, lambda p: p.name in kwargs) # Process data-carrier overrides args = ReducerMap() for p in overrides: args.update(p._arg_values(**kwargs)) try: args = ReducerMap(args.reduce_all()) except ValueError: raise ValueError( "Override `%s` is incompatible with overrides `%s`" % (p, [i for i in overrides if i.name in args])) # Process data-carrier defaults for p in defaults: if p.name in args: # E.g., SubFunctions continue for k, v in p._arg_values(**kwargs).items(): if k in args and args[k] != v: raise ValueError( "Default `%s` is incompatible with other args as " "`%s=%s`, while `%s=%s` is expected. Perhaps you " "forgot to override `%s`?" % (p, k, v, k, args[k], p)) args[k] = v args = args.reduce_all() # All DiscreteFunctions should be defined on the same Grid grids = {getattr(kwargs[p.name], 'grid', None) for p in overrides} grids.update({getattr(p, 'grid', None) for p in defaults}) grids.discard(None) if len(grids) > 1 and configuration['mpi']: raise ValueError("Multiple Grids found") try: grid = grids.pop() args.update(grid._arg_values(**kwargs)) except KeyError: grid = None # Process Dimensions # A topological sorting is used so that derived Dimensions are processed after # their parents (note that a leaf Dimension can have an arbitrary long list of # ancestors) dag = DAG(self.dimensions, [(i, i.parent) for i in self.dimensions if i.is_Derived]) for d in reversed(dag.topological_sort()): args.update(d._arg_values(args, self._dspace[d], grid, **kwargs)) # Process Objects (which may need some `args`) for o in self.objects: args.update(o._arg_values(args, grid=grid, **kwargs)) # Sanity check for p in self.parameters: p._arg_check(args, self._dspace[p]) for d in self.dimensions: if d.is_Derived: d._arg_check(args, self._dspace[p]) # Turn arguments into a format suitable for the generated code # E.g., instead of NumPy arrays for Functions, the generated code expects # pointers to ctypes.Struct for p in self.parameters: try: args.update(kwargs.get(p.name, p)._arg_as_ctype(args, alias=p)) except AttributeError: # User-provided floats/ndarray obviously do not have `_arg_as_ctype` args.update(p._arg_as_ctype(args, alias=p)) # Add in any backend-specific argument args.update(kwargs.pop('backend', {})) # Execute autotuning and adjust arguments accordingly args = self._autotune( args, kwargs.pop('autotune', configuration['autotuning'])) # Check all user-provided keywords are known to the Operator if not configuration['ignore-unknowns']: for k, v in kwargs.items(): if k not in self._known_arguments: raise ValueError("Unrecognized argument %s=%s" % (k, v)) # Attach `grid` to the arguments map args = ArgumentsMap(grid, **args) return args
def _prepare_arguments(self, autotune=None, **kwargs): """ Process runtime arguments passed to ``.apply()` and derive default values for any remaining arguments. """ # Sanity check -- all user-provided keywords must be known to the Operator if not configuration['ignore-unknowns']: for k, v in kwargs.items(): if k not in self._known_arguments: raise ValueError("Unrecognized argument %s=%s" % (k, v)) overrides, defaults = split(self.input, lambda p: p.name in kwargs) # Process data-carrier overrides args = kwargs['args'] = ReducerMap() for p in overrides: args.update(p._arg_values(**kwargs)) try: args.reduce_inplace() except ValueError: raise ValueError( "Override `%s` is incompatible with overrides `%s`" % (p, [i for i in overrides if i.name in args])) # Process data-carrier defaults for p in defaults: if p.name in args: # E.g., SubFunctions continue for k, v in p._arg_values(**kwargs).items(): if k in args and args[k] != v: raise ValueError( "Default `%s` is incompatible with other args as " "`%s=%s`, while `%s=%s` is expected. Perhaps you " "forgot to override `%s`?" % (p, k, v, k, args[k], p)) args[k] = v args = kwargs['args'] = args.reduce_all() # DiscreteFunctions may be created from CartesianDiscretizations, which in # turn could be Grids or SubDomains. Both may provide arguments discretizations = { getattr(kwargs[p.name], 'grid', None) for p in overrides } discretizations.update({getattr(p, 'grid', None) for p in defaults}) discretizations.discard(None) for i in discretizations: args.update(i._arg_values(**kwargs)) # There can only be one Grid from which DiscreteFunctions were created grids = {i for i in discretizations if isinstance(i, Grid)} if len(grids) > 1: # We loosely tolerate multiple Grids for backwards compatibility # with spacial subsampling, which should be revisited however. And # With MPI it would definitely break! if configuration['mpi']: raise ValueError("Multiple Grids found") try: grid = grids.pop() except KeyError: grid = None # An ArgumentsMap carries additional metadata that may be used by # the subsequent phases of the arguments processing args = kwargs['args'] = ArgumentsMap(args, grid, self._allocator, self._platform) # Process Dimensions # A topological sorting is used so that derived Dimensions are processed after # their parents (note that a leaf Dimension can have an arbitrary long list of # ancestors) dag = DAG(self.dimensions, [(i, i.parent) for i in self.dimensions if i.is_Derived]) for d in reversed(dag.topological_sort()): args.update(d._arg_values(self._dspace[d], grid, **kwargs)) # Process Objects for o in self.objects: args.update(o._arg_values(grid=grid, **kwargs)) # In some "lower-level" Operators implementing a random piece of C, such as # one or more calls to third-party library functions, there could still be # at this point unprocessed arguments (e.g., scalars) kwargs.pop('args') args.update({k: v for k, v in kwargs.items() if k not in args}) # Sanity check for p in self.parameters: p._arg_check(args, self._dspace[p]) for d in self.dimensions: if d.is_Derived: d._arg_check(args, self._dspace[p]) # Turn arguments into a format suitable for the generated code # E.g., instead of NumPy arrays for Functions, the generated code expects # pointers to ctypes.Struct for p in self.parameters: try: args.update(kwargs.get(p.name, p)._arg_finalize(args, alias=p)) except AttributeError: # User-provided floats/ndarray obviously do not have `_arg_finalize` args.update(p._arg_finalize(args, alias=p)) # Execute autotuning and adjust arguments accordingly args.update( self._autotune(args, autotune or configuration['autotuning'])) return args
def _build_dag(self, cgroups, prefix): """ A DAG captures data dependences between ClusterGroups up to the iteration space depth dictated by ``prefix``. Examples -------- Consider two ClusterGroups `c0` and `c1`, and ``prefix=[i]``. 1) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j] ... Non-carried flow-dependence, so `cg1` must go after `cg0`. 2) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j-1] ... Carried flow-dependence in `j`, so `cg1` must go after `cg0`. 3) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j+1] ... Carried anti-dependence in `j`, so `cg1` must go after `cg0`. 4) cg0 := b[i, j] = ... cg1 := ... = ... b[i-1, j+1] ... Carried flow-dependence in `i`, so `cg1` can safely go before or after `cg0`. Note: the `j+1` in `cg1` has no impact -- the actual dependence betweeb `b[i, j]` and `b[i-1, j+1]` is along `i`. """ prefix = {i.dim for i in as_tuple(prefix)} dag = DAG(nodes=cgroups) for n, cg0 in enumerate(cgroups): for cg1 in cgroups[n + 1:]: rule = lambda i: i.is_cross # Only retain dep if cross-ClusterGroup scope = Scope(exprs=cg0.exprs + cg1.exprs, rules=rule) # Optimization: we exploit the following property: # no prefix => (edge <=> at least one (any) dependence) # To jump out of this potentially expensive loop as quickly as possible if not prefix and any(scope.d_all_gen()): dag.add_edge(cg0, cg1) continue # Handle anti-dependences if any(i.cause & prefix for i in scope.d_anti_gen()): # Anti-dependences break the execution flow # i) ClusterGroups between `cg0` and `cg1` must precede `cg1` for cg2 in cgroups[n:cgroups.index(cg1)]: dag.add_edge(cg2, cg1) # ii) ClusterGroups after `cg1` cannot precede `cg1` for cg2 in cgroups[cgroups.index(cg1) + 1:]: dag.add_edge(cg1, cg2) break elif any(scope.d_anti_gen()): dag.add_edge(cg0, cg1) continue # Flow-dependences along one of the `prefix` Dimensions can # be ignored; all others require sequentialization if any(not (i.cause and i.cause & prefix) for i in scope.d_flow_gen()): dag.add_edge(cg0, cg1) continue # Handle increment-after-write dependences if any(i.is_iaw for i in scope.d_output_gen()): dag.add_edge(cg0, cg1) continue return dag
class State(object): def __init__(self, iet): self._efuncs = OrderedDict([('main', iet)]) self._dimensions = [] self._input = [] self._includes = [] self._call_graph = DAG(nodes=['main']) def _process(self, func): """Apply ``func`` to all tracked ``IETs``.""" for i in self._call_graph.topological_sort(): self._efuncs[i], metadata = func(self._efuncs[i]) # Track any new Dimensions and includes introduced by `func` self._dimensions.extend(list(metadata.get('dimensions', []))) self._includes.extend(list(metadata.get('includes', []))) # If there's a change to the `input` and the `iet` is an efunc, then # we must update the call sites as well, as the arguments dropped down # to the efunc have just increased _input = as_tuple(metadata.get('input')) if _input: # `extif` avoids redundant updates to the parameters list, due # to multiple children wanting to add the same input argument extif = lambda v: list(v) + [e for e in _input if e not in v] stack = [i] + self._call_graph.all_downstreams(i) for n in stack: efunc = self._efuncs[n] calls = [c for c in FindNodes(Call).visit(efunc) if c.name in stack] mapper = {c: c._rebuild(arguments=extif(c.arguments)) for c in calls} efunc = Transformer(mapper).visit(efunc) if efunc.is_Callable: efunc = efunc._rebuild(parameters=extif(efunc.parameters)) self._efuncs[n] = efunc self._input.extend(list(_input)) for k, v in metadata.get('efuncs', {}).items(): # Update the efuncs if k.is_Callable: self._efuncs[k.name] = k # Update the call graph self._call_graph.add_node(k.name, ignore_existing=True) for target in (v or [None]): self._call_graph.add_edge(k.name, target or 'main', force_add=True) @property def root(self): return self._efuncs['main'] @property def efuncs(self): return tuple(v for k, v in self._efuncs.items() if k != 'main') @property def dimensions(self): return self._dimensions @property def input(self): return self._input @property def includes(self): return self._includes