def _build_dag(self, cgroups, prefix): """ A DAG representing the data dependences across the ClusterGroups within a given scope. """ prefix = {i.dim for i in as_tuple(prefix)} dag = DAG(nodes=cgroups) for n, cg0 in enumerate(cgroups): for cg1 in cgroups[n + 1:]: # A Scope to compute all cross-ClusterGroup anti-dependences rule = lambda i: i.is_cross scope = Scope(exprs=cg0.exprs + cg1.exprs, rules=rule) # Optimization: we exploit the following property: # no prefix => (edge <=> at least one (any) dependence) # to jump out of this potentially expensive loop as quickly as possible if not prefix and any(scope.d_all_gen()): dag.add_edge(cg0, cg1) # Anti-dependences along `prefix` break the execution flow # (intuitively, "the loop nests are to be kept separated") # * All ClusterGroups between `cg0` and `cg1` must precede `cg1` # * All ClusterGroups after `cg1` cannot precede `cg1` elif any(i.cause & prefix for i in scope.d_anti_gen()): for cg2 in cgroups[n:cgroups.index(cg1)]: dag.add_edge(cg2, cg1) for cg2 in cgroups[cgroups.index(cg1) + 1:]: dag.add_edge(cg1, cg2) break # Any anti- and iaw-dependences impose that `cg1` follows `cg0` # while not being its immediate successor (unless it already is), # to avoid they are fused together (thus breaking the dependence) # TODO: the "not being its immediate successor" part *seems* to be # a work around to the fact that any two Clusters characterized # by anti-dependence should have been given a different stamp, # and same for guarded Clusters, but that is not the case (yet) elif any(scope.d_anti_gen()) or\ any(i.is_iaw for i in scope.d_output_gen()): dag.add_edge(cg0, cg1) index = cgroups.index(cg1) - 1 if index > n and self._key(cg0) == self._key(cg1): dag.add_edge(cg0, cgroups[index]) dag.add_edge(cgroups[index], cg1) # Any flow-dependences along an inner Dimension (i.e., a Dimension # that doesn't appear in `prefix`) impose that `cg1` follows `cg0` elif any(not (i.cause and i.cause & prefix) for i in scope.d_flow_gen()): dag.add_edge(cg0, cg1) # Clearly, output dependences must be honored elif any(scope.d_output_gen()): dag.add_edge(cg0, cg1) return dag
def _build_dag(self, cgroups, prefix): """ A DAG captures data dependences between ClusterGroups up to the iteration space depth dictated by ``prefix``. Examples -------- Consider two ClusterGroups `c0` and `c1`, and ``prefix=[i]``. 1) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j] ... Non-carried flow-dependence, so `cg1` must go after `cg0`. 2) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j-1] ... Carried flow-dependence in `j`, so `cg1` must go after `cg0`. 3) cg0 := b[i, j] = ... cg1 := ... = ... b[i, j+1] ... Carried anti-dependence in `j`, so `cg1` must go after `cg0`. 4) cg0 := b[i, j] = ... cg1 := ... = ... b[i-1, j+1] ... Carried flow-dependence in `i`, so `cg1` can safely go before or after `cg0`. Note: the `j+1` in `cg1` has no impact -- the actual dependence betweeb `b[i, j]` and `b[i-1, j+1]` is along `i`. """ prefix = {i.dim for i in as_tuple(prefix)} dag = DAG(nodes=cgroups) for n, cg0 in enumerate(cgroups): for cg1 in cgroups[n + 1:]: rule = lambda i: i.is_cross # Only retain dep if cross-ClusterGroup scope = Scope(exprs=cg0.exprs + cg1.exprs, rules=rule) # Optimization: we exploit the following property: # no prefix => (edge <=> at least one (any) dependence) # To jump out of this potentially expensive loop as quickly as possible if not prefix and any(scope.d_all_gen()): dag.add_edge(cg0, cg1) continue # Handle anti-dependences if any(i.cause & prefix for i in scope.d_anti_gen()): # Anti-dependences break the execution flow # i) ClusterGroups between `cg0` and `cg1` must precede `cg1` for cg2 in cgroups[n:cgroups.index(cg1)]: dag.add_edge(cg2, cg1) # ii) ClusterGroups after `cg1` cannot precede `cg1` for cg2 in cgroups[cgroups.index(cg1) + 1:]: dag.add_edge(cg1, cg2) break elif any(scope.d_anti_gen()): dag.add_edge(cg0, cg1) continue # Flow-dependences along one of the `prefix` Dimensions can # be ignored; all others require sequentialization if any(not (i.cause and i.cause & prefix) for i in scope.d_flow_gen()): dag.add_edge(cg0, cg1) continue # Handle increment-after-write dependences if any(i.is_iaw for i in scope.d_output_gen()): dag.add_edge(cg0, cg1) continue return dag