def apply(self, sdfg): def gnode(nname): return graph.nodes()[self.subgraph[nname]] graph = sdfg.nodes()[self.state_id] in_array = gnode(RedundantArray._in_array) out_array = gnode(RedundantArray._out_array) for e in graph.in_edges(in_array): # Modify all incoming edges to point to out_array path = graph.memlet_path(e) for pe in path: if pe.data.data == in_array.data: pe.data.data = out_array.data # Redirect edge to out_array graph.remove_edge(e) graph.add_edge(e.src, e.src_conn, out_array, e.dst_conn, e.data) # Finally, remove in_array node graph.remove_node(in_array) # TODO: Should the array be removed from the SDFG? # del sdfg.arrays[in_array] if Config.get_bool("debugprint"): RedundantArray._arrays_removed += 1
def apply(self, sdfg): def gnode(nname): return graph.nodes()[self.subgraph[nname]] graph = sdfg.nodes()[self.state_id] in_array = gnode(RedundantSecondArray._in_array) out_array = gnode(RedundantSecondArray._out_array) # We assume the following pattern: A -- e1 --> B -- e2 --> others # 1. Get edge e1 and extract subsets for arrays A and B e1 = graph.edges_between(in_array, out_array)[0] a_subset, b1_subset = _validate_subsets(e1, sdfg.arrays) # 2. Iterate over the e2 edges and traverse the memlet tree for e2 in graph.out_edges(out_array): path = graph.memlet_tree(e2) for e3 in path: # 2-a. Extract subsets for array B and others b3_subset, other_subset = _validate_subsets( e3, sdfg.arrays, src_name=out_array.data) # 2-b. Modify memlet to match array A. Example: # A -- (0, a:b)/(c:c+b) --> B -- (c+d)/None --> others # A -- (0, a+d)/None --> others e3.data.data = in_array.data # (c+d) - (c:c+b) = (d) b3_subset.offset(b1_subset, negative=True) # (0, a:b)(d) = (0, a+d) (or offset for indices) if isinstance(a_subset, subsets.Indices): tmp = copy.deepcopy(a_subset) tmp.offset(b3_subset, negative=False) e3.data.subset = tmp else: e3.data.subset = a_subset.compose(b3_subset) e3.data.other_subset = other_subset # 2-c. Remove edge and add new one graph.remove_edge(e2) graph.add_edge(in_array, e2.src_conn, e2.dst, e2.dst_conn, e2.data) # Finally, remove out_array node graph.remove_node(out_array) # TODO: Should the array be removed from the SDFG? # del sdfg.arrays[out_array] if Config.get_bool("debugprint"): RedundantSecondArray._arrays_removed += 1
def _make_view(self, sdfg: SDFG, graph: SDFGState, in_array: nodes.AccessNode, out_array: nodes.AccessNode, e1: graph.MultiConnectorEdge[mm.Memlet], b_subset: subsets.Subset, b_dims_to_pop: typing.List[int]): in_desc = sdfg.arrays[in_array.data] out_desc = sdfg.arrays[out_array.data] # NOTE: We do not want to create another view, if the immediate # ancestors of in_array are views as well. We just remove it. in_ancestors_desc = [ e.src.desc(sdfg) if isinstance(e.src, nodes.AccessNode) else None for e in graph.in_edges(in_array) ] if all([ desc and isinstance(desc, data.View) for desc in in_ancestors_desc ]): for e in graph.in_edges(in_array): a_subset, _ = _validate_subsets(e, sdfg.arrays) graph.add_edge( e.src, e.src_conn, out_array, None, mm.Memlet(out_array.data, subset=b_subset, other_subset=a_subset, wcr=e1.data.wcr, wcr_nonatomic=e1.data.wcr_nonatomic)) graph.remove_edge(e) graph.remove_edge(e1) graph.remove_node(in_array) if in_array.data in sdfg.arrays: del sdfg.arrays[in_array.data] return view_strides = in_desc.strides if (b_dims_to_pop and len(b_dims_to_pop) == len(out_desc.shape) - len(in_desc.shape)): view_strides = [ s for i, s in enumerate(out_desc.strides) if i not in b_dims_to_pop ] sdfg.arrays[in_array.data] = data.View( in_desc.dtype, in_desc.shape, True, in_desc.allow_conflicts, out_desc.storage, out_desc.location, view_strides, in_desc.offset, out_desc.may_alias, dtypes.AllocationLifetime.Scope, in_desc.alignment, in_desc.debuginfo, in_desc.total_size)
def redirect_edge(graph, edge, new_src=None, new_src_conn=None, new_dst=None, new_dst_conn=None, new_data=None): data = new_data if new_data else edge.data if new_src and new_dst: ret = graph.add_edge(new_src, new_src_conn, new_dst, new_dst_conn, data) graph.remove_edge(edge) elif new_src: ret = graph.add_edge(new_src, new_src_conn, edge.dst, edge.dst_conn, data) graph.remove_edge(edge) elif new_dst: ret = graph.add_edge(edge.src, edge.src_conn, new_dst, new_dst_conn, data) graph.remove_edge(edge) else: pass return ret
def apply(self, sdfg): def gnode(nname): return graph.nodes()[self.subgraph[nname]] graph = sdfg.nodes()[self.state_id] in_array = self.in_array(sdfg) out_array = self.out_array(sdfg) in_desc = sdfg.arrays[in_array.data] out_desc = sdfg.arrays[out_array.data] # If arrays are not of the same shape, modify first array to reshape # instead of removing it if len(in_desc.shape) != len(out_desc.shape) or any( i != o for i, o in zip(in_desc.shape, out_desc.shape)): sdfg.arrays[in_array.data] = data.View( in_desc.dtype, in_desc.shape, True, in_desc.allow_conflicts, out_desc.storage, out_desc.location, in_desc.strides, in_desc.offset, out_desc.may_alias, out_desc.lifetime, in_desc.alignment, in_desc.debuginfo, in_desc.total_size) return for e in graph.in_edges(in_array): # Modify all incoming edges to point to out_array path = graph.memlet_path(e) for pe in path: if pe.data.data == in_array.data: pe.data.data = out_array.data # Redirect edge to out_array graph.remove_edge(e) graph.add_edge(e.src, e.src_conn, out_array, e.dst_conn, e.data) # Finally, remove in_array node graph.remove_node(in_array) if in_array.data in sdfg.arrays: del sdfg.arrays[in_array.data]
def expansion(node: 'Reduce', state: SDFGState, sdfg: SDFG): """ Create a map around the BlockReduce node with in and out transients in registers and an if tasklet that redirects the output of thread 0 to a shared memory transient """ ### define some useful vars graph = state reduce_node = node in_edge = graph.in_edges(reduce_node)[0] out_edge = graph.out_edges(reduce_node)[0] axes = reduce_node.axes ### add a map that encloses the reduce node (new_entry, new_exit) = graph.add_map( name = 'inner_reduce_block', ndrange = {'i'+str(i): f'{rng[0]}:{rng[1]+1}:{rng[2]}' \ for (i,rng) in enumerate(in_edge.data.subset) \ if i in axes}, schedule = dtypes.ScheduleType.Default) map = new_entry.map ExpandReduceCUDABlockAll.redirect_edge(graph, in_edge, new_dst=new_entry) ExpandReduceCUDABlockAll.redirect_edge(graph, out_edge, new_src=new_exit) subset_in = subsets.Range([ in_edge.data.subset[i] if i not in axes else (new_entry.map.params[0], new_entry.map.params[0], 1) for i in range(len(in_edge.data.subset)) ]) memlet_in = dace.Memlet(data=in_edge.data.data, volume=1, subset=subset_in) memlet_out = dcpy(out_edge.data) graph.add_edge(u=new_entry, u_connector=None, v=reduce_node, v_connector=None, memlet=memlet_in) graph.add_edge(u=reduce_node, u_connector=None, v=new_exit, v_connector=None, memlet=memlet_out) ### add in and out local storage from dace.transformation.dataflow.local_storage import LocalStorage in_local_storage_subgraph = { LocalStorage._node_a: graph.nodes().index(new_entry), LocalStorage._node_b: graph.nodes().index(reduce_node) } out_local_storage_subgraph = { LocalStorage._node_a: graph.nodes().index(reduce_node), LocalStorage._node_b: graph.nodes().index(new_exit) } local_storage = LocalStorage(sdfg.sdfg_id, sdfg.nodes().index(state), in_local_storage_subgraph, 0) local_storage.array = in_edge.data.data local_storage.apply(sdfg) in_transient = local_storage._data_node sdfg.data(in_transient.data).storage = dtypes.StorageType.Register local_storage = LocalStorage(sdfg.sdfg_id, sdfg.nodes().index(state), out_local_storage_subgraph, 0) local_storage.array = out_edge.data.data local_storage.apply(sdfg) out_transient = local_storage._data_node sdfg.data(out_transient.data).storage = dtypes.StorageType.Register # hack: swap edges as local_storage does not work correctly here # as subsets and data get assigned wrongly (should be swapped) # NOTE: If local_storage ever changes, this will not work any more e1 = graph.in_edges(out_transient)[0] e2 = graph.out_edges(out_transient)[0] e1.data.data = dcpy(e2.data.data) e1.data.subset = dcpy(e2.data.subset) ### add an if tasket and diverge code = 'if ' for (i, param) in enumerate(new_entry.map.params): code += (param + '== 0') if i < len(axes) - 1: code += ' and ' code += ':\n' code += '\tout=inp' tasklet_node = graph.add_tasklet(name='block_reduce_write', inputs=['inp'], outputs=['out'], code=code) edge_out_outtrans = graph.out_edges(out_transient)[0] edge_out_innerexit = graph.out_edges(new_exit)[0] ExpandReduceCUDABlockAll.redirect_edge(graph, edge_out_outtrans, new_dst=tasklet_node, new_dst_conn='inp') e = graph.add_edge(u=tasklet_node, u_connector='out', v=new_exit, v_connector=None, memlet=dcpy(edge_out_innerexit.data)) # set dynamic with volume 0 FORNOW e.data.volume = 0 e.data.dynamic = True ### set reduce_node axes to all (needed) reduce_node.axes = None # fill scope connectors, done. sdfg.fill_scope_connectors() # finally, change the implementation to cuda (block) # itself and expand again. reduce_node.implementation = 'CUDA (block)' sub_expansion = ExpandReduceCUDABlock(0, 0, {}, 0) return sub_expansion.expansion(node=node, state=state, sdfg=sdfg)
def apply(self, sdfg): def gnode(nname): return graph.nodes()[self.subgraph[nname]] graph = sdfg.nodes()[self.state_id] in_array = gnode(RedundantSecondArray._in_array) out_array = gnode(RedundantSecondArray._out_array) in_desc = sdfg.arrays[in_array.data] out_desc = sdfg.arrays[out_array.data] # We assume the following pattern: A -- e1 --> B -- e2 --> others # 1. Get edge e1 and extract subsets for arrays A and B e1 = graph.edges_between(in_array, out_array)[0] a_subset, b1_subset = _validate_subsets(e1, sdfg.arrays) # Find extraneous A or B subset dimensions a_dims_to_pop = [] b_dims_to_pop = [] aset = a_subset popped = [] if a_subset and b1_subset and a_subset.dims() != b1_subset.dims(): a_size = a_subset.size_exact() b_size = b1_subset.size_exact() if a_subset.dims() > b1_subset.dims(): a_dims_to_pop = find_dims_to_pop(a_size, b_size) aset, popped = pop_dims(a_subset, a_dims_to_pop) else: b_dims_to_pop = find_dims_to_pop(b_size, a_size) # If the src subset does not cover the removed array, create a view. if a_subset and any(m != a for m, a in zip(a_subset.size(), out_desc.shape)): # NOTE: We do not want to create another view, if the immediate # successors of out_array are views as well. We just remove it. out_successors_desc = [ e.dst.desc(sdfg) if isinstance(e.dst, nodes.AccessNode) else None for e in graph.out_edges(out_array) ] if all([ desc and isinstance(desc, data.View) for desc in out_successors_desc ]): for e in graph.out_edges(out_array): _, b_subset = _validate_subsets(e, sdfg.arrays) graph.add_edge( in_array, None, e.dst, e.dst_conn, mm.Memlet(in_array.data, subset=a_subset, other_subset=b_subset, wcr=e1.data.wcr, wcr_nonatomic=e1.data.wcr_nonatomic)) graph.remove_edge(e) graph.remove_edge(e1) graph.remove_node(out_array) if out_array.data in sdfg.arrays: del sdfg.arrays[out_array.data] return view_strides = out_desc.strides if (a_dims_to_pop and len(a_dims_to_pop) == len(in_desc.shape) - len(out_desc.shape)): view_strides = [ s for i, s in enumerate(in_desc.strides) if i not in a_dims_to_pop ] sdfg.arrays[out_array.data] = data.View( out_desc.dtype, out_desc.shape, True, out_desc.allow_conflicts, in_desc.storage, in_desc.location, view_strides, out_desc.offset, in_desc.may_alias, dtypes.AllocationLifetime.Scope, out_desc.alignment, out_desc.debuginfo, out_desc.total_size) return # 2. Iterate over the e2 edges and traverse the memlet tree for e2 in graph.out_edges(out_array): path = graph.memlet_tree(e2) wcr = e1.data.wcr wcr_nonatomic = e1.data.wcr_nonatomic for e3 in path: # 2-a. Extract subsets for array B and others b3_subset, other_subset = _validate_subsets( e3, sdfg.arrays, src_name=out_array.data) # 2-b. Modify memlet to match array A. Example: # A -- (0, a:b)/(c:c+b) --> B -- (c+d)/None --> others # A -- (0, a+d)/None --> others e3.data.data = in_array.data # (c+d) - (c:c+b) = (d) b3_subset.offset(b1_subset, negative=True) # (0, a:b)(d) = (0, a+d) (or offset for indices) if b3_subset and b_dims_to_pop: bset, _ = pop_dims(b3_subset, b_dims_to_pop) else: bset = b3_subset e3.data.subset = compose_and_push_back(aset, bset, a_dims_to_pop, popped) # NOTE: This fixes the following case: # A ----> A[subset] ----> ... -----> Tasklet # Tasklet is not data, so it doesn't have an other subset. if isinstance(e3.dst, nodes.AccessNode): e3.data.other_subset = other_subset else: e3.data.other_subset = None wcr = wcr or e3.data.wcr wcr_nonatomic = wcr_nonatomic or e3.data.wcr_nonatomic e3.data.wcr = wcr e3.data.wcr_nonatomic = wcr_nonatomic # 2-c. Remove edge and add new one graph.remove_edge(e2) e2.data.wcr = wcr e2.data.wcr_nonatomic = wcr_nonatomic graph.add_edge(in_array, e2.src_conn, e2.dst, e2.dst_conn, e2.data) # Finally, remove out_array node graph.remove_node(out_array) if out_array.data in sdfg.arrays: try: sdfg.remove_data(out_array.data) except ValueError: # Already in use (e.g., with Views) pass
def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] in_array = self.in_array(sdfg) out_array = self.out_array(sdfg) in_desc = sdfg.arrays[in_array.data] out_desc = sdfg.arrays[out_array.data] # 1. Get edge e1 and extract subsets for arrays A and B e1 = graph.edges_between(in_array, out_array)[0] a1_subset, b_subset = _validate_subsets(e1, sdfg.arrays) # View connected to a view: simple case if (isinstance(in_desc, data.View) and isinstance(out_desc, data.View)): for e in graph.in_edges(in_array): new_memlet = copy.deepcopy(e.data) e.dst_subset = b_subset graph.add_edge(e.src, e.src_conn, out_array, e.dst_conn, new_memlet) graph.remove_node(in_array) if in_array.data in sdfg.arrays: del sdfg.arrays[in_array.data] return # Find extraneous A or B subset dimensions a_dims_to_pop = [] b_dims_to_pop = [] bset = b_subset popped = [] if a1_subset and b_subset and a1_subset.dims() != b_subset.dims(): a_size = a1_subset.size_exact() b_size = b_subset.size_exact() if a1_subset.dims() > b_subset.dims(): a_dims_to_pop = find_dims_to_pop(a_size, b_size) else: b_dims_to_pop = find_dims_to_pop(b_size, a_size) bset, popped = pop_dims(b_subset, b_dims_to_pop) from dace.libraries.standard import Reduce reduction = False for e in graph.in_edges(in_array): if isinstance(e.src, Reduce): reduction = True # If: # 1. A reduce node is involved; # 2. The memlet does not cover the removed array; or # 3. Dimensions are mismatching (all dimensions are popped); # create a view. if reduction or len(a_dims_to_pop) == len(in_desc.shape) or any( m != a for m, a in zip(a1_subset.size(), in_desc.shape)): self._make_view(sdfg, graph, in_array, out_array, e1, b_subset, b_dims_to_pop) return # Validate that subsets are composable. If not, make a view try: for e2 in graph.in_edges(in_array): path = graph.memlet_tree(e2) wcr = e1.data.wcr wcr_nonatomic = e1.data.wcr_nonatomic for e3 in path: # 2-a. Extract subsets for array B and others other_subset, a3_subset = _validate_subsets( e3, sdfg.arrays, dst_name=in_array.data) # 2-b. Modify memlet to match array B. dname = out_array.data src_is_data = False a3_subset.offset(a1_subset, negative=True) if a3_subset and a_dims_to_pop: aset, _ = pop_dims(a3_subset, a_dims_to_pop) else: aset = a3_subset compose_and_push_back(bset, aset, b_dims_to_pop, popped) except (ValueError, NotImplementedError): self._make_view(sdfg, graph, in_array, out_array, e1, b_subset, b_dims_to_pop) return # 2. Iterate over the e2 edges and traverse the memlet tree for e2 in graph.in_edges(in_array): path = graph.memlet_tree(e2) wcr = e1.data.wcr wcr_nonatomic = e1.data.wcr_nonatomic for e3 in path: # 2-a. Extract subsets for array B and others other_subset, a3_subset = _validate_subsets( e3, sdfg.arrays, dst_name=in_array.data) # 2-b. Modify memlet to match array B. dname = out_array.data src_is_data = False a3_subset.offset(a1_subset, negative=True) if a3_subset and a_dims_to_pop: aset, _ = pop_dims(a3_subset, a_dims_to_pop) else: aset = a3_subset dst_subset = compose_and_push_back(bset, aset, b_dims_to_pop, popped) # NOTE: This fixes the following case: # Tasklet ----> A[subset] ----> ... -----> A # Tasklet is not data, so it doesn't have an other subset. if isinstance(e3.src, nodes.AccessNode): if e3.src.data == out_array.data: dname = e3.src.data src_is_data = True src_subset = other_subset else: src_subset = None subset = src_subset if src_is_data else dst_subset other_subset = dst_subset if src_is_data else src_subset e3.data.data = dname e3.data.subset = subset e3.data.other_subset = other_subset wcr = wcr or e3.data.wcr wcr_nonatomic = wcr_nonatomic or e3.data.wcr_nonatomic e3.data.wcr = wcr e3.data.wcr_nonatomic = wcr_nonatomic # 2-c. Remove edge and add new one graph.remove_edge(e2) e2.data.wcr = wcr e2.data.wcr_nonatomic = wcr_nonatomic graph.add_edge(e2.src, e2.src_conn, out_array, e2.dst_conn, e2.data) # Finally, remove in_array node graph.remove_node(in_array) try: if in_array.data in sdfg.arrays: sdfg.remove_data(in_array.data) except ValueError: # Already in use (e.g., with Views) pass
def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] in_array = self.in_array(sdfg) out_array = self.out_array(sdfg) in_desc = sdfg.arrays[in_array.data] out_desc = sdfg.arrays[out_array.data] # 1. Get edge e1 and extract subsets for arrays A and B e1 = graph.edges_between(in_array, out_array)[0] a1_subset, b_subset = _validate_subsets(e1, sdfg.arrays) # Find extraneous A or B subset dimensions a_dims_to_pop = [] b_dims_to_pop = [] bset = b_subset popped = [] if a1_subset and b_subset and a1_subset.dims() != b_subset.dims(): a_size = a1_subset.size_exact() b_size = b_subset.size_exact() if a1_subset.dims() > b_subset.dims(): a_dims_to_pop = find_dims_to_pop(a_size, b_size) else: b_dims_to_pop = find_dims_to_pop(b_size, a_size) bset, popped = pop_dims(b_subset, b_dims_to_pop) from dace.libraries.standard import Reduce reduction = False for e in graph.in_edges(in_array): if isinstance(e.src, Reduce): reduction = True # If the memlet does not cover the removed array, create a view. if reduction or any(m != a for m, a in zip(a1_subset.size(), in_desc.shape)): # NOTE: We do not want to create another view, if the immediate # ancestors of in_array are views as well. We just remove it. in_ancestors_desc = [ e.src.desc(sdfg) if isinstance(e.src, nodes.AccessNode) else None for e in graph.in_edges(in_array) ] if all([ desc and isinstance(desc, data.View) for desc in in_ancestors_desc ]): for e in graph.in_edges(in_array): a_subset, _ = _validate_subsets(e, sdfg.arrays) graph.add_edge( e.src, e.src_conn, out_array, None, mm.Memlet(out_array.data, subset=b_subset, other_subset=a_subset, wcr=e1.data.wcr, wcr_nonatomic=e1.data.wcr.nonatomic)) graph.remove_edge(e) graph.remove_edge(e1) graph.remove_node(in_array) if in_array.data in sdfg.arrays: del sdfg.arrays[in_array.data] return view_strides = in_desc.strides if (b_dims_to_pop and len(b_dims_to_pop) == len(out_desc.shape) - len(in_desc.shape)): view_strides = [ s for i, s in enumerate(out_desc.strides) if i not in b_dims_to_pop ] sdfg.arrays[in_array.data] = data.View( in_desc.dtype, in_desc.shape, True, in_desc.allow_conflicts, out_desc.storage, out_desc.location, view_strides, in_desc.offset, out_desc.may_alias, dtypes.AllocationLifetime.Scope, in_desc.alignment, in_desc.debuginfo, in_desc.total_size) return # 2. Iterate over the e2 edges and traverse the memlet tree for e2 in graph.in_edges(in_array): path = graph.memlet_tree(e2) wcr = e1.data.wcr wcr_nonatomic = e1.data.wcr_nonatomic for e3 in path: # 2-a. Extract subsets for array B and others other_subset, a3_subset = _validate_subsets( e3, sdfg.arrays, dst_name=in_array.data) # 2-b. Modify memlet to match array B. dname = out_array.data src_is_data = False a3_subset.offset(a1_subset, negative=True) if a3_subset and a_dims_to_pop: aset, _ = pop_dims(a3_subset, a_dims_to_pop) else: aset = a3_subset dst_subset = compose_and_push_back(bset, aset, b_dims_to_pop, popped) # NOTE: This fixes the following case: # Tasklet ----> A[subset] ----> ... -----> A # Tasklet is not data, so it doesn't have an other subset. if isinstance(e3.src, nodes.AccessNode): if e3.src.data == out_array.data: dname = e3.src.data src_is_data = True src_subset = other_subset else: src_subset = None subset = src_subset if src_is_data else dst_subset other_subset = dst_subset if src_is_data else src_subset e3.data.data = dname e3.data.subset = subset e3.data.other_subset = other_subset wcr = wcr or e3.data.wcr wcr_nonatomic = wcr_nonatomic or e3.data.wcr_nonatomic e3.data.wcr = wcr e3.data.wcr_nonatomic = wcr_nonatomic # 2-c. Remove edge and add new one graph.remove_edge(e2) e2.data.wcr = wcr e2.data.wcr_nonatomic = wcr_nonatomic graph.add_edge(e2.src, e2.src_conn, out_array, e2.dst_conn, e2.data) # Finally, remove in_array node graph.remove_node(in_array) if in_array.data in sdfg.arrays: del sdfg.arrays[in_array.data]
def expand(self, sdfg: SDFG, graph: SDFGState, reduce_node): """ Splits the data dimension into an inner and outer dimension, where the inner dimension are the reduction axes and the outer axes the complement. Pushes the reduce inside a new map consisting of the complement axes. """ # get out storage node, might be hidden behind view node out_data = graph.out_edges(reduce_node)[0].data out_storage_node = reduce_node while not isinstance(out_storage_node, nodes.AccessNode): out_storage_node = graph.out_edges(out_storage_node)[0].dst if isinstance(sdfg.data(out_storage_node.data), View): out_storage_node = graph.out_edges(out_storage_node)[0].dst while not isinstance(out_storage_node, nodes.AccessNode): out_storage_node = graph.out_edges(out_storage_node)[0].dst # get other useful quantities from the original reduce node wcr = reduce_node.wcr identity = reduce_node.identity implementation = reduce_node.implementation # remove the reduce identity, will get reassigned after expansion reduce_node.identity = None # expand the reduce node in_edge = graph.in_edges(reduce_node)[0] nsdfg = self._expand_reduce(sdfg, graph, reduce_node) # find the new nodes in the nested sdfg created nstate = nsdfg.sdfg.nodes()[0] for node, scope in nstate.scope_dict().items(): if isinstance(node, nodes.MapEntry): if scope is None: outer_entry = node else: inner_entry = node if isinstance(node, nodes.Tasklet): tasklet_node = node inner_exit = nstate.exit_node(inner_entry) outer_exit = nstate.exit_node(outer_entry) # find earliest parent read-write occurrence of array onto which the reduction is performed: BFS if self.create_out_transient: queue = [nsdfg] enqueued = set() array_closest_ancestor = None while len(queue) > 0: current = queue.pop() if isinstance(current, nodes.AccessNode): if current.data == out_storage_node.data: # it suffices to find the first node # no matter what access (ReadWrite or Read) array_closest_ancestor = current break for in_edge in graph.in_edges(current): if in_edge.src not in enqueued: queue.append(in_edge.src) enqueued.add(in_edge.src) if self.debug and array_closest_ancestor: print( f"ReduceExpansion::Closest ancestor={array_closest_ancestor}" ) elif self.debug: print("ReduceExpansion::No closest ancestor found") if self.create_out_transient: # create an out transient between inner and outer map exit array_out = nstate.out_edges(outer_exit)[0].data.data from dace.transformation.dataflow.local_storage import LocalStorage, OutLocalStorage local_storage_subgraph = { LocalStorage.node_a: nsdfg.sdfg.nodes()[0].nodes().index(inner_exit), LocalStorage.node_b: nsdfg.sdfg.nodes()[0].nodes().index(outer_exit) } nsdfg_id = nsdfg.sdfg.sdfg_list.index(nsdfg.sdfg) nstate_id = 0 local_storage = OutLocalStorage(nsdfg.sdfg, nsdfg_id, nstate_id, local_storage_subgraph, 0) local_storage.array = array_out local_storage.apply(nsdfg.sdfg.node(0), nsdfg.sdfg) out_transient_node_inner = local_storage._data_node # push to register nsdfg.sdfg.data(out_transient_node_inner.data ).storage = dtypes.StorageType.Register # remove WCRs from all edges where possible if there is no # prior occurrence if array_closest_ancestor is None: nstate.out_edges(outer_exit)[0].data.wcr = None nstate.out_edges(out_transient_node_inner)[0].data.wcr = None nstate.out_edges(out_transient_node_inner)[0].data.volume = 1 else: # remove WCR from outer exit nstate.out_edges(outer_exit)[0].data.wcr = None if self.create_in_transient: # create an in-transient between inner and outer map entry array_in = nstate.in_edges(outer_entry)[0].data.data from dace.transformation.dataflow.local_storage import LocalStorage, InLocalStorage local_storage_subgraph = { LocalStorage.node_a: nsdfg.sdfg.nodes()[0].nodes().index(outer_entry), LocalStorage.node_b: nsdfg.sdfg.nodes()[0].nodes().index(inner_entry) } nsdfg_id = nsdfg.sdfg.sdfg_list.index(nsdfg.sdfg) nstate_id = 0 local_storage = InLocalStorage(nsdfg.sdfg, nsdfg_id, nstate_id, local_storage_subgraph, 0) local_storage.array = array_in local_storage.apply(nsdfg.sdfg.node(0), nsdfg.sdfg) in_transient_node_inner = local_storage._data_node # push to register nsdfg.sdfg.data(in_transient_node_inner.data ).storage = dtypes.StorageType.Register # inline fuse back our nested SDFG from dace.transformation.interstate import InlineSDFG inline_sdfg = InlineSDFG( sdfg, sdfg.sdfg_id, sdfg.node_id(graph), {InlineSDFG.nested_sdfg: graph.node_id(nsdfg)}, 0) inline_sdfg.apply(graph, sdfg) new_schedule = dtypes.ScheduleType.Default new_implementation = self.reduce_implementation \ if self.reduce_implementation is not None \ else implementation new_axes = dcpy(reduce_node.axes) reduce_node_new = graph.add_reduce(wcr=wcr, axes=new_axes, schedule=new_schedule, identity=identity) reduce_node_new.implementation = new_implementation # replace inner map with new reduction node edge_tmp = graph.in_edges(inner_entry)[0] memlet_src_reduce = dcpy(edge_tmp.data) graph.add_edge(edge_tmp.src, edge_tmp.src_conn, reduce_node_new, None, memlet_src_reduce) edge_tmp = graph.out_edges(inner_exit)[0] memlet_reduce_dst = Memlet(data=edge_tmp.data.data, volume=1, subset=edge_tmp.data.subset) graph.add_edge(reduce_node_new, None, edge_tmp.dst, edge_tmp.dst_conn, memlet_reduce_dst) identity_tasklet = graph.out_edges(inner_entry)[0].dst graph.remove_node(inner_entry) graph.remove_node(inner_exit) graph.remove_node(identity_tasklet) # propagate scope for correct volumes scope_tree = ScopeTree(outer_entry, outer_exit) scope_tree.parent = ScopeTree(None, None) propagate_memlets_scope(sdfg, graph, scope_tree) sdfg.validate() # create variables for outside access self._reduce = reduce_node_new self._outer_entry = outer_entry if identity is None and self.create_out_transient: if self.debug: print( "ReduceExpansion::Trying to infer reduction WCR type due to out transient created" ) # set the reduction identity accordingly so that the correct # blank result is written to the out_transient node # we use default values deducted from the reduction type reduction_type = detect_reduction_type(wcr) try: reduce_node_new.identity = self.reduction_type_identity[ reduction_type] except KeyError: if reduction_type == dtypes.ReductionType.Min: reduce_node_new.identity = dtypes.max_value( sdfg.arrays[out_storage_node.data].dtype) elif reduction_type == dtypes.ReductionType.Max: reduce_node_new.identity = dtypes.min_value( sdfg.arrays[out_storage_node.data].dtype) else: raise ValueError(f"Cannot infer reduction identity." "Please specify the identity of node" "{reduce_node_new}") return