def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): in_array = graph.nodes()[candidate[RedundantArray._in_array]] out_array = graph.nodes()[candidate[RedundantArray._out_array]] in_desc = in_array.desc(sdfg) out_desc = out_array.desc(sdfg) # Ensure out degree is one (only one target, which is out_array) if graph.out_degree(in_array) != 1: return False # Make sure that the candidate is a transient variable if not in_desc.transient: return False # Make sure that both arrays are using the same storage location # and are of the same type (e.g., Stream->Stream) if in_desc.storage != out_desc.storage: return False if type(in_desc) != type(out_desc): return False # Find occurrences in this and other states occurrences = [] for state in sdfg.nodes(): occurrences.extend([ n for n in state.nodes() if isinstance(n, nodes.AccessNode) and n.desc(sdfg) == in_desc ]) for isedge in sdfg.edges(): if in_array.data in isedge.data.free_symbols: occurrences.append(isedge) if len(occurrences) > 1: return False # Only apply if arrays are of same shape (no need to modify subset) if len(in_desc.shape) != len(out_desc.shape) or any( i != o for i, o in zip(in_desc.shape, out_desc.shape)): return False if strict: # In strict mode, make sure the memlet covers the removed array edge = graph.edges_between(in_array, out_array)[0] if any(m != a for m, a in zip(edge.data.subset.size(), in_desc.shape)): return False return True
def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): nested_sdfg = graph.nodes()[candidate[CopyToDevice._nested_sdfg]] for edge in graph.all_edges(nested_sdfg): # Stream inputs/outputs not allowed path = graph.memlet_path(edge) if ((isinstance(path[0].src, nodes.AccessNode) and isinstance(sdfg.arrays[path[0].src.data], data.Stream)) or (isinstance(path[-1].dst, nodes.AccessNode) and isinstance(sdfg.arrays[path[-1].dst.data], data.Stream))): return False # WCR outputs with arrays are not allowed if (edge.data.wcr is not None and edge.data.subset.num_elements() != 1): return False return True
def match_to_str(graph, candidate): nested_sdfg = graph.nodes()[candidate[CopyToDevice._nested_sdfg]] return nested_sdfg.label
def expansion(node: 'Reduce', state: SDFGState, sdfg: SDFG): """ Create a map around the BlockReduce node with in and out transients in registers and an if tasklet that redirects the output of thread 0 to a shared memory transient """ ### define some useful vars graph = state reduce_node = node in_edge = graph.in_edges(reduce_node)[0] out_edge = graph.out_edges(reduce_node)[0] axes = reduce_node.axes ### add a map that encloses the reduce node (new_entry, new_exit) = graph.add_map( name = 'inner_reduce_block', ndrange = {'i'+str(i): f'{rng[0]}:{rng[1]+1}:{rng[2]}' \ for (i,rng) in enumerate(in_edge.data.subset) \ if i in axes}, schedule = dtypes.ScheduleType.Default) map = new_entry.map ExpandReduceCUDABlockAll.redirect_edge(graph, in_edge, new_dst=new_entry) ExpandReduceCUDABlockAll.redirect_edge(graph, out_edge, new_src=new_exit) subset_in = subsets.Range([ in_edge.data.subset[i] if i not in axes else (new_entry.map.params[0], new_entry.map.params[0], 1) for i in range(len(in_edge.data.subset)) ]) memlet_in = dace.Memlet(data=in_edge.data.data, volume=1, subset=subset_in) memlet_out = dcpy(out_edge.data) graph.add_edge(u=new_entry, u_connector=None, v=reduce_node, v_connector=None, memlet=memlet_in) graph.add_edge(u=reduce_node, u_connector=None, v=new_exit, v_connector=None, memlet=memlet_out) ### add in and out local storage from dace.transformation.dataflow.local_storage import LocalStorage in_local_storage_subgraph = { LocalStorage._node_a: graph.nodes().index(new_entry), LocalStorage._node_b: graph.nodes().index(reduce_node) } out_local_storage_subgraph = { LocalStorage._node_a: graph.nodes().index(reduce_node), LocalStorage._node_b: graph.nodes().index(new_exit) } local_storage = LocalStorage(sdfg.sdfg_id, sdfg.nodes().index(state), in_local_storage_subgraph, 0) local_storage.array = in_edge.data.data local_storage.apply(sdfg) in_transient = local_storage._data_node sdfg.data(in_transient.data).storage = dtypes.StorageType.Register local_storage = LocalStorage(sdfg.sdfg_id, sdfg.nodes().index(state), out_local_storage_subgraph, 0) local_storage.array = out_edge.data.data local_storage.apply(sdfg) out_transient = local_storage._data_node sdfg.data(out_transient.data).storage = dtypes.StorageType.Register # hack: swap edges as local_storage does not work correctly here # as subsets and data get assigned wrongly (should be swapped) # NOTE: If local_storage ever changes, this will not work any more e1 = graph.in_edges(out_transient)[0] e2 = graph.out_edges(out_transient)[0] e1.data.data = dcpy(e2.data.data) e1.data.subset = dcpy(e2.data.subset) ### add an if tasket and diverge code = 'if ' for (i, param) in enumerate(new_entry.map.params): code += (param + '== 0') if i < len(axes) - 1: code += ' and ' code += ':\n' code += '\tout=inp' tasklet_node = graph.add_tasklet(name='block_reduce_write', inputs=['inp'], outputs=['out'], code=code) edge_out_outtrans = graph.out_edges(out_transient)[0] edge_out_innerexit = graph.out_edges(new_exit)[0] ExpandReduceCUDABlockAll.redirect_edge(graph, edge_out_outtrans, new_dst=tasklet_node, new_dst_conn='inp') e = graph.add_edge(u=tasklet_node, u_connector='out', v=new_exit, v_connector=None, memlet=dcpy(edge_out_innerexit.data)) # set dynamic with volume 0 FORNOW e.data.volume = 0 e.data.dynamic = True ### set reduce_node axes to all (needed) reduce_node.axes = None # fill scope connectors, done. sdfg.fill_scope_connectors() # finally, change the implementation to cuda (block) # itself and expand again. reduce_node.implementation = 'CUDA (block)' sub_expansion = ExpandReduceCUDABlock(0, 0, {}, 0) return sub_expansion.expansion(node=node, state=state, sdfg=sdfg)
def gnode(nname): return graph.nodes()[self.subgraph[nname]]
def match_to_str(graph, candidate): out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]] return "Remove " + str(out_array)
def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): in_array = graph.nodes()[candidate[RedundantSecondArray._in_array]] out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]] in_desc = in_array.desc(sdfg) out_desc = out_array.desc(sdfg) # Ensure in degree is one (only one source, which is in_array) if graph.in_degree(out_array) != 1: return False # Make sure that the candidate is a transient variable if not out_desc.transient: return False # Dimensionality must be the same in strict mode if strict and len(in_desc.shape) != len(out_desc.shape): return False # Make sure that both arrays are using the same storage location # and are of the same type (e.g., Stream->Stream) if in_desc.storage != out_desc.storage: return False if type(in_desc) != type(out_desc): return False # Find occurrences in this and other states occurrences = [] for state in sdfg.nodes(): occurrences.extend([ n for n in state.nodes() if isinstance(n, nodes.AccessNode) and n.desc(sdfg) == out_desc ]) for isedge in sdfg.edges(): if out_array.data in isedge.data.free_symbols: occurrences.append(isedge) if len(occurrences) > 1: return False # Check whether the data copied from the first datanode cover # the subsets of all the output edges of the second datanode. # We assume the following pattern: A -- e1 --> B -- e2 --> others # 1. Get edge e1 and extract/validate subsets for arrays A and B e1 = graph.edges_between(in_array, out_array)[0] try: _, b1_subset = _validate_subsets(e1, sdfg.arrays) except NotImplementedError: return False # 2. Iterate over the e2 edges for e2 in graph.out_edges(out_array): # 2-a. Extract/validate subsets for array B and others try: b2_subset, _ = _validate_subsets(e2, sdfg.arrays) except NotImplementedError: return False # 2-b. Check where b1_subset covers b2_subset if not b1_subset.covers(b2_subset): return False # 2-c. Validate subsets in memlet tree # (should not be needed for valid SDGs) path = graph.memlet_tree(e2) for e3 in path: if e3 is not e2: try: _validate_subsets(e3, sdfg.arrays, src_name=out_array.data) except NotImplementedError: return False return True
def match_to_str(graph, candidate): in_array = graph.nodes()[candidate[RedundantArray._in_array]] return "Remove " + str(in_array)
def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): in_array = graph.nodes()[candidate[RedundantSecondArray._in_array]] out_array = graph.nodes()[candidate[RedundantSecondArray._out_array]] in_desc = in_array.desc(sdfg) out_desc = out_array.desc(sdfg) # Ensure in degree is one (only one source, which is in_array) if graph.in_degree(out_array) != 1: return False # Make sure that the candidate is a transient variable if not out_desc.transient: return False # 1. Get edge e1 and extract/validate subsets for arrays A and B e1 = graph.edges_between(in_array, out_array)[0] a_subset, b1_subset = _validate_subsets(e1, sdfg.arrays) if strict: # In strict mode, make sure the memlet covers the removed array if not b1_subset: return False subset = copy.deepcopy(b1_subset) subset.squeeze() shape = [sz for sz in out_desc.shape if sz != 1] if any(m != a for m, a in zip(subset.size(), shape)): return False # NOTE: Library node check # The transformation must not apply in strict mode if out_array is # not a view, is input to a library node, and an access or a view # of in_desc is also output to the same library node. # The reason is that the application of the transformation will lead # to in_desc being both input and output of the library node. # We do not know if this is safe. # First find the true in_desc (in case in_array is a view). true_in_desc = in_desc if isinstance(in_desc, data.View): e = sdutil.get_view_edge(graph, in_array) if not e: return False true_in_desc = sdfg.arrays[e.dst.data] if not isinstance(out_desc, data.View): edges_to_check = [] for a in graph.out_edges(out_array): if isinstance(a.dst, nodes.LibraryNode): edges_to_check.append(a) elif (isinstance(a.dst, nodes.AccessNode) and isinstance(sdfg.arrays[a.dst.data], data.View)): for b in graph.out_edges(a.dst): edges_to_check.append(graph.memlet_path(b)[-1]) for a in edges_to_check: if isinstance(a.dst, nodes.LibraryNode): for b in graph.out_edges(a.dst): if isinstance(b.dst, nodes.AccessNode): desc = sdfg.arrays[b.dst.data] if isinstance(desc, data.View): e = sdutil.get_view_edge(graph, b.dst) if not e: return False desc = sdfg.arrays[e.dst.data] if desc is true_in_desc: return False # In strict mode, check if the state has two or more access nodes # for in_array and at least one of them is a write access. There # might be a RW, WR, or WW dependency. accesses = [ n for n in graph.nodes() if isinstance(n, nodes.AccessNode) and n.desc(sdfg) == in_desc and n is not in_array ] if len(accesses) > 0: if (graph.in_degree(in_array) > 0 or any(graph.in_degree(a) > 0 for a in accesses)): # We need to ensure that a data race will not happen if we # remove in_array. # First, we simplify the graph G = helpers.simplify_state(graph) # Loop over the accesses for a in accesses: subsets_intersect = False for e in graph.in_edges(a): _, subset = _validate_subsets(e, sdfg.arrays, dst_name=a.data) res = subsets.intersects(a_subset, subset) if res == True or res is None: subsets_intersect = True break if not subsets_intersect: continue try: has_bward_path = nx.has_path(G, a, in_array) except NodeNotFound: has_bward_path = nx.has_path(graph.nx, a, in_array) try: has_fward_path = nx.has_path(G, in_array, a) except NodeNotFound: has_fward_path = nx.has_path(graph.nx, in_array, a) # If there is no path between the access nodes # (disconnected components), then it is definitely # possible to have data races. Abort. if not (has_bward_path or has_fward_path): return False # If there is a forward path then a must not be a direct # successor of in_array. if has_fward_path and a in G.successors(in_array): for src, _ in G.in_edges(a): if src is in_array: continue if (nx.has_path(G, in_array, src) and src != out_array): continue return False # Make sure that both arrays are using the same storage location # and are of the same type (e.g., Stream->Stream) if in_desc.storage != out_desc.storage: return False if in_desc.location != out_desc.location: return False if type(in_desc) != type(out_desc): if isinstance(in_desc, data.View): # Case View -> Access # If the View points to the Access (and has a different shape?) # then we should (probably) not remove the Access. e = sdutil.get_view_edge(graph, in_array) if e and e.dst is out_array and in_desc.shape != out_desc.shape: return False # Check that the View's immediate ancestors are Accesses. # Otherwise, the application of the transformation will result # in an ambiguous View. view_ancestors_desc = [ e.src.desc(sdfg) if isinstance(e.src, nodes.AccessNode) else None for e in graph.in_edges(in_array) ] if any([ not desc or isinstance(desc, data.View) for desc in view_ancestors_desc ]): return False elif isinstance(out_desc, data.View): # Case Access -> View # If the View points to the Access and has the same shape, # it can be removed e = sdutil.get_view_edge(graph, out_array) if e and e.src is in_array and in_desc.shape == out_desc.shape: return True return False else: # Something else, for example, Stream return False else: # Two views connected to each other if isinstance(in_desc, data.View): return False # Find occurrences in this and other states occurrences = [] for state in sdfg.nodes(): occurrences.extend([ n for n in state.nodes() if isinstance(n, nodes.AccessNode) and n.desc(sdfg) == out_desc ]) for isedge in sdfg.edges(): if out_array.data in isedge.data.free_symbols: occurrences.append(isedge) if len(occurrences) > 1: return False # Check whether the data copied from the first datanode cover # the subsets of all the output edges of the second datanode. # We assume the following pattern: A -- e1 --> B -- e2 --> others # 2. Iterate over the e2 edges for e2 in graph.out_edges(out_array): # 2-a. Extract/validate subsets for array B and others try: b2_subset, _ = _validate_subsets(e2, sdfg.arrays) except NotImplementedError: return False # 2-b. Check where b1_subset covers b2_subset if not b1_subset.covers(b2_subset): return False # 2-c. Validate subsets in memlet tree # (should not be needed for valid SDGs) path = graph.memlet_tree(e2) for e3 in path: if e3 is not e2: try: _validate_subsets(e3, sdfg.arrays, src_name=out_array.data) except NotImplementedError: return False return True