def apply(self, graph: SDFGState, sdfg: SDFG): if self.expr_index == 0: map_entry = self.map_entry nsdfg_node = helpers.nest_state_subgraph( sdfg, graph, graph.scope_subgraph(map_entry), full_data=self.fullcopy) else: cnode = self.reduce nsdfg_node = helpers.nest_state_subgraph(sdfg, graph, SubgraphView( graph, [cnode]), full_data=self.fullcopy) # Avoiding import loops from dace.transformation.interstate import GPUTransformSDFG transformation = GPUTransformSDFG(sdfg, 0, -1, {}, 0) transformation.register_trans = self.register_trans transformation.sequential_innermaps = self.sequential_innermaps transformation.toplevel_trans = self.toplevel_trans transformation.apply(nsdfg_node.sdfg, nsdfg_node.sdfg) # Inline back as necessary sdfg.simplify()
def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] if self.expr_index == 0: map_entry = graph.nodes()[self.subgraph[GPUTransformMap._map_entry]] nsdfg_node = helpers.nest_state_subgraph( sdfg, graph, graph.scope_subgraph(map_entry), full_data=self.fullcopy) else: cnode = graph.nodes()[self.subgraph[GPUTransformMap._reduce]] nsdfg_node = helpers.nest_state_subgraph(sdfg, graph, SubgraphView( graph, [cnode]), full_data=self.fullcopy) # Avoiding import loops from dace.transformation.interstate import GPUTransformSDFG transformation = GPUTransformSDFG(0, 0, {}, 0) transformation.register_trans = self.register_trans transformation.sequential_innermaps = self.sequential_innermaps transformation.toplevel_trans = self.toplevel_trans transformation.gpu_id = self.gpu_id transformation.apply(nsdfg_node.sdfg) # Inline back as necessary sdfg.apply_strict_transformations()
def test_index_propagation_in_tiled_sdfg(): sdfg, state, t, me, mx = create_sdfg_4() tiling.MapTiling.apply_to(sdfg=sdfg, options={'tile_sizes': (2, )}, map_entry=me) nested_me = state.in_edges(t)[0].src nested_mx = state.out_edges(t)[0].dst nest_state_subgraph(sdfg, state, SubgraphView(state, [nested_me, t, nested_mx])) sdfg.validate() sdfg.compile()
def test_nested_sdfg(self): A, expected = config() B = np.random.rand(2) # Nest the subgraph within the outer map, then apply transformation graph = mapfission_sdfg() state = graph.nodes()[0] topmap = next(node for node in state.nodes() if isinstance(node, nodes.MapEntry) and node.label == 'outer') subgraph = state.scope_subgraph(topmap, include_entry=False, include_exit=False) nest_state_subgraph(graph, state, subgraph) self.assertGreater(graph.apply_transformations(MapFission), 0) graph(A=A, B=B) self.assertTrue(np.allclose(B, expected))
def test_tiled_program(self): # Tasklet only sdfg, state = create_tiled_sdfg() tasklet = next(n for n in state.nodes() if isinstance(n, Tasklet)) nest_state_subgraph(sdfg, state, SubgraphView(state, [tasklet])) sdfg.validate() # Inner map scope sdfg, state = create_tiled_sdfg() tasklet = next(n for n in state.nodes() if isinstance(n, Tasklet)) entry = state.entry_node(tasklet) nest_state_subgraph(sdfg, state, state.scope_subgraph(entry)) sdfg.validate() # Outer map scope sdfg, state = create_tiled_sdfg() sdc = state.scope_dict(True) entry = next(n for n in sdc[None] if isinstance(n, MapEntry)) nest_state_subgraph(sdfg, state, state.scope_subgraph(entry)) sdfg.validate() # Entire state sdfg, state = create_tiled_sdfg() nest_state_subgraph(sdfg, state, SubgraphView(state, state.nodes())) sdfg.validate()
def test_internal_outarray(): sdfg = dace.SDFG('internal_outarr') sdfg.add_array('A', [20], dace.float64) state = sdfg.add_state() me, mx = state.add_map('_', dict(i='0:1')) t = state.add_tasklet('doit', {}, {'a'}, 'a = 0') w = state.add_write('A') state.add_nedge(me, t, dace.Memlet()) state.add_edge(t, 'a', w, None, dace.Memlet('A[1]')) state.add_nedge(w, mx, dace.Memlet()) subgraph = StateSubgraphView(state, [t, w]) nest_state_subgraph(sdfg, state, subgraph) a = np.random.rand(20) sdfg(A=a) assert a[1] == 0
def test_nest_oneelementmap(): A, B = np.random.rand(1), np.random.rand(1) sdfg: dace.SDFG = nest_subgraph.to_sdfg() state: dace.SDFGState # Nest outer region for node, state in sdfg.all_nodes_recursive(): if isinstance(node, dace.nodes.MapEntry): subgraph = state.scope_subgraph(node) nest_state_subgraph(sdfg, state, subgraph) # Nest inner scope for node, state in sdfg.all_nodes_recursive(): if isinstance(node, dace.nodes.MapEntry): subgraph = state.scope_subgraph(node, include_entry=False, include_exit=False) nest_state_subgraph(state.parent, state, subgraph) sdfg(A=A, B=B) assert np.allclose(A, B)
def test_badscope(self): with self.assertRaises(ValueError): sdfg, state, t, me, mx = create_sdfg() nest_state_subgraph(sdfg, state, SubgraphView(state, [t, me])) with self.assertRaises(ValueError): sdfg, state, t, me, mx = create_sdfg() nest_state_subgraph(sdfg, state, SubgraphView(state, [t, mx])) with self.assertRaises(KeyError): sdfg, state, t, me, mx = create_sdfg() b_node = state.sink_nodes()[0] sdfg, state, t, me, mx = create_sdfg() # Notice that b_node comes from another graph nest_state_subgraph(sdfg, state, SubgraphView(state, [t, b_node]))
def test_badscope(): with pytest.raises(ValueError): sdfg, state, t, me, mx = create_sdfg() nest_state_subgraph(sdfg, state, SubgraphView(state, [t, me])) with pytest.raises(ValueError): sdfg, state, t, me, mx = create_sdfg() nest_state_subgraph(sdfg, state, SubgraphView(state, [t, mx])) with pytest.raises(NodeNotFoundError): sdfg, state, t, me, mx = create_sdfg() b_node = state.sink_nodes()[0] sdfg, state, t, me, mx = create_sdfg() # Notice that b_node comes from another graph nest_state_subgraph(sdfg, state, SubgraphView(state, [t, b_node]))
def test_simple_program(): @dace.program def multiply(a: dace.float32[N]): a *= 2 a *= 3 sdfg = multiply.to_sdfg(strict=True) for state in sdfg.nodes(): if any(isinstance(node, Tasklet) for node in state.nodes()): break else: raise KeyError('State with tasklet not found') tasklet_nodes = [n for n in state.nodes() if isinstance(n, Tasklet)] with pytest.raises(ValueError): nest_state_subgraph(sdfg, state, SubgraphView(state, tasklet_nodes)) nest_state_subgraph(sdfg, state, SubgraphView(state, [tasklet_nodes[0]])) sdfg.validate() nest_state_subgraph(sdfg, state, SubgraphView(state, [tasklet_nodes[1]])) sdfg.validate()
def apply(self, _, sdfg: sd.SDFG): # Obtain loop information guard: sd.SDFGState = self.loop_guard body: sd.SDFGState = self.loop_begin # Obtain iteration variable, range, and stride itervar, (start, end, step), _ = find_for_loop(sdfg, guard, body) forward_loop = step > 0 for node in body.nodes(): if isinstance(node, nodes.MapEntry): map_entry = node if isinstance(node, nodes.MapExit): map_exit = node # nest map's content in sdfg map_subgraph = body.scope_subgraph(map_entry, include_entry=False, include_exit=False) nsdfg = helpers.nest_state_subgraph(sdfg, body, map_subgraph, full_data=True) # replicate loop in nested sdfg new_before, new_guard, new_after = nsdfg.sdfg.add_loop( before_state=None, loop_state=nsdfg.sdfg.nodes()[0], loop_end_state=None, after_state=None, loop_var=itervar, initialize_expr=f'{start}', condition_expr=f'{itervar} <= {end}' if forward_loop else f'{itervar} >= {end}', increment_expr=f'{itervar} + {step}' if forward_loop else f'{itervar} - {abs(step)}') # remove outer loop before_guard_edge = nsdfg.sdfg.edges_between(new_before, new_guard)[0] for e in nsdfg.sdfg.out_edges(new_guard): if e.dst is new_after: guard_after_edge = e else: guard_body_edge = e for body_inedge in sdfg.in_edges(body): if body_inedge.src is guard: guard_body_edge.data.assignments.update(body_inedge.data.assignments) sdfg.remove_edge(body_inedge) for body_outedge in sdfg.out_edges(body): sdfg.remove_edge(body_outedge) for guard_inedge in sdfg.in_edges(guard): before_guard_edge.data.assignments.update(guard_inedge.data.assignments) guard_inedge.data.assignments = {} sdfg.add_edge(guard_inedge.src, body, guard_inedge.data) sdfg.remove_edge(guard_inedge) for guard_outedge in sdfg.out_edges(guard): if guard_outedge.dst is body: guard_body_edge.data.assignments.update(guard_outedge.data.assignments) else: guard_after_edge.data.assignments.update(guard_outedge.data.assignments) guard_outedge.data.condition = CodeBlock("1") sdfg.add_edge(body, guard_outedge.dst, guard_outedge.data) sdfg.remove_edge(guard_outedge) sdfg.remove_node(guard) if itervar in nsdfg.symbol_mapping: del nsdfg.symbol_mapping[itervar] if itervar in sdfg.symbols: del sdfg.symbols[itervar] # Add missing data/symbols for s in nsdfg.sdfg.free_symbols: if s in nsdfg.symbol_mapping: continue if s in sdfg.symbols: nsdfg.symbol_mapping[s] = s elif s in sdfg.arrays: desc = sdfg.arrays[s] access = body.add_access(s) conn = nsdfg.sdfg.add_datadesc(s, copy.deepcopy(desc)) nsdfg.sdfg.arrays[s].transient = False nsdfg.add_in_connector(conn) body.add_memlet_path(access, map_entry, nsdfg, memlet=Memlet.from_array(s, desc), dst_conn=conn) else: raise NotImplementedError(f"Free symbol {s} is neither a symbol nor data.") to_delete = set() for s in nsdfg.symbol_mapping: if s not in nsdfg.sdfg.free_symbols: to_delete.add(s) for s in to_delete: del nsdfg.symbol_mapping[s] # propagate scope for correct volumes scope_tree = ScopeTree(map_entry, map_exit) scope_tree.parent = ScopeTree(None, None) # The first execution helps remove apperances of symbols # that are now defined only in the nested SDFG in memlets. propagation.propagate_memlets_scope(sdfg, body, scope_tree) for s in to_delete: if helpers.is_symbol_unused(sdfg, s): sdfg.remove_symbol(s) from dace.transformation.interstate import RefineNestedAccess transformation = RefineNestedAccess() transformation.setup_match(sdfg, 0, sdfg.node_id(body), {RefineNestedAccess.nsdfg: body.node_id(nsdfg)}, 0) transformation.apply(body, sdfg) # Second propagation for refined accesses. propagation.propagate_memlets_scope(sdfg, body, scope_tree)
def apply(self, sdfg) -> Tuple[nodes.NestedSDFG, SDFGState]: """ Applies the transformation and returns a tuple with the new nested SDFG node and the main state in the for-loop. """ # Retrieve map entry and exit nodes. graph = sdfg.nodes()[self.state_id] map_entry = graph.nodes()[self.subgraph[MapToForLoop._map_entry]] map_exit = graph.exit_node(map_entry) loop_idx = map_entry.map.params[0] loop_from, loop_to, loop_step = map_entry.map.range[0] # Turn the map scope into a nested SDFG node = nest_state_subgraph(sdfg, graph, graph.scope_subgraph(map_entry)) nsdfg: SDFG = node.sdfg nstate: SDFGState = nsdfg.nodes()[0] # If map range is dynamic, replace loop expressions with memlets param_to_edge = {} for edge in nstate.in_edges(map_entry): if edge.dst_conn and not edge.dst_conn.startswith('IN_'): param = '__DACE_P%d' % len(param_to_edge) repldict = {symbolic.pystr_to_symbolic(edge.dst_conn): param} param_to_edge[param] = edge loop_from = loop_from.subs(repldict) loop_to = loop_to.subs(repldict) loop_step = loop_step.subs(repldict) # Avoiding import loop from dace.codegen.targets.cpp import cpp_array_expr def replace_param(param): param = symbolic.symstr(param) for p, pval in param_to_edge.items(): # TODO: Correct w.r.t. connector type param = param.replace(p, cpp_array_expr(nsdfg, pval.data)) return param # End of dynamic input range # Create a loop inside the nested SDFG nsdfg.add_loop(None, nstate, None, loop_idx, replace_param(loop_from), '%s < %s' % (loop_idx, replace_param(loop_to + 1)), '%s + %s' % (loop_idx, replace_param(loop_step))) # Skip map in input edges for edge in nstate.out_edges(map_entry): src_node = nstate.memlet_path(edge)[0].src nstate.add_edge(src_node, None, edge.dst, edge.dst_conn, edge.data) nstate.remove_edge(edge) # Skip map in output edges for edge in nstate.in_edges(map_exit): dst_node = nstate.memlet_path(edge)[-1].dst nstate.add_edge(edge.src, edge.src_conn, dst_node, None, edge.data) nstate.remove_edge(edge) # Remove nodes from dynamic map range nstate.remove_nodes_from( [e.src for e in dace.sdfg.dynamic_map_inputs(nstate, map_entry)]) # Remove scope nodes nstate.remove_nodes_from([map_entry, map_exit]) return node, nstate
def apply(self, sdfg: sd.SDFG): # Obtain loop information guard: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_guard]) body: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_begin]) after: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._exit_state]) # Obtain iteration variable, range, and stride itervar, (start, end, step), (_, body_end) = find_for_loop( sdfg, guard, body, itervar=self.itervar) # Find all loop-body states states = set([body_end]) to_visit = [body] while to_visit: state = to_visit.pop(0) if state is body_end: continue for _, dst, _ in sdfg.out_edges(state): if dst not in states: to_visit.append(dst) states.add(state) # Nest loop-body states if len(states) > 1: # Find read/write sets read_set, write_set = set(), set() for state in states: rset, wset = state.read_and_write_sets() read_set |= rset write_set |= wset # Add data from edges for src in states: for dst in states: for edge in sdfg.edges_between(src, dst): for s in edge.data.free_symbols: if s in sdfg.arrays: read_set.add(s) # Find NestedSDFG's unique data rw_set = read_set | write_set unique_set = set() for name in rw_set: if not sdfg.arrays[name].transient: continue found = False for state in sdfg.states(): if state in states: continue for node in state.nodes(): if (isinstance(node, nodes.AccessNode) and node.data == name): found = True break if not found: unique_set.add(name) # Find NestedSDFG's connectors read_set = {n for n in read_set if n not in unique_set or not sdfg.arrays[n].transient} write_set = {n for n in write_set if n not in unique_set or not sdfg.arrays[n].transient} # Create NestedSDFG and add all loop-body states and edges # Also, find defined symbols in NestedSDFG fsymbols = set(sdfg.free_symbols) new_body = sdfg.add_state('single_state_body') nsdfg = SDFG("loop_body", constants=sdfg.constants, parent=new_body) nsdfg.add_node(body, is_start_state=True) body.parent = nsdfg exit_state = nsdfg.add_state('exit') nsymbols = dict() for state in states: if state is body: continue nsdfg.add_node(state) state.parent = nsdfg for state in states: if state is body: continue for src, dst, data in sdfg.in_edges(state): nsymbols.update({s: sdfg.symbols[s] for s in data.assignments.keys() if s in sdfg.symbols}) nsdfg.add_edge(src, dst, data) nsdfg.add_edge(body_end, exit_state, InterstateEdge()) # Move guard -> body edge to guard -> new_body for src, dst, data, in sdfg.edges_between(guard, body): sdfg.add_edge(src, new_body, data) # Move body_end -> guard edge to new_body -> guard for src, dst, data in sdfg.edges_between(body_end, guard): sdfg.add_edge(new_body, dst, data) # Delete loop-body states and edges from parent SDFG for state in states: for e in sdfg.all_edges(state): sdfg.remove_edge(e) sdfg.remove_node(state) # Add NestedSDFG arrays for name in read_set | write_set: nsdfg.arrays[name] = copy.deepcopy(sdfg.arrays[name]) nsdfg.arrays[name].transient = False for name in unique_set: nsdfg.arrays[name] = sdfg.arrays[name] del sdfg.arrays[name] # Add NestedSDFG node cnode = new_body.add_nested_sdfg(nsdfg, None, read_set, write_set) if sdfg.parent: for s, m in sdfg.parent_nsdfg_node.symbol_mapping.items(): if s not in cnode.symbol_mapping: cnode.symbol_mapping[s] = m nsdfg.add_symbol(s, sdfg.symbols[s]) for name in read_set: r = new_body.add_read(name) new_body.add_edge( r, None, cnode, name, memlet.Memlet.from_array(name, sdfg.arrays[name])) for name in write_set: w = new_body.add_write(name) new_body.add_edge( cnode, name, w, None, memlet.Memlet.from_array(name, sdfg.arrays[name])) # Fix SDFG symbols for sym in sdfg.free_symbols - fsymbols: del sdfg.symbols[sym] for sym, dtype in nsymbols.items(): nsdfg.symbols[sym] = dtype # Change body state reference body = new_body if (step < 0) == True: # If step is negative, we have to flip start and end to produce a # correct map with a positive increment start, end, step = end, start, -step # If necessary, make a nested SDFG with assignments isedge = sdfg.edges_between(guard, body)[0] symbols_to_remove = set() if len(isedge.data.assignments) > 0: nsdfg = helpers.nest_state_subgraph( sdfg, body, gr.SubgraphView(body, body.nodes())) for sym in isedge.data.free_symbols: if sym in nsdfg.symbol_mapping or sym in nsdfg.in_connectors: continue if sym in sdfg.symbols: nsdfg.symbol_mapping[sym] = symbolic.pystr_to_symbolic(sym) nsdfg.sdfg.add_symbol(sym, sdfg.symbols[sym]) elif sym in sdfg.arrays: if sym in nsdfg.sdfg.arrays: raise NotImplementedError rnode = body.add_read(sym) nsdfg.add_in_connector(sym) desc = copy.deepcopy(sdfg.arrays[sym]) desc.transient = False nsdfg.sdfg.add_datadesc(sym, desc) body.add_edge(rnode, None, nsdfg, sym, memlet.Memlet(sym)) nstate = nsdfg.sdfg.node(0) init_state = nsdfg.sdfg.add_state_before(nstate) nisedge = nsdfg.sdfg.edges_between(init_state, nstate)[0] nisedge.data.assignments = isedge.data.assignments symbols_to_remove = set(nisedge.data.assignments.keys()) for k in nisedge.data.assignments.keys(): if k in nsdfg.symbol_mapping: del nsdfg.symbol_mapping[k] isedge.data.assignments = {} source_nodes = body.source_nodes() sink_nodes = body.sink_nodes() map = nodes.Map(body.label + "_map", [itervar], [(start, end, step)]) entry = nodes.MapEntry(map) exit = nodes.MapExit(map) body.add_node(entry) body.add_node(exit) # If the map uses symbols from data containers, instantiate reads containers_to_read = entry.free_symbols & sdfg.arrays.keys() for rd in containers_to_read: # We are guaranteed that this is always a scalar, because # can_be_applied makes sure there are no sympy functions in each of # the loop expresions access_node = body.add_read(rd) body.add_memlet_path(access_node, entry, dst_conn=rd, memlet=memlet.Memlet(rd)) # Reroute all memlets through the entry and exit nodes for n in source_nodes: if isinstance(n, nodes.AccessNode): for e in body.out_edges(n): body.remove_edge(e) body.add_edge_pair(entry, e.dst, n, e.data, internal_connector=e.dst_conn) else: body.add_nedge(entry, n, memlet.Memlet()) for n in sink_nodes: if isinstance(n, nodes.AccessNode): for e in body.in_edges(n): body.remove_edge(e) body.add_edge_pair(exit, e.src, n, e.data, internal_connector=e.src_conn) else: body.add_nedge(n, exit, memlet.Memlet()) # Get rid of the loop exit condition edge after_edge = sdfg.edges_between(guard, after)[0] sdfg.remove_edge(after_edge) # Remove the assignment on the edge to the guard for e in sdfg.in_edges(guard): if itervar in e.data.assignments: del e.data.assignments[itervar] # Remove the condition on the entry edge condition_edge = sdfg.edges_between(guard, body)[0] condition_edge.data.condition = CodeBlock("1") # Get rid of backedge to guard sdfg.remove_edge(sdfg.edges_between(body, guard)[0]) # Route body directly to after state, maintaining any other assignments # it might have had sdfg.add_edge( body, after, sd.InterstateEdge(assignments=after_edge.data.assignments)) # If this had made the iteration variable a free symbol, we can remove # it from the SDFG symbols if itervar in sdfg.free_symbols: sdfg.remove_symbol(itervar) for sym in symbols_to_remove: if helpers.is_symbol_unused(sdfg, sym): sdfg.remove_symbol(sym)
def apply(self, sdfg: SDFG): graph = sdfg.node(self.state_id) map_exit = graph.node(self.subgraph[AccumulateTransient.map_exit]) outer_map_exit = graph.node( self.subgraph[AccumulateTransient.outer_map_exit]) # Avoid import loop from dace.transformation.dataflow.local_storage import OutLocalStorage array_identity_dict = self.array_identity_dict # Choose array array = self.array if array is not None and len(array) != 0: array_identity_dict[array] = self.identity elif ((array is None or len(array) == 0) and len(array_identity_dict) == 0): array = next(e.data.data for e in graph.edges_between(map_exit, outer_map_exit) if e.data.wcr is not None) array_identity_dict[array] = self.identity transients: Dict[str, Any] = {} for array, identity in array_identity_dict.items(): data_node: nodes.AccessNode = OutLocalStorage.apply_to( sdfg, dict(array=array, prefix=self.prefix), verify=False, save=False, node_a=map_exit, node_b=outer_map_exit) transients[data_node.data] = identity if identity is None: warnings.warn( 'AccumulateTransient did not properly initialize ' 'newly-created transient!') return sdfg_state: SDFGState = sdfg.node(self.state_id) map_entry = sdfg_state.entry_node(map_exit) nested_sdfg: nodes.NestedSDFG = nest_state_subgraph( sdfg=sdfg, state=sdfg_state, subgraph=SubgraphView( sdfg_state, {map_entry, map_exit} | sdfg_state.all_nodes_between(map_entry, map_exit))) nested_sdfg_state: SDFGState = nested_sdfg.sdfg.nodes()[0] init_state = nested_sdfg.sdfg.add_state_before(nested_sdfg_state) for data_name, identity in transients.items(): temp_array: Array = sdfg.arrays[data_name] init_state.add_mapped_tasklet( name='acctrans_init', map_ranges={ '_o%d' % i: '0:%s' % symbolic.symstr(d) for i, d in enumerate(temp_array.shape) }, inputs={}, code='out = %s' % identity, outputs={ 'out': dace.Memlet.simple( data=data_name, subset_str=','.join([ '_o%d' % i for i, _ in enumerate(temp_array.shape) ])) }, external_edges=True) # TODO: use trivial map elimintation here when it will be merged to remove map if it has trivial ranges return nested_sdfg
def apply(self, sdfg: sd.SDFG): # Obtain loop information guard: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_guard]) body: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_begin]) after: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._exit_state]) # Obtain iteration variable, range, and stride itervar, (start, end, step), _ = find_for_loop(sdfg, guard, body) if (step < 0) == True: # If step is negative, we have to flip start and end to produce a # correct map with a positive increment start, end, step = end, start, -step # If necessary, make a nested SDFG with assignments isedge = sdfg.edges_between(guard, body)[0] symbols_to_remove = set() if len(isedge.data.assignments) > 0: nsdfg = helpers.nest_state_subgraph( sdfg, body, gr.SubgraphView(body, body.nodes())) for sym in isedge.data.free_symbols: if sym in nsdfg.symbol_mapping or sym in nsdfg.in_connectors: continue if sym in sdfg.symbols: nsdfg.symbol_mapping[sym] = symbolic.pystr_to_symbolic(sym) nsdfg.sdfg.add_symbol(sym, sdfg.symbols[sym]) elif sym in sdfg.arrays: if sym in nsdfg.sdfg.arrays: raise NotImplementedError rnode = body.add_read(sym) nsdfg.add_in_connector(sym) desc = copy.deepcopy(sdfg.arrays[sym]) desc.transient = False nsdfg.sdfg.add_datadesc(sym, desc) body.add_edge(rnode, None, nsdfg, sym, memlet.Memlet(sym)) nstate = nsdfg.sdfg.node(0) init_state = nsdfg.sdfg.add_state_before(nstate) nisedge = nsdfg.sdfg.edges_between(init_state, nstate)[0] nisedge.data.assignments = isedge.data.assignments symbols_to_remove = set(nisedge.data.assignments.keys()) for k in nisedge.data.assignments.keys(): if k in nsdfg.symbol_mapping: del nsdfg.symbol_mapping[k] isedge.data.assignments = {} source_nodes = body.source_nodes() sink_nodes = body.sink_nodes() map = nodes.Map(body.label + "_map", [itervar], [(start, end, step)]) entry = nodes.MapEntry(map) exit = nodes.MapExit(map) body.add_node(entry) body.add_node(exit) # If the map uses symbols from data containers, instantiate reads containers_to_read = entry.free_symbols & sdfg.arrays.keys() for rd in containers_to_read: # We are guaranteed that this is always a scalar, because # can_be_applied makes sure there are no sympy functions in each of # the loop expresions access_node = body.add_read(rd) body.add_memlet_path(access_node, entry, dst_conn=rd, memlet=memlet.Memlet(rd)) # Reroute all memlets through the entry and exit nodes for n in source_nodes: if isinstance(n, nodes.AccessNode): for e in body.out_edges(n): body.remove_edge(e) body.add_edge_pair(entry, e.dst, n, e.data, internal_connector=e.dst_conn) else: body.add_nedge(entry, n, memlet.Memlet()) for n in sink_nodes: if isinstance(n, nodes.AccessNode): for e in body.in_edges(n): body.remove_edge(e) body.add_edge_pair(exit, e.src, n, e.data, internal_connector=e.src_conn) else: body.add_nedge(n, exit, memlet.Memlet()) # Get rid of the loop exit condition edge after_edge = sdfg.edges_between(guard, after)[0] sdfg.remove_edge(after_edge) # Remove the assignment on the edge to the guard for e in sdfg.in_edges(guard): if itervar in e.data.assignments: del e.data.assignments[itervar] # Remove the condition on the entry edge condition_edge = sdfg.edges_between(guard, body)[0] condition_edge.data.condition = CodeBlock("1") # Get rid of backedge to guard sdfg.remove_edge(sdfg.edges_between(body, guard)[0]) # Route body directly to after state, maintaining any other assignments # it might have had sdfg.add_edge( body, after, sd.InterstateEdge(assignments=after_edge.data.assignments)) # If this had made the iteration variable a free symbol, we can remove # it from the SDFG symbols if itervar in sdfg.free_symbols: sdfg.remove_symbol(itervar) for sym in symbols_to_remove: if helpers.is_symbol_unused(sdfg, sym): sdfg.remove_symbol(sym)
def apply(self, graph: SDFGState, sdfg: SDFG): map_exit = self.map_exit outer_map_exit = self.outer_map_exit # Choose array array = self.array if array is None or len(array) == 0: array = next(e.data.data for e in graph.edges_between(map_exit, outer_map_exit) if e.data.wcr is not None) # Avoid import loop from dace.transformation.dataflow.local_storage import OutLocalStorage data_node: nodes.AccessNode = OutLocalStorage.apply_to( sdfg, dict(array=array), verify=False, save=False, node_a=map_exit, node_b=outer_map_exit) if self.identity is None: warnings.warn('AccumulateTransient did not properly initialize ' 'newly-created transient!') return sdfg_state: SDFGState = sdfg.node(self.state_id) map_entry = sdfg_state.entry_node(map_exit) nested_sdfg: NestedSDFG = nest_state_subgraph( sdfg=sdfg, state=sdfg_state, subgraph=SubgraphView( sdfg_state, {map_entry, map_exit} | sdfg_state.all_nodes_between(map_entry, map_exit))) nested_sdfg_state: SDFGState = nested_sdfg.sdfg.nodes()[0] init_state = nested_sdfg.sdfg.add_state_before(nested_sdfg_state) temp_array: Array = sdfg.arrays[data_node.data] init_state.add_mapped_tasklet( name='acctrans_init', map_ranges={ '_o%d' % i: '0:%s' % symstr(d) for i, d in enumerate(temp_array.shape) }, inputs={}, code='out = %s' % self.identity, outputs={ 'out': dace.Memlet.simple(data=data_node.data, subset_str=','.join([ '_o%d' % i for i, _ in enumerate(temp_array.shape) ])) }, external_edges=True)
def apply(self, graph: SDFGState, sdfg: SDFG) -> nodes.MapEntry: me = self.mapentry # Add new map within map mx = graph.exit_node(me) new_me, new_mx = graph.add_map('warp_tile', dict(__tid=f'0:{self.warp_size}'), dtypes.ScheduleType.GPU_ThreadBlock) __tid = symbolic.pystr_to_symbolic('__tid') for e in graph.out_edges(me): xfh.reconnect_edge_through_map(graph, e, new_me, True) for e in graph.in_edges(mx): xfh.reconnect_edge_through_map(graph, e, new_mx, False) # Stride and offset all internal maps maps_to_stride = xfh.get_internal_scopes(graph, new_me, immediate=True) for nstate, nmap in maps_to_stride: nsdfg = nstate.parent nsdfg_node = nsdfg.parent_nsdfg_node # Map cannot be partitioned across a warp if (nmap.range.size()[-1] < self.warp_size) == True: continue if nsdfg is not sdfg and nsdfg_node is not None: nsdfg_node.symbol_mapping['__tid'] = __tid if '__tid' not in nsdfg.symbols: nsdfg.add_symbol('__tid', dtypes.int32) nmap.range[-1] = (nmap.range[-1][0], nmap.range[-1][1] - __tid, nmap.range[-1][2] * self.warp_size) subgraph = nstate.scope_subgraph(nmap) subgraph.replace(nmap.params[-1], f'{nmap.params[-1]} + __tid') inner_map_exit = nstate.exit_node(nmap) # If requested, replicate maps with multiple dependent maps if self.replicate_maps: destinations = [ nstate.memlet_path(edge)[-1].dst for edge in nstate.out_edges(inner_map_exit) ] for dst in destinations: # Transformation will not replicate map with more than one # output if len(destinations) != 1: break if not isinstance(dst, nodes.AccessNode): continue # Not leading to access node if not xfh.contained_in(nstate, dst, new_me): continue # Memlet path goes out of map if not nsdfg.arrays[dst.data].transient: continue # Cannot modify non-transients for edge in nstate.out_edges(dst)[1:]: rep_subgraph = xfh.replicate_scope( nsdfg, nstate, subgraph) rep_edge = nstate.out_edges( rep_subgraph.sink_nodes()[0])[0] # Add copy of data newdesc = copy.deepcopy(sdfg.arrays[dst.data]) newname = nsdfg.add_datadesc(dst.data, newdesc, find_new_name=True) newaccess = nstate.add_access(newname) # Redirect edges xfh.redirect_edge(nstate, rep_edge, new_dst=newaccess, new_data=newname) xfh.redirect_edge(nstate, edge, new_src=newaccess, new_data=newname) # If has WCR, add warp-collaborative reduction on outputs for out_edge in nstate.out_edges(inner_map_exit): dst = nstate.memlet_path(out_edge)[-1].dst if not xfh.contained_in(nstate, dst, new_me): # Skip edges going out of map continue if dst.desc(nsdfg).storage == dtypes.StorageType.GPU_Global: # Skip shared memory continue if out_edge.data.wcr is not None: ctype = nsdfg.arrays[out_edge.data.data].dtype.ctype redtype = detect_reduction_type(out_edge.data.wcr) if redtype == dtypes.ReductionType.Custom: raise NotImplementedError credtype = ('dace::ReductionType::' + str(redtype)[str(redtype).find('.') + 1:]) # One element: tasklet if out_edge.data.subset.num_elements() == 1: # Add local access between thread-local and warp reduction name = nsdfg._find_new_name(out_edge.data.data) nsdfg.add_scalar( name, nsdfg.arrays[out_edge.data.data].dtype, transient=True) # Initialize thread-local to global value read = nstate.add_read(out_edge.data.data) write = nstate.add_write(name) edge = nstate.add_nedge(read, write, copy.deepcopy(out_edge.data)) edge.data.wcr = None xfh.state_fission(nsdfg, SubgraphView(nstate, [read, write])) newnode = nstate.add_access(name) nstate.remove_edge(out_edge) edge = nstate.add_edge(out_edge.src, out_edge.src_conn, newnode, None, copy.deepcopy(out_edge.data)) for e in nstate.memlet_path(edge): e.data.data = name e.data.subset = subsets.Range([(0, 0, 1)]) wrt = nstate.add_tasklet( 'warpreduce', {'__a'}, {'__out'}, f'__out = dace::warpReduce<{credtype}, {ctype}>::reduce(__a);', dtypes.Language.CPP) nstate.add_edge(newnode, None, wrt, '__a', Memlet(name)) out_edge.data.wcr = None nstate.add_edge(wrt, '__out', out_edge.dst, None, out_edge.data) else: # More than one element: mapped tasklet # Could be a parallel summation # TODO(later): Check if reduction continue # End of WCR to warp reduction # Make nested SDFG out of new scope xfh.nest_state_subgraph(sdfg, graph, graph.scope_subgraph(new_me, False, False)) return new_me
def test_simple_sdfg_program(self): sdfg, state, t, me, mx = create_sdfg() nest_state_subgraph(sdfg, state, SubgraphView(state, state.nodes())) sdfg.validate()
def test_simple_sdfg_map(self): sdfg, state, t, me, mx = create_sdfg() nest_state_subgraph(sdfg, state, SubgraphView(state, [me, t, mx])) sdfg.validate()