def can_be_applied(self, graph, candidate, expr_index, sdfg, strict=False): # Is this even a loop if not DetectLoop.can_be_applied(graph, candidate, expr_index, sdfg, strict): return False guard = graph.node(candidate[DetectLoop._loop_guard]) begin = graph.node(candidate[DetectLoop._loop_begin]) # Guard state should not contain any dataflow if len(guard.nodes()) != 0: return False # If loop cannot be detected, fail found = find_for_loop(graph, guard, begin, itervar=self.itervar) if not found: return False itervar, (start, end, step), (_, body_end) = found # We cannot handle symbols read from data containers unless they are # scalar for expr in (start, end, step): if symbolic.contains_sympy_functions(expr): return False # Find all loop-body states states = set() to_visit = [begin] while to_visit: state = to_visit.pop(0) for _, dst, _ in sdfg.out_edges(state): if dst not in states and dst is not guard: to_visit.append(dst) states.add(state) assert (body_end in states) write_set = set() for state in states: _, wset = state.read_and_write_sets() write_set |= wset # Get access nodes from other states to isolate local loop variables other_access_nodes = set() for state in sdfg.nodes(): if state in states: continue other_access_nodes |= set(n.data for n in state.data_nodes() if sdfg.arrays[n.data].transient) # Add non-transient nodes from loop state for state in states: other_access_nodes |= set(n.data for n in state.data_nodes() if not sdfg.arrays[n.data].transient) write_memlets = defaultdict(list) itersym = symbolic.pystr_to_symbolic(itervar) a = sp.Wild('a', exclude=[itersym]) b = sp.Wild('b', exclude=[itersym]) for state in states: for dn in state.data_nodes(): if dn.data not in other_access_nodes: continue # Take all writes that are not conflicted into consideration if dn.data in write_set: for e in state.in_edges(dn): if e.data.dynamic and e.data.wcr is None: # If pointers are involved, give up return False # To be sure that the value is only written at unique # indices per loop iteration, we want to match symbols # of the form "a*i+b" where a >= 1, and i is the iteration # variable. The iteration variable must be used. if e.data.wcr is None: dst_subset = e.data.get_dst_subset(e, state) if not (dst_subset and _check_range( dst_subset, a, itersym, b, step)): return False # End of check write_memlets[dn.data].append(e.data) # After looping over relevant writes, consider reads that may overlap for state in states: for dn in state.data_nodes(): if dn.data not in other_access_nodes: continue data = dn.data if data in write_memlets: # Import as necessary from dace.sdfg.propagation import propagate_subset for e in state.out_edges(dn): # If the same container is both read and written, only match if # it read and written at locations that will not create data races if (e.data.dynamic and e.data.src_subset.num_elements() != 1): # If pointers are involved, give up return False src_subset = e.data.get_src_subset(e, state) if not _check_range(src_subset, a, itersym, b, step): return False pread = propagate_subset([e.data], sdfg.arrays[data], [itervar], subsets.Range([(start, end, step)])) for candidate in write_memlets[data]: # Simple case: read and write are in the same subset read = src_subset write = candidate.dst_subset if read == write: continue ridx = _dependent_indices(itervar, read) widx = _dependent_indices(itervar, write) indices = set(ridx) | set(widx) if not indices: indices = set(range(len(read))) read = _sanitize_by_index(indices, read) write = _sanitize_by_index(indices, write) if read == write: continue # Propagated read does not overlap with propagated write pwrite = propagate_subset([candidate], sdfg.arrays[data], [itervar], subsets.Range([ (start, end, step) ]), use_dst=True) t_pread = _sanitize_by_index( indices, pread.src_subset) pwrite = _sanitize_by_index( indices, pwrite.dst_subset) if subsets.intersects(t_pread, pwrite) is False: continue return False # Check that the iteration variable is not used on other edges or states # before it is reassigned prior_states = True for state in cfg.stateorder_topological_sort(sdfg): # Skip all states up to guard if prior_states: if state is begin: prior_states = False continue # We do not need to check the loop-body states if state in states: continue if itervar in state.free_symbols: return False # Don't continue in this direction, as the variable has # now been reassigned # TODO: Handle case of subset of out_edges if all(itervar in e.data.assignments for e in sdfg.out_edges(state)): break return True
def from_string(s): return pystr_to_symbolic(s, simplify=False)
def from_json(json_obj, context=None): from dace.symbolic import pystr_to_symbolic return vector(json_to_typeclass(json_obj['dtype'], context), pystr_to_symbolic(json_obj['elements']))
def astrange_to_symrange(astrange, arrays, arrname=None): """ Converts an AST range (array, [(start, end, skip)]) to a symbolic math range, using the obtained array sizes and resolved symbols. """ if arrname is not None: arrdesc = arrays[arrname] # If the array is a scalar, return None if arrdesc.shape is None: return None # If range is the entire array, use the array descriptor to obtain the # entire range if astrange is None: return [ (symbolic.pystr_to_symbolic(0), symbolic.pystr_to_symbolic(symbolic.symbol_name_or_value(s)) - 1, symbolic.pystr_to_symbolic(1)) for s in arrdesc.shape ] missing_slices = len(arrdesc.shape) - len(astrange) if missing_slices < 0: raise ValueError( 'Mismatching shape {} - range {} dimensions'.format( arrdesc.shape, astrange)) for i in range(missing_slices): astrange.append((None, None, None)) result = [None] * len(astrange) for i, r in enumerate(astrange): if isinstance(r, tuple): begin, end, skip = r # Default values if begin is None: begin = symbolic.pystr_to_symbolic(0) else: begin = symbolic.pystr_to_symbolic(unparse(begin)) if (begin < 0) == True: begin += arrdesc.shape[i] if end is None and arrname is None: raise SyntaxError('Cannot define range without end') elif end is not None: end = symbolic.pystr_to_symbolic(unparse(end)) - 1 if (end < 0) == True: end += arrdesc.shape[i] else: end = symbolic.pystr_to_symbolic( symbolic.symbol_name_or_value(arrdesc.shape[i])) - 1 if skip is None: skip = symbolic.pystr_to_symbolic(1) else: skip = symbolic.pystr_to_symbolic(unparse(skip)) else: # In the case where a single element is given begin = symbolic.pystr_to_symbolic(unparse(r)) if (begin < 0) == True: begin += arrdesc.shape[i] end = begin skip = symbolic.pystr_to_symbolic(1) result[i] = (begin, end, skip) return result
def propagate(self, array, dim_exprs, node_range): # Compute last index in map according to range definition node_rb, node_re, node_rs = node_range[self.paramind] # node_rs = 1 node_rlen = node_re - node_rb + 1 if isinstance(dim_exprs, list): dim_exprs = dim_exprs[0] if isinstance(dim_exprs, tuple): if len(dim_exprs) == 3: rb, re, rs = dim_exprs rt = '1' elif len(dim_exprs) == 4: rb, re, rs, rt = dim_exprs else: raise NotImplementedError rb = symbolic.pystr_to_symbolic(rb).expand() re = symbolic.pystr_to_symbolic(re).expand() rs = symbolic.pystr_to_symbolic(rs).expand() rt = symbolic.pystr_to_symbolic(rt).expand() else: rb, re = (dim_exprs.expand(), dim_exprs.expand()) rs = 1 rt = 1 result_begin = rb.subs(self.param, node_rb).expand() result_end = re.subs(self.param, node_re).expand() # Experimental # This should be using sympy.floor memlet_start_pts = ((re - rt + 1 - rb) / rs) + 1 memlet_rlen = memlet_start_pts.expand() * rt interval_len = (result_end - result_begin + 1) * self.veclen num_elements = node_rlen * memlet_rlen if (interval_len == num_elements or interval_len.expand() == num_elements): # Continuous access result_skip = 1 result_tile = 1 else: if rt == 1: result_skip = (result_end - result_begin - re + rb) / (node_re - node_rb) try: if result_skip < 1: result_skip = 1 except: pass result_tile = result_end - result_begin + 1 - (node_rlen - 1) * result_skip else: candidate_skip = rs candidate_tile = rt * node_rlen candidate_lstart_pt = result_end - result_begin + 1 - candidate_tile if (candidate_lstart_pt / (num_elements / candidate_tile - 1) ).simplify() == candidate_skip: result_skip = rs result_tile = rt * node_rlen else: result_skip = rs / node_rlen result_tile = rt if result_skip == result_tile or result_skip == 1: result_skip = 1 result_tile = 1 result_begin = sympy.simplify(result_begin) result_end = sympy.simplify(result_end) result_skip = sympy.simplify(result_skip) result_tile = sympy.simplify(result_tile) return (result_begin, result_end, result_skip, result_tile)
def free_symbols(self) -> Set[str]: result = super().free_symbols result.update(*(map(str, pystr_to_symbolic(v).free_symbols) for v in self.symbol_mapping.values())) return result
def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] tile_strides = self.tile_sizes if self.strides is not None and len(self.strides) == len(tile_strides): tile_strides = self.strides # Retrieve map entry and exit nodes. map_entry = graph.nodes()[self.subgraph[MapTiling.map_entry]] from dace.transformation.dataflow.map_collapse import MapCollapse from dace.transformation.dataflow.strip_mining import StripMining stripmine_subgraph = { StripMining._map_entry: self.subgraph[MapTiling.map_entry] } sdfg_id = sdfg.sdfg_id last_map_entry = None removed_maps = 0 original_schedule = map_entry.schedule for dim_idx in range(len(map_entry.map.params)): if dim_idx >= len(self.tile_sizes): tile_size = symbolic.pystr_to_symbolic(self.tile_sizes[-1]) tile_stride = symbolic.pystr_to_symbolic(tile_strides[-1]) else: tile_size = symbolic.pystr_to_symbolic( self.tile_sizes[dim_idx]) tile_stride = symbolic.pystr_to_symbolic(tile_strides[dim_idx]) # handle offsets if self.tile_offset and dim_idx >= len(self.tile_offset): offset = self.tile_offset[-1] elif self.tile_offset: offset = self.tile_offset[dim_idx] else: offset = 0 dim_idx -= removed_maps # If tile size is trivial, skip strip-mining map dimension if tile_size == map_entry.map.range.size()[dim_idx]: continue stripmine = StripMining(sdfg_id, self.state_id, stripmine_subgraph, self.expr_index) # Special case: Tile size of 1 should be omitted from inner map if tile_size == 1 and tile_stride == 1 and self.tile_trivial == False: stripmine.dim_idx = dim_idx stripmine.new_dim_prefix = '' stripmine.tile_size = str(tile_size) stripmine.tile_stride = str(tile_stride) stripmine.divides_evenly = True stripmine.tile_offset = str(offset) stripmine.apply(sdfg) removed_maps += 1 else: stripmine.dim_idx = dim_idx stripmine.new_dim_prefix = self.prefix stripmine.tile_size = str(tile_size) stripmine.tile_stride = str(tile_stride) stripmine.divides_evenly = self.divides_evenly stripmine.tile_offset = str(offset) stripmine.apply(sdfg) # apply to the new map the schedule of the original one map_entry.schedule = original_schedule if last_map_entry: new_map_entry = graph.in_edges(map_entry)[0].src mapcollapse_subgraph = { MapCollapse._outer_map_entry: graph.node_id(last_map_entry), MapCollapse._inner_map_entry: graph.node_id(new_map_entry) } mapcollapse = MapCollapse(sdfg_id, self.state_id, mapcollapse_subgraph, 0) mapcollapse.apply(sdfg) last_map_entry = graph.in_edges(map_entry)[0].src return last_map_entry
def can_be_applied(graph, candidate, expr_index, sdfg, strict=False): first_map_exit = graph.nodes()[candidate[MapFusion.first_map_exit]] first_map_entry = graph.entry_node(first_map_exit) second_map_entry = graph.nodes()[candidate[MapFusion.second_map_entry]] for _in_e in graph.in_edges(first_map_exit): if _in_e.data.wcr is not None: for _out_e in graph.out_edges(second_map_entry): if _out_e.data.data == _in_e.data.data: # wcr is on a node that is used in the second map, quit return False # Check whether there is a pattern map -> access -> map. intermediate_nodes = set() intermediate_data = set() for _, _, dst, _, _ in graph.out_edges(first_map_exit): if isinstance(dst, nodes.AccessNode): intermediate_nodes.add(dst) intermediate_data.add(dst.data) # If array is used anywhere else in this state. num_occurrences = len([ n for n in graph.nodes() if isinstance(n, nodes.AccessNode) and n.data == dst.data ]) if num_occurrences > 1: return False else: return False # Check map ranges perm = MapFusion.find_permutation(first_map_entry.map, second_map_entry.map) if perm is None: return False # Check if any intermediate transient is also going to another location second_inodes = set(e.src for e in graph.in_edges(second_map_entry) if isinstance(e.src, nodes.AccessNode)) transients_to_remove = intermediate_nodes & second_inodes # if any(e.dst != second_map_entry for n in transients_to_remove # for e in graph.out_edges(n)): if any(graph.out_degree(n) > 1 for n in transients_to_remove): return False # Create a dict that maps parameters of the first map to those of the # second map. params_dict = {} for _index, _param in enumerate(first_map_entry.map.params): params_dict[_param] = second_map_entry.map.params[perm[_index]] out_memlets = [e.data for e in graph.in_edges(first_map_exit)] # Check that input set of second map is provided by the output set # of the first map, or other unrelated maps for second_edge in graph.out_edges(second_map_entry): # Memlets that do not come from one of the intermediate arrays if second_edge.data.data not in intermediate_data: # however, if intermediate_data eventually leads to # second_memlet.data, need to fail. for _n in intermediate_nodes: source_node = _n destination_node = graph.memlet_path(second_edge)[0].src # NOTE: Assumes graph has networkx version if destination_node in nx.descendants( graph._nx, source_node): return False continue provided = False # Compute second subset with respect to first subset's symbols sbs_permuted = dcpy(second_edge.data.subset) sbs_permuted.replace({ symbolic.pystr_to_symbolic(k): symbolic.pystr_to_symbolic(v) for k, v in params_dict.items() }) for first_memlet in out_memlets: if first_memlet.data != second_edge.data.data: continue # If there is a covered subset, it is provided if first_memlet.subset.covers(sbs_permuted): provided = True break # If none of the output memlets of the first map provide the info, # fail. if provided is False: return False # Success return True
def apply(self, sdfg: sd.SDFG): graph: sd.SDFGState = sdfg.nodes()[self.state_id] map_entry = graph.node(self.subgraph[MapFission._map_entry]) map_exit = graph.exit_node(map_entry) nsdfg_node: Optional[nodes.NestedSDFG] = None # Obtain subgraph to perform fission to if self.expr_index == 0: # Map with subgraph subgraphs = [(graph, graph.scope_subgraph(map_entry, include_entry=False, include_exit=False))] parent = sdfg else: # Map with nested SDFG nsdfg_node = graph.node(self.subgraph[MapFission._nested_sdfg]) subgraphs = [(state, state) for state in nsdfg_node.sdfg.nodes()] parent = nsdfg_node.sdfg modified_arrays = set() # Get map information outer_map: nodes.Map = map_entry.map mapsize = outer_map.range.size() # Add new symbols from outer map to nested SDFG if self.expr_index == 1: map_syms = outer_map.range.free_symbols for edge in graph.out_edges(map_entry): if edge.data.data: map_syms.update(edge.data.subset.free_symbols) for edge in graph.in_edges(map_exit): if edge.data.data: map_syms.update(edge.data.subset.free_symbols) for sym in map_syms: symname = str(sym) if symname in outer_map.params: continue if symname not in nsdfg_node.symbol_mapping.keys(): nsdfg_node.symbol_mapping[symname] = sym nsdfg_node.sdfg.symbols[symname] = graph.symbols_defined_at( nsdfg_node)[symname] # Remove map symbols from nested mapping for name in outer_map.params: if str(name) in nsdfg_node.symbol_mapping: del nsdfg_node.symbol_mapping[str(name)] if str(name) in nsdfg_node.sdfg.symbols: del nsdfg_node.sdfg.symbols[str(name)] for state, subgraph in subgraphs: components = MapFission._components(subgraph) sources = subgraph.source_nodes() sinks = subgraph.sink_nodes() # Collect external edges if self.expr_index == 0: external_edges_entry = list(state.out_edges(map_entry)) external_edges_exit = list(state.in_edges(map_exit)) else: external_edges_entry = [ e for e in subgraph.edges() if (isinstance(e.src, nodes.AccessNode) and not nsdfg_node.sdfg.arrays[e.src.data].transient) ] external_edges_exit = [ e for e in subgraph.edges() if (isinstance(e.dst, nodes.AccessNode) and not nsdfg_node.sdfg.arrays[e.dst.data].transient) ] # Map external edges to outer memlets edge_to_outer = {} for edge in external_edges_entry: if self.expr_index == 0: # Subgraphs use the corresponding outer map edges path = state.memlet_path(edge) eindex = path.index(edge) edge_to_outer[edge] = path[eindex - 1] else: # Nested SDFGs use the internal map edges of the node outer_edge = next(e for e in graph.in_edges(nsdfg_node) if e.dst_conn == edge.src.data) edge_to_outer[edge] = outer_edge for edge in external_edges_exit: if self.expr_index == 0: path = state.memlet_path(edge) eindex = path.index(edge) edge_to_outer[edge] = path[eindex + 1] else: # Nested SDFGs use the internal map edges of the node outer_edge = next(e for e in graph.out_edges(nsdfg_node) if e.src_conn == edge.dst.data) edge_to_outer[edge] = outer_edge # Collect all border arrays and code->code edges arrays = MapFission._border_arrays( nsdfg_node.sdfg if self.expr_index == 1 else sdfg, state, subgraph) scalars = defaultdict(list) for _, component_out in components: for e in subgraph.out_edges(component_out): if isinstance(e.dst, nodes.CodeNode): scalars[e.data.data].append(e) # Create new arrays for scalars for scalar, edges in scalars.items(): desc = parent.arrays[scalar] del parent.arrays[scalar] name, newdesc = parent.add_transient( scalar, mapsize, desc.dtype, desc.storage, lifetime=desc.lifetime, debuginfo=desc.debuginfo, allow_conflicts=desc.allow_conflicts, find_new_name=True) # Add extra nodes in component boundaries for edge in edges: anode = state.add_access(name) sbs = subsets.Range.from_string(','.join(outer_map.params)) # Offset memlet by map range begin (to fit the transient) sbs.offset([r[0] for r in outer_map.range], True) state.add_edge( edge.src, edge.src_conn, anode, None, mm.Memlet.simple( name, sbs, num_accesses=outer_map.range.num_elements())) state.add_edge( anode, None, edge.dst, edge.dst_conn, mm.Memlet.simple( name, sbs, num_accesses=outer_map.range.num_elements())) state.remove_edge(edge) # Add extra maps around components new_map_entries = [] for component_in, component_out in components: me, mx = state.add_map(outer_map.label + '_fission', [(p, '0:1') for p in outer_map.params], outer_map.schedule, unroll=outer_map.unroll, debuginfo=outer_map.debuginfo) # Add dynamic input connectors for conn in map_entry.in_connectors: if not conn.startswith('IN_'): me.add_in_connector(conn) me.map.range = dcpy(outer_map.range) new_map_entries.append(me) # Reconnect edges through new map for e in state.in_edges(component_in): state.add_edge(me, None, e.dst, e.dst_conn, dcpy(e.data)) # Reconnect inner edges at source directly to external nodes if self.expr_index == 0 and e in external_edges_entry: state.add_edge(edge_to_outer[e].src, edge_to_outer[e].src_conn, me, None, dcpy(edge_to_outer[e].data)) else: state.add_edge(e.src, e.src_conn, me, None, dcpy(e.data)) state.remove_edge(e) # Empty memlet edge in nested SDFGs if state.in_degree(component_in) == 0: state.add_edge(me, None, component_in, None, mm.Memlet()) for e in state.out_edges(component_out): state.add_edge(e.src, e.src_conn, mx, None, dcpy(e.data)) # Reconnect inner edges at sink directly to external nodes if self.expr_index == 0 and e in external_edges_exit: state.add_edge(mx, None, edge_to_outer[e].dst, edge_to_outer[e].dst_conn, dcpy(edge_to_outer[e].data)) else: state.add_edge(mx, None, e.dst, e.dst_conn, dcpy(e.data)) state.remove_edge(e) # Empty memlet edge in nested SDFGs if state.out_degree(component_out) == 0: state.add_edge(component_out, None, mx, None, mm.Memlet()) # Connect other sources/sinks not in components (access nodes) # directly to external nodes if self.expr_index == 0: for node in sources: if isinstance(node, nodes.AccessNode): for edge in state.in_edges(node): outer_edge = edge_to_outer[edge] memlet = dcpy(edge.data) memlet.subset = subsets.Range( outer_map.range.ranges + memlet.subset.ranges) state.add_edge(outer_edge.src, outer_edge.src_conn, edge.dst, edge.dst_conn, memlet) for node in sinks: if isinstance(node, nodes.AccessNode): for edge in state.out_edges(node): outer_edge = edge_to_outer[edge] state.add_edge(edge.src, edge.src_conn, outer_edge.dst, outer_edge.dst_conn, dcpy(outer_edge.data)) # Augment arrays by prepending map dimensions for array in arrays: if array in modified_arrays: continue desc = parent.arrays[array] for sz in reversed(mapsize): desc.strides = [desc.total_size] + list(desc.strides) desc.total_size = desc.total_size * sz desc.shape = mapsize + list(desc.shape) desc.offset = [0] * len(mapsize) + list(desc.offset) modified_arrays.add(array) # Fill scope connectors so that memlets can be tracked below state.fill_scope_connectors() # Correct connectors and memlets in nested SDFGs to account for # missing outside map if self.expr_index == 1: to_correct = ([(e, e.src) for e in external_edges_entry] + [(e, e.dst) for e in external_edges_exit]) corrected_nodes = set() for edge, node in to_correct: if isinstance(node, nodes.AccessNode): if node in corrected_nodes: continue corrected_nodes.add(node) outer_edge = edge_to_outer[edge] desc = parent.arrays[node.data] # Modify shape of internal array to match outer one outer_desc = sdfg.arrays[outer_edge.data.data] if not isinstance(desc, dt.Scalar): desc.shape = outer_desc.shape if isinstance(desc, dt.Array): desc.strides = outer_desc.strides desc.total_size = outer_desc.total_size # Inside the nested SDFG, offset all memlets to include # the offsets from within the map. # NOTE: Relies on propagation to fix outer memlets for internal_edge in state.all_edges(node): for e in state.memlet_tree(internal_edge): e.data.subset.offset(desc.offset, False) e.data.subset = helpers.unsqueeze_memlet( e.data, outer_edge.data).subset # Only after offsetting memlets we can modify the # overall offset if isinstance(desc, dt.Array): desc.offset = outer_desc.offset # Fill in memlet trees for border transients # NOTE: Memlet propagation should run to correct the outer edges for node in subgraph.nodes(): if isinstance(node, nodes.AccessNode) and node.data in arrays: for edge in state.all_edges(node): for e in state.memlet_tree(edge): # Prepend map dimensions to memlet e.data.subset = subsets.Range( [(pystr_to_symbolic(d) - r[0], pystr_to_symbolic(d) - r[0], 1) for d, r in zip(outer_map.params, outer_map.range)] + e.data.subset.ranges) # If nested SDFG, reconnect nodes around map and modify memlets if self.expr_index == 1: for edge in graph.in_edges(map_entry): if not edge.dst_conn or not edge.dst_conn.startswith('IN_'): continue # Modify edge coming into nested SDFG to include entire array desc = sdfg.arrays[edge.data.data] edge.data.subset = subsets.Range.from_array(desc) edge.data.num_accesses = edge.data.subset.num_elements() # Find matching edge inside map inner_edge = next( e for e in graph.out_edges(map_entry) if e.src_conn and e.src_conn[4:] == edge.dst_conn[3:]) graph.add_edge(edge.src, edge.src_conn, nsdfg_node, inner_edge.dst_conn, dcpy(edge.data)) for edge in graph.out_edges(map_exit): # Modify edge coming out of nested SDFG to include entire array desc = sdfg.arrays[edge.data.data] edge.data.subset = subsets.Range.from_array(desc) # Find matching edge inside map inner_edge = next(e for e in graph.in_edges(map_exit) if e.dst_conn[3:] == edge.src_conn[4:]) graph.add_edge(nsdfg_node, inner_edge.src_conn, edge.dst, edge.dst_conn, dcpy(edge.data)) # Remove outer map graph.remove_nodes_from([map_entry, map_exit])
def apply(self, sdfg: SDFG) -> nodes.MapEntry: me: nodes.MapEntry = self.mapentry(sdfg) graph = sdfg.node(self.state_id) # Add new map within map mx = graph.exit_node(me) new_me, new_mx = graph.add_map('warp_tile', dict(__tid=f'0:{self.warp_size}'), dtypes.ScheduleType.GPU_ThreadBlock) __tid = symbolic.pystr_to_symbolic('__tid') for e in graph.out_edges(me): xfh.reconnect_edge_through_map(graph, e, new_me, True) for e in graph.in_edges(mx): xfh.reconnect_edge_through_map(graph, e, new_mx, False) # Stride and offset all internal maps maps_to_stride = xfh.get_internal_scopes(graph, new_me, immediate=True) for nstate, nmap in maps_to_stride: nsdfg = nstate.parent nsdfg_node = nsdfg.parent_nsdfg_node # Map cannot be partitioned across a warp if (nmap.range.size()[-1] < self.warp_size) == True: continue if nsdfg is not sdfg and nsdfg_node is not None: nsdfg_node.symbol_mapping['__tid'] = __tid if '__tid' not in nsdfg.symbols: nsdfg.add_symbol('__tid', dtypes.int32) nmap.range[-1] = (nmap.range[-1][0], nmap.range[-1][1], nmap.range[-1][2] * self.warp_size) subgraph = nstate.scope_subgraph(nmap) subgraph.replace(nmap.params[-1], f'{nmap.params[-1]} + __tid') inner_map_exit = nstate.exit_node(nmap) # If requested, replicate maps with multiple dependent maps if self.replicate_maps: destinations = [ nstate.memlet_path(edge)[-1].dst for edge in nstate.out_edges(inner_map_exit) ] for dst in destinations: # Transformation will not replicate map with more than one # output if len(destinations) != 1: break if not isinstance(dst, nodes.AccessNode): continue # Not leading to access node if not xfh.contained_in(nstate, dst, new_me): continue # Memlet path goes out of map if not nsdfg.arrays[dst.data].transient: continue # Cannot modify non-transients for edge in nstate.out_edges(dst)[1:]: rep_subgraph = xfh.replicate_scope( nsdfg, nstate, subgraph) rep_edge = nstate.out_edges( rep_subgraph.sink_nodes()[0])[0] # Add copy of data newdesc = copy.deepcopy(sdfg.arrays[dst.data]) newname = nsdfg.add_datadesc(dst.data, newdesc, find_new_name=True) newaccess = nstate.add_access(newname) # Redirect edges xfh.redirect_edge(nstate, rep_edge, new_dst=newaccess, new_data=newname) xfh.redirect_edge(nstate, edge, new_src=newaccess, new_data=newname) # If has WCR, add warp-collaborative reduction on outputs for out_edge in nstate.out_edges(inner_map_exit): if out_edge.data.wcr is not None: ctype = nsdfg.arrays[out_edge.data.data].dtype.ctype redtype = detect_reduction_type(out_edge.data.wcr) if redtype == dtypes.ReductionType.Custom: raise NotImplementedError credtype = ('dace::ReductionType::' + str(redtype)[str(redtype).find('.') + 1:]) # Add local access between thread-locan and warp reduction newnode = nstate.add_access(out_edge.data.data) nstate.remove_edge(out_edge) nstate.add_edge(out_edge.src, out_edge.src_conn, newnode, None, copy.deepcopy(out_edge.data)) if out_edge.data.subset.num_elements( ) == 1: # One element: tasklet wrt = nstate.add_tasklet( 'warpreduce', {'__a'}, {'__out'}, f'__out = dace::warpReduce<{credtype}, {ctype}>::reduce(__a);', dtypes.Language.CPP) nstate.add_edge(newnode, None, wrt, '__a', Memlet(out_edge.data.data)) out_edge.data.wcr = None nstate.add_edge(wrt, '__out', out_edge.dst, None, out_edge.data) else: # More than one element: mapped tasklet raise NotImplementedError # End of WCR to warp reduction # Make nested SDFG out of new scope xfh.nest_state_subgraph(sdfg, graph, graph.scope_subgraph(new_me, False, False)) return new_me
def __stripmine(self, sdfg, graph, candidate): # Retrieve map entry and exit nodes. map_entry = graph.nodes()[candidate[StripMining._map_entry]] map_exit = graph.exit_nodes(map_entry)[0] # Retrieve transformation properties. dim_idx = self.dim_idx new_dim_prefix = self.new_dim_prefix tile_size = self.tile_size divides_evenly = self.divides_evenly strided = self.strided tile_stride = self.tile_stride if tile_stride is None or len(tile_stride) == 0: tile_stride = tile_size # Retrieve parameter and range of dimension to be strip-mined. target_dim = map_entry.map.params[dim_idx] td_from, td_to, td_step = map_entry.map.range[dim_idx] # Create new map. Replace by cloning??? new_dim = new_dim_prefix + '_' + target_dim nd_from = 0 nd_to = symbolic.pystr_to_symbolic( 'int_ceil(%s + 1 - %s, %s) - 1' % (symbolic.symstr(td_to), symbolic.symstr(td_from), tile_stride)) nd_step = 1 new_dim_range = (nd_from, nd_to, nd_step) new_map = nodes.Map(new_dim + '_' + map_entry.map.label, [new_dim], subsets.Range([new_dim_range])) new_map_entry = nodes.MapEntry(new_map) new_map_exit = nodes.MapExit(new_map) # Change the range of the selected dimension to iterate over a single # tile if strided: td_from_new = symbolic.pystr_to_symbolic(new_dim) td_to_new_approx = td_to td_step = symbolic.pystr_to_symbolic(tile_size) else: td_from_new = symbolic.pystr_to_symbolic( '%s + %s * %s' % (symbolic.symstr(td_from), str(new_dim), tile_stride)) td_to_new_exact = symbolic.pystr_to_symbolic( 'min(%s + 1, %s + %s * %s + %s) - 1' % (symbolic.symstr(td_to), symbolic.symstr(td_from), tile_stride, str(new_dim), tile_size)) td_to_new_approx = symbolic.pystr_to_symbolic( '%s + %s * %s + %s - 1' % (symbolic.symstr(td_from), tile_stride, str(new_dim), tile_size)) if divides_evenly or strided: td_to_new = td_to_new_approx else: td_to_new = dace.symbolic.SymExpr(td_to_new_exact, td_to_new_approx) map_entry.map.range[dim_idx] = (td_from_new, td_to_new, td_step) # Make internal map's schedule to "not parallel" new_map.schedule = map_entry.map.schedule map_entry.map.schedule = dtypes.ScheduleType.Sequential # Redirect edges new_map_entry.in_connectors = dcpy(map_entry.in_connectors) nxutil.change_edge_dest(graph, map_entry, new_map_entry) new_map_exit.out_connectors = dcpy(map_exit.out_connectors) nxutil.change_edge_src(graph, map_exit, new_map_exit) # Create new entry edges new_in_edges = dict() entry_in_conn = set() entry_out_conn = set() for _src, src_conn, _dst, _, memlet in graph.out_edges(map_entry): if (src_conn[:4] == 'OUT_' and not isinstance( sdfg.arrays[memlet.data], dace.data.Scalar)): new_subset = calc_set_image( map_entry.map.params, map_entry.map.range, memlet.subset, ) conn = src_conn[4:] key = (memlet.data, 'IN_' + conn, 'OUT_' + conn) if key in new_in_edges.keys(): old_subset = new_in_edges[key].subset new_in_edges[key].subset = calc_set_union( old_subset, new_subset) else: entry_in_conn.add('IN_' + conn) entry_out_conn.add('OUT_' + conn) new_memlet = dcpy(memlet) new_memlet.subset = new_subset new_memlet.num_accesses = new_memlet.num_elements() new_in_edges[key] = new_memlet else: if src_conn[:4] == 'OUT_': conn = src_conn[4:] in_conn = 'IN_' + conn out_conn = 'OUT_' + conn else: in_conn = src_conn out_conn = src_conn entry_in_conn.add(in_conn) entry_out_conn.add(out_conn) new_in_edges[(memlet.data, in_conn, out_conn)] = dcpy(memlet) new_map_entry.out_connectors = entry_out_conn map_entry.in_connectors = entry_in_conn for (_, in_conn, out_conn), memlet in new_in_edges.items(): graph.add_edge(new_map_entry, out_conn, map_entry, in_conn, memlet) # Create new exit edges new_out_edges = dict() exit_in_conn = set() exit_out_conn = set() for _src, _, _dst, dst_conn, memlet in graph.in_edges(map_exit): if (dst_conn[:3] == 'IN_' and not isinstance( sdfg.arrays[memlet.data], dace.data.Scalar)): new_subset = calc_set_image( map_entry.map.params, map_entry.map.range, memlet.subset, ) conn = dst_conn[3:] key = (memlet.data, 'IN_' + conn, 'OUT_' + conn) if key in new_out_edges.keys(): old_subset = new_out_edges[key].subset new_out_edges[key].subset = calc_set_union( old_subset, new_subset) else: exit_in_conn.add('IN_' + conn) exit_out_conn.add('OUT_' + conn) new_memlet = dcpy(memlet) new_memlet.subset = new_subset new_memlet.num_accesses = new_memlet.num_elements() new_out_edges[key] = new_memlet else: if dst_conn[:3] == 'IN_': conn = dst_conn[3:] in_conn = 'IN_' + conn out_conn = 'OUT_' + conn else: in_conn = src_conn out_conn = src_conn exit_in_conn.add(in_conn) exit_out_conn.add(out_conn) new_in_edges[(memlet.data, in_conn, out_conn)] = dcpy(memlet) new_map_exit.in_connectors = exit_in_conn map_exit.out_connectors = exit_out_conn for (_, in_conn, out_conn), memlet in new_out_edges.items(): graph.add_edge(map_exit, out_conn, new_map_exit, in_conn, memlet) # Return strip-mined dimension. return target_dim, new_dim, new_map
def astrange_to_symrange(astrange, arrays, arrname=None): """ Converts an AST range (array, [(start, end, skip)]) to a symbolic math range, using the obtained array sizes and resolved symbols. """ if arrname is not None: arrdesc = arrays[arrname] # If the array is a scalar, return None if arrdesc.shape is None: return None # If range is the entire array, use the array descriptor to obtain the # entire range if astrange is None: return [ (symbolic.pystr_to_symbolic(0), symbolic.pystr_to_symbolic(types.symbol_name_or_value(s)) - 1, symbolic.pystr_to_symbolic(1)) for s in arrdesc.shape ] result = [None] * len(astrange) for i, r in enumerate(astrange): if isinstance(r, tuple): begin, end, skip = r # Default values if begin is None: begin = symbolic.pystr_to_symbolic(0) else: begin = symbolic.pystr_to_symbolic(unparse(begin)) if end is None and arrname is None: raise SyntaxError('Cannot define range without end') elif end is not None: end = symbolic.pystr_to_symbolic(unparse(end)) - 1 else: end = symbolic.pystr_to_symbolic( types.symbol_name_or_value(arrdesc.shape[i])) - 1 if skip is None: skip = symbolic.pystr_to_symbolic(1) else: skip = symbolic.pystr_to_symbolic(unparse(skip)) else: # In the case where a single element is given begin = symbolic.pystr_to_symbolic(unparse(r)) end = begin skip = symbolic.pystr_to_symbolic(1) result[i] = (begin, end, skip) return result
def _reduce(sdfg: SDFG, state: SDFGState, redfunction: Callable[[Any, Any], Any], in_array: str, out_array=None, axis=None, identity=None): if out_array is None: inarr = in_array # Convert axes to tuple if axis is not None and not isinstance(axis, (tuple, list)): axis = (axis, ) if axis is not None: axis = tuple(pystr_to_symbolic(a) for a in axis) input_subset = parse_memlet_subset(sdfg.arrays[inarr], ast.parse(in_array).body[0].value, {}) input_memlet = Memlet(inarr, input_subset.num_elements(), input_subset, 1) output_shape = None if axis is None: output_shape = [1] else: output_subset = copy.deepcopy(input_subset) output_subset.pop(axis) output_shape = output_subset.size() outarr, arr = sdfg.add_temp_transient(output_shape, sdfg.arrays[inarr].dtype, sdfg.arrays[inarr].storage) output_memlet = Memlet.from_array(outarr, arr) else: inarr = in_array outarr = out_array # Convert axes to tuple if axis is not None and not isinstance(axis, (tuple, list)): axis = (axis, ) if axis is not None: axis = tuple(pystr_to_symbolic(a) for a in axis) # Compute memlets input_subset = parse_memlet_subset(sdfg.arrays[inarr], ast.parse(in_array).body[0].value, {}) input_memlet = Memlet(inarr, input_subset.num_elements(), input_subset, 1) output_subset = parse_memlet_subset(sdfg.arrays[outarr], ast.parse(out_array).body[0].value, {}) output_memlet = Memlet(outarr, output_subset.num_elements(), output_subset, 1) # Create reduce subgraph inpnode = state.add_read(inarr) rednode = state.add_reduce(redfunction, axis, identity) outnode = state.add_write(outarr) state.add_nedge(inpnode, rednode, input_memlet) state.add_nedge(rednode, outnode, output_memlet) if out_array is None: return outarr else: return []
def _create_ceil_range(self, sdfg: SDFG, graph: SDFGState, map_entry: nodes.MapEntry): map_exit = graph.exit_node(map_entry) # Retrieve transformation properties. dim_idx = self.dim_idx new_dim_prefix = self.new_dim_prefix tile_size = self.tile_size divides_evenly = self.divides_evenly strided = self.strided offset = self.tile_offset tile_stride = self.tile_stride if tile_stride == 0: tile_stride = tile_size # Retrieve parameter and range of dimension to be strip-mined. target_dim = map_entry.map.params[dim_idx] td_from, td_to, td_step = map_entry.map.range[dim_idx] # Create new map. Replace by cloning map object? new_dim = self._find_new_dim(sdfg, graph, map_entry, new_dim_prefix, target_dim) nd_from = 0 if tile_stride == 1: nd_to = td_to - td_from else: nd_to = symbolic.pystr_to_symbolic( 'int_ceil(%s + 1 - %s, %s) - 1' % (symbolic.symstr(td_to), symbolic.symstr(td_from), symbolic.symstr(tile_stride))) nd_step = 1 new_dim_range = (nd_from, nd_to, nd_step) new_map = nodes.Map(new_dim + '_' + map_entry.map.label, [new_dim], subsets.Range([new_dim_range])) # Change the range of the selected dimension to iterate over a single # tile if strided: td_from_new = symbolic.pystr_to_symbolic(new_dim) td_to_new_approx = td_to td_step = tile_size elif offset == 0: td_from_new = symbolic.pystr_to_symbolic( '%s + %s * %s' % (symbolic.symstr(td_from), symbolic.symstr(new_dim), symbolic.symstr(tile_stride))) td_to_new_exact = symbolic.pystr_to_symbolic( 'min(%s + 1, %s + %s * %s + %s) - 1' % (symbolic.symstr(td_to), symbolic.symstr(td_from), symbolic.symstr(tile_stride), symbolic.symstr(new_dim), symbolic.symstr(tile_size))) td_to_new_approx = symbolic.pystr_to_symbolic( '%s + %s * %s + %s - 1' % (symbolic.symstr(td_from), symbolic.symstr(tile_stride), symbolic.symstr(new_dim), symbolic.symstr(tile_size))) else: # include offset td_from_new_exact = symbolic.pystr_to_symbolic( 'max(%s,%s + %s * %s - %s)' % (symbolic.symstr(td_from), symbolic.symstr(td_from), symbolic.symstrtr(tile_stride), symbolic.symstr(new_dim), symbolic.symstr(offset))) td_from_new_approx = symbolic.pystr_to_symbolic( '%s + %s * %s - %s ' % (symbolic.symstr(td_from), symbolic.symstr(tile_stride), symbolic.symstr(new_dim), symbolic.symstr(offset))) td_from_new = dace.symbolic.SymExpr(td_from_new_exact, td_from_new_approx) td_to_new_exact = symbolic.pystr_to_symbolic( 'min(%s + 1, %s + %s * %s + %s - %s) -1' % (symbolic.symstr(td_to), symbolic.symstr(td_from), symbolic.symstr(tile_stride), symbolic.symstr(new_dim), symbolic.symstr(tile_size), symbolic.symstr(offset))) td_to_new_approx = symbolic.pystr_to_symbolic( '%s + %s * %s + %s - %s - 1' % (symbolic.symstr(td_from), symbolic.symstr(tile_stride), symbolic.symstr(new_dim), symbolic.symstr(tile_size), symbolic.symstr(offset))) if divides_evenly or strided: td_to_new = td_to_new_approx else: td_to_new = dace.symbolic.SymExpr(td_to_new_exact, td_to_new_approx) return new_dim, new_map, (td_from_new, td_to_new, td_step)
def _parse_dim_atom(das, atom): result = pyexpr_to_symbolic(das, atom) if isinstance(result, data.Data): return pystr_to_symbolic(astutils.unparse(atom)) return result
def nest_state_subgraph(sdfg: SDFG, state: SDFGState, subgraph: SubgraphView, name: Optional[str] = None, full_data: bool = False) -> nodes.NestedSDFG: """ Turns a state subgraph into a nested SDFG. Operates in-place. :param sdfg: The SDFG containing the state subgraph. :param state: The state containing the subgraph. :param subgraph: Subgraph to nest. :param name: An optional name for the nested SDFG. :param full_data: If True, nests entire input/output data. :return: The nested SDFG node. :raise KeyError: Some or all nodes in the subgraph are not located in this state, or the state does not belong to the given SDFG. :raise ValueError: The subgraph is contained in more than one scope. """ if state.parent != sdfg: raise KeyError('State does not belong to given SDFG') if subgraph is not state and subgraph.graph is not state: raise KeyError('Subgraph does not belong to given state') # Find the top-level scope scope_tree = state.scope_tree() scope_dict = state.scope_dict() scope_dict_children = state.scope_children() top_scopenode = -1 # Initialized to -1 since "None" already means top-level for node in subgraph.nodes(): if node not in scope_dict: raise KeyError('Node not found in state') # If scope entry/exit, ensure entire scope is in subgraph if isinstance(node, nodes.EntryNode): scope_nodes = scope_dict_children[node] if any(n not in subgraph.nodes() for n in scope_nodes): raise ValueError('Subgraph contains partial scopes (entry)') elif isinstance(node, nodes.ExitNode): entry = state.entry_node(node) scope_nodes = scope_dict_children[entry] + [entry] if any(n not in subgraph.nodes() for n in scope_nodes): raise ValueError('Subgraph contains partial scopes (exit)') scope_node = scope_dict[node] if scope_node not in subgraph.nodes(): if top_scopenode != -1 and top_scopenode != scope_node: raise ValueError( 'Subgraph is contained in more than one scope') top_scopenode = scope_node scope = scope_tree[top_scopenode] ### # Consolidate edges in top scope utils.consolidate_edges(sdfg, scope) snodes = subgraph.nodes() # Collect inputs and outputs of the nested SDFG inputs: List[MultiConnectorEdge] = [] outputs: List[MultiConnectorEdge] = [] for node in snodes: for edge in state.in_edges(node): if edge.src not in snodes: inputs.append(edge) for edge in state.out_edges(node): if edge.dst not in snodes: outputs.append(edge) # Collect transients not used outside of subgraph (will be removed of # top-level graph) data_in_subgraph = set(n.data for n in subgraph.nodes() if isinstance(n, nodes.AccessNode)) # Find other occurrences in SDFG other_nodes = set( n.data for s in sdfg.nodes() for n in s.nodes() if isinstance(n, nodes.AccessNode) and n not in subgraph.nodes()) subgraph_transients = set() for data in data_in_subgraph: datadesc = sdfg.arrays[data] if datadesc.transient and data not in other_nodes: subgraph_transients.add(data) # All transients of edges between code nodes are also added to nested graph for edge in subgraph.edges(): if (isinstance(edge.src, nodes.CodeNode) and isinstance(edge.dst, nodes.CodeNode)): subgraph_transients.add(edge.data.data) # Collect data used in access nodes within subgraph (will be referenced in # full upon nesting) input_arrays = set() output_arrays = {} for node in subgraph.nodes(): if (isinstance(node, nodes.AccessNode) and node.data not in subgraph_transients): if node.has_reads(state): input_arrays.add(node.data) if node.has_writes(state): output_arrays[node.data] = state.in_edges(node)[0].data.wcr # Create the nested SDFG nsdfg = SDFG(name or 'nested_' + state.label) # Transients are added to the nested graph as-is for name in subgraph_transients: nsdfg.add_datadesc(name, sdfg.arrays[name]) # Input/output data that are not source/sink nodes are added to the graph # as non-transients for name in (input_arrays | output_arrays.keys()): datadesc = copy.deepcopy(sdfg.arrays[name]) datadesc.transient = False nsdfg.add_datadesc(name, datadesc) # Connected source/sink nodes outside subgraph become global data # descriptors in nested SDFG input_names = {} output_names = {} global_subsets: Dict[str, Tuple[str, Subset]] = {} for edge in inputs: if edge.data.data is None: # Skip edges with an empty memlet continue name = edge.data.data if name not in global_subsets: datadesc = copy.deepcopy(sdfg.arrays[edge.data.data]) datadesc.transient = False if not full_data: datadesc.shape = edge.data.subset.size() new_name = nsdfg.add_datadesc(name, datadesc, find_new_name=True) global_subsets[name] = (new_name, edge.data.subset) else: new_name, subset = global_subsets[name] if not full_data: new_subset = union(subset, edge.data.subset) if new_subset is None: new_subset = Range.from_array(sdfg.arrays[name]) global_subsets[name] = (new_name, new_subset) nsdfg.arrays[new_name].shape = new_subset.size() input_names[edge] = new_name for edge in outputs: if edge.data.data is None: # Skip edges with an empty memlet continue name = edge.data.data if name not in global_subsets: datadesc = copy.deepcopy(sdfg.arrays[edge.data.data]) datadesc.transient = False if not full_data: datadesc.shape = edge.data.subset.size() new_name = nsdfg.add_datadesc(name, datadesc, find_new_name=True) global_subsets[name] = (new_name, edge.data.subset) else: new_name, subset = global_subsets[name] if not full_data: new_subset = union(subset, edge.data.subset) if new_subset is None: new_subset = Range.from_array(sdfg.arrays[name]) global_subsets[name] = (new_name, new_subset) nsdfg.arrays[new_name].shape = new_subset.size() output_names[edge] = new_name ################### # Add scope symbols to the nested SDFG defined_vars = set( symbolic.pystr_to_symbolic(s) for s in (state.symbols_defined_at(top_scopenode).keys() | sdfg.symbols)) for v in defined_vars: if v in sdfg.symbols: sym = sdfg.symbols[v] nsdfg.add_symbol(v, sym.dtype) # Add constants to nested SDFG for cstname, cstval in sdfg.constants.items(): nsdfg.add_constant(cstname, cstval) # Create nested state nstate = nsdfg.add_state() # Add subgraph nodes and edges to nested state nstate.add_nodes_from(subgraph.nodes()) for e in subgraph.edges(): nstate.add_edge(e.src, e.src_conn, e.dst, e.dst_conn, copy.deepcopy(e.data)) # Modify nested SDFG parents in subgraph for node in subgraph.nodes(): if isinstance(node, nodes.NestedSDFG): node.sdfg.parent = nstate node.sdfg.parent_sdfg = nsdfg node.sdfg.parent_nsdfg_node = node # Add access nodes and edges as necessary edges_to_offset = [] for edge, name in input_names.items(): node = nstate.add_read(name) new_edge = copy.deepcopy(edge.data) new_edge.data = name edges_to_offset.append((edge, nstate.add_edge(node, None, edge.dst, edge.dst_conn, new_edge))) for edge, name in output_names.items(): node = nstate.add_write(name) new_edge = copy.deepcopy(edge.data) new_edge.data = name edges_to_offset.append((edge, nstate.add_edge(edge.src, edge.src_conn, node, None, new_edge))) # Offset memlet paths inside nested SDFG according to subsets for original_edge, new_edge in edges_to_offset: for edge in nstate.memlet_tree(new_edge): edge.data.data = new_edge.data.data if not full_data: edge.data.subset.offset( global_subsets[original_edge.data.data][1], True) # Add nested SDFG node to the input state nested_sdfg = state.add_nested_sdfg( nsdfg, None, set(input_names.values()) | input_arrays, set(output_names.values()) | output_arrays.keys()) # Reconnect memlets to nested SDFG reconnected_in = set() reconnected_out = set() empty_input = None empty_output = None for edge in inputs: if edge.data.data is None: empty_input = edge continue name = input_names[edge] if name in reconnected_in: continue if full_data: data = Memlet.from_array(edge.data.data, sdfg.arrays[edge.data.data]) else: data = copy.deepcopy(edge.data) data.subset = global_subsets[edge.data.data][1] state.add_edge(edge.src, edge.src_conn, nested_sdfg, name, data) reconnected_in.add(name) for edge in outputs: if edge.data.data is None: empty_output = edge continue name = output_names[edge] if name in reconnected_out: continue if full_data: data = Memlet.from_array(edge.data.data, sdfg.arrays[edge.data.data]) else: data = copy.deepcopy(edge.data) data.subset = global_subsets[edge.data.data][1] data.wcr = edge.data.wcr state.add_edge(nested_sdfg, name, edge.dst, edge.dst_conn, data) reconnected_out.add(name) # Connect access nodes to internal input/output data as necessary entry = scope.entry exit = scope.exit for name in input_arrays: node = state.add_read(name) if entry is not None: state.add_nedge(entry, node, Memlet()) state.add_edge(node, None, nested_sdfg, name, Memlet.from_array(name, sdfg.arrays[name])) for name, wcr in output_arrays.items(): node = state.add_write(name) if exit is not None: state.add_nedge(node, exit, Memlet()) state.add_edge(nested_sdfg, name, node, None, Memlet(data=name, wcr=wcr)) # Graph was not reconnected, but needs to be if state.in_degree(nested_sdfg) == 0 and empty_input is not None: state.add_edge(empty_input.src, empty_input.src_conn, nested_sdfg, None, empty_input.data) if state.out_degree(nested_sdfg) == 0 and empty_output is not None: state.add_edge(nested_sdfg, None, empty_output.dst, empty_output.dst_conn, empty_output.data) # Remove subgraph nodes from graph state.remove_nodes_from(subgraph.nodes()) # Remove subgraph transients from top-level graph for transient in subgraph_transients: del sdfg.arrays[transient] # Remove newly isolated nodes due to memlet consolidation for edge in inputs: if state.in_degree(edge.src) + state.out_degree(edge.src) == 0: state.remove_node(edge.src) for edge in outputs: if state.in_degree(edge.dst) + state.out_degree(edge.dst) == 0: state.remove_node(edge.dst) return nested_sdfg
def free_symbols(self) -> Set[str]: return set().union(*(map(str, pystr_to_symbolic(v).free_symbols) for v in self.location.values()))
def find_for_loop( sdfg: sd.SDFG, guard: sd.SDFGState, entry: sd.SDFGState, itervar: Optional[str] = None ) -> Optional[Tuple[AnyStr, Tuple[symbolic.SymbolicType, symbolic.SymbolicType, symbolic.SymbolicType], Tuple[ List[sd.SDFGState], sd.SDFGState]]]: """ Finds loop range from state machine. :param guard: State from which the outgoing edges detect whether to exit the loop or not. :param entry: First state in the loop "body". :return: (iteration variable, (start, end, stride), (start_states[], last_loop_state)), or None if proper for-loop was not detected. ``end`` is inclusive. """ # Extract state transition edge information guard_inedges = sdfg.in_edges(guard) condition_edge = sdfg.edges_between(guard, entry)[0] if itervar is None: itervar = list(guard_inedges[0].data.assignments.keys())[0] condition = condition_edge.data.condition_sympy() # Find the stride edge. All in-edges to the guard except for the stride edge # should have exactly the same assignment, since a valid for loop can only # have one assignment. init_edges = [] init_assignment = None step_edge = None itersym = symbolic.symbol(itervar) for iedge in guard_inedges: assignment = iedge.data.assignments[itervar] if itersym in symbolic.pystr_to_symbolic(assignment).free_symbols: if step_edge is None: step_edge = iedge else: # More than one edge with the iteration variable as a free # symbol, which is not legal. Invalid for loop. return None else: if init_assignment is None: init_assignment = assignment init_edges.append(iedge) elif init_assignment != assignment: # More than one init assignment variations mean that this for # loop is not valid. return None else: init_edges.append(iedge) if step_edge is None or len(init_edges) == 0 or init_assignment is None: # Less than two assignment variations, can't be a valid for loop. return None # Get the init expression and the stride. start = symbolic.pystr_to_symbolic(init_assignment) stride = (symbolic.pystr_to_symbolic(step_edge.data.assignments[itervar]) - itersym) # Get a list of the last states before the loop and a reference to the last # loop state. start_states = [] for init_edge in init_edges: start_state = init_edge.src if start_state not in start_states: start_states.append(start_state) last_loop_state = step_edge.src # Find condition by matching expressions end: Optional[symbolic.SymbolicType] = None a = sp.Wild('a') match = condition.match(itersym < a) if match: end = match[a] - 1 if end is None: match = condition.match(itersym <= a) if match: end = match[a] if end is None: match = condition.match(itersym > a) if match: end = match[a] + 1 if end is None: match = condition.match(itersym >= a) if match: end = match[a] if end is None: # No match found return None return itervar, (start, end, stride), (start_states, last_loop_state)
def ndcopy_to_strided_copy( copy_shape, src_shape, src_strides, dst_shape, dst_strides, subset, src_subset, dst_subset, ): """ Detects situations where an N-dimensional copy can be degenerated into a (faster) 1D copy or 2D strided copy. Returns new copy dimensions and offsets to emulate the requested copy. :return: a 3-tuple: copy_shape, src_strides, dst_strides """ # Cannot degenerate tiled copies if any(ts != 1 for ts in subset.tile_sizes): return None # If the copy is contiguous, the difference between the first and last # pointers should be the shape of the copy first_src_index = src_subset.at([0] * src_subset.dims(), src_strides) first_dst_index = dst_subset.at([0] * dst_subset.dims(), dst_strides) last_src_index = src_subset.at([d - 1 for d in src_subset.size()], src_strides) last_dst_index = dst_subset.at([d - 1 for d in dst_subset.size()], dst_strides) copy_length = functools.reduce(lambda x, y: x * y, copy_shape) src_copylen = last_src_index - first_src_index + 1 dst_copylen = last_dst_index - first_dst_index + 1 # Make expressions symbolic and simplify copy_length = symbolic.pystr_to_symbolic(copy_length).simplify() src_copylen = symbolic.pystr_to_symbolic(src_copylen).simplify() dst_copylen = symbolic.pystr_to_symbolic(dst_copylen).simplify() # Detect 1D copies. The first condition is the general one, whereas the # second one applies when the arrays are completely equivalent in strides # and shapes to the copy. The second condition is there because sometimes # the symbolic math engine fails to produce the same expressions for both # arrays. if (tuple(src_strides) == tuple(dst_strides) and ((src_copylen == copy_length and dst_copylen == copy_length) or (tuple(src_shape) == tuple(copy_shape) and tuple(dst_shape) == tuple(copy_shape)))): # Emit 1D copy of the whole array copy_shape = [functools.reduce(lambda x, y: x * y, copy_shape)] return copy_shape, [1], [1] # 1D strided copy elif sum([0 if c == 1 else 1 for c in copy_shape]) == 1: # Find the copied dimension: # In copy shape copydim = next(i for i, c in enumerate(copy_shape) if c != 1) # In source strides src_copy_shape = src_subset.size_exact() if copy_shape == src_copy_shape: srcdim = copydim else: try: srcdim = next(i for i, c in enumerate(src_copy_shape) if c != 1) except StopIteration: # NOTE: This is the old stride computation code for FPGA # compatibility if len(copy_shape) == len(src_shape): srcdim = copydim else: srcdim = next(i for i, c in enumerate(src_shape) if c != 1) # In destination strides dst_copy_shape = dst_subset.size_exact() if copy_shape == dst_copy_shape: dstdim = copydim else: try: dstdim = next(i for i, c in enumerate(dst_copy_shape) if c != 1) except StopIteration: # NOTE: This is the old stride computation code for FPGA # compatibility if len(copy_shape) == len(dst_shape): dstdim = copydim else: dstdim = next(i for i, c in enumerate(dst_shape) if c != 1) # Return new copy return [copy_shape[copydim]], [src_strides[srcdim] ], [dst_strides[dstdim]] else: return None
def _approx(val): if isinstance(val, symbolic.SymExpr): return val.approx elif isinstance(val, sp.Basic): return val return symbolic.pystr_to_symbolic(val)
def apply(self, sdfg) -> Tuple[nodes.NestedSDFG, SDFGState]: """ Applies the transformation and returns a tuple with the new nested SDFG node and the main state in the for-loop. """ # Retrieve map entry and exit nodes. graph = sdfg.nodes()[self.state_id] map_entry = graph.nodes()[self.subgraph[MapToForLoop._map_entry]] map_exit = graph.exit_node(map_entry) loop_idx = map_entry.map.params[0] loop_from, loop_to, loop_step = map_entry.map.range[0] # Turn the map scope into a nested SDFG node = nest_state_subgraph(sdfg, graph, graph.scope_subgraph(map_entry)) nsdfg: SDFG = node.sdfg nstate: SDFGState = nsdfg.nodes()[0] # If map range is dynamic, replace loop expressions with memlets param_to_edge = {} for edge in nstate.in_edges(map_entry): if edge.dst_conn and not edge.dst_conn.startswith('IN_'): param = '__DACE_P%d' % len(param_to_edge) repldict = {symbolic.pystr_to_symbolic(edge.dst_conn): param} param_to_edge[param] = edge loop_from = loop_from.subs(repldict) loop_to = loop_to.subs(repldict) loop_step = loop_step.subs(repldict) # Avoiding import loop from dace.codegen.targets.cpp import cpp_array_expr def replace_param(param): param = symbolic.symstr(param) for p, pval in param_to_edge.items(): # TODO: Correct w.r.t. connector type param = param.replace(p, cpp_array_expr(nsdfg, pval.data)) return param # End of dynamic input range # Create a loop inside the nested SDFG loop_result = nsdfg.add_loop( None, nstate, None, loop_idx, replace_param(loop_from), '%s < %s' % (loop_idx, replace_param(loop_to + 1)), '%s + %s' % (loop_idx, replace_param(loop_step))) # store as object fields for external access self.before_state, self.guard, self.after_state = loop_result # Skip map in input edges for edge in nstate.out_edges(map_entry): src_node = nstate.memlet_path(edge)[0].src nstate.add_edge(src_node, None, edge.dst, edge.dst_conn, edge.data) nstate.remove_edge(edge) # Skip map in output edges for edge in nstate.in_edges(map_exit): dst_node = nstate.memlet_path(edge)[-1].dst nstate.add_edge(edge.src, edge.src_conn, dst_node, None, edge.data) nstate.remove_edge(edge) # Remove nodes from dynamic map range nstate.remove_nodes_from( [e.src for e in dace.sdfg.dynamic_map_inputs(nstate, map_entry)]) # Remove scope nodes nstate.remove_nodes_from([map_entry, map_exit]) # create object field for external nsdfg access self.nsdfg = nsdfg return node, nstate
def _tuple_to_symexpr(val): return (symbolic.SymExpr(val[0], val[1]) if isinstance(val, tuple) else symbolic.pystr_to_symbolic(val))
def apply(self, sdfg: sd.SDFG): graph: sd.SDFGState = sdfg.nodes()[self.state_id] map_entry = graph.node(self.subgraph[DoubleBuffering._map_entry]) map_param = map_entry.map.params[0] # Assuming one dimensional ############################## # Change condition of loop to one fewer iteration (so that the # final one reads from the last buffer) map_rstart, map_rend, map_rstride = map_entry.map.range[0] map_rend = symbolic.pystr_to_symbolic('(%s) - (%s)' % (map_rend, map_rstride)) map_entry.map.range = subsets.Range([(map_rstart, map_rend, map_rstride)]) ############################## # Gather transients to modify transients_to_modify = set(edge.dst.data for edge in graph.out_edges(map_entry) if isinstance(edge.dst, nodes.AccessNode)) # Add dimension to transients and modify memlets for transient in transients_to_modify: desc: data.Array = sdfg.arrays[transient] # Using non-python syntax to ensure properties change desc.strides = [desc.total_size] + list(desc.strides) desc.shape = [2] + list(desc.shape) desc.offset = [0] + list(desc.offset) desc.total_size = desc.total_size * 2 ############################## # Modify memlets to use map parameter as buffer index modified_subsets = [] # Store modified memlets for final state for edge in graph.scope_subgraph(map_entry).edges(): if edge.data.data in transients_to_modify: edge.data.subset = self._modify_memlet(sdfg, edge.data.subset, edge.data.data) modified_subsets.append(edge.data.subset) else: # Could be other_subset path = graph.memlet_path(edge) src_node = path[0].src dst_node = path[-1].dst # other_subset could be None. In that case, recreate from array dataname = None if (isinstance(src_node, nodes.AccessNode) and src_node.data in transients_to_modify): dataname = src_node.data elif (isinstance(dst_node, nodes.AccessNode) and dst_node.data in transients_to_modify): dataname = dst_node.data if dataname is not None: subset = (edge.data.other_subset or subsets.Range.from_array(sdfg.arrays[dataname])) edge.data.other_subset = self._modify_memlet( sdfg, subset, dataname) modified_subsets.append(edge.data.other_subset) ############################## # Turn map into for loop map_to_for = MapToForLoop(self.sdfg_id, self.state_id, { MapToForLoop._map_entry: self.subgraph[DoubleBuffering._map_entry] }, self.expr_index) nsdfg_node, nstate = map_to_for.apply(sdfg) ############################## # Gather node copies and remove memlets edges_to_replace = [] for node in nstate.source_nodes(): for edge in nstate.out_edges(node): if (isinstance(edge.dst, nodes.AccessNode) and edge.dst.data in transients_to_modify): edges_to_replace.append(edge) nstate.remove_edge(edge) if nstate.out_degree(node) == 0: nstate.remove_node(node) ############################## # Add initial reads to initial nested state initial_state: sd.SDFGState = nsdfg_node.sdfg.start_state initial_state.set_label('%s_init' % map_entry.map.label) for edge in edges_to_replace: initial_state.add_node(edge.src) rnode = edge.src wnode = initial_state.add_write(edge.dst.data) initial_state.add_edge(rnode, edge.src_conn, wnode, edge.dst_conn, copy.deepcopy(edge.data)) # All instances of the map parameter in this state become the loop start sd.replace(initial_state, map_param, map_rstart) # Initial writes go to the appropriate buffer init_expr = symbolic.pystr_to_symbolic('(%s / %s) %% 2' % (map_rstart, map_rstride)) sd.replace(initial_state, '__dace_db_param', init_expr) ############################## # Modify main state's memlets # Divide by loop stride new_expr = symbolic.pystr_to_symbolic('(%s / %s) %% 2' % (map_param, map_rstride)) sd.replace(nstate, '__dace_db_param', new_expr) ############################## # Add the main state's contents to the last state, modifying # memlets appropriately. final_state: sd.SDFGState = nsdfg_node.sdfg.sink_nodes()[0] final_state.set_label('%s_final_computation' % map_entry.map.label) dup_nstate = copy.deepcopy(nstate) final_state.add_nodes_from(dup_nstate.nodes()) for e in dup_nstate.edges(): final_state.add_edge(e.src, e.src_conn, e.dst, e.dst_conn, e.data) # If there is a WCR output with transient, only output in last state nstate: sd.SDFGState for node in nstate.sink_nodes(): for e in list(nstate.in_edges(node)): if e.data.wcr is not None: path = nstate.memlet_path(e) if isinstance(path[0].src, nodes.AccessNode): nstate.remove_memlet_path(e) ############################## # Add reads into next buffers to main state for edge in edges_to_replace: rnode = copy.deepcopy(edge.src) nstate.add_node(rnode) wnode = nstate.add_write(edge.dst.data) new_memlet = copy.deepcopy(edge.data) if new_memlet.data in transients_to_modify: new_memlet.other_subset = self._replace_in_subset( new_memlet.other_subset, map_param, '(%s + %s)' % (map_param, map_rstride)) else: new_memlet.subset = self._replace_in_subset( new_memlet.subset, map_param, '(%s + %s)' % (map_param, map_rstride)) nstate.add_edge(rnode, edge.src_conn, wnode, edge.dst_conn, new_memlet) nstate.set_label('%s_double_buffered' % map_entry.map.label) # Divide by loop stride new_expr = symbolic.pystr_to_symbolic('((%s / %s) + 1) %% 2' % (map_param, map_rstride)) sd.replace(nstate, '__dace_db_param', new_expr) # Remove symbol once done del nsdfg_node.sdfg.symbols['__dace_db_param'] del nsdfg_node.symbol_mapping['__dace_db_param'] return nsdfg_node
def from_string(string): # The following code uses regular expressions in order to support the # use of comma not only for separating range dimensions, but also # inside function calls. # Example (with 2 dimensions): # tile_i * ts_i : min(int_ceil(M, rs_i), tile_i * ts_i + ts_i), # regtile_j * rs_j : min(K, regtile_j * rs_j + rs_j) ranges = [] # Split string to tokens separated by colons. # tokens = [ # 'tile_i * ts_i ', # 'min(int_ceil(M, rs_i), tile_i * ts_i + ts_i), regtile_j * rs_j ', # 'min(K, regtile_j * rs_j + rs_j)' # ] tokens = string.split(':') # In the example, the second token must be split to 2 separate tokens. # List of list of tokens (one list per range dimension) multi_dim_tokens = [] # List of tokens (single dimension) uni_dim_tokens = [] for token in tokens: i = 0 # Character index in the token count = 0 # Number of open parenthesis while i < len(token): # Comma found while not in a function or any other expression # with parenthesis. This is a comma separating range dimensions. if token[i] == ',' and count == 0: # Split the token to token[:i] and token[i+1:] # Append token[:i] to the current range dimension uni_dim_tokens.append(token[0:i]) # Append current range dimension to the list of lists multi_dim_tokens.append(uni_dim_tokens) # Start a new range dimension uni_dim_tokens = [] # Adjust the token token = token[i + 1:] i = 0 continue # Open parenthesis found, increase count by 1 if token[i] == '(': count += 1 # Closing parenthesis found, decrease cound by 1 elif token[i] == ')': count -= 1 # Move to the next character i += 1 # Append token to the current range dimension uni_dim_tokens.append(token) # Append current range dimension to the list of lists multi_dim_tokens.append(uni_dim_tokens) # Generate ranges for uni_dim_tokens in multi_dim_tokens: # If dimension has only 1 token, then it is an index (not a range), # treat as range of size 1 if len(uni_dim_tokens) < 2: ranges.append( (symbolic.pystr_to_symbolic(uni_dim_tokens[0]), symbolic.pystr_to_symbolic(uni_dim_tokens[0]), 1)) continue #return Range(ranges) # If dimension has more than 4 tokens, the range is invalid if len(uni_dim_tokens) > 4: raise SyntaxError("Invalid range: {}".format(multi_dim_tokens)) # Support for SymExpr tokens = [] for token in uni_dim_tokens: expr = token.split('|') if len(expr) == 1: tokens.append(expr[0]) elif len(expr) == 2: tokens.append((expr[0], expr[1])) else: raise SyntaxError( "Invalid range: {}".format(multi_dim_tokens)) # Parse tokens try: if isinstance(tokens[0], tuple): begin = symbolic.SymExpr(tokens[0][0], tokens[0][1]) else: begin = symbolic.pystr_to_symbolic(tokens[0]) if isinstance(tokens[1], tuple): end = symbolic.SymExpr(tokens[1][0], tokens[1][1]) - 1 else: end = symbolic.pystr_to_symbolic(tokens[1]) - 1 if len(tokens) >= 3: if isinstance(tokens[2], tuple): step = symbolic.SymExpr(tokens[2][0], tokens[2][1]) else: step = symbolic.SymExpr(tokens[2]) else: step = 1 if len(tokens) >= 4: if isinstance(tokens[3], tuple): tsize = tokens[3][0] else: tsize = tokens[3] else: tsize = 1 except sympy.SympifyError: raise SyntaxError("Invalid range: {}".format(string)) # Append range ranges.append((begin, end, step, tsize)) return Range(ranges)
def propagate_memlet(dfg_state, memlet: Memlet, scope_node: nodes.EntryNode, union_inner_edges: bool, arr=None): """ Tries to propagate a memlet through a scope (computes the image of the memlet function applied on an integer set of, e.g., a map range) and returns a new memlet object. @param dfg_state: An SDFGState object representing the graph. @param memlet: The memlet adjacent to the scope node from the inside. @param scope_node: A scope entry or exit node. @param union_inner_edges: True if the propagation should take other neighboring internal memlets within the same scope into account. """ if isinstance(scope_node, nodes.EntryNode): entry_node = scope_node neighboring_edges = dfg_state.out_edges(scope_node) elif isinstance(scope_node, nodes.ExitNode): entry_node = dfg_state.scope_dict()[scope_node] neighboring_edges = dfg_state.in_edges(scope_node) else: raise TypeError('Trying to propagate through a non-scope node') if isinstance(memlet, EmptyMemlet): return EmptyMemlet() sdfg = dfg_state.parent defined_vars = [ symbolic.pystr_to_symbolic(s) for s in (sdfg.symbols_defined_at(scope_node, dfg_state).keys()) ] # Find other adjacent edges within the connected to the scope node # and union their subsets if union_inner_edges: aggdata = [ e.data for e in neighboring_edges if e.data.data == memlet.data and e.data != memlet ] else: aggdata = [] aggdata.append(memlet) if arr is None: if memlet.data not in sdfg.arrays: raise KeyError('Data descriptor (Array, Stream) "%s" not defined ' 'in SDFG.' % memlet.data) arr = sdfg.arrays[memlet.data] # Propagate subset if isinstance(entry_node, nodes.MapEntry): mapnode = entry_node.map variable_context = [ defined_vars, [symbolic.pystr_to_symbolic(p) for p in mapnode.params] ] new_subset = None for md in aggdata: tmp_subset = None for pattern in MemletPattern.patterns(): if pattern.match([md.subset], variable_context, mapnode.range, [md]): tmp_subset = pattern.propagate(arr, [md.subset], mapnode.range) break else: # No patterns found. Emit a warning and propagate the entire # array warnings.warn('Cannot find appropriate memlet pattern to ' 'propagate %s through %s' % (str(md.subset), str(mapnode.range))) tmp_subset = subsets.Range.from_array(arr) # Union edges as necessary if new_subset is None: new_subset = tmp_subset else: old_subset = new_subset new_subset = subsets.union(new_subset, tmp_subset) if new_subset is None: warnings.warn('Subset union failed between %s and %s ' % (old_subset, tmp_subset)) # Some unions failed if new_subset is None: new_subset = subsets.Range.from_array(arr) assert new_subset is not None elif isinstance(entry_node, nodes.ConsumeEntry): # Nothing to analyze/propagate in consume new_subset = subsets.Range.from_array(arr) else: raise NotImplementedError('Unimplemented primitive: %s' % type(scope_node)) ### End of subset propagation new_memlet = copy.copy(memlet) new_memlet.subset = new_subset new_memlet.other_subset = None # Number of accesses in the propagated memlet is the sum of the internal # number of accesses times the size of the map range set new_memlet.num_accesses = ( sum(m.num_accesses for m in aggdata) * functools.reduce(lambda a, b: a * b, scope_node.map.range.size(), 1)) if any(m.num_accesses == -1 for m in aggdata): memlet.num_accesses = -1 elif symbolic.issymbolic(memlet.num_accesses) and any( s not in defined_vars for s in memlet.num_accesses.free_symbols): memlet.num_accesses = -1 return new_memlet
def from_string(s): return Indices([ symbolic.pystr_to_symbolic(m.group(0)) for m in re.finditer("[^,;:]+", s) ])
def to_string(obj): # Go through sympy once to reorder factors return str(pystr_to_symbolic(str(obj), simplify=False))
def apply(self, sdfg: SDFG): graph = sdfg.nodes()[self.state_id] map_entry = graph.nodes()[self.subgraph[Vectorization._map_entry]] tasklet = graph.nodes()[self.subgraph[Vectorization._tasklet]] param = symbolic.pystr_to_symbolic(map_entry.map.params[-1]) # Create new vector size. vector_size = self.vector_len dim_from, dim_to, dim_skip = map_entry.map.range[-1] # Determine whether to create preamble or postamble maps if self.preamble is not None: create_preamble = self.preamble else: create_preamble = not ((dim_from % vector_size == 0) == True or dim_from == 0) if self.postamble is not None: create_postamble = self.postamble else: if isinstance(dim_to, symbolic.SymExpr): create_postamble = (((dim_to.approx + 1) % vector_size == 0) == False) else: create_postamble = (((dim_to + 1) % vector_size == 0) == False) # Determine new range for vectorized map if self.strided_map: new_range = [dim_from, dim_to - vector_size + 1, vector_size] else: new_range = [ dim_from // vector_size, ((dim_to + 1) // vector_size) - 1, dim_skip ] # Create preamble non-vectorized map (replacing the original map) if create_preamble: old_scope = graph.scope_subgraph(map_entry, True, True) new_scope: ScopeSubgraphView = replicate_scope( sdfg, graph, old_scope) new_begin = dim_from + (vector_size - (dim_from % vector_size)) map_entry.map.range[-1] = (dim_from, new_begin - 1, dim_skip) # Replace map_entry with the replicated scope (so that the preamble # will usually come first in topological sort) map_entry = new_scope.entry tasklet = new_scope.nodes()[old_scope.nodes().index(tasklet)] new_range[0] = new_begin # Create postamble non-vectorized map if create_postamble: new_scope: ScopeSubgraphView = replicate_scope( sdfg, graph, graph.scope_subgraph(map_entry, True, True)) dim_to_ex = dim_to + 1 new_scope.entry.map.range[-1] = (dim_to_ex - (dim_to_ex % vector_size), dim_to, dim_skip) # Change the step of the inner-most dimension. map_entry.map.range[-1] = tuple(new_range) # Vectorize connectors adjacent to the tasklet. for edge in graph.all_edges(tasklet): connectors = (tasklet.in_connectors if edge.dst == tasklet else tasklet.out_connectors) conn = edge.dst_conn if edge.dst == tasklet else edge.src_conn if edge.data.data is None: # Empty memlets continue desc = sdfg.arrays[edge.data.data] contigidx = desc.strides.index(1) newlist = [] lastindex = edge.data.subset[contigidx] if isinstance(lastindex, tuple): newlist = [(rb, re, rs) for rb, re, rs in edge.data.subset] symbols = set() for indd in lastindex: symbols.update( symbolic.pystr_to_symbolic(indd).free_symbols) else: newlist = [(rb, rb, 1) for rb in edge.data.subset] symbols = symbolic.pystr_to_symbolic(lastindex).free_symbols oldtype = connectors[conn] if oldtype is None or oldtype.type is None: oldtype = desc.dtype # Vector to scalar WCR edge: change connector and continue if (edge.data.subset.num_elements() == 1 and edge.data.wcr is not None): connectors[conn] = dtypes.vector(oldtype, vector_size) continue if str(param) not in map(str, symbols): continue # Vectorize connector, if not already vectorized if isinstance(oldtype, dtypes.vector): continue connectors[conn] = dtypes.vector(oldtype, vector_size) # Modify memlet subset to match vector length if self.strided_map: rb = newlist[contigidx][0] if self.propagate_parent: newlist[contigidx] = (rb / self.vector_len, rb / self.vector_len, 1) else: newlist[contigidx] = (rb, rb + self.vector_len - 1, 1) else: rb = newlist[contigidx][0] if self.propagate_parent: newlist[contigidx] = (rb, rb, 1) else: newlist[contigidx] = (self.vector_len * rb, self.vector_len * rb + self.vector_len - 1, 1) edge.data.subset = subsets.Range(newlist) edge.data.volume = vector_size # Vector length propagation using data descriptors, recursive traversal # outwards if self.propagate_parent: for edge in graph.all_edges(tasklet): cursdfg = sdfg curedge = edge while cursdfg is not None: arrname = curedge.data.data dtype = cursdfg.arrays[arrname].dtype # Change type and shape to vector if not isinstance(dtype, dtypes.vector): cursdfg.arrays[arrname].dtype = dtypes.vector( dtype, vector_size) new_shape = list(cursdfg.arrays[arrname].shape) contigidx = cursdfg.arrays[arrname].strides.index(1) new_shape[contigidx] /= vector_size try: new_shape[contigidx] = int(new_shape[contigidx]) except TypeError: pass cursdfg.arrays[arrname].shape = new_shape propagation.propagate_memlets_sdfg(cursdfg) # Find matching edge in parent nsdfg = cursdfg.parent_nsdfg_node if nsdfg is None: break tstate = cursdfg.parent curedge = ([ e for e in tstate.in_edges(nsdfg) if e.dst_conn == arrname ] + [ e for e in tstate.out_edges(nsdfg) if e.src_conn == arrname ])[0] cursdfg = cursdfg.parent_sdfg
def _stripmine(self, sdfg, graph, candidate): # Retrieve map entry and exit nodes. map_entry = graph.nodes()[candidate[StripMining._map_entry]] map_exit = graph.exit_node(map_entry) # Retrieve transformation properties. dim_idx = self.dim_idx new_dim_prefix = self.new_dim_prefix tile_size = self.tile_size divides_evenly = self.divides_evenly strided = self.strided tile_stride = self.tile_stride if tile_stride is None or len(tile_stride) == 0: tile_stride = tile_size # Retrieve parameter and range of dimension to be strip-mined. target_dim = map_entry.map.params[dim_idx] td_from, td_to, td_step = map_entry.map.range[dim_idx] # Create new map. Replace by cloning map object? new_dim = self._find_new_dim(sdfg, graph, map_entry, new_dim_prefix, target_dim) nd_from = 0 if symbolic.pystr_to_symbolic(tile_stride) == 1: nd_to = td_to else: nd_to = symbolic.pystr_to_symbolic( 'int_ceil(%s + 1 - %s, %s) - 1' % (symbolic.symstr(td_to), symbolic.symstr(td_from), tile_stride)) nd_step = 1 new_dim_range = (nd_from, nd_to, nd_step) new_map = nodes.Map(new_dim + '_' + map_entry.map.label, [new_dim], subsets.Range([new_dim_range])) new_map_entry = nodes.MapEntry(new_map) new_map_exit = nodes.MapExit(new_map) # Change the range of the selected dimension to iterate over a single # tile if strided: td_from_new = symbolic.pystr_to_symbolic(new_dim) td_to_new_approx = td_to td_step = symbolic.pystr_to_symbolic(tile_size) else: td_from_new = symbolic.pystr_to_symbolic( '%s + %s * %s' % (symbolic.symstr(td_from), str(new_dim), tile_stride)) td_to_new_exact = symbolic.pystr_to_symbolic( 'min(%s + 1, %s + %s * %s + %s) - 1' % (symbolic.symstr(td_to), symbolic.symstr(td_from), tile_stride, str(new_dim), tile_size)) td_to_new_approx = symbolic.pystr_to_symbolic( '%s + %s * %s + %s - 1' % (symbolic.symstr(td_from), tile_stride, str(new_dim), tile_size)) if divides_evenly or strided: td_to_new = td_to_new_approx else: td_to_new = dace.symbolic.SymExpr(td_to_new_exact, td_to_new_approx) # Special case: If range is 1 and no prefix was specified, skip range if td_from_new == td_to_new_approx and target_dim == new_dim: map_entry.map.range = subsets.Range( [r for i, r in enumerate(map_entry.map.range) if i != dim_idx]) map_entry.map.params = [ p for i, p in enumerate(map_entry.map.params) if i != dim_idx ] if len(map_entry.map.params) == 0: raise ValueError('Strip-mining all dimensions of the map with ' 'empty tiles is disallowed') else: map_entry.map.range[dim_idx] = (td_from_new, td_to_new, td_step) # Make internal map's schedule to "not parallel" new_map.schedule = map_entry.map.schedule map_entry.map.schedule = dtypes.ScheduleType.Sequential # Redirect edges new_map_entry.in_connectors = dcpy(map_entry.in_connectors) sdutil.change_edge_dest(graph, map_entry, new_map_entry) new_map_exit.out_connectors = dcpy(map_exit.out_connectors) sdutil.change_edge_src(graph, map_exit, new_map_exit) # Create new entry edges new_in_edges = dict() entry_in_conn = {} entry_out_conn = {} for _src, src_conn, _dst, _, memlet in graph.out_edges(map_entry): if (src_conn is not None and src_conn[:4] == 'OUT_' and not isinstance( sdfg.arrays[memlet.data], dace.data.Scalar)): new_subset = calc_set_image( map_entry.map.params, map_entry.map.range, memlet.subset, ) conn = src_conn[4:] key = (memlet.data, 'IN_' + conn, 'OUT_' + conn) if key in new_in_edges.keys(): old_subset = new_in_edges[key].subset new_in_edges[key].subset = calc_set_union( old_subset, new_subset) else: entry_in_conn['IN_' + conn] = None entry_out_conn['OUT_' + conn] = None new_memlet = dcpy(memlet) new_memlet.subset = new_subset if memlet.dynamic: new_memlet.num_accesses = memlet.num_accesses else: new_memlet.num_accesses = new_memlet.num_elements() new_in_edges[key] = new_memlet else: if src_conn is not None and src_conn[:4] == 'OUT_': conn = src_conn[4:] in_conn = 'IN_' + conn out_conn = 'OUT_' + conn else: in_conn = src_conn out_conn = src_conn if in_conn: entry_in_conn[in_conn] = None if out_conn: entry_out_conn[out_conn] = None new_in_edges[(memlet.data, in_conn, out_conn)] = dcpy(memlet) new_map_entry.out_connectors = entry_out_conn map_entry.in_connectors = entry_in_conn for (_, in_conn, out_conn), memlet in new_in_edges.items(): graph.add_edge(new_map_entry, out_conn, map_entry, in_conn, memlet) # Create new exit edges new_out_edges = dict() exit_in_conn = {} exit_out_conn = {} for _src, _, _dst, dst_conn, memlet in graph.in_edges(map_exit): if (dst_conn is not None and dst_conn[:3] == 'IN_' and not isinstance( sdfg.arrays[memlet.data], dace.data.Scalar)): new_subset = calc_set_image( map_entry.map.params, map_entry.map.range, memlet.subset, ) conn = dst_conn[3:] key = (memlet.data, 'IN_' + conn, 'OUT_' + conn) if key in new_out_edges.keys(): old_subset = new_out_edges[key].subset new_out_edges[key].subset = calc_set_union( old_subset, new_subset) else: exit_in_conn['IN_' + conn] = None exit_out_conn['OUT_' + conn] = None new_memlet = dcpy(memlet) new_memlet.subset = new_subset if memlet.dynamic: new_memlet.num_accesses = memlet.num_accesses else: new_memlet.num_accesses = new_memlet.num_elements() new_out_edges[key] = new_memlet else: if dst_conn is not None and dst_conn[:3] == 'IN_': conn = dst_conn[3:] in_conn = 'IN_' + conn out_conn = 'OUT_' + conn else: in_conn = src_conn out_conn = src_conn if in_conn: exit_in_conn[in_conn] = None if out_conn: exit_out_conn[out_conn] = None new_in_edges[(memlet.data, in_conn, out_conn)] = dcpy(memlet) new_map_exit.in_connectors = exit_in_conn map_exit.out_connectors = exit_out_conn for (_, in_conn, out_conn), memlet in new_out_edges.items(): graph.add_edge(map_exit, out_conn, new_map_exit, in_conn, memlet) # Return strip-mined dimension. return target_dim, new_dim, new_map
def apply(self, sdfg: sd.SDFG): # Obtain loop information guard: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_guard]) body: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._loop_begin]) after: sd.SDFGState = sdfg.node(self.subgraph[DetectLoop._exit_state]) # Obtain iteration variable, range, and stride itervar, (start, end, step), (_, body_end) = find_for_loop(sdfg, guard, body, itervar=self.itervar) # Find all loop-body states states = set() to_visit = [body] while to_visit: state = to_visit.pop(0) for _, dst, _ in sdfg.out_edges(state): if dst not in states and dst is not guard: to_visit.append(dst) states.add(state) # Nest loop-body states if len(states) > 1: # Find read/write sets read_set, write_set = set(), set() for state in states: rset, wset = state.read_and_write_sets() read_set |= rset write_set |= wset # Add to write set also scalars between tasklets for src_node in state.nodes(): if not isinstance(src_node, nodes.Tasklet): continue for dst_node in state.nodes(): if src_node is dst_node: continue if not isinstance(dst_node, nodes.Tasklet): continue for e in state.edges_between(src_node, dst_node): if e.data.data and e.data.data in sdfg.arrays: write_set.add(e.data.data) # Add data from edges for src in states: for dst in states: for edge in sdfg.edges_between(src, dst): for s in edge.data.free_symbols: if s in sdfg.arrays: read_set.add(s) # Find NestedSDFG's unique data rw_set = read_set | write_set unique_set = set() for name in rw_set: if not sdfg.arrays[name].transient: continue found = False for state in sdfg.states(): if state in states: continue for node in state.nodes(): if (isinstance(node, nodes.AccessNode) and node.data == name): found = True break if not found: unique_set.add(name) # Find NestedSDFG's connectors read_set = { n for n in read_set if n not in unique_set or not sdfg.arrays[n].transient } write_set = { n for n in write_set if n not in unique_set or not sdfg.arrays[n].transient } # Create NestedSDFG and add all loop-body states and edges # Also, find defined symbols in NestedSDFG fsymbols = set(sdfg.free_symbols) new_body = sdfg.add_state('single_state_body') nsdfg = SDFG("loop_body", constants=sdfg.constants, parent=new_body) nsdfg.add_node(body, is_start_state=True) body.parent = nsdfg exit_state = nsdfg.add_state('exit') nsymbols = dict() for state in states: if state is body: continue nsdfg.add_node(state) state.parent = nsdfg for state in states: if state is body: continue for src, dst, data in sdfg.in_edges(state): nsymbols.update({ s: sdfg.symbols[s] for s in data.assignments.keys() if s in sdfg.symbols }) nsdfg.add_edge(src, dst, data) nsdfg.add_edge(body_end, exit_state, InterstateEdge()) # Move guard -> body edge to guard -> new_body for src, dst, data, in sdfg.edges_between(guard, body): sdfg.add_edge(src, new_body, data) # Move body_end -> guard edge to new_body -> guard for src, dst, data in sdfg.edges_between(body_end, guard): sdfg.add_edge(new_body, dst, data) # Delete loop-body states and edges from parent SDFG for state in states: for e in sdfg.all_edges(state): sdfg.remove_edge(e) sdfg.remove_node(state) # Add NestedSDFG arrays for name in read_set | write_set: nsdfg.arrays[name] = copy.deepcopy(sdfg.arrays[name]) nsdfg.arrays[name].transient = False for name in unique_set: nsdfg.arrays[name] = sdfg.arrays[name] del sdfg.arrays[name] # Add NestedSDFG node cnode = new_body.add_nested_sdfg(nsdfg, None, read_set, write_set) if sdfg.parent: for s, m in sdfg.parent_nsdfg_node.symbol_mapping.items(): if s not in cnode.symbol_mapping: cnode.symbol_mapping[s] = m nsdfg.add_symbol(s, sdfg.symbols[s]) for name in read_set: r = new_body.add_read(name) new_body.add_edge( r, None, cnode, name, memlet.Memlet.from_array(name, sdfg.arrays[name])) for name in write_set: w = new_body.add_write(name) new_body.add_edge( cnode, name, w, None, memlet.Memlet.from_array(name, sdfg.arrays[name])) # Fix SDFG symbols for sym in sdfg.free_symbols - fsymbols: del sdfg.symbols[sym] for sym, dtype in nsymbols.items(): nsdfg.symbols[sym] = dtype # Change body state reference body = new_body if (step < 0) == True: # If step is negative, we have to flip start and end to produce a # correct map with a positive increment start, end, step = end, start, -step # If necessary, make a nested SDFG with assignments isedge = sdfg.edges_between(guard, body)[0] symbols_to_remove = set() if len(isedge.data.assignments) > 0: nsdfg = helpers.nest_state_subgraph( sdfg, body, gr.SubgraphView(body, body.nodes())) for sym in isedge.data.free_symbols: if sym in nsdfg.symbol_mapping or sym in nsdfg.in_connectors: continue if sym in sdfg.symbols: nsdfg.symbol_mapping[sym] = symbolic.pystr_to_symbolic(sym) nsdfg.sdfg.add_symbol(sym, sdfg.symbols[sym]) elif sym in sdfg.arrays: if sym in nsdfg.sdfg.arrays: raise NotImplementedError rnode = body.add_read(sym) nsdfg.add_in_connector(sym) desc = copy.deepcopy(sdfg.arrays[sym]) desc.transient = False nsdfg.sdfg.add_datadesc(sym, desc) body.add_edge(rnode, None, nsdfg, sym, memlet.Memlet(sym)) nstate = nsdfg.sdfg.node(0) init_state = nsdfg.sdfg.add_state_before(nstate) nisedge = nsdfg.sdfg.edges_between(init_state, nstate)[0] nisedge.data.assignments = isedge.data.assignments symbols_to_remove = set(nisedge.data.assignments.keys()) for k in nisedge.data.assignments.keys(): if k in nsdfg.symbol_mapping: del nsdfg.symbol_mapping[k] isedge.data.assignments = {} source_nodes = body.source_nodes() sink_nodes = body.sink_nodes() map = nodes.Map(body.label + "_map", [itervar], [(start, end, step)]) entry = nodes.MapEntry(map) exit = nodes.MapExit(map) body.add_node(entry) body.add_node(exit) # If the map uses symbols from data containers, instantiate reads containers_to_read = entry.free_symbols & sdfg.arrays.keys() for rd in containers_to_read: # We are guaranteed that this is always a scalar, because # can_be_applied makes sure there are no sympy functions in each of # the loop expresions access_node = body.add_read(rd) body.add_memlet_path(access_node, entry, dst_conn=rd, memlet=memlet.Memlet(rd)) # Reroute all memlets through the entry and exit nodes for n in source_nodes: if isinstance(n, nodes.AccessNode): for e in body.out_edges(n): body.remove_edge(e) body.add_edge_pair(entry, e.dst, n, e.data, internal_connector=e.dst_conn) else: body.add_nedge(entry, n, memlet.Memlet()) for n in sink_nodes: if isinstance(n, nodes.AccessNode): for e in body.in_edges(n): body.remove_edge(e) body.add_edge_pair(exit, e.src, n, e.data, internal_connector=e.src_conn) else: body.add_nedge(n, exit, memlet.Memlet()) # Get rid of the loop exit condition edge after_edge = sdfg.edges_between(guard, after)[0] sdfg.remove_edge(after_edge) # Remove the assignment on the edge to the guard for e in sdfg.in_edges(guard): if itervar in e.data.assignments: del e.data.assignments[itervar] # Remove the condition on the entry edge condition_edge = sdfg.edges_between(guard, body)[0] condition_edge.data.condition = CodeBlock("1") # Get rid of backedge to guard sdfg.remove_edge(sdfg.edges_between(body, guard)[0]) # Route body directly to after state, maintaining any other assignments # it might have had sdfg.add_edge( body, after, sd.InterstateEdge(assignments=after_edge.data.assignments)) # If this had made the iteration variable a free symbol, we can remove # it from the SDFG symbols if itervar in sdfg.free_symbols: sdfg.remove_symbol(itervar) for sym in symbols_to_remove: if helpers.is_symbol_unused(sdfg, sym): sdfg.remove_symbol(sym)