def calc_set_image_range(map_idx, map_set, array_range): image = [] for a_range in array_range: new_range = list(a_range) for m_idx, m_range in zip(map_idx, map_set): symbol = symbolic.pystr_to_symbolic(m_idx) for i in range(3): if isinstance(m_range[i], SymExpr): exact = m_range[i].expr approx = m_range[i].approx else: exact = m_range[i] approx = overapproximate(m_range[i]) if isinstance(new_range[i], SymExpr): new_range[i] = SymExpr( new_range[i].expr.subs([(symbol, exact)]), new_range[i].approx.subs([(symbol, approx)])) elif issymbolic(new_range[i]): new_range[i] = SymExpr( new_range[i].subs([(symbol, exact)]), new_range[i].subs([(symbol, approx)])) else: new_range[i] = SymExpr(new_range[i], new_range[i]) image.append(new_range) return subsets.Range(image)
def apply(self, sdfg: SDFG): graph = sdfg.nodes()[self.state_id] tasklet = graph.nodes()[self.subgraph[StreamTransient.tasklet]] map_exit = graph.nodes()[self.subgraph[StreamTransient.map_exit]] outer_map_exit = graph.nodes()[self.subgraph[ StreamTransient.outer_map_exit]] memlet = None edge = None for e in graph.out_edges(map_exit): memlet = e.data # TODO: What if there's more than one? if e.dst == outer_map_exit and isinstance(sdfg.arrays[memlet.data], data.Stream): edge = e break tasklet_memlet = None for e in graph.out_edges(tasklet): tasklet_memlet = e.data if tasklet_memlet.data == memlet.data: break bbox = map_exit.map.range.bounding_box_size() bbox_approx = [symbolic.overapproximate(dim) for dim in bbox] dataname = memlet.data # Create the new node: Temporary stream and an access node newname, _ = sdfg.add_stream('trans_' + dataname, sdfg.arrays[memlet.data].dtype, bbox_approx[0], storage=sdfg.arrays[memlet.data].storage, transient=True, find_new_name=True) snode = graph.add_access(newname) to_stream_mm = copy.deepcopy(memlet) to_stream_mm.data = snode.data tasklet_memlet.data = snode.data if self.with_buffer: newname_arr, _ = sdfg.add_transient('strans_' + dataname, [bbox_approx[0]], sdfg.arrays[memlet.data].dtype, find_new_name=True) anode = graph.add_access(newname_arr) to_array_mm = copy.deepcopy(memlet) to_array_mm.data = anode.data graph.add_edge(snode, None, anode, None, to_array_mm) else: anode = snode # Reconnect, assuming one edge to the stream graph.remove_edge(edge) graph.add_edge(map_exit, edge.src_conn, snode, None, to_stream_mm) graph.add_edge(anode, None, outer_map_exit, edge.dst_conn, memlet) return
def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] tasklet = graph.nodes()[self.subgraph[StreamTransient._tasklet]] map_exit = graph.nodes()[self.subgraph[StreamTransient._map_exit]] outer_map_exit = graph.nodes()[self.subgraph[ StreamTransient._outer_map_exit]] memlet = None edge = None for e in graph.out_edges(map_exit): memlet = e.data # TODO: What if there's more than one? if e.dst == outer_map_exit and isinstance(sdfg.arrays[memlet.data], data.Stream): edge = e break tasklet_memlet = None for e in graph.out_edges(tasklet): tasklet_memlet = e.data if tasklet_memlet.data == memlet.data: break bbox = map_exit.map.range.bounding_box_size() bbox_approx = [symbolic.overapproximate(dim) for dim in bbox] dataname = memlet.data # Create the new node: Temporary stream and an access node newstream = sdfg.add_stream( 'tile_' + dataname, sdfg.arrays[memlet.data].dtype, 1, bbox_approx[0], [1], transient=True, ) snode = nodes.AccessNode('tile_' + dataname) to_stream_mm = copy.deepcopy(memlet) to_stream_mm.data = snode.data tasklet_memlet.data = snode.data # Reconnect, assuming one edge to the stream graph.remove_edge(edge) graph.add_edge(map_exit, None, snode, None, to_stream_mm) graph.add_edge(snode, None, outer_map_exit, None, memlet) return
def calc_set_image_range(map_idx, map_set, array_range): image = [] for a_range in array_range: new_range = list(a_range) for m_idx, m_range in zip(map_idx, map_set): symbol = symbolic.pystr_to_symbolic(m_idx) for i in range(3): if isinstance(m_range[i], SymExpr): exact = m_range[i].expr approx = m_range[i].approx else: exact = m_range[i] approx = overapproximate(m_range[i]) if isinstance(new_range[i], SymExpr): new_range[i] = SymExpr( new_range[i].expr.subs([(symbol, exact)]), new_range[i].approx.subs([(symbol, approx)])) elif issymbolic(new_range[i]): new_range[i] = SymExpr( new_range[i].subs([(symbol, exact)]), new_range[i].subs([(symbol, approx)])) else: new_range[i] = SymExpr(new_range[i], new_range[i]) if isinstance(new_range[0], SymExpr): start = new_range[0].approx else: start = new_range[0] if isinstance(new_range[1], SymExpr): stop = new_range[1].approx else: stop = new_range[1] if isinstance(new_range[2], SymExpr): step = new_range[2].approx else: step = new_range[2] descending = (start > stop) == True posstep = (step > 0) == True if descending and posstep: new_range[0], new_range[1] = new_range[1], new_range[0] image.append(new_range) return subsets.Range(image)
def apply(self, sdfg): state = sdfg.nodes()[self.state_id] nested_sdfg = state.nodes()[self.subgraph[CopyToDevice._nested_sdfg]] storage = self.storage created_arrays = set() for _, edge in enumerate(state.in_edges(nested_sdfg)): src, src_conn, dst, dst_conn, memlet = edge dataname = memlet.data if dataname is None: continue memdata = sdfg.arrays[dataname] name = 'device_' + dataname + '_in' if name not in created_arrays: if isinstance(memdata, data.Array): name, _ = sdfg.add_array( 'device_' + dataname + '_in', shape=[ symbolic.overapproximate(r) for r in memlet.bounding_box_size() ], dtype=memdata.dtype, transient=True, storage=storage, find_new_name=True) elif isinstance(memdata, data.Scalar): name, _ = sdfg.add_scalar('device_' + dataname + '_in', dtype=memdata.dtype, transient=True, storage=storage, find_new_name=True) else: raise NotImplementedError created_arrays.add(name) data_node = nodes.AccessNode(name) to_data_mm = dcpy(memlet) from_data_mm = dcpy(memlet) from_data_mm.data = name offset = [] for ind, r in enumerate(memlet.subset): offset.append(r[0]) if isinstance(memlet.subset[ind], tuple): begin = memlet.subset[ind][0] - r[0] end = memlet.subset[ind][1] - r[0] step = memlet.subset[ind][2] from_data_mm.subset[ind] = (begin, end, step) else: from_data_mm.subset[ind] -= r[0] state.remove_edge(edge) state.add_edge(src, src_conn, data_node, None, to_data_mm) state.add_edge(data_node, None, dst, dst_conn, from_data_mm) for _, edge in enumerate(state.out_edges(nested_sdfg)): src, src_conn, dst, dst_conn, memlet = edge dataname = memlet.data if dataname is None: continue memdata = sdfg.arrays[dataname] name = 'device_' + dataname + '_out' if name not in created_arrays: if isinstance(memdata, data.Array): name, _ = sdfg.add_array( name, shape=[ symbolic.overapproximate(r) for r in memlet.bounding_box_size() ], dtype=memdata.dtype, transient=True, storage=storage, find_new_name=True) elif isinstance(memdata, data.Scalar): name, _ = sdfg.add_scalar(name, dtype=memdata.dtype, transient=True, storage=storage) else: raise NotImplementedError created_arrays.add(name) data_node = nodes.AccessNode(name) to_data_mm = dcpy(memlet) from_data_mm = dcpy(memlet) to_data_mm.data = name offset = [] for ind, r in enumerate(memlet.subset): offset.append(r[0]) if isinstance(memlet.subset[ind], tuple): begin = memlet.subset[ind][0] - r[0] end = memlet.subset[ind][1] - r[0] step = memlet.subset[ind][2] to_data_mm.subset[ind] = (begin, end, step) else: to_data_mm.subset[ind] -= r[0] state.remove_edge(edge) state.add_edge(src, src_conn, data_node, None, to_data_mm) state.add_edge(data_node, None, dst, dst_conn, from_data_mm) # Change storage for all data inside nested SDFG to device. change_storage(nested_sdfg.sdfg, storage)
def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] outer_map_entry = graph.nodes()[self.subgraph[ InLocalStorage._outer_map_entry]] inner_map_entry = graph.nodes()[self.subgraph[ InLocalStorage._inner_map_entry]] array = self.array if array is None: array = graph.edges_between(outer_map_entry, inner_map_entry)[0].data.data original_edge = None invariant_memlet = None for edge in graph.in_edges(inner_map_entry): src = edge.src if src != outer_map_entry: continue memlet = edge.data if array == memlet.data: original_edge = edge invariant_memlet = memlet break if invariant_memlet is None: for edge in graph.in_edges(inner_map_entry): src = edge.src if src != outer_map_entry: continue original_edge = edge invariant_memlet = edge.data print('WARNING: Array %s not found! Using array %s instead.' % (array, invariant_memlet.data)) array = invariant_memlet.data break if invariant_memlet is None: raise KeyError('Array %s not found!' % array) new_data = sdfg.add_array('trans_' + invariant_memlet.data, [ symbolic.overapproximate(r) for r in invariant_memlet.bounding_box_size() ], sdfg.arrays[invariant_memlet.data].dtype, transient=True) data_node = nodes.AccessNode('trans_' + invariant_memlet.data) to_data_mm = copy.deepcopy(invariant_memlet) from_data_mm = copy.deepcopy(invariant_memlet) from_data_mm.data = data_node.data offset = [] for ind, r in enumerate(invariant_memlet.subset): offset.append(r[0]) if isinstance(invariant_memlet.subset[ind], tuple): begin = invariant_memlet.subset[ind][0] - r[0] end = invariant_memlet.subset[ind][1] - r[0] step = invariant_memlet.subset[ind][2] from_data_mm.subset[ind] = (begin, end, step) else: from_data_mm.subset[ind] -= r[0] to_data_mm.other_subset = copy.deepcopy(from_data_mm.subset) # Reconnect, assuming one edge to the stream graph.remove_edge(original_edge) graph.add_edge(outer_map_entry, original_edge.src_conn, data_node, None, to_data_mm) graph.add_edge(data_node, None, inner_map_entry, original_edge.dst_conn, from_data_mm) for _parent, _, _child, _, memlet in graph.bfs_edges(inner_map_entry, reverse=False): if memlet.data != array: continue for ind, r in enumerate(memlet.subset): if isinstance(memlet.subset[ind], tuple): begin = r[0] - offset[ind] end = r[1] - offset[ind] step = r[2] memlet.subset[ind] = (begin, end, step) else: memlet.subset[ind] -= offset[ind] memlet.data = 'trans_' + invariant_memlet.data return
def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] inner_map_exit = graph.nodes()[self.subgraph[ OutLocalStorage._inner_map_exit]] outer_map_exit = graph.nodes()[self.subgraph[ OutLocalStorage._outer_map_exit]] original_edge = None invariant_memlet = None array = None for edge in graph.in_edges(outer_map_exit): src = edge.src if src != inner_map_exit: continue memlet = edge.data original_edge = edge invariant_memlet = memlet array = memlet.data break new_data = sdfg.add_array( graph.label + '_trans_' + invariant_memlet.data, [ symbolic.overapproximate(r) for r in invariant_memlet.bounding_box_size() ], sdfg.arrays[invariant_memlet.data].dtype, transient=True) data_node = nodes.AccessNode(graph.label + '_trans_' + invariant_memlet.data) data_node.setzero = True from_data_mm = copy.deepcopy(invariant_memlet) to_data_mm = copy.deepcopy(invariant_memlet) to_data_mm.data = data_node.data offset = [] for ind, r in enumerate(invariant_memlet.subset): offset.append(r[0]) if isinstance(invariant_memlet.subset[ind], tuple): begin = invariant_memlet.subset[ind][0] - r[0] end = invariant_memlet.subset[ind][1] - r[0] step = invariant_memlet.subset[ind][2] to_data_mm.subset[ind] = (begin, end, step) else: to_data_mm.subset[ind] -= r[0] # Reconnect, assuming one edge to the stream graph.remove_edge(original_edge) graph.add_edge(inner_map_exit, original_edge.src_conn, data_node, None, to_data_mm) graph.add_edge(data_node, None, outer_map_exit, original_edge.dst_conn, from_data_mm) for _parent, _, _child, _, memlet in graph.bfs_edges(inner_map_exit, reverse=True): if isinstance(_child, nodes.CodeNode): break if memlet.data != array: continue for ind, r in enumerate(memlet.subset): if isinstance(memlet.subset[ind], tuple): begin = r[0] - offset[ind] end = r[1] - offset[ind] step = r[2] memlet.subset[ind] = (begin, end, step) else: memlet.subset[ind] -= offset[ind] memlet.data = graph.label + '_trans_' + invariant_memlet.data return
def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] if self.expr_index == 0: cnode = graph.nodes()[self.subgraph[ GPUTransformLocalStorage._map_entry]] node_schedprop = cnode.map exit_nodes = graph.exit_nodes(cnode) else: cnode = graph.nodes()[self.subgraph[ GPUTransformLocalStorage._reduce]] node_schedprop = cnode exit_nodes = [cnode] # Change schedule node_schedprop._schedule = dtypes.ScheduleType.GPU_Device if Config.get_bool("debugprint"): GPUTransformLocalStorage._maps_transformed += 1 # If nested graph is designated as sequential, transform schedules and # storage from Default to Sequential/Register if self.nested_seq and self.expr_index == 0: for node in graph.scope_subgraph(cnode).nodes(): if isinstance(node, nodes.AccessNode): arr = node.desc(sdfg) if arr.storage == dtypes.StorageType.Default: arr.storage = dtypes.StorageType.Register elif isinstance(node, nodes.MapEntry): if node.map.schedule == dtypes.ScheduleType.Default: node.map.schedule = dtypes.ScheduleType.Sequential gpu_storage_types = [ dtypes.StorageType.GPU_Global, dtypes.StorageType.GPU_Shared, dtypes.StorageType.GPU_Stack, ] ####################################################### # Add GPU copies of CPU arrays (i.e., not already on GPU) # First, understand which arrays to clone all_out_edges = [] for enode in exit_nodes: all_out_edges.extend(list(graph.out_edges(enode))) in_arrays_to_clone = set() out_arrays_to_clone = set() for e in graph.in_edges(cnode): data_node = sd.find_input_arraynode(graph, e) if data_node.desc(sdfg).storage not in gpu_storage_types: in_arrays_to_clone.add((data_node, e.data)) for e in all_out_edges: data_node = sd.find_output_arraynode(graph, e) if data_node.desc(sdfg).storage not in gpu_storage_types: out_arrays_to_clone.add((data_node, e.data)) if Config.get_bool("debugprint"): GPUTransformLocalStorage._arrays_removed += len( in_arrays_to_clone) + len(out_arrays_to_clone) # Second, create a GPU clone of each array # TODO: Overapproximate union of memlets cloned_arrays = {} in_cloned_arraynodes = {} out_cloned_arraynodes = {} for array_node, memlet in in_arrays_to_clone: array = array_node.desc(sdfg) cloned_name = "gpu_" + array_node.data for i, r in enumerate(memlet.bounding_box_size()): size = symbolic.overapproximate(r) try: if int(size) == 1: suffix = [] for c in str(memlet.subset[i][0]): if c.isalpha() or c.isdigit() or c == "_": suffix.append(c) elif c == "+": suffix.append("p") elif c == "-": suffix.append("m") elif c == "*": suffix.append("t") elif c == "/": suffix.append("d") cloned_name += "_" + "".join(suffix) except: continue if cloned_name in sdfg.arrays.keys(): cloned_array = sdfg.arrays[cloned_name] elif array_node.data in cloned_arrays: cloned_array = cloned_arrays[array_node.data] else: full_shape = [] for r in memlet.bounding_box_size(): size = symbolic.overapproximate(r) try: full_shape.append(int(size)) except: full_shape.append(size) actual_dims = [ idx for idx, r in enumerate(full_shape) if not (isinstance(r, int) and r == 1) ] if len(actual_dims) == 0: # abort actual_dims = [len(full_shape) - 1] if isinstance(array, data.Scalar): sdfg.add_array(name=cloned_name, shape=[1], dtype=array.dtype, transient=True, storage=dtypes.StorageType.GPU_Global) elif isinstance(array, data.Stream): sdfg.add_stream( name=cloned_name, dtype=array.dtype, shape=[full_shape[d] for d in actual_dims], veclen=array.veclen, buffer_size=array.buffer_size, storage=dtypes.StorageType.GPU_Global, transient=True, offset=[array.offset[d] for d in actual_dims]) else: sdfg.add_array( name=cloned_name, shape=[full_shape[d] for d in actual_dims], dtype=array.dtype, materialize_func=array.materialize_func, transient=True, storage=dtypes.StorageType.GPU_Global, allow_conflicts=array.allow_conflicts, strides=[array.strides[d] for d in actual_dims], offset=[array.offset[d] for d in actual_dims], ) cloned_arrays[array_node.data] = cloned_name cloned_node = type(array_node)(cloned_name) in_cloned_arraynodes[array_node.data] = cloned_node for array_node, memlet in out_arrays_to_clone: array = array_node.desc(sdfg) cloned_name = "gpu_" + array_node.data for i, r in enumerate(memlet.bounding_box_size()): size = symbolic.overapproximate(r) try: if int(size) == 1: suffix = [] for c in str(memlet.subset[i][0]): if c.isalpha() or c.isdigit() or c == "_": suffix.append(c) elif c == "+": suffix.append("p") elif c == "-": suffix.append("m") elif c == "*": suffix.append("t") elif c == "/": suffix.append("d") cloned_name += "_" + "".join(suffix) except: continue if cloned_name in sdfg.arrays.keys(): cloned_array = sdfg.arrays[cloned_name] elif array_node.data in cloned_arrays: cloned_array = cloned_arrays[array_node.data] else: full_shape = [] for r in memlet.bounding_box_size(): size = symbolic.overapproximate(r) try: full_shape.append(int(size)) except: full_shape.append(size) actual_dims = [ idx for idx, r in enumerate(full_shape) if not (isinstance(r, int) and r == 1) ] if len(actual_dims) == 0: # abort actual_dims = [len(full_shape) - 1] if isinstance(array, data.Scalar): sdfg.add_array(name=cloned_name, shape=[1], dtype=array.dtype, transient=True, storage=dtypes.StorageType.GPU_Global) elif isinstance(array, data.Stream): sdfg.add_stream( name=cloned_name, dtype=array.dtype, shape=[full_shape[d] for d in actual_dims], veclen=array.veclen, buffer_size=array.buffer_size, storage=dtypes.StorageType.GPU_Global, transient=True, offset=[array.offset[d] for d in actual_dims]) else: sdfg.add_array( name=cloned_name, shape=[full_shape[d] for d in actual_dims], dtype=array.dtype, materialize_func=array.materialize_func, transient=True, storage=dtypes.StorageType.GPU_Global, allow_conflicts=array.allow_conflicts, strides=[array.strides[d] for d in actual_dims], offset=[array.offset[d] for d in actual_dims], ) cloned_arrays[array_node.data] = cloned_name cloned_node = type(array_node)(cloned_name) cloned_node.setzero = True out_cloned_arraynodes[array_node.data] = cloned_node # Third, connect the cloned arrays to the originals for array_name, node in in_cloned_arraynodes.items(): graph.add_node(node) is_scalar = isinstance(sdfg.arrays[array_name], data.Scalar) for edge in graph.in_edges(cnode): if edge.data.data == array_name: newmemlet = copy.deepcopy(edge.data) newmemlet.data = node.data if is_scalar: newmemlet.subset = sbs.Indices([0]) else: offset = [] lost_dims = [] lost_ranges = [] newsubset = [None] * len(edge.data.subset) for ind, r in enumerate(edge.data.subset): offset.append(r[0]) if isinstance(edge.data.subset[ind], tuple): begin = edge.data.subset[ind][0] - r[0] end = edge.data.subset[ind][1] - r[0] step = edge.data.subset[ind][2] if begin == end: lost_dims.append(ind) lost_ranges.append((begin, end, step)) else: newsubset[ind] = (begin, end, step) else: newsubset[ind] -= r[0] if len(lost_dims) == len(edge.data.subset): lost_dims.pop() newmemlet.subset = type( edge.data.subset)([lost_ranges[-1]]) else: newmemlet.subset = type(edge.data.subset)( [r for r in newsubset if r is not None]) graph.add_edge(node, None, edge.dst, edge.dst_conn, newmemlet) for e in graph.bfs_edges(edge.dst, reverse=False): parent, _, _child, _, memlet = e if parent != edge.dst and not in_scope( graph, parent, edge.dst): break if memlet.data != edge.data.data: continue path = graph.memlet_path(e) if not isinstance(path[-1].dst, nodes.CodeNode): if in_path(path, e, nodes.ExitNode, forward=True): if isinstance(parent, nodes.CodeNode): # Output edge break else: continue if is_scalar: memlet.subset = sbs.Indices([0]) else: newsubset = [None] * len(memlet.subset) for ind, r in enumerate(memlet.subset): if ind in lost_dims: continue if isinstance(memlet.subset[ind], tuple): begin = r[0] - offset[ind] end = r[1] - offset[ind] step = r[2] newsubset[ind] = (begin, end, step) else: newsubset[ind] = ( r - offset[ind], r - offset[ind], 1, ) memlet.subset = type(edge.data.subset)( [r for r in newsubset if r is not None]) memlet.data = node.data if self.fullcopy: edge.data.subset = sbs.Range.from_array( node.desc(sdfg)) edge.data.other_subset = newmemlet.subset graph.add_edge(edge.src, edge.src_conn, node, None, edge.data) graph.remove_edge(edge) for array_name, node in out_cloned_arraynodes.items(): graph.add_node(node) is_scalar = isinstance(sdfg.arrays[array_name], data.Scalar) for edge in all_out_edges: if edge.data.data == array_name: newmemlet = copy.deepcopy(edge.data) newmemlet.data = node.data if is_scalar: newmemlet.subset = sbs.Indices([0]) else: offset = [] lost_dims = [] lost_ranges = [] newsubset = [None] * len(edge.data.subset) for ind, r in enumerate(edge.data.subset): offset.append(r[0]) if isinstance(edge.data.subset[ind], tuple): begin = edge.data.subset[ind][0] - r[0] end = edge.data.subset[ind][1] - r[0] step = edge.data.subset[ind][2] if begin == end: lost_dims.append(ind) lost_ranges.append((begin, end, step)) else: newsubset[ind] = (begin, end, step) else: newsubset[ind] -= r[0] if len(lost_dims) == len(edge.data.subset): lost_dims.pop() newmemlet.subset = type( edge.data.subset)([lost_ranges[-1]]) else: newmemlet.subset = type(edge.data.subset)( [r for r in newsubset if r is not None]) graph.add_edge(edge.src, edge.src_conn, node, None, newmemlet) end_node = graph.scope_dict()[edge.src] for e in graph.bfs_edges(edge.src, reverse=True): parent, _, _child, _, memlet = e if parent == end_node: break if memlet.data != edge.data.data: continue path = graph.memlet_path(e) if not isinstance(path[0].dst, nodes.CodeNode): if in_path(path, e, nodes.EntryNode, forward=False): if isinstance(parent, nodes.CodeNode): # Output edge break else: continue if is_scalar: memlet.subset = sbs.Indices([0]) else: newsubset = [None] * len(memlet.subset) for ind, r in enumerate(memlet.subset): if ind in lost_dims: continue if isinstance(memlet.subset[ind], tuple): begin = r[0] - offset[ind] end = r[1] - offset[ind] step = r[2] newsubset[ind] = (begin, end, step) else: newsubset[ind] = ( r - offset[ind], r - offset[ind], 1, ) memlet.subset = type(edge.data.subset)( [r for r in newsubset if r is not None]) memlet.data = node.data edge.data.wcr = None if self.fullcopy: edge.data.subset = sbs.Range.from_array( node.desc(sdfg)) edge.data.other_subset = newmemlet.subset graph.add_edge(node, None, edge.dst, edge.dst_conn, edge.data) graph.remove_edge(edge) # Fourth, replace memlet arrays as necessary if self.expr_index == 0: scope_subgraph = graph.scope_subgraph(cnode) for edge in scope_subgraph.edges(): if edge.data.data is not None and edge.data.data in cloned_arrays: edge.data.data = cloned_arrays[edge.data.data]
def fuse_nodes(self, sdfg, graph, edge, new_dst, new_dst_conn, other_edges=None): """ Fuses two nodes via memlets and possibly transient arrays. """ other_edges = other_edges or [] memlet_path = graph.memlet_path(edge) access_node = memlet_path[-1].dst local_name = "__s%d_n%d%s_n%d%s" % ( self.state_id, graph.node_id(edge.src), edge.src_conn, graph.node_id(edge.dst), edge.dst_conn, ) # Add intermediate memory between subgraphs. If a scalar, # uses direct connection. If an array, adds a transient node if edge.data.subset.num_elements() == 1: local_name, _ = sdfg.add_scalar( local_name, dtype=access_node.desc(graph).dtype, transient=True, storage=dtypes.StorageType.Register, find_new_name=True, ) edge.data.data = local_name edge.data.subset = "0" # If source of edge leads to multiple destinations, # redirect all through an access node out_edges = list( graph.out_edges_by_connector(edge.src, edge.src_conn)) if len(out_edges) > 1: local_node = graph.add_access(local_name) src_connector = None # Add edge that leads to transient node graph.add_edge(edge.src, edge.src_conn, local_node, None, dcpy(edge.data)) for other_edge in out_edges: if other_edge is not edge: graph.remove_edge(other_edge) graph.add_edge(local_node, src_connector, other_edge.dst, other_edge.dst_conn, other_edge.data) else: local_node = edge.src src_connector = edge.src_conn # Add edge that leads to the second node graph.add_edge(local_node, src_connector, new_dst, new_dst_conn, dcpy(edge.data)) for e in other_edges: graph.add_edge(local_node, src_connector, e.dst, e.dst_conn, dcpy(edge.data)) else: local_name, _ = sdfg.add_transient( local_name, symbolic.overapproximate(edge.data.subset.size()), dtype=access_node.desc(graph).dtype, find_new_name=True) old_edge = dcpy(edge) local_node = graph.add_access(local_name) src_connector = None edge.data.data = local_name edge.data.subset = ",".join( ["0:" + str(s) for s in edge.data.subset.size()]) # Add edge that leads to transient node graph.add_edge( edge.src, edge.src_conn, local_node, None, dcpy(edge.data), ) # Add edge that leads to the second node graph.add_edge(local_node, src_connector, new_dst, new_dst_conn, dcpy(edge.data)) for e in other_edges: graph.add_edge(local_node, src_connector, e.dst, e.dst_conn, dcpy(edge.data)) # Modify data and memlets on all surrounding edges to match array for neighbor in graph.all_edges(local_node): for e in graph.memlet_tree(neighbor): e.data.data = local_name e.data.subset.offset(old_edge.data.subset, negative=True)
def apply(self, sdfg): graph = sdfg.nodes()[self.state_id] node_a = self.node_a(sdfg) node_b = self.node_b(sdfg) # Determine direction of new memlet scope_dict = graph.scope_dict() propagate_forward = sd.scope_contains_scope(scope_dict, node_a, node_b) array = self.array if array is None or len(array) == 0: array = next(e.data.data for e in graph.edges_between(node_a, node_b) if e.data.data is not None and e.data.wcr is None) original_edge = None invariant_memlet = None for edge in graph.edges_between(node_a, node_b): if array == edge.data.data: original_edge = edge invariant_memlet = edge.data break if invariant_memlet is None: for edge in graph.edges_between(node_a, node_b): original_edge = edge invariant_memlet = edge.data warnings.warn('Array %s not found! Using array %s instead.' % (array, invariant_memlet.data)) array = invariant_memlet.data break if invariant_memlet is None: raise NameError('Array %s not found!' % array) # Add transient array new_data, _ = sdfg.add_array('trans_' + invariant_memlet.data, [ symbolic.overapproximate(r) for r in invariant_memlet.bounding_box_size() ], sdfg.arrays[invariant_memlet.data].dtype, transient=True, find_new_name=True) data_node = nodes.AccessNode(new_data) # Store as fields so that other transformations can use them self._local_name = new_data self._data_node = data_node to_data_mm = copy.deepcopy(invariant_memlet) from_data_mm = copy.deepcopy(invariant_memlet) offset = subsets.Indices([r[0] for r in invariant_memlet.subset]) # Reconnect, assuming one edge to the access node graph.remove_edge(original_edge) if propagate_forward: graph.add_edge(node_a, original_edge.src_conn, data_node, None, to_data_mm) new_edge = graph.add_edge(data_node, None, node_b, original_edge.dst_conn, from_data_mm) else: new_edge = graph.add_edge(node_a, original_edge.src_conn, data_node, None, to_data_mm) graph.add_edge(data_node, None, node_b, original_edge.dst_conn, from_data_mm) # Offset all edges in the memlet tree (including the new edge) for edge in graph.memlet_tree(new_edge): edge.data.subset.offset(offset, True) edge.data.data = new_data return data_node
def apply(self, sdfg): # Retrieve map entry and exit nodes. graph = sdfg.nodes()[self.state_id] map_entry = graph.nodes()[self.subgraph[MapToForLoop._map_entry]] map_exits = graph.exit_nodes(map_entry) loop_idx = map_entry.map.params[0] loop_from, loop_to, loop_step = map_entry.map.range[0] nested_sdfg = dace.SDFG(graph.label + '_' + map_entry.map.label) # Construct nested SDFG begin = nested_sdfg.add_state('begin') guard = nested_sdfg.add_state('guard') body = nested_sdfg.add_state('body') end = nested_sdfg.add_state('end') nested_sdfg.add_edge( begin, guard, edges.InterstateEdge(assignments={str(loop_idx): str(loop_from)})) nested_sdfg.add_edge( guard, body, edges.InterstateEdge(condition = str(loop_idx) + ' <= ' + \ str(loop_to)) ) nested_sdfg.add_edge( guard, end, edges.InterstateEdge(condition = str(loop_idx) + ' > ' + \ str(loop_to)) ) nested_sdfg.add_edge( body, guard, edges.InterstateEdge(assignments = {str(loop_idx): str(loop_idx) + \ ' + ' +str(loop_step)}) ) # Add map contents map_subgraph = graph.scope_subgraph(map_entry) for node in map_subgraph.nodes(): if node is not map_entry and node not in map_exits: body.add_node(node) for src, src_conn, dst, dst_conn, memlet in map_subgraph.edges(): if src is not map_entry and dst not in map_exits: body.add_edge(src, src_conn, dst, dst_conn, memlet) # Reconnect inputs nested_in_data_nodes = {} nested_in_connectors = {} nested_in_memlets = {} for i, edge in enumerate(graph.in_edges(map_entry)): src, src_conn, dst, dst_conn, memlet = edge data_label = '_in_' + memlet.data memdata = sdfg.arrays[memlet.data] if isinstance(memdata, data.Array): data_array = sdfg.add_array(data_label, memdata.dtype, [ symbolic.overapproximate(r) for r in memlet.bounding_box_size() ]) elif isinstance(memdata, data.Scalar): data_array = sdfg.add_scalar(data_label, memdata.dtype) else: raise NotImplementedError() data_node = nodes.AccessNode(data_label) body.add_node(data_node) nested_in_data_nodes.update({i: data_node}) nested_in_connectors.update({i: data_label}) nested_in_memlets.update({i: memlet}) for _, _, _, _, old_memlet in body.edges(): if old_memlet.data == memlet.data: old_memlet.data = data_label #body.add_edge(data_node, None, dst, dst_conn, memlet) # Reconnect outputs nested_out_data_nodes = {} nested_out_connectors = {} nested_out_memlets = {} for map_exit in map_exits: for i, edge in enumerate(graph.out_edges(map_exit)): src, src_conn, dst, dst_conn, memlet = edge data_label = '_out_' + memlet.data memdata = sdfg.arrays[memlet.data] if isinstance(memdata, data.Array): data_array = sdfg.add_array(data_label, memdata.dtype, [ symbolic.overapproximate(r) for r in memlet.bounding_box_size() ]) elif isinstance(memdata, data.Scalar): data_array = sdfg.add_scalar(data_label, memdata.dtype) else: raise NotImplementedError() data_node = nodes.AccessNode(data_label) body.add_node(data_node) nested_out_data_nodes.update({i: data_node}) nested_out_connectors.update({i: data_label}) nested_out_memlets.update({i: memlet}) for _, _, _, _, old_memlet in body.edges(): if old_memlet.data == memlet.data: old_memlet.data = data_label #body.add_edge(src, src_conn, data_node, None, memlet) # Add nested SDFG and reconnect it nested_node = graph.add_nested_sdfg( nested_sdfg, sdfg, set(nested_in_connectors.values()), set(nested_out_connectors.values())) for i, edge in enumerate(graph.in_edges(map_entry)): src, src_conn, dst, dst_conn, memlet = edge graph.add_edge(src, src_conn, nested_node, nested_in_connectors[i], nested_in_memlets[i]) for map_exit in map_exits: for i, edge in enumerate(graph.out_edges(map_exit)): src, src_conn, dst, dst_conn, memlet = edge graph.add_edge(nested_node, nested_out_connectors[i], dst, dst_conn, nested_out_memlets[i]) for src, src_conn, dst, dst_conn, memlet in graph.out_edges(map_entry): i = int(src_conn[4:]) - 1 new_memlet = dcpy(memlet) new_memlet.data = nested_in_data_nodes[i].data body.add_edge(nested_in_data_nodes[i], None, dst, dst_conn, new_memlet) for map_exit in map_exits: for src, src_conn, dst, dst_conn, memlet in graph.in_edges( map_exit): i = int(dst_conn[3:]) - 1 new_memlet = dcpy(memlet) new_memlet.data = nested_out_data_nodes[i].data body.add_edge(src, src_conn, nested_out_data_nodes[i], None, new_memlet) for node in map_subgraph: graph.remove_node(node)