def test_notbmap(): sdfg = dace.SDFG('default_storage_test_1') sdfg.add_array('A', [20], dace.float64, dace.StorageType.GPU_Global) sdfg.add_transient('tmp', [1], dace.float64) state = sdfg.add_state() r = state.add_read('A') me, mx = state.add_map('kernel', dict(i='0:20'), dace.ScheduleType.GPU_Device) tmp = state.add_access('tmp') t = state.add_tasklet('add', {'a'}, {'b'}, 'b = a + 1') w = state.add_write('A') state.add_memlet_path(r, me, tmp, memlet=dace.Memlet.simple('A', 'i')) state.add_memlet_path(tmp, t, dst_conn='a', memlet=dace.Memlet.simple('tmp', '0')) state.add_memlet_path(t, mx, w, src_conn='b', memlet=dace.Memlet.simple('A', 'i')) set_default_schedule_and_storage_types(sdfg, None) assert sdfg.arrays['tmp'].storage == dace.StorageType.Register
def tbmap_sequential_test(): sdfg = dace.SDFG('default_storage_test_2') sdfg.add_array('A', [20, 32], dace.float64, dace.StorageType.GPU_Global) sdfg.add_transient('tmp', [1], dace.float64) state = sdfg.add_state() r = state.add_read('A') ome, omx = state.add_map('kernel', dict(i='0:20'), dace.ScheduleType.GPU_Device) sme, smx = state.add_map('seq', dict(j='0:1'), dace.ScheduleType.Sequential) ime, imx = state.add_map('block', dict(ti='0:32'), dace.ScheduleType.GPU_ThreadBlock) tmp = state.add_access('tmp') t = state.add_tasklet('add', {'a'}, {'b'}, 'b = a + 1') w = state.add_write('A') state.add_memlet_path(r, ome, sme, tmp, memlet=dace.Memlet.simple('A', 'i+j, 0:32')) state.add_memlet_path(tmp, ime, t, dst_conn='a', memlet=dace.Memlet.simple('tmp', '0, ti')) state.add_memlet_path(t, imx, smx, omx, w, src_conn='b', memlet=dace.Memlet.simple('A', 'i+j, ti')) set_default_schedule_and_storage_types(sdfg, None) assert sdfg.arrays['tmp'].storage == dace.StorageType.GPU_Shared
def _test_determine_alloc(lifetime: dace.AllocationLifetime, unused: bool = False) -> dace.SDFG: """ Creates an SDFG playground for determining allocation. """ sdfg = dace.SDFG('lifetimetest') sdfg.add_array('A', [N], dace.float64) sdfg.add_array('B', [N], dace.float64) sdfg.add_transient('unused', [N], dace.float64, lifetime=lifetime) state = sdfg.add_state() me, mx = state.add_map('m', dict(i='0:N')) ######################################################################### nsdfg = dace.SDFG('nested') nsdfg.add_array('A', [N], dace.float64) nsdfg.add_array('B', [N], dace.float64) nsdfg.add_transient('tmp', [N], dace.float64, dace.StorageType.GPU_Global, lifetime=lifetime) nsdfg.add_transient('tmp2', [1], dace.float64, dace.StorageType.Register, lifetime=lifetime) nstate = nsdfg.add_state() ime, imx = nstate.add_map('m2', dict(i='0:20'), schedule=dace.ScheduleType.GPU_Device) t1 = nstate.add_access('tmp') t2 = nstate.add_access('tmp2') nstate.add_nedge(t1, t2, dace.Memlet('tmp[0]')) nstate.add_memlet_path(nstate.add_read('A'), ime, t1, memlet=dace.Memlet('A[i]')) nstate.add_memlet_path(t2, imx, nstate.add_write('B'), memlet=dace.Memlet('B[0]', wcr='lambda a,b: a+b')) ######################################################################### nsdfg_node = state.add_nested_sdfg(nsdfg, None, {'A'}, {'B'}) state.add_memlet_path(state.add_read('A'), me, nsdfg_node, dst_conn='A', memlet=dace.Memlet('A[0:N]')) state.add_memlet_path(nsdfg_node, mx, state.add_write('B'), src_conn='B', memlet=dace.Memlet('B[0:N]')) # Set default storage/schedule types in SDFG infer_types.set_default_schedule_and_storage_types(sdfg, None) return sdfg, (sdfg, state, me, nsdfg, nstate, ime)
def test_schedule_inference_simple(): @dace.program def nested_call(A: dace.float64[3, 3]): return A + 1 @dace.program def simple_schedule_inference(A: dace.float64[3, 3]): return nested_call(A) sdfg: dace.SDFG = simple_schedule_inference.to_sdfg(strict=False) infer_types.infer_connector_types(sdfg) infer_types.set_default_schedule_and_storage_types(sdfg, None) sdfg.apply_transformations_repeated(StateFusion) entry = [ n for n, _ in sdfg.all_nodes_recursive() if isinstance(n, dace.nodes.MapEntry) ][0] assert entry.schedule is dace.ScheduleType.CPU_Multicore
def apply(self, sdfg: SDFG): state: SDFGState = sdfg.nodes()[self.state_id] nsdfg_node = state.nodes()[self.subgraph[InlineSDFG._nested_sdfg]] nsdfg: SDFG = nsdfg_node.sdfg nstate: SDFGState = nsdfg.nodes()[0] if nsdfg_node.schedule is not dtypes.ScheduleType.Default: infer_types.set_default_schedule_and_storage_types( nsdfg, nsdfg_node.schedule) nsdfg_scope_entry = state.entry_node(nsdfg_node) nsdfg_scope_exit = (state.exit_node(nsdfg_scope_entry) if nsdfg_scope_entry is not None else None) ####################################################### # Collect and update top-level SDFG metadata # Global/init/exit code for loc, code in nsdfg.global_code.items(): sdfg.append_global_code(code.code, loc) for loc, code in nsdfg.init_code.items(): sdfg.append_init_code(code.code, loc) for loc, code in nsdfg.exit_code.items(): sdfg.append_exit_code(code.code, loc) # Constants for cstname, cstval in nsdfg.constants.items(): if cstname in sdfg.constants: if cstval != sdfg.constants[cstname]: warnings.warn('Constant value mismatch for "%s" while ' 'inlining SDFG. Inner = %s != %s = outer' % (cstname, cstval, sdfg.constants[cstname])) else: sdfg.add_constant(cstname, cstval) # Find original source/destination edges (there is only one edge per # connector, according to match) inputs: Dict[str, MultiConnectorEdge] = {} outputs: Dict[str, MultiConnectorEdge] = {} input_set: Dict[str, str] = {} output_set: Dict[str, str] = {} for e in state.in_edges(nsdfg_node): inputs[e.dst_conn] = e input_set[e.data.data] = e.dst_conn for e in state.out_edges(nsdfg_node): outputs[e.src_conn] = e output_set[e.data.data] = e.src_conn # Access nodes that need to be reshaped reshapes: Set(str) = set() for aname, array in nsdfg.arrays.items(): if array.transient: continue edge = None if aname in inputs: edge = inputs[aname] if len(array.shape) > len(edge.data.subset): reshapes.add(aname) continue if aname in outputs: edge = outputs[aname] if len(array.shape) > len(edge.data.subset): reshapes.add(aname) continue if edge is not None and not InlineSDFG._check_strides( array.strides, sdfg.arrays[edge.data.data].strides, edge.data, nsdfg_node): reshapes.add(aname) # Replace symbols using invocation symbol mapping # Two-step replacement (N -> __dacesym_N --> map[N]) to avoid clashes for symname, symvalue in nsdfg_node.symbol_mapping.items(): if str(symname) != str(symvalue): nsdfg.replace(symname, '__dacesym_' + symname) for symname, symvalue in nsdfg_node.symbol_mapping.items(): if str(symname) != str(symvalue): nsdfg.replace('__dacesym_' + symname, symvalue) # All transients become transients of the parent (if data already # exists, find new name) # Mapping from nested transient name to top-level name transients: Dict[str, str] = {} for node in nstate.nodes(): if isinstance(node, nodes.AccessNode): datadesc = nsdfg.arrays[node.data] if node.data not in transients and datadesc.transient: name = sdfg.add_datadesc('%s_%s' % (nsdfg.label, node.data), datadesc, find_new_name=True) transients[node.data] = name # All transients of edges between code nodes are also added to parent for edge in nstate.edges(): if (isinstance(edge.src, nodes.CodeNode) and isinstance(edge.dst, nodes.CodeNode)): if edge.data.data is not None: datadesc = nsdfg.arrays[edge.data.data] if edge.data.data not in transients and datadesc.transient: name = sdfg.add_datadesc('%s_%s' % (nsdfg.label, edge.data.data), datadesc, find_new_name=True) transients[edge.data.data] = name # Collect nodes to add to top-level graph new_incoming_edges: Dict[nodes.Node, MultiConnectorEdge] = {} new_outgoing_edges: Dict[nodes.Node, MultiConnectorEdge] = {} source_accesses = set() sink_accesses = set() for node in nstate.source_nodes(): if (isinstance(node, nodes.AccessNode) and node.data not in transients and node.data not in reshapes): new_incoming_edges[node] = inputs[node.data] source_accesses.add(node) for node in nstate.sink_nodes(): if (isinstance(node, nodes.AccessNode) and node.data not in transients and node.data not in reshapes): new_outgoing_edges[node] = outputs[node.data] sink_accesses.add(node) ####################################################### # Replace data on inlined SDFG nodes/edges # Replace data names with their top-level counterparts repldict = {} repldict.update(transients) repldict.update({ k: v.data.data for k, v in itertools.chain(inputs.items(), outputs.items()) }) # Add views whenever reshapes are necessary for dname in reshapes: desc = nsdfg.arrays[dname] # To avoid potential confusion, rename protected __return keyword if dname.startswith('__return'): newname = f'{nsdfg.name}_ret{dname[8:]}' else: newname = dname newname, _ = sdfg.add_view(newname, desc.shape, desc.dtype, storage=desc.storage, strides=desc.strides, offset=desc.offset, debuginfo=desc.debuginfo, allow_conflicts=desc.allow_conflicts, total_size=desc.total_size, alignment=desc.alignment, may_alias=desc.may_alias, find_new_name=True) repldict[dname] = newname for node in nstate.nodes(): if isinstance(node, nodes.AccessNode) and node.data in repldict: node.data = repldict[node.data] for edge in nstate.edges(): if edge.data.data in repldict: edge.data.data = repldict[edge.data.data] # Add extra access nodes for out/in view nodes for node in nstate.nodes(): if isinstance(node, nodes.AccessNode) and node.data in reshapes: if nstate.in_degree(node) > 0 and nstate.out_degree(node) > 0: # Such a node has to be in the output set edge = outputs[node.data] # Redirect outgoing edges through access node out_edges = list(nstate.out_edges(node)) anode = nstate.add_access(edge.data.data) vnode = nstate.add_access(node.data) nstate.add_nedge(node, anode, edge.data) nstate.add_nedge(anode, vnode, edge.data) for e in out_edges: nstate.remove_edge(e) nstate.add_edge(vnode, e.src_conn, e.dst, e.dst_conn, e.data) ####################################################### # Add nested SDFG into top-level SDFG # Add nested nodes into original state subgraph = SubgraphView(nstate, [ n for n in nstate.nodes() if n not in (source_accesses | sink_accesses) ]) state.add_nodes_from(subgraph.nodes()) for edge in subgraph.edges(): state.add_edge(edge.src, edge.src_conn, edge.dst, edge.dst_conn, edge.data) ####################################################### # Reconnect inlined SDFG # If a source/sink node is one of the inputs/outputs, reconnect it, # replacing memlets in outgoing/incoming paths modified_edges = set() modified_edges |= self._modify_memlet_path(new_incoming_edges, nstate, state, True) modified_edges |= self._modify_memlet_path(new_outgoing_edges, nstate, state, False) # Reshape: add connections to viewed data self._modify_reshape_data(reshapes, repldict, inputs, nstate, state, True) self._modify_reshape_data(reshapes, repldict, outputs, nstate, state, False) # Modify all other internal edges pertaining to input/output nodes for node in subgraph.nodes(): if isinstance(node, nodes.AccessNode): if node.data in input_set or node.data in output_set: if node.data in input_set: outer_edge = inputs[input_set[node.data]] else: outer_edge = outputs[output_set[node.data]] for edge in state.all_edges(node): if (edge not in modified_edges and edge.data.data == node.data): for e in state.memlet_tree(edge): if e.data.data == node.data: e._data = helpers.unsqueeze_memlet( e.data, outer_edge.data) # If source/sink node is not connected to a source/destination access # node, and the nested SDFG is in a scope, connect to scope with empty # memlets if nsdfg_scope_entry is not None: for node in subgraph.nodes(): if state.in_degree(node) == 0: state.add_edge(nsdfg_scope_entry, None, node, None, Memlet()) if state.out_degree(node) == 0: state.add_edge(node, None, nsdfg_scope_exit, None, Memlet()) # Replace nested SDFG parents with new SDFG for node in nstate.nodes(): if isinstance(node, nodes.NestedSDFG): node.sdfg.parent = state node.sdfg.parent_sdfg = sdfg node.sdfg.parent_nsdfg_node = node # Remove all unused external inputs/output memlet paths, as well as # resulting isolated nodes removed_in_edges = self._remove_edge_path(state, inputs, set(inputs.keys()) - source_accesses, reverse=True) removed_out_edges = self._remove_edge_path(state, outputs, set(outputs.keys()) - sink_accesses, reverse=False) # Re-add in/out edges to first/last nodes in subgraph order = [ x for x in nx.topological_sort(nstate._nx) if isinstance(x, nodes.AccessNode) ] for edge in removed_in_edges: # Find first access node that refers to this edge node = next(n for n in order if n.data == edge.data.data) state.add_edge(edge.src, edge.src_conn, node, edge.dst_conn, edge.data) for edge in removed_out_edges: # Find last access node that refers to this edge node = next(n for n in reversed(order) if n.data == edge.data.data) state.add_edge(node, edge.src_conn, edge.dst, edge.dst_conn, edge.data) ####################################################### # Remove nested SDFG node state.remove_node(nsdfg_node)
def generate_code(sdfg) -> List[CodeObject]: """ Generates code as a list of code objects for a given SDFG. :param sdfg: The SDFG to use :return: List of code objects that correspond to files to compile. """ # Before compiling, validate SDFG correctness sdfg.validate() if Config.get_bool('testing', 'serialization'): from dace.sdfg import SDFG import filecmp import shutil import tempfile with tempfile.TemporaryDirectory() as tmp_dir: sdfg.save(f'{tmp_dir}/test.sdfg') sdfg2 = SDFG.from_file(f'{tmp_dir}/test.sdfg') sdfg2.save(f'{tmp_dir}/test2.sdfg') print('Testing SDFG serialization...') if not filecmp.cmp(f'{tmp_dir}/test.sdfg', f'{tmp_dir}/test2.sdfg'): shutil.move(f"{tmp_dir}/test.sdfg", "test.sdfg") shutil.move(f"{tmp_dir}/test2.sdfg", "test2.sdfg") raise RuntimeError( 'SDFG serialization failed - files do not match') # Run with the deserialized version # NOTE: This means that all subsequent modifications to `sdfg` # are not reflected outside of this function (e.g., library # node expansion). sdfg = sdfg2 # Before generating the code, run type inference on the SDFG connectors infer_types.infer_connector_types(sdfg) # Set default storage/schedule types in SDFG infer_types.set_default_schedule_and_storage_types(sdfg, None) # Recursively expand library nodes that have not yet been expanded sdfg.expand_library_nodes() # After expansion, run another pass of connector/type inference infer_types.infer_connector_types(sdfg) infer_types.set_default_schedule_and_storage_types(sdfg, None) frame = framecode.DaCeCodeGenerator() # Instantiate CPU first (as it is used by the other code generators) # TODO: Refactor the parts used by other code generators out of CPU default_target = cpu.CPUCodeGen for k, v in target.TargetCodeGenerator.extensions().items(): # If another target has already been registered as CPU, use it instead if v['name'] == 'cpu': default_target = k targets = {'cpu': default_target(frame, sdfg)} # Instantiate the rest of the targets targets.update({ v['name']: k(frame, sdfg) for k, v in target.TargetCodeGenerator.extensions().items() if v['name'] not in targets }) # Instantiate all instrumentation providers in SDFG provider_mapping = InstrumentationProvider.get_provider_mapping() frame._dispatcher.instrumentation[ dtypes.InstrumentationType.No_Instrumentation] = None for node, _ in sdfg.all_nodes_recursive(): if hasattr(node, 'instrument'): frame._dispatcher.instrumentation[node.instrument] = \ provider_mapping[node.instrument] elif hasattr(node, 'consume'): frame._dispatcher.instrumentation[node.consume.instrument] = \ provider_mapping[node.consume.instrument] elif hasattr(node, 'map'): frame._dispatcher.instrumentation[node.map.instrument] = \ provider_mapping[node.map.instrument] if sdfg.instrument != dtypes.InstrumentationType.No_Instrumentation: frame._dispatcher.instrumentation[sdfg.instrument] = \ provider_mapping[sdfg.instrument] frame._dispatcher.instrumentation = { k: v() if v is not None else None for k, v in frame._dispatcher.instrumentation.items() } # Generate frame code (and the rest of the code) (global_code, frame_code, used_targets, used_environments) = frame.generate_code(sdfg, None) target_objects = [ CodeObject(sdfg.name, global_code + frame_code, 'cpp', cpu.CPUCodeGen, 'Frame', environments=used_environments, sdfg=sdfg) ] # Create code objects for each target for tgt in used_targets: target_objects.extend(tgt.get_generated_codeobjects()) # add a header file for calling the SDFG dummy = CodeObject(sdfg.name, generate_headers(sdfg), 'h', cpu.CPUCodeGen, 'CallHeader', target_type='../../include', linkable=False) target_objects.append(dummy) for env in dace.library.get_environments_and_dependencies( used_environments): if hasattr(env, "codeobjects"): target_objects.extend(env.codeobjects) # add a dummy main function to show how to call the SDFG dummy = CodeObject(sdfg.name + "_main", generate_dummy(sdfg), 'cpp', cpu.CPUCodeGen, 'SampleMain', target_type='../../sample', linkable=False) target_objects.append(dummy) return target_objects
def apply(self, outer_state: SDFGState, sdfg: SDFG): nsdfg_node = self.nested_sdfg nsdfg: SDFG = nsdfg_node.sdfg if nsdfg_node.schedule is not dtypes.ScheduleType.Default: infer_types.set_default_schedule_and_storage_types( nsdfg, nsdfg_node.schedule) ####################################################### # Collect and update top-level SDFG metadata # Global/init/exit code for loc, code in nsdfg.global_code.items(): sdfg.append_global_code(code.code, loc) for loc, code in nsdfg.init_code.items(): sdfg.append_init_code(code.code, loc) for loc, code in nsdfg.exit_code.items(): sdfg.append_exit_code(code.code, loc) # Environments for nstate in nsdfg.nodes(): for node in nstate.nodes(): if isinstance(node, nodes.CodeNode): node.environments |= nsdfg_node.environments # Constants for cstname, cstval in nsdfg.constants.items(): if cstname in sdfg.constants: if cstval != sdfg.constants[cstname]: warnings.warn('Constant value mismatch for "%s" while ' 'inlining SDFG. Inner = %s != %s = outer' % (cstname, cstval, sdfg.constants[cstname])) else: sdfg.add_constant(cstname, cstval) # Symbols outer_symbols = {str(k): v for k, v in sdfg.symbols.items()} for ise in sdfg.edges(): outer_symbols.update(ise.data.new_symbols(sdfg, outer_symbols)) # Find original source/destination edges (there is only one edge per # connector, according to match) inputs: Dict[str, MultiConnectorEdge] = {} outputs: Dict[str, MultiConnectorEdge] = {} input_set: Dict[str, str] = {} output_set: Dict[str, str] = {} for e in outer_state.in_edges(nsdfg_node): inputs[e.dst_conn] = e input_set[e.data.data] = e.dst_conn for e in outer_state.out_edges(nsdfg_node): outputs[e.src_conn] = e output_set[e.data.data] = e.src_conn # Replace symbols using invocation symbol mapping # Two-step replacement (N -> __dacesym_N --> map[N]) to avoid clashes symbolic.safe_replace(nsdfg_node.symbol_mapping, nsdfg.replace_dict) # Access nodes that need to be reshaped # reshapes: Set(str) = set() # for aname, array in nsdfg.arrays.items(): # if array.transient: # continue # edge = None # if aname in inputs: # edge = inputs[aname] # if len(array.shape) > len(edge.data.subset): # reshapes.add(aname) # continue # if aname in outputs: # edge = outputs[aname] # if len(array.shape) > len(edge.data.subset): # reshapes.add(aname) # continue # if edge is not None and not InlineMultistateSDFG._check_strides( # array.strides, sdfg.arrays[edge.data.data].strides, # edge.data, nsdfg_node): # reshapes.add(aname) # Mapping from nested transient name to top-level name transients: Dict[str, str] = {} # All transients become transients of the parent (if data already # exists, find new name) for nstate in nsdfg.nodes(): for node in nstate.nodes(): if isinstance(node, nodes.AccessNode): datadesc = nsdfg.arrays[node.data] if node.data not in transients and datadesc.transient: new_name = node.data if (new_name in sdfg.arrays or new_name in outer_symbols or new_name in sdfg.constants): new_name = f'{nsdfg.label}_{node.data}' name = sdfg.add_datadesc(new_name, datadesc, find_new_name=True) transients[node.data] = name # All transients of edges between code nodes are also added to parent for edge in nstate.edges(): if (isinstance(edge.src, nodes.CodeNode) and isinstance(edge.dst, nodes.CodeNode)): if edge.data.data is not None: datadesc = nsdfg.arrays[edge.data.data] if edge.data.data not in transients and datadesc.transient: new_name = edge.data.data if (new_name in sdfg.arrays or new_name in outer_symbols or new_name in sdfg.constants): new_name = f'{nsdfg.label}_{edge.data.data}' name = sdfg.add_datadesc(new_name, datadesc, find_new_name=True) transients[edge.data.data] = name ####################################################### # Replace data on inlined SDFG nodes/edges # Replace data names with their top-level counterparts repldict = {} repldict.update(transients) repldict.update({ k: v.data.data for k, v in itertools.chain(inputs.items(), outputs.items()) }) symbolic.safe_replace(repldict, lambda m: replace_datadesc_names(nsdfg, m), value_as_string=True) # Add views whenever reshapes are necessary # for dname in reshapes: # desc = nsdfg.arrays[dname] # # To avoid potential confusion, rename protected __return keyword # if dname.startswith('__return'): # newname = f'{nsdfg.name}_ret{dname[8:]}' # else: # newname = dname # newname, _ = sdfg.add_view(newname, # desc.shape, # desc.dtype, # storage=desc.storage, # strides=desc.strides, # offset=desc.offset, # debuginfo=desc.debuginfo, # allow_conflicts=desc.allow_conflicts, # total_size=desc.total_size, # alignment=desc.alignment, # may_alias=desc.may_alias, # find_new_name=True) # repldict[dname] = newname # Add extra access nodes for out/in view nodes # inv_reshapes = {repldict[r]: r for r in reshapes} # for nstate in nsdfg.nodes(): # for node in nstate.nodes(): # if isinstance(node, # nodes.AccessNode) and node.data in inv_reshapes: # if nstate.in_degree(node) > 0 and nstate.out_degree( # node) > 0: # # Such a node has to be in the output set # edge = outputs[inv_reshapes[node.data]] # # Redirect outgoing edges through access node # out_edges = list(nstate.out_edges(node)) # anode = nstate.add_access(edge.data.data) # vnode = nstate.add_access(node.data) # nstate.add_nedge(node, anode, edge.data) # nstate.add_nedge(anode, vnode, edge.data) # for e in out_edges: # nstate.remove_edge(e) # nstate.add_edge(vnode, e.src_conn, e.dst, # e.dst_conn, e.data) # Make unique names for states statenames = set(s.label for s in sdfg.nodes()) for nstate in nsdfg.nodes(): if nstate.label in statenames: newname = data.find_new_name(nstate.label, statenames) statenames.add(newname) nstate.set_label(newname) ####################################################### # Collect and modify interstate edges as necessary outer_assignments = set() for e in sdfg.edges(): outer_assignments |= e.data.assignments.keys() inner_assignments = set() for e in nsdfg.edges(): inner_assignments |= e.data.assignments.keys() assignments_to_replace = inner_assignments & outer_assignments sym_replacements: Dict[str, str] = {} allnames = set(outer_symbols.keys()) | set(sdfg.arrays.keys()) for assign in assignments_to_replace: newname = data.find_new_name(assign, allnames) allnames.add(newname) sym_replacements[assign] = newname nsdfg.replace_dict(sym_replacements) ####################################################### # Add nested SDFG states into top-level SDFG outer_start_state = sdfg.start_state sdfg.add_nodes_from(nsdfg.nodes()) for ise in nsdfg.edges(): sdfg.add_edge(ise.src, ise.dst, ise.data) ####################################################### # Reconnect inlined SDFG source = nsdfg.start_state sinks = nsdfg.sink_nodes() # Reconnect state machine for e in sdfg.in_edges(outer_state): sdfg.add_edge(e.src, source, e.data) for e in sdfg.out_edges(outer_state): for sink in sinks: sdfg.add_edge(sink, e.dst, e.data) # Modify start state as necessary if outer_start_state is outer_state: sdfg.start_state = sdfg.node_id(source) # TODO: Modify memlets by offsetting # If both source and sink nodes are inputs/outputs, reconnect once # edges_to_ignore = self._modify_access_to_access(new_incoming_edges, # nsdfg, nstate, state, # orig_data) # source_to_outer = {n: e.src for n, e in new_incoming_edges.items()} # sink_to_outer = {n: e.dst for n, e in new_outgoing_edges.items()} # # If a source/sink node is one of the inputs/outputs, reconnect it, # # replacing memlets in outgoing/incoming paths # modified_edges = set() # modified_edges |= self._modify_memlet_path(new_incoming_edges, nstate, # state, sink_to_outer, True, # edges_to_ignore) # modified_edges |= self._modify_memlet_path(new_outgoing_edges, nstate, # state, source_to_outer, # False, edges_to_ignore) # # Reshape: add connections to viewed data # self._modify_reshape_data(reshapes, repldict, inputs, nstate, state, # True) # self._modify_reshape_data(reshapes, repldict, outputs, nstate, state, # False) # Modify all other internal edges pertaining to input/output nodes # for nstate in nsdfg.nodes(): # for node in nstate.nodes(): # if isinstance(node, nodes.AccessNode): # if node.data in input_set or node.data in output_set: # if node.data in input_set: # outer_edge = inputs[input_set[node.data]] # else: # outer_edge = outputs[output_set[node.data]] # for edge in state.all_edges(node): # if (edge not in modified_edges # and edge.data.data == node.data): # for e in state.memlet_tree(edge): # if e.data.data == node.data: # e._data = helpers.unsqueeze_memlet( # e.data, outer_edge.data) # Replace nested SDFG parents with new SDFG for nstate in nsdfg.nodes(): nstate.parent = sdfg for node in nstate.nodes(): if isinstance(node, nodes.NestedSDFG): node.sdfg.parent_sdfg = sdfg node.sdfg.parent_nsdfg_node = node ####################################################### # Remove nested SDFG and state sdfg.remove_node(outer_state) return nsdfg.nodes()
def generate_code( self, sdfg: SDFG, schedule: Optional[dtypes.ScheduleType], sdfg_id: str = "" ) -> Tuple[str, str, Set[TargetCodeGenerator], Set[str]]: """ Generate frame code for a given SDFG, calling registered targets' code generation callbacks for them to generate their own code. :param sdfg: The SDFG to generate code for. :param schedule: The schedule the SDFG is currently located, or None if the SDFG is top-level. :param sdfg_id: An optional string id given to the SDFG label :return: A tuple of the generated global frame code, local frame code, and a set of targets that have been used in the generation of this SDFG. """ if len(sdfg_id) == 0 and sdfg.sdfg_id != 0: sdfg_id = '_%d' % sdfg.sdfg_id sdfg_label = sdfg.name + sdfg_id global_stream = CodeIOStream() callsite_stream = CodeIOStream() # Set default storage/schedule types in SDFG set_default_schedule_and_storage_types(sdfg, schedule) is_top_level = sdfg.parent is None # Generate code ########################### # Invoke all instrumentation providers for instr in self._dispatcher.instrumentation.values(): if instr is not None: instr.on_sdfg_begin(sdfg, callsite_stream, global_stream) # Allocate outer-level transients shared_transients = sdfg.shared_transients() allocated = set() for state in sdfg.nodes(): for node in state.data_nodes(): if (node.data in shared_transients and node.data not in allocated): self._dispatcher.dispatch_allocate(sdfg, state, None, node, global_stream, callsite_stream) allocated.add(node.data) # Allocate inter-state variables global_symbols = copy.deepcopy(sdfg.symbols) interstate_symbols = {} for e in sdfg.edges(): symbols = e.data.new_symbols(global_symbols) interstate_symbols.update(symbols) global_symbols.update(symbols) for isvarName, isvarType in interstate_symbols.items(): # Skip symbols that have been declared as outer-level transients if isvarName in allocated: continue isvar = data.Scalar(isvarType) callsite_stream.write( '%s;\n' % (isvar.as_arg(with_types=True, name=isvarName)), sdfg) callsite_stream.write('\n', sdfg) states_topological = list(sdfg.topological_sort(sdfg.start_state)) # {edge: [dace.edges.ControlFlow]} control_flow = {e: [] for e in sdfg.edges()} if dace.config.Config.get_bool('optimizer', 'detect_control_flow'): #################################################################### # Loop detection procedure all_cycles = list(sdfg.find_cycles()) # Returns a list of lists # Order according to topological sort all_cycles = [ sorted(c, key=lambda x: states_topological.index(x)) for c in all_cycles ] # Group in terms of starting node starting_nodes = [c[0] for c in all_cycles] # Order cycles according to starting node in topological sort starting_nodes = sorted(starting_nodes, key=lambda x: states_topological.index(x)) cycles_by_node = [[c for c in all_cycles if c[0] == n] for n in starting_nodes] for cycles in cycles_by_node: # Use arbitrary cycle to find the first and last nodes first_node = cycles[0][0] last_node = cycles[0][-1] if not first_node.is_empty(): # The entry node should not contain any computations continue if not all([c[-1] == last_node for c in cycles]): # There are multiple back edges: not a for or while loop continue previous_edge = [ e for e in sdfg.in_edges(first_node) if e.src != last_node ] if len(previous_edge) != 1: # No single starting point: not a for or while continue previous_edge = previous_edge[0] back_edge = sdfg.edges_between(last_node, first_node) if len(back_edge) != 1: raise RuntimeError("Expected exactly one edge in cycle") back_edge = back_edge[0] # Build a set of all nodes in all cycles associated with this # set of start and end node internal_nodes = functools.reduce( lambda a, b: a | b, [set(c) for c in cycles]) - {first_node} exit_edge = [ e for e in sdfg.out_edges(first_node) if e.dst not in internal_nodes | {first_node} ] if len(exit_edge) != 1: # No single stopping condition: not a for or while # (we don't support continue or break) continue exit_edge = exit_edge[0] entry_edge = [ e for e in sdfg.out_edges(first_node) if e != exit_edge ] if len(entry_edge) != 1: # No single starting condition: not a for or while continue entry_edge = entry_edge[0] # Make sure this is not already annotated to be another construct if (len(control_flow[entry_edge]) != 0 or len(control_flow[back_edge]) != 0): continue # Nested loops case I - previous edge of internal loop is a # loop-entry of an external loop (first state in a loop is # another loop) if (len(control_flow[previous_edge]) == 1 and isinstance( control_flow[previous_edge][0], cflow.LoopEntry)): # Nested loop, mark parent scope loop_parent = control_flow[previous_edge][0].scope # Nested loops case II - exit edge of internal loop is a # back-edge of an external loop (last state in a loop is another # loop) elif (len(control_flow[exit_edge]) == 1 and isinstance( control_flow[exit_edge][0], cflow.LoopBack)): # Nested loop, mark parent scope loop_parent = control_flow[exit_edge][0].scope elif (len(control_flow[exit_edge]) == 0 or len(control_flow[previous_edge]) == 0): loop_parent = None else: continue if entry_edge == back_edge: # No entry check (we don't support do-loops) # TODO: do we want to add some support for self-loops? continue # Now we make sure that there is no other way to exit this # cycle, by checking that there's no reachable node *not* # included in any cycle between the first and last node. if any([len(set(c) - internal_nodes) > 1 for c in cycles]): continue # This is a loop! Generate the necessary annotation objects. loop_scope = cflow.LoopScope(internal_nodes) if ((len(previous_edge.data.assignments) > 0 or len(back_edge.data.assignments) > 0) and (len(control_flow[previous_edge]) == 0 or (len(control_flow[previous_edge]) == 1 and control_flow[previous_edge][0].scope == loop_parent))): # Generate assignment edge, if available control_flow[previous_edge].append( cflow.LoopAssignment(loop_scope, previous_edge)) # Assign remaining control flow constructs control_flow[entry_edge].append( cflow.LoopEntry(loop_scope, entry_edge)) control_flow[exit_edge].append( cflow.LoopExit(loop_scope, exit_edge)) control_flow[back_edge].append( cflow.LoopBack(loop_scope, back_edge)) ################################################################### # If/then/else detection procedure candidates = [ n for n in states_topological if sdfg.out_degree(n) == 2 ] for candidate in candidates: # A valid if occurs when then are no reachable nodes for either # path that does not pass through a common dominator. dominators = nx.dominance.dominance_frontiers( sdfg.nx, candidate) left_entry, right_entry = sdfg.out_edges(candidate) if (len(control_flow[left_entry]) > 0 or len(control_flow[right_entry]) > 0): # Already assigned to a control flow construct # TODO: carefully allow this in some cases continue left, right = left_entry.dst, right_entry.dst dominator = dominators[left] & dominators[right] if len(dominator) != 1: # There must be a single dominator across both branches, # unless one of the nodes _is_ the next dominator # if (len(dominator) == 0 and dominators[left] == {right} # or dominators[right] == {left}): # dominator = dominators[left] | dominators[right] # else: # continue continue dominator = next(iter(dominator)) # Exactly one dominator exit_edges = sdfg.in_edges(dominator) if len(exit_edges) != 2: # There must be a single entry and a single exit. This # could be relaxed in the future. continue left_exit, right_exit = exit_edges if (len(control_flow[left_exit]) > 0 or len(control_flow[right_exit]) > 0): # Already assigned to a control flow construct # TODO: carefully allow this in some cases continue # Now traverse from the source and verify that all possible paths # pass through the dominator left_nodes = sdfg.all_nodes_between(left, dominator) if left_nodes is None: # Not all paths lead to the next dominator continue right_nodes = sdfg.all_nodes_between(right, dominator) if right_nodes is None: # Not all paths lead to the next dominator continue all_nodes = left_nodes | right_nodes # Make sure there is no overlap between left and right nodes if len(left_nodes & right_nodes) > 0: continue # This is a valid if/then/else construct. Generate annotations if_then_else = cflow.IfThenElse(candidate, dominator) # Arbitrarily assign then/else to the two branches. If one edge # has no dominator but leads to the dominator, it means there's # only a then clause (and no else). has_else = False if len(dominators[left]) == 1: then_scope = cflow.IfThenScope(if_then_else, left_nodes) else_scope = cflow.IfElseScope(if_then_else, right_nodes) control_flow[left_entry].append( cflow.IfEntry(then_scope, left_entry)) control_flow[left_exit].append( cflow.IfExit(then_scope, left_exit)) control_flow[right_exit].append( cflow.IfExit(else_scope, right_exit)) if len(dominators[right]) == 1: control_flow[right_entry].append( cflow.IfEntry(else_scope, right_entry)) has_else = True else: then_scope = cflow.IfThenScope(if_then_else, right_nodes) else_scope = cflow.IfElseScope(if_then_else, left_nodes) control_flow[right_entry].append( cflow.IfEntry(then_scope, right_entry)) control_flow[right_exit].append( cflow.IfExit(then_scope, right_exit)) control_flow[left_exit].append( cflow.IfExit(else_scope, left_exit)) ####################################################################### # Generate actual program body states_generated = set() # For sanity check generated_edges = set() self.generate_states(sdfg, "sdfg", control_flow, global_stream, callsite_stream, set(states_topological), states_generated, generated_edges) ####################################################################### # Sanity check if len(states_generated) != len(sdfg.nodes()): raise RuntimeError( "Not all states were generated in SDFG {}!" "\n Generated: {}\n Missing: {}".format( sdfg.label, [s.label for s in states_generated], [s.label for s in (set(sdfg.nodes()) - states_generated)])) # Deallocate transients shared_transients = sdfg.shared_transients() deallocated = set() for state in sdfg.nodes(): for node in state.data_nodes(): if (node.data in shared_transients and node.data not in deallocated): self._dispatcher.dispatch_deallocate( sdfg, state, None, node, global_stream, callsite_stream) deallocated.add(node.data) # Now that we have all the information about dependencies, generate # header and footer if is_top_level: header_stream = CodeIOStream() header_global_stream = CodeIOStream() footer_stream = CodeIOStream() footer_global_stream = CodeIOStream() self.generate_header(sdfg, self._dispatcher.used_environments, header_global_stream, header_stream) # Open program function function_signature = 'void __program_%s_internal(%s)\n{\n' % ( sdfg.name, sdfg.signature()) self.generate_footer(sdfg, self._dispatcher.used_environments, footer_global_stream, footer_stream) header_global_stream.write(global_stream.getvalue()) header_global_stream.write(footer_global_stream.getvalue()) generated_header = header_global_stream.getvalue() all_code = CodeIOStream() all_code.write(function_signature) all_code.write(header_stream.getvalue()) all_code.write(callsite_stream.getvalue()) all_code.write(footer_stream.getvalue()) generated_code = all_code.getvalue() else: generated_header = global_stream.getvalue() generated_code = callsite_stream.getvalue() # Clean up generated code gotos = re.findall(r'goto (.*);', generated_code) clean_code = '' for line in generated_code.split('\n'): # Empty line with semicolon if re.match(r'^\s*;\s*', line): continue # Label that might be unused label = re.findall( r'^\s*([a-zA-Z_][a-zA-Z_0-9]*):\s*[;]?\s*////.*$', line) if len(label) > 0: if label[0] not in gotos: continue clean_code += line + '\n' # Return the generated global and local code strings return (generated_header, clean_code, self._dispatcher.used_targets, self._dispatcher.used_environments)