def apply(self, sdfg: SDFG): state = sdfg.node(self.state_id) map_entry = self.map_entry(sdfg) map_exit = state.exit_node(map_entry) current_map = map_entry.map # Expand the innermost map if multidimensional if len(current_map.params) > 1: ext, rem = dace.transformation.helpers.extract_map_dims( sdfg, map_entry, list(range(len(current_map.params) - 1))) map_entry = rem map_exit = state.exit_node(map_entry) current_map = map_entry.map subgraph = state.scope_subgraph(map_entry) subgraph_contents = state.scope_subgraph(map_entry, include_entry=False, include_exit=False) # Set the schedule current_map.schedule = dace.dtypes.ScheduleType.SVE_Map # Infer all connector types and apply them inferred = infer_types.infer_connector_types(sdfg, state, subgraph) infer_types.apply_connector_types(inferred) # Infer vector connectors and AccessNodes and apply them vector_inference.infer_vectors( sdfg, state, map_entry, util.SVE_LEN, flags=vector_inference.VectorInferenceFlags.Allow_Stride, apply=True)
def apply(self, state: SDFGState, sdfg: SDFG): map_entry = self.map_entry current_map = map_entry.map # Expand the innermost map if multidimensional if len(current_map.params) > 1: ext, rem = dace.transformation.helpers.extract_map_dims( sdfg, map_entry, list(range(len(current_map.params) - 1))) map_entry = rem current_map = map_entry.map subgraph = state.scope_subgraph(map_entry) # Set the schedule current_map.schedule = dace.dtypes.ScheduleType.SVE_Map # Infer all connector types and apply them inferred = infer_types.infer_connector_types(sdfg, state, subgraph) infer_types.apply_connector_types(inferred) # Infer vector connectors and AccessNodes and apply them vector_inference.infer_vectors( sdfg, state, map_entry, self.vec_len, flags=vector_inference.VectorInferenceFlags.Allow_Stride, apply=True)
def test_schedule_inference_simple(): @dace.program def nested_call(A: dace.float64[3, 3]): return A + 1 @dace.program def simple_schedule_inference(A: dace.float64[3, 3]): return nested_call(A) sdfg: dace.SDFG = simple_schedule_inference.to_sdfg(strict=False) infer_types.infer_connector_types(sdfg) infer_types.set_default_schedule_storage_types_and_location(sdfg, None) sdfg.apply_transformations_repeated(StateFusion) entry = [ n for n, _ in sdfg.all_nodes_recursive() if isinstance(n, dace.nodes.MapEntry) ][0] assert entry.schedule is dace.ScheduleType.CPU_Multicore
def test_gpu(input_array, output_array, expand_first): sdfg = dace.SDFG("test_gpu_scalars") state = sdfg.add_state() if input_array: # input_arr is an unsqueezed scalar sdfg.add_array("input_arr", [1], dace.float32) else: sdfg.add_scalar("input_arr", dace.float32) if output_array: # output_arr is an unsqueezed scalar sdfg.add_array("output_arr", [1], dace.float32) else: sdfg.add_scalar("transient_output_arr", dace.float32, transient=True) sdfg.add_array("output_arr", [1], dace.float32) inp = state.add_access("input_arr") addnode = addlib.AddNode("add") state.add_node(addnode) outp = state.add_access("output_arr") state.add_edge(inp, None, addnode, "_a", dace.Memlet("input_arr")) if output_array: state.add_edge(addnode, "_b", outp, None, dace.Memlet("output_arr")) else: transient_outp = state.add_access("transient_output_arr") state.add_edge(addnode, "_b", transient_outp, None, sdfg.make_array_memlet("transient_output_arr")) state.add_edge(transient_outp, None, outp, None, sdfg.make_array_memlet("transient_output_arr")) sdfg.apply_gpu_transformations() if expand_first: sdfg.expand_library_nodes() infer_types.infer_connector_types(sdfg) else: infer_types.infer_connector_types(sdfg) sdfg.expand_library_nodes() infer_types.infer_connector_types(sdfg) input_arr = np.array([1]).astype(np.float32) if input_array else 1 output_arr = np.array([0]).astype(np.float32) sdfg(input_arr=input_arr, output_arr=output_arr) assert output_arr[0] == 2
def generate_code(sdfg) -> List[CodeObject]: """ Generates code as a list of code objects for a given SDFG. :param sdfg: The SDFG to use :return: List of code objects that correspond to files to compile. """ # Before compiling, validate SDFG correctness sdfg.validate() if Config.get_bool('testing', 'serialization'): from dace.sdfg import SDFG import filecmp import shutil import tempfile with tempfile.TemporaryDirectory() as tmp_dir: sdfg.save(f'{tmp_dir}/test.sdfg') sdfg2 = SDFG.from_file(f'{tmp_dir}/test.sdfg') sdfg2.save(f'{tmp_dir}/test2.sdfg') print('Testing SDFG serialization...') if not filecmp.cmp(f'{tmp_dir}/test.sdfg', f'{tmp_dir}/test2.sdfg'): shutil.move(f"{tmp_dir}/test.sdfg", "test.sdfg") shutil.move(f"{tmp_dir}/test2.sdfg", "test2.sdfg") raise RuntimeError( 'SDFG serialization failed - files do not match') # Run with the deserialized version # NOTE: This means that all subsequent modifications to `sdfg` # are not reflected outside of this function (e.g., library # node expansion). sdfg = sdfg2 # Before generating the code, run type inference on the SDFG connectors infer_types.infer_connector_types(sdfg) # Set default storage/schedule types in SDFG infer_types.set_default_schedule_and_storage_types(sdfg, None) # Recursively expand library nodes that have not yet been expanded sdfg.expand_library_nodes() # After expansion, run another pass of connector/type inference infer_types.infer_connector_types(sdfg) infer_types.set_default_schedule_and_storage_types(sdfg, None) frame = framecode.DaCeCodeGenerator() # Instantiate CPU first (as it is used by the other code generators) # TODO: Refactor the parts used by other code generators out of CPU default_target = cpu.CPUCodeGen for k, v in target.TargetCodeGenerator.extensions().items(): # If another target has already been registered as CPU, use it instead if v['name'] == 'cpu': default_target = k targets = {'cpu': default_target(frame, sdfg)} # Instantiate the rest of the targets targets.update({ v['name']: k(frame, sdfg) for k, v in target.TargetCodeGenerator.extensions().items() if v['name'] not in targets }) # Instantiate all instrumentation providers in SDFG provider_mapping = InstrumentationProvider.get_provider_mapping() frame._dispatcher.instrumentation[ dtypes.InstrumentationType.No_Instrumentation] = None for node, _ in sdfg.all_nodes_recursive(): if hasattr(node, 'instrument'): frame._dispatcher.instrumentation[node.instrument] = \ provider_mapping[node.instrument] elif hasattr(node, 'consume'): frame._dispatcher.instrumentation[node.consume.instrument] = \ provider_mapping[node.consume.instrument] elif hasattr(node, 'map'): frame._dispatcher.instrumentation[node.map.instrument] = \ provider_mapping[node.map.instrument] if sdfg.instrument != dtypes.InstrumentationType.No_Instrumentation: frame._dispatcher.instrumentation[sdfg.instrument] = \ provider_mapping[sdfg.instrument] frame._dispatcher.instrumentation = { k: v() if v is not None else None for k, v in frame._dispatcher.instrumentation.items() } # Generate frame code (and the rest of the code) (global_code, frame_code, used_targets, used_environments) = frame.generate_code(sdfg, None) target_objects = [ CodeObject(sdfg.name, global_code + frame_code, 'cpp', cpu.CPUCodeGen, 'Frame', environments=used_environments, sdfg=sdfg) ] # Create code objects for each target for tgt in used_targets: target_objects.extend(tgt.get_generated_codeobjects()) # add a header file for calling the SDFG dummy = CodeObject(sdfg.name, generate_headers(sdfg), 'h', cpu.CPUCodeGen, 'CallHeader', target_type='../../include', linkable=False) target_objects.append(dummy) for env in dace.library.get_environments_and_dependencies( used_environments): if hasattr(env, "codeobjects"): target_objects.extend(env.codeobjects) # add a dummy main function to show how to call the SDFG dummy = CodeObject(sdfg.name + "_main", generate_dummy(sdfg), 'cpp', cpu.CPUCodeGen, 'SampleMain', target_type='../../sample', linkable=False) target_objects.append(dummy) return target_objects
def infer_connector_types(self, sdfg, state): # Avoid import loop from dace.sdfg.infer_types import infer_connector_types # Infer internal connector types infer_connector_types(self.sdfg)
def can_be_applied(cls, state: SDFGState, candidate, expr_index, sdfg: SDFG, strict=False) -> bool: map_entry = state.node(candidate[cls.map_entry]) map_exit = state.exit_node(map_entry) current_map = map_entry.map subgraph = state.scope_subgraph(map_entry) subgraph_contents = state.scope_subgraph(map_entry, include_entry=False, include_exit=False) # Prevent infinite repeats if current_map.schedule == dace.dtypes.ScheduleType.SVE_Map: return False # Infer all connector types for later checks (without modifying the graph) inferred = infer_types.infer_connector_types(sdfg, state, subgraph) ######################## # Ensure only Tasklets and AccessNodes are within the map for node, _ in subgraph_contents.all_nodes_recursive(): if not isinstance(node, (nodes.Tasklet, nodes.AccessNode)): return False ######################## # Check for unsupported datatypes on the connectors (including on the Map itself) bit_widths = set() for node, _ in subgraph.all_nodes_recursive(): for conn in node.in_connectors: t = inferred[(node, conn, True)] bit_widths.add(util.get_base_type(t).bytes) if not t.type in sve.util.TYPE_TO_SVE: return False for conn in node.out_connectors: t = inferred[(node, conn, False)] bit_widths.add(util.get_base_type(t).bytes) if not t.type in sve.util.TYPE_TO_SVE: return False # Multiple different bit widths occuring (messes up the predicates) if len(bit_widths) > 1: return False ######################## # Check for unsupported memlets param_name = current_map.params[-1] for e, _ in subgraph.all_edges_recursive(): # Check for unsupported strides # The only unsupported strides are the ones containing the innermost # loop param because they are not constant during a vector step param_sym = symbolic.symbol(current_map.params[-1]) if param_sym in e.data.get_stride(sdfg, map_entry.map).free_symbols: return False # Check for unsupported WCR if e.data.wcr is not None: # Unsupported reduction type reduction_type = dace.frontend.operations.detect_reduction_type( e.data.wcr) if reduction_type not in sve.util.REDUCTION_TYPE_TO_SVE: return False # Param in memlet during WCR is not supported if param_name in e.data.subset.free_symbols and e.data.wcr_nonatomic: return False # vreduce is not supported dst_node = state.memlet_path(e)[-1] if isinstance(dst_node, nodes.Tasklet): if isinstance(dst_node.in_connectors[e.dst_conn], dtypes.vector): return False elif isinstance(dst_node, nodes.AccessNode): desc = dst_node.desc(sdfg) if isinstance(desc, data.Scalar) and isinstance( desc.dtype, dtypes.vector): return False ######################## # Check for invalid copies in the subgraph for node, _ in subgraph.all_nodes_recursive(): if not isinstance(node, nodes.Tasklet): continue for e in state.in_edges(node): # Check for valid copies from other tasklets and/or streams if e.data.data is not None: src_node = state.memlet_path(e)[0].src if not isinstance(src_node, (nodes.Tasklet, nodes.AccessNode)): # Make sure we only have Code->Code copies and from arrays return False if isinstance(src_node, nodes.AccessNode): src_desc = src_node.desc(sdfg) if isinstance(src_desc, dace.data.Stream): # Stream pops are not implemented return False # Run the vector inference algorithm to check if vectorization is feasible try: inf_graph = vector_inference.infer_vectors( sdfg, state, map_entry, util.SVE_LEN, flags=vector_inference.VectorInferenceFlags.Allow_Stride, apply=False) except vector_inference.VectorInferenceException as ex: print(f'UserWarning: Vector inference failed! {ex}') return False return True
def generate_code(sdfg) -> List[CodeObject]: """ Generates code as a list of code objects for a given SDFG. :param sdfg: The SDFG to use :return: List of code objects that correspond to files to compile. """ # Before compiling, validate SDFG correctness sdfg.validate() if Config.get_bool('testing', 'serialization'): from dace.sdfg import SDFG import filecmp sdfg.save('test.sdfg') sdfg2 = SDFG.from_file('test.sdfg') sdfg2.save('test2.sdfg') print('Testing SDFG serialization...') if not filecmp.cmp('test.sdfg', 'test2.sdfg'): raise RuntimeError('SDFG serialization failed - files do not match') os.remove('test.sdfg') os.remove('test2.sdfg') # Run with the deserialized version sdfg = sdfg2 # Before generating the code, run type inference on the SDFG connectors infer_connector_types(sdfg) frame = framecode.DaCeCodeGenerator() # Instantiate CPU first (as it is used by the other code generators) # TODO: Refactor the parts used by other code generators out of CPU default_target = cpu.CPUCodeGen for k, v in target.TargetCodeGenerator.extensions().items(): # If another target has already been registered as CPU, use it instead if v['name'] == 'cpu': default_target = k targets = {'cpu': default_target(frame, sdfg)} # Instantiate the rest of the targets targets.update({ v['name']: k(frame, sdfg) for k, v in target.TargetCodeGenerator.extensions().items() if v['name'] not in targets }) # Instantiate all instrumentation providers in SDFG provider_mapping = InstrumentationProvider.get_provider_mapping() frame._dispatcher.instrumentation[ dtypes.InstrumentationType.No_Instrumentation] = None for node, _ in sdfg.all_nodes_recursive(): if hasattr(node, 'instrument'): frame._dispatcher.instrumentation[node.instrument] = \ provider_mapping[node.instrument] elif hasattr(node, 'consume'): frame._dispatcher.instrumentation[node.consume.instrument] = \ provider_mapping[node.consume.instrument] elif hasattr(node, 'map'): frame._dispatcher.instrumentation[node.map.instrument] = \ provider_mapping[node.map.instrument] frame._dispatcher.instrumentation = { k: v() if v is not None else None for k, v in frame._dispatcher.instrumentation.items() } # Generate frame code (and the rest of the code) (global_code, frame_code, used_targets, used_environments) = frame.generate_code(sdfg, None) target_objects = [ CodeObject(sdfg.name, global_code + frame_code, 'cpp', cpu.CPUCodeGen, 'Frame', environments=used_environments) ] # Create code objects for each target for tgt in used_targets: target_objects.extend(tgt.get_generated_codeobjects()) # add a header file for calling the SDFG dummy = CodeObject(sdfg.name, generate_headers(sdfg), 'h', cpu.CPUCodeGen, 'CallHeader', linkable=False) target_objects.append(dummy) # add a dummy main function to show how to call the SDFG dummy = CodeObject(sdfg.name + "_main", generate_dummy(sdfg), 'cpp', cpu.CPUCodeGen, 'DummyMain', linkable=False) target_objects.append(dummy) return target_objects