def test_nccl_reduce_symbolic(): ng = Config.get('compiler', 'cuda', 'max_number_gpus') n = 2 sdfg: dace.SDFG = nccl_reduce_symbolic.to_sdfg(strict=True) outer_map = find_map_by_param(sdfg, 'root_gpu') if outer_map: outer_map.schedule = dtypes.ScheduleType.Sequential gpu_map = find_map_by_param(sdfg, 'gpu') gpu_map.schedule = dtypes.ScheduleType.GPU_Multidevice infer_types.set_default_schedule_storage_types_and_location(sdfg, None) sdfg.specialize(dict(num_gpus=ng)) out = np.ndarray(shape=[ng, n], dtype=np_dtype) out.fill(0) sdfg(out=out, N=n) res = np.array([ng * i for i in range(ng)]) assert (np.unique(out) == res).all()
def testDefaultDataTypes(self): # check that configuration about defult data types is enforced config_data_types = Config.get('compiler', 'default_data_types') code_str = """value1 = 10 value2=3.14 value3=5000000000""" inf_symbols = type_inference.infer_types(code_str) if config_data_types.lower() == "python": self.assertEqual(inf_symbols["value1"], dtypes.typeclass(np.int64)) self.assertEqual(inf_symbols["value2"], dtypes.typeclass(np.float64)) elif config_data_types.lower() == "c": self.assertEqual(inf_symbols["value1"], dtypes.typeclass(np.int32)) self.assertEqual(inf_symbols["value2"], dtypes.typeclass(np.float32)) # in any case, value3 needs uint64 self.assertEqual(inf_symbols["value3"], dtypes.typeclass(np.uint64))
def __init__(self, wrapped_type): # Convert python basic types if isinstance(wrapped_type, str): try: wrapped_type = getattr(numpy, wrapped_type) except AttributeError: raise ValueError("Unknown type: {}".format(wrapped_type)) config_data_types = Config.get('compiler', 'default_data_types') if wrapped_type is int: if config_data_types.lower() == 'python': wrapped_type = numpy.int64 elif config_data_types.lower() == 'c': wrapped_type = numpy.int32 else: raise NameError( "Unknown configuration for default_data_types: {}".format( config_data_types)) elif wrapped_type is float: if config_data_types.lower() == 'python': wrapped_type = numpy.float64 elif config_data_types.lower() == 'c': wrapped_type = numpy.float32 else: raise NameError( "Unknown configuration for default_data_types: {}".format( config_data_types)) elif wrapped_type is complex: if config_data_types.lower() == 'python': wrapped_type = numpy.complex128 elif config_data_types.lower() == 'c': wrapped_type = numpy.complex64 else: raise NameError( "Unknown configuration for default_data_types: {}".format( config_data_types)) self.type = wrapped_type # Type in Python self.ctype = _CTYPES[wrapped_type] # Type in C self.ctype_unaligned = self.ctype # Type in C (without alignment) self.dtype = self # For compatibility support with numpy self.bytes = _BYTES[wrapped_type] # Number of bytes for this type
def cmake_options(): host_flags = Config.get("compiler", "xilinx", "host_flags") synthesis_flags = Config.get("compiler", "xilinx", "synthesis_flags") build_flags = Config.get("compiler", "xilinx", "build_flags") mode = Config.get("compiler", "xilinx", "mode") target_platform = Config.get("compiler", "xilinx", "platform") enable_debugging = ("ON" if Config.get_bool( "compiler", "xilinx", "enable_debugging") else "OFF") options = [ "-DDACE_XILINX_HOST_FLAGS=\"{}\"".format(host_flags), "-DDACE_XILINX_SYNTHESIS_FLAGS=\"{}\"".format(synthesis_flags), "-DDACE_XILINX_BUILD_FLAGS=\"{}\"".format(build_flags), "-DDACE_XILINX_MODE={}".format(mode), "-DDACE_XILINX_TARGET_PLATFORM=\"{}\"".format(target_platform), "-DDACE_XILINX_ENABLE_DEBUGGING={}".format(enable_debugging), ] # Override Vitis/SDx/SDAccel installation directory if Config.get("compiler", "xilinx", "path"): options.append("-DVITIS_ROOT_DIR=\"{}\"".format( Config.get("compiler", "xilinx", "path").replace("\\", "/"))) return options
def render_config_dialog(self): # Load metadata for configuration Config.load_schema() self.window = Gtk.Window() notebook = Gtk.Notebook() notebook.set_scrollable(True) self.window.add(notebook) # General (top-level) settings gtklabel = Gtk.Label() gtklabel.set_label('General') general_grid = Gtk.Grid() general_grid.set_hexpand(True) notebook.append_page(general_grid, gtklabel) columized = False for i, (cname, cval) in enumerate(sorted(Config.get().items())): meta = Config.get_metadata(cname) if meta['type'] == 'dict': gtklabel = Gtk.Label() gtklabel.set_label(meta['title']) grid = Gtk.Grid() grid.set_hexpand(True) notebook.append_page(grid, gtklabel) self.render_config_subtree(cval, (cname, ), grid) continue if columized == False: general_grid.insert_column(0) general_grid.insert_column(1) columized = True self.render_config_element(cval, (cname, ), general_grid, i, meta) self.window.show_all() self.window.connect("delete-event", self.win_close_callback, None)
def cmake_options(): compiler = make_absolute(Config.get("compiler", "xilinx", "executable")) host_flags = Config.get("compiler", "xilinx", "host_flags") synthesis_flags = Config.get("compiler", "xilinx", "synthesis_flags") build_flags = Config.get("compiler", "xilinx", "build_flags") mode = Config.get("compiler", "xilinx", "mode") target_platform = Config.get("compiler", "xilinx", "platform") enable_debugging = ("ON" if Config.get_bool( "compiler", "xilinx", "enable_debugging") else "OFF") options = [ "-DSDACCEL_ROOT_DIR={}".format( os.path.dirname(os.path.dirname(compiler))), "-DDACE_XILINX_HOST_FLAGS=\"{}\"".format(host_flags), "-DDACE_XILINX_SYNTHESIS_FLAGS=\"{}\"".format(synthesis_flags), "-DDACE_XILINX_BUILD_FLAGS=\"{}\"".format(build_flags), "-DDACE_XILINX_MODE={}".format(mode), "-DDACE_XILINX_TARGET_PLATFORM=\"{}\"".format(target_platform), "-DDACE_XILINX_ENABLE_DEBUGGING={}".format(enable_debugging), ] return options
def configure_and_compile(program_folder, program_name=None, output_stream=None): """ Configures and compiles a DaCe program in the specified folder into a shared library file. :param program_folder: Folder containing all files necessary to build, equivalent to what was passed to `generate_program_folder`. :param output_stream: Additional output stream to write to (used for DIODE client). :return: Path to the compiled shared library file. """ if program_name is None: program_name = os.path.basename(program_folder) program_folder = os.path.abspath(program_folder) src_folder = os.path.join(program_folder, "src") # Prepare build folder build_folder = os.path.join(program_folder, "build") try: os.makedirs(build_folder) except FileExistsError: pass # Read list of DaCe files to compile. # We do this instead of iterating over source files in the directory to # avoid globbing files from previous compilations, such that we don't need # to wipe the directory for every compilation. file_list = [ line.strip().split(",") for line in open(os.path.join(program_folder, "dace_files.csv"), "r") ] # Get absolute paths and targets for all source files files = [] targets = {} # {target name: target class} for target_name, file_name in file_list: path = os.path.join(src_folder, target_name, file_name) files.append(path) targets[target_name] = codegen.STRING_TO_TARGET[target_name] # Start forming CMake command dace_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) cmake_command = [ "cmake", "-A x64" if os.name == 'nt' else "", # Windows-specific flag '"' + os.path.join(dace_path, "codegen") + '"', "-DDACE_FILES=\"{}\"".format(";".join(files)), "-DDACE_PROGRAM_NAME={}".format(program_name), ] # Replace backslashes with forward slashes cmake_command = [cmd.replace('\\', '/') for cmd in cmake_command] # Generate CMake options for each compiler libraries = set() for target_name, target in targets.items(): cmake_command += target.cmake_options() try: libraries |= unique_flags( Config.get("compiler", target_name, "libs")) except KeyError: pass # TODO: it should be possible to use the default arguments/compilers # found by CMake cmake_command += [ "-DDACE_LIBS=\"{}\"".format(" ".join(libraries)), "-DCMAKE_LINKER=\"{}\"".format( make_absolute(Config.get('compiler', 'linker', 'executable'))), "-DCMAKE_SHARED_LINKER_FLAGS=\"{}\"".format( Config.get('compiler', 'linker', 'args') + Config.get('compiler', 'linker', 'additional_args')), ] ############################################## # Configure try: _run_liveoutput(" ".join(cmake_command), shell=True, cwd=build_folder, output_stream=output_stream) except subprocess.CalledProcessError as ex: # Clean CMake directory and try once more if Config.get_bool('debugprint'): print('Cleaning CMake build folder and retrying...') shutil.rmtree(build_folder) os.makedirs(build_folder) try: _run_liveoutput(" ".join(cmake_command), shell=True, cwd=build_folder, output_stream=output_stream) except subprocess.CalledProcessError as ex: # If still unsuccessful, print results if Config.get_bool('debugprint'): raise CompilerConfigurationError('Configuration failure') else: raise CompilerConfigurationError('Configuration failure:\n' + ex.output) # Compile and link try: _run_liveoutput("cmake --build . --config %s" % (Config.get('compiler', 'build_type')), shell=True, cwd=build_folder, output_stream=output_stream) except subprocess.CalledProcessError as ex: # If unsuccessful, print results if Config.get_bool('debugprint'): raise CompilationError('Compiler failure') else: raise CompilationError('Compiler failure:\n' + ex.output) shared_library_path = os.path.join( build_folder, "lib{}.{}".format(program_name, Config.get('compiler', 'library_extension'))) return shared_library_path
def get_generated_codeobjects(self): execution_mode = Config.get("compiler", "xilinx", "mode") kernel_file_name = "DACE_BINARY_DIR \"/{}".format(self._program_name) if execution_mode == "software_emulation": kernel_file_name += "_sw_emu.xclbin\"" xcl_emulation_mode = "\"sw_emu\"" xilinx_sdx = "DACE_VITIS_DIR" elif execution_mode == "hardware_emulation": kernel_file_name += "_hw_emu.xclbin\"" xcl_emulation_mode = "\"hw_emu\"" xilinx_sdx = "DACE_VITIS_DIR" elif execution_mode == "hardware" or execution_mode == "simulation": kernel_file_name += "_hw.xclbin\"" xcl_emulation_mode = None xilinx_sdx = None else: raise cgx.CodegenError( "Unknown Xilinx execution mode: {}".format(execution_mode)) set_env_vars = "" set_str = "dace::set_environment_variable(\"{}\", {});\n" unset_str = "dace::unset_environment_variable(\"{}\");\n" set_env_vars += (set_str.format("XCL_EMULATION_MODE", xcl_emulation_mode) if xcl_emulation_mode is not None else unset_str.format("XCL_EMULATION_MODE")) set_env_vars += (set_str.format("XILINX_SDX", xilinx_sdx) if xilinx_sdx is not None else unset_str.format("XILINX_SDX")) set_env_vars += set_str.format( "EMCONFIG_PATH", "DACE_BINARY_DIR" ) if execution_mode == 'hardware_emulation' else unset_str.format( "EMCONFIG_PATH") host_code = CodeIOStream() host_code.write("""\ #include "dace/xilinx/host.h" #include "dace/dace.h" """) if len(self._dispatcher.instrumentation) > 1: host_code.write("""\ #include "dace/perf/reporting.h" #include <chrono> #include <iomanip> #include <iostream> #include <limits> """) host_code.write("\n\n") self._frame.generate_fileheader(self._global_sdfg, host_code, 'xilinx_host') params_comma = self._global_sdfg.signature(with_arrays=False) if params_comma: params_comma = ', ' + params_comma host_code.write(""" DACE_EXPORTED int __dace_init_xilinx({sdfg.name}_t *__state{signature}) {{ {environment_variables} __state->fpga_context = new dace::fpga::Context(); __state->fpga_context->Get().MakeProgram({kernel_file_name}); return 0; }} DACE_EXPORTED void __dace_exit_xilinx({sdfg.name}_t *__state) {{ delete __state->fpga_context; }} {host_code}""".format(signature=params_comma, sdfg=self._global_sdfg, environment_variables=set_env_vars, kernel_file_name=kernel_file_name, host_code="".join([ "{separator}\n// Kernel: {kernel_name}" "\n{separator}\n\n{code}\n\n".format( separator="/" * 79, kernel_name=name, code=code) for (name, code) in self._host_codes ]))) host_code_obj = CodeObject(self._program_name, host_code.getvalue(), "cpp", XilinxCodeGen, "Xilinx", target_type="host") kernel_code_objs = [ CodeObject(kernel_name, code, "cpp", XilinxCodeGen, "Xilinx", target_type="device") for (kernel_name, code) in self._kernel_codes ] # Memory bank and streaming interfaces connectivity configuration file link_cfg = CodeIOStream() self._other_codes["link.cfg"] = link_cfg link_cfg.write("[connectivity]") are_assigned = [v is not None for v in self._bank_assignments.values()] if any(are_assigned): if not all(are_assigned): raise RuntimeError("Some, but not all global memory arrays " "were assigned to memory banks: {}".format( self._bank_assignments)) # Emit mapping from kernel memory interfaces to DRAM banks for (kernel_name, interface_name), ( memory_type, memory_bank) in self._bank_assignments.items(): link_cfg.write( f"sp={kernel_name}_1.m_axi_{interface_name}:{memory_type}[{memory_bank}]" ) # Emit mapping between inter-kernel streaming interfaces for _, (src, dst) in self._stream_connections.items(): link_cfg.write(f"stream_connect={src}:{dst}") other_objs = [] for name, code in self._other_codes.items(): name = name.split(".") other_objs.append( CodeObject(name[0], code.getvalue(), ".".join(name[1:]), XilinxCodeGen, "Xilinx", target_type="device")) return [host_code_obj] + kernel_code_objs + other_objs
def get_generated_codeobjects(self): execution_mode = Config.get("compiler", "xilinx", "mode") kernel_file_name = "DACE_BINARY_DIR \"/{}".format(self._program_name) if execution_mode == "software_emulation": kernel_file_name += "_sw_emu.xclbin\"" xcl_emulation_mode = "\"sw_emu\"" xilinx_sdx = "DACE_VITIS_DIR" elif execution_mode == "hardware_emulation": kernel_file_name += "_hw_emu.xclbin\"" xcl_emulation_mode = "\"hw_emu\"" xilinx_sdx = "DACE_VITIS_DIR" elif execution_mode == "hardware" or execution_mode == "simulation": kernel_file_name += "_hw.xclbin\"" xcl_emulation_mode = None xilinx_sdx = None else: raise dace.codegen.codegen.CodegenError( "Unknown Xilinx execution mode: {}".format(execution_mode)) set_env_vars = "" set_str = "dace::set_environment_variable(\"{}\", {});\n" unset_str = "dace::unset_environment_variable(\"{}\");\n" set_env_vars += (set_str.format("XCL_EMULATION_MODE", xcl_emulation_mode) if xcl_emulation_mode is not None else unset_str.format("XCL_EMULATION_MODE")) set_env_vars += (set_str.format("XILINX_SDX", xilinx_sdx) if xilinx_sdx is not None else unset_str.format("XILINX_SDX")) host_code = CodeIOStream() host_code.write("""\ #include "dace/xilinx/host.h" #include "dace/dace.h" #include <iostream>\n\n""") self._frame.generate_fileheader(self._global_sdfg, host_code) host_code.write(""" dace::fpga::Context *dace::fpga::_context; DACE_EXPORTED int __dace_init_xilinx({signature}) {{ {environment_variables} dace::fpga::_context = new dace::fpga::Context(); dace::fpga::_context->Get().MakeProgram({kernel_file_name}); return 0; }} DACE_EXPORTED void __dace_exit_xilinx({signature}) {{ delete dace::fpga::_context; }} {host_code}""".format(signature=self._global_sdfg.signature(), environment_variables=set_env_vars, kernel_file_name=kernel_file_name, host_code="".join([ "{separator}\n// Kernel: {kernel_name}" "\n{separator}\n\n{code}\n\n".format( separator="/" * 79, kernel_name=name, code=code) for (name, code) in self._host_codes ]))) host_code_obj = CodeObject(self._program_name, host_code.getvalue(), "cpp", XilinxCodeGen, "Xilinx", target_type="host") kernel_code_objs = [ CodeObject(kernel_name, code, "cpp", XilinxCodeGen, "Xilinx", target_type="device") for (kernel_name, code) in self._kernel_codes ] return [host_code_obj] + kernel_code_objs
def test_set_temporary(): path = ["compiler", "build_type"] current_value = Config.get(*path) with set_temporary(*path, value="I'm not a build type"): assert Config.get(*path) == "I'm not a build type" assert Config.get(*path) == current_value
def __init__(self, base_indentation=0): super(CodeIOStream, self).__init__() self._indent = 0 self._spaces = int(Config.get('compiler', 'indentation_spaces'))
def apply(self, sdfg: SDFG) -> None: graph: SDFGState = sdfg.nodes()[self.state_id] inner_map_entry: nodes.MapEntry = graph.nodes()[self.subgraph[ GPUMultiTransformMap._map_entry]] number_of_gpus = self.number_of_gpus ngpus = Config.get("compiler", "cuda", "max_number_gpus") if (number_of_gpus == None): number_of_gpus = ngpus if number_of_gpus > ngpus: raise ValueError( 'Requesting more gpus than specified in the dace config') # Avoiding import loops from dace.transformation.dataflow import (StripMining, InLocalStorage, OutLocalStorage, AccumulateTransient) # The user has responsibility for the implementation of a Library node. scope_subgraph = graph.scope_subgraph(inner_map_entry) for node in scope_subgraph.nodes(): if isinstance(node, nodes.LibraryNode): warnings.warn( 'Node %s is a library node, make sure to manually set the ' 'implementation to a GPU compliant specialization.' % node) # Tile map into number_of_gpus tiles outer_map: nodes.Map = StripMining.apply_to( sdfg, dict(dim_idx=-1, new_dim_prefix=self.new_dim_prefix, tile_size=number_of_gpus, tiling_type=dtypes.TilingType.NumberOfTiles), _map_entry=inner_map_entry) outer_map_entry: nodes.MapEntry = graph.scope_dict()[inner_map_entry] inner_map_exit: nodes.MapExit = graph.exit_node(inner_map_entry) outer_map_exit: nodes.MapExit = graph.exit_node(outer_map_entry) # Change map schedules inner_map_entry.map.schedule = dtypes.ScheduleType.GPU_Device outer_map.schedule = dtypes.ScheduleType.GPU_Multidevice symbolic_gpu_id = outer_map.params[0] # Add the parameter of the outer map for node in graph.successors(inner_map_entry): if isinstance(node, nodes.NestedSDFG): map_syms = inner_map_entry.range.free_symbols for sym in map_syms: symname = str(sym) if symname not in node.symbol_mapping.keys(): node.symbol_mapping[symname] = sym node.sdfg.symbols[symname] = graph.symbols_defined_at( node)[symname] # Add transient Data leading to the inner map prefix = self.new_transient_prefix for node in graph.predecessors(outer_map_entry): # Only AccessNodes are relevant if (isinstance(node, nodes.AccessNode) and not (self.skip_scalar and isinstance(node.desc(sdfg), Scalar))): if self.use_p2p and node.desc( sdfg).storage is dtypes.StorageType.GPU_Global: continue in_data_node = InLocalStorage.apply_to(sdfg, dict(array=node.data, prefix=prefix), verify=False, save=False, node_a=outer_map_entry, node_b=inner_map_entry) in_data_node.desc(sdfg).location['gpu'] = symbolic_gpu_id in_data_node.desc(sdfg).storage = dtypes.StorageType.GPU_Global wcr_data: Dict[str, Any] = {} # Add transient Data leading to the outer map for edge in graph.in_edges(outer_map_exit): node = graph.memlet_path(edge)[-1].dst if isinstance(node, nodes.AccessNode): data_name = node.data # Transients with write-conflict resolution need to be # collected first as AccumulateTransient creates a nestedSDFG if edge.data.wcr is not None: dtype = sdfg.arrays[data_name].dtype redtype = operations.detect_reduction_type(edge.data.wcr) # Custom reduction can not have an accumulate transient, # as the accumulation from the transient to the outer # storage is not defined. if redtype == dtypes.ReductionType.Custom: warnings.warn( 'Using custom reductions in a GPUMultitransformed ' 'Map only works for a small data volume. For large ' 'volume there is no guarantee.') continue identity = dtypes.reduction_identity(dtype, redtype) wcr_data[data_name] = identity elif (not isinstance(node.desc(sdfg), Scalar) or not self.skip_scalar): if self.use_p2p and node.desc( sdfg).storage is dtypes.StorageType.GPU_Global: continue # Transients without write-conflict resolution if prefix + '_' + data_name in sdfg.arrays: create_array = False else: create_array = True out_data_node = OutLocalStorage.apply_to( sdfg, dict(array=data_name, prefix=prefix, create_array=create_array), verify=False, save=False, node_a=inner_map_exit, node_b=outer_map_exit) out_data_node.desc(sdfg).location['gpu'] = symbolic_gpu_id out_data_node.desc( sdfg).storage = dtypes.StorageType.GPU_Global # Add Transients for write-conflict resolution if len(wcr_data) != 0: nsdfg = AccumulateTransient.apply_to( sdfg, options=dict(array_identity_dict=wcr_data, prefix=prefix), map_exit=inner_map_exit, outer_map_exit=outer_map_exit) nsdfg.schedule = dtypes.ScheduleType.GPU_Multidevice nsdfg.location['gpu'] = symbolic_gpu_id for transient_node in graph.successors(nsdfg): if isinstance(transient_node, nodes.AccessNode): transient_node.desc(sdfg).location['gpu'] = symbolic_gpu_id transient_node.desc( sdfg).storage = dtypes.StorageType.GPU_Global nsdfg.sdfg.arrays[ transient_node.label].location['gpu'] = symbolic_gpu_id nsdfg.sdfg.arrays[ transient_node. label].storage = dtypes.StorageType.GPU_Global infer_types.set_default_schedule_storage_types_and_location( nsdfg.sdfg, dtypes.ScheduleType.GPU_Multidevice, symbolic_gpu_id) # Remove the parameter of the outer_map from the sdfg symbols, # as it got added as a symbol in StripMining. if outer_map.params[0] in sdfg.free_symbols: sdfg.remove_symbol(outer_map.params[0])
def preprocess_dace_program( f: Callable[..., Any], argtypes: Dict[str, data.Data], global_vars: Dict[str, Any], modules: Dict[str, Any], resolve_functions: bool = False, parent_closure: Optional[SDFGClosure] = None ) -> Tuple[PreprocessedAST, SDFGClosure]: """ Preprocesses a ``@dace.program`` and all its nested functions, returning a preprocessed AST object and the closure of the resulting SDFG. :param f: A Python function to parse. :param argtypes: An dictionary of (name, type) for the given function's arguments, which may pertain to data nodes or symbols (scalars). :param global_vars: A dictionary of global variables in the closure of `f`. :param modules: A dictionary from an imported module name to the module itself. :param constants: A dictionary from a name to a constant value. :param resolve_functions: If True, treats all global functions defined outside of the program as returning constant values. :param parent_closure: If not None, represents the closure of the parent of the currently processed function. :return: A 2-tuple of the AST and its reduced (used) closure. """ src_ast, src_file, src_line, src = astutils.function_to_ast(f) # Resolve data structures src_ast = StructTransformer(global_vars).visit(src_ast) src_ast = ModuleResolver(modules).visit(src_ast) # Convert modules after resolution for mod, modval in modules.items(): if mod == 'builtins': continue newmod = global_vars[mod] #del global_vars[mod] global_vars[modval] = newmod # Resolve constants to their values (if they are not already defined in this scope) # and symbols to their names resolved = { k: v for k, v in global_vars.items() if k not in argtypes and k != '_' } closure_resolver = GlobalResolver(resolved, resolve_functions) # Append element to call stack and handle max recursion depth if parent_closure is not None: fid = id(f) if fid in parent_closure.callstack: raise DaceRecursionError(fid) if len(parent_closure.callstack) > Config.get( 'frontend', 'implicit_recursion_depth'): raise TypeError( 'Implicit (automatically parsed) recursion depth ' 'exceeded. Functions below this call will not be ' 'parsed. To change this setting, modify the value ' '`frontend.implicit_recursion_depth` in .dace.conf') closure_resolver.closure.callstack = parent_closure.callstack + [fid] src_ast = closure_resolver.visit(src_ast) src_ast = LoopUnroller(resolved, src_file).visit(src_ast) src_ast = ConditionalCodeResolver(resolved).visit(src_ast) src_ast = DeadCodeEliminator().visit(src_ast) try: ctr = CallTreeResolver(closure_resolver.closure, resolved) ctr.visit(src_ast) except DaceRecursionError as ex: if id(f) == ex.fid: raise TypeError( 'Parsing failed due to recursion in a data-centric ' 'context called from this function') else: raise ex used_arrays = ArrayClosureResolver(closure_resolver.closure) used_arrays.visit(src_ast) # Filter out arrays that are not used after dead code elimination closure_resolver.closure.closure_arrays = { k: v for k, v in closure_resolver.closure.closure_arrays.items() if k in used_arrays.arrays } # Filter out callbacks that were removed after dead code elimination closure_resolver.closure.callbacks = { k: v for k, v in closure_resolver.closure.callbacks.items() if k in ctr.seen_calls } # Filter remaining global variables according to type and scoping rules program_globals = { k: v for k, v in global_vars.items() if k not in argtypes } # Fill in data descriptors from closure arrays argtypes.update({ arrname: v[1] for arrname, v in closure_resolver.closure.closure_arrays.items() }) # Combine nested closures with the current one closure_resolver.closure.combine_nested_closures() past = PreprocessedAST(src_file, src_line, src, src_ast, program_globals) return past, closure_resolver.closure
def run(self, dace_state, fail_on_nonzero=False): dace_progname = dace_state.get_sdfg().name code_objects = dace_state.get_generated_code() # Figure out whether we should use MPI for launching use_mpi = False for code_object in code_objects: if code_object.target.target_name == 'mpi': use_mpi = True break # Check counter validity PerfUtils.check_performance_counters(self) remote_workdir = Config.get("execution", "general", "workdir") remote_dace_dir = remote_workdir + "/.dacecache/%s/" % dace_progname self.show_output("Executing DaCe program " + dace_progname + " on " + \ Config.get("execution", "general", "host") + "\n") try: if self.running_async: # Add information about what is being run self.async_host.notify("Generating remote workspace") tmpfolder = tempfile.mkdtemp() generate_program_folder(dace_state.get_sdfg(), code_objects, tmpfolder) self.create_remote_directory(remote_dace_dir) self.copy_folder_to_remote(tmpfolder, remote_dace_dir) if self.running_async: # Add information about what is being run self.async_host.notify("Compiling...") # call compile.py on the remote node in the copied folder self.remote_compile(remote_dace_dir, dace_progname) if self.running_async: # Add information about what is being run self.async_host.notify("Done compiling") # copy the input file and the .so file (with the right name) # to remote_dace_dir so_name = "lib" + dace_progname + "." + Config.get( 'compiler', 'library_extension') self.copy_file_from_remote(remote_dace_dir + "/build/" + so_name, tmpfolder + "/" + so_name) self.copy_file_to_remote(tmpfolder + "/" + so_name, remote_dace_dir) dace_file = dace_state.get_dace_tmpfile() if dace_file is None: raise ValueError("Dace file is None!") # copy the SDFG try: local_sdfg = tmpfolder + "/sdfg.out" sdfg = dace_state.get_sdfg() sdfg.save(local_sdfg) remote_sdfg = remote_workdir + "/sdfg.out" self.copy_file_to_remote(local_sdfg, remote_sdfg) except: print("Could NOT save the SDFG") remote_dace_file = remote_workdir + "/" + os.path.basename( dace_file) self.copy_file_to_remote(dace_file, remote_dace_file) if self.running_async: # Add information about what is being run self.async_host.notify("All files copied to remote") # We got the file there, now we can run with different # configurations. for iteration in range(0, PerfSettings.perf_multirun_num()): optdict, omp_thread_num = PerfUtils.get_run_options( self, iteration) self.remote_exec_dace(remote_workdir, remote_dace_file, use_mpi, fail_on_nonzero, omp_num_threads=omp_thread_num, additional_options_dict=optdict) if self.running_async: # Add information about what is being run self.async_host.notify("Done option threads=" + str(omp_thread_num)) self.show_output("Execution Terminated\n") try: self.copy_file_from_remote(remote_workdir + "/results.log", ".") except: pass # Copy back the vectorization results PerfUtils.retrieve_vectorization_report(self, code_objects, remote_dace_dir) # Copy back the instrumentation results PerfUtils.retrieve_instrumentation_results(self, remote_workdir) if self.running_async: # Add information about what is being run self.async_host.notify("Cleaning up") try: self.remote_delete_file(remote_workdir + "/results.log") except: print( "WARNING: results.log could not be transmitted (probably not created)" ) self.remote_delete_file(remote_dace_file) self.remote_delete_dir(remote_dace_dir) def deferred(): try: res = self.update_performance_plot("results.log", str(self.counter)) os.remove("results.log") except FileNotFoundError: print("WARNING: results.log could not be read") self.async_host.run_sync(deferred) if self.running_async: # Add information about what is being run self.async_host.notify("Done cleaning") # Also, update the performance data. self.rendered_graphs.set_memspeed_target() self.rendered_graphs.render_performance_data( Config.get("instrumentation", "papi_mode")) except Exception as e: print("\n\n\n") print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") print("Running the program failed:") traceback.print_exc() print( "Inspect above output for more information about executed command sequence." ) print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") if self.headless: sys.exit(1) if self.running_async: self.async_host.notify("All done") self.counter += 1
def copy_file_from_remote(self, src, dst): s = Template(Config.get("execution", "general", "copycmd_r2l")) cmd = s.substitute(host=Config.get("execution", "general", "host"), srcfile=src, dstfile=dst) self.exec_cmd_and_show_output(cmd)
def remote_exec_dace(self, remote_workdir, dace_file, use_mpi=True, fail_on_nonzero=False, omp_num_threads=None, additional_options_dict={}): run = "${command} " if use_mpi == True: run = Config.get("execution", "mpi", "mpiexec") nprocs = Config.get("execution", "mpi", "num_procs") else: nprocs = 1 repetitions = Config.get("execution", "general", "repetitions") omp_num_threads_str = "" omp_num_threads_unset_str = "" perf_instrumentation_result_marker = "" if (omp_num_threads != None): omp_num_threads_str = "export OMP_NUM_THREADS=" + str( omp_num_threads) + "\n" omp_num_threads_unset_str = "unset OMP_NUM_THREADS\n" perf_instrumentation_result_marker = "echo '# ;%s; Running in multirun config' >> %s/instrumentation_results.txt\n" % ( omp_num_threads_str.replace("\n", ""), remote_workdir) # Create string from all misc options miscoptstring = "" miscoptresetstring = "" for optkey, optval in additional_options_dict.items(): miscoptstring += "export " + str(optkey) + "=" + str(optval) + "\n" miscoptresetstring += "unset " + str(optkey) + "\n" # Create a startscript which exports necessary env-vars start_sh = "set -x\n" + \ "export DACE_compiler_use_cache=1\n" + \ "export DACE_optimizer_interface=''\n" + \ "export DACE_profiling=1\n" + \ "export DACE_treps=" + str(repetitions) +"\n" + \ miscoptstring + \ omp_num_threads_str + \ "cd " + remote_workdir + "\n" + \ perf_instrumentation_result_marker s = Template(run + " ") cmd = s.substitute(command="python3 " + dace_file, num_procs=nprocs) start_sh += cmd + "\n" start_sh += "export RETVAL=$?\n" start_sh += ( "unset DACE_compiler_use_cache\n" + "unset DACE_optimizer_interface\n" + "unset DACE_treps\n" + "unset DACE_profiling\n" + omp_num_threads_unset_str + miscoptresetstring + # TODO: separate program error and system error "exit $RETVAL\n") tempdir = tempfile.mkdtemp() startsh_file = os.path.join(tempdir, "start.sh") fh = open(startsh_file, "w") fh.write(start_sh) fh.close() st = os.stat(startsh_file) os.chmod(startsh_file, st.st_mode | stat.S_IEXEC) workdir = Config.get("execution", "general", "workdir") self.copy_file_to_remote( startsh_file, Config.get("execution", "general", "workdir") + "/start.sh") s = Template(Config.get("execution", "general", "execcmd")) cmd = s.substitute(host=Config.get("execution", "general", "host"), command=workdir + "/start.sh") self.exec_cmd_and_show_output(cmd, fail_on_nonzero) self.remote_delete_file(workdir + "/start.sh")
def remote_delete_dir(self, deldir): s = Template(Config.get("execution", "general", "execcmd")) cmd = s.substitute(host=Config.get("execution", "general", "host"), command="rm -r " + deldir) self.exec_cmd_and_show_output(cmd)
def timethis(sdfg, title, flop_count, f, *args, **kwargs): """ Runs a function multiple (`DACE_treps`) times, logs the running times to a file, and prints the median time (with FLOPs if given). :param sdfg: The SDFG belonging to the measurement. :param title: A title of the measurement. :param flop_count: Number of floating point operations in `program`. If greater than zero, produces a median FLOPS report. :param f: The function to measure. :param args: Arguments to invoke the function with. :param kwargs: Keyword arguments to invoke the function with. :return: Latest return value of the function. """ start = timer() REPS = int(Config.get('treps')) times = [start] * (REPS + 1) ret = None print('\nProfiling...') iterator = range(REPS) if Config.get_bool('profiling_status'): try: from tqdm import tqdm iterator = tqdm(iterator, desc="Profiling", file=sys.stdout) except ImportError: print( 'WARNING: Cannot show profiling progress, missing optional ' 'dependency tqdm...\n\tTo see a live progress bar please install ' 'tqdm (`pip install tqdm`)\n\tTo disable this feature (and ' 'this warning) set `profiling_status` to false in the dace ' 'config (~/.dace.conf).') for i in iterator: # Call function ret = f(*args, **kwargs) times[i + 1] = timer() diffs = np.array([(times[i] - times[i - 1]) for i in range(1, REPS + 1)]) problem_size = sys.argv[1] if len(sys.argv) >= 2 else 0 profiling_dir = os.path.join(sdfg.build_folder, 'profiling') os.makedirs(profiling_dir, exist_ok=True) timestamp_string = str(int(time.time() * 1000)) outfile_path = os.path.join(profiling_dir, 'results-' + timestamp_string + '.csv') with open(outfile_path, 'w') as f: f.write('Program,Optimization,Problem_Size,Runtime_sec\n') for d in diffs: f.write('%s,%s,%s,%.8f\n' % (sdfg.name, title, problem_size, d)) if flop_count > 0: gflops_arr = (flop_count / diffs) * 1e-9 time_secs = np.median(diffs) GFLOPs = (flop_count / time_secs) * 1e-9 print(title, GFLOPs, 'GFLOP/s (', time_secs * 1000, 'ms)') else: time_secs = np.median(diffs) print(title, time_secs * 1000, 'ms') return ret
def expand(self, sdfg, state, *args, **kwargs) -> str: """ Create and perform the expansion transformation for this library node. :return: the name of the expanded implementation """ implementation = self.implementation library_name = getattr(type(self), '_dace_library_name', '') try: if library_name: config_implementation = Config.get("library", library_name, "default_implementation") else: config_implementation = None except KeyError: # Non-standard libraries are not defined in the config schema, and # thus might not exist in the config. config_implementation = None if config_implementation is not None: try: config_override = Config.get("library", library_name, "override") if config_override and implementation in self.implementations: if implementation is not None: warnings.warn( "Overriding explicitly specified " "implementation {} for {} with {}.".format( implementation, self.label, config_implementation)) implementation = config_implementation except KeyError: config_override = False # If not explicitly set, try the node default if implementation is None: implementation = type(self).default_implementation # If no node default, try library default if implementation is None: import dace.library # Avoid cyclic dependency lib = dace.library._DACE_REGISTERED_LIBRARIES[type( self)._dace_library_name] implementation = lib.default_implementation # Try the default specified in the config if implementation is None: implementation = config_implementation # Otherwise we don't know how to expand if implementation is None: raise ValueError("No implementation or default " "implementation specified.") if implementation not in self.implementations.keys(): raise KeyError("Unknown implementation for node {}: {}".format( type(self).__name__, implementation)) transformation_type = type(self).implementations[implementation] sdfg_id = sdfg.sdfg_id state_id = sdfg.nodes().index(state) subgraph = {transformation_type._match_node: state.node_id(self)} transformation = transformation_type(sdfg, sdfg_id, state_id, subgraph, 0) if not transformation.can_be_applied(state, 0, sdfg): raise RuntimeError("Library node " "expansion applicability check failed.") sdfg.append_transformation(transformation) transformation.apply(state, sdfg, *args, **kwargs) return implementation
def cmake_options(): compiler = make_absolute(Config.get("compiler", "mpi", "executable")) return [ "-DMPI_CXX_COMPILER=\"{}\"".format(compiler), "-DDACE_ENABLE_MPI=ON", ]
def config_get(self, *key_hierarchy): if self._config is None: return Config.get(*key_hierarchy) else: return self._config.get(*key_hierarchy)
import os import logging import dace.library from dace.config import Config log = logging.getLogger(__name__) if 'ORT_ROOT' not in os.environ and 'ORT_RELEASE' not in os.environ: raise ValueError("This environment expects the environment variable " "ORT_ROOT or ORT_RELEASE to be set (see README.md)") if Config.get("compiler", "cuda", "max_concurrent_streams") != -1: log.info("Setting compiler.cuda.max_concurrent_streams to -1") Config.set("compiler", "cuda", "max_concurrent_streams", value=-1) def _get_src_includes(): """ Get the includes and dll path when ORT is built from source """ ort_path = os.path.abspath(os.environ['ORT_ROOT']) cand_path = os.path.join(ort_path, "build", "Linux", dace.Config.get("compiler", "build_type")) if os.path.isdir(cand_path): ort_build_path = cand_path else: ort_build_path = os.path.join(ort_path, "build", "Linux", "Release") ort_dll_path = os.path.join(ort_build_path, "libonnxruntime.so")
def unparse_tasklet(sdfg, state_id, dfg, node, function_stream, callsite_stream, locals, ldepth, toplevel_schedule, codegen): if node.label is None or node.label == "": return "" state_dfg = sdfg.nodes()[state_id] # Not [], "" or None if not node.code: return "" # If raw C++ code, return the code directly if node.language != dtypes.Language.Python: # If this code runs on the host and is associated with a GPU stream, # set the stream to a local variable. max_streams = int( Config.get("compiler", "cuda", "max_concurrent_streams")) if (max_streams >= 0 and not is_devicelevel_gpu(sdfg, state_dfg, node) and hasattr(node, "_cuda_stream")): callsite_stream.write( 'int __dace_current_stream_id = %d;\n%sStream_t __dace_current_stream = dace::cuda::__streams[__dace_current_stream_id];' % (node._cuda_stream, Config.get('compiler', 'cuda', 'backend')), sdfg, state_id, node, ) if node.language != dtypes.Language.CPP: raise ValueError( "Only Python or C++ code supported in CPU codegen, got: {}". format(node.language)) callsite_stream.write( type(node).__properties__["code"].to_string(node.code), sdfg, state_id, node) if hasattr(node, "_cuda_stream") and not is_devicelevel_gpu( sdfg, state_dfg, node): synchronize_streams(sdfg, state_dfg, state_id, node, node, callsite_stream) return body = node.code.code # Map local names to memlets (for WCR detection) memlets = {} for edge in state_dfg.all_edges(node): u, uconn, v, vconn, memlet = edge if u == node: memlet_nc = not is_write_conflicted( dfg, edge, sdfg_schedule=toplevel_schedule) memlet_wcr = memlet.wcr if uconn in u.out_connectors: conntype = u.out_connectors[uconn] else: conntype = None memlets[uconn] = (memlet, memlet_nc, memlet_wcr, conntype) elif v == node: if vconn in v.in_connectors: conntype = v.in_connectors[vconn] else: conntype = None memlets[vconn] = (memlet, False, None, conntype) callsite_stream.write("// Tasklet code (%s)\n" % node.label, sdfg, state_id, node) for stmt in body: stmt = copy.deepcopy(stmt) rk = StructInitializer(sdfg).visit(stmt) if isinstance(stmt, ast.Expr): rk = DaCeKeywordRemover(sdfg, memlets, sdfg.constants, codegen).visit_TopLevelExpr(stmt) else: rk = DaCeKeywordRemover(sdfg, memlets, sdfg.constants, codegen).visit(stmt) if rk is not None: # Unparse to C++ and add 'auto' declarations if locals not declared result = StringIO() cppunparse.CPPUnparser(rk, ldepth + 1, locals, result) callsite_stream.write(result.getvalue(), sdfg, state_id, node)
def consume_programs(self): try: cmd = self._executor_queue.get(timeout=3) if cmd['cmd'] == "run": while True: with self._run_cv: if self._slot_available: break import time time.sleep(0.5) with self._run_cv: self._slot_available = False print("Running task") self._task_dict[cmd['index']]['state'] = 'running' runner = self.run( cmd['cot'], { 'index': cmd['index'], 'config_path': cmd['config_path'], 'client_id': cmd['cid'], 'reset-perfdata': cmd['reset-perfdata'], 'perfopts': cmd['opt']['perfopts'] }) print("Wait for oplock") with self._oplock: self._current_runs[cmd['cid']] = runner import time # Wait a predefined time for clients to catch up on the outputs time.sleep(RUNNING_TIMEOUT) with self._oplock: run_locally = True try: x = self._current_runs[cmd['cid']] except: run_locally = False if run_locally: print("running locally") def tmp(): with self._oplock: del self._current_runs[cmd['cid']] try: c = self._orphaned_runs[cmd['cid']] except: self._orphaned_runs[cmd['cid']] = [] self._orphaned_runs[cmd['cid']].append([]) print("Starting runner") for x in runner(): self._orphaned_runs[cmd['cid']][-1] += x # Because this holds locks (and the output should be generated even if nobody asks for it immediately), this is run when the timeout for direct interception expires tmp() elif cmd['cmd'] == 'control': # Control operations that must be synchronous with execution (e.g. for cleanup, storage operations) with self._oplock: self._task_dict[cmd['index']]['state'] = 'running' if cmd['operation'] == 'startgroup': from diode.db_scripts.db_setup import db_setup perf_tmp_dir = ExecutorServer.getPerfdataDir(cmd['cid']) perfdata_path = os.path.join(perf_tmp_dir, "perfdata.db") # Clean database and create tables db_setup(perf_tmp_dir) elif cmd['operation'] == 'remove_group': perfdir = ExecutorServer.getPerfdataDir(cmd['cid']) perfdata_path = os.path.join(perfdir, "perfdata.db") os.remove(perfdata_path) os.rmdir(perfdir) elif cmd['operation'] == 'endgroup': print("Ending group") from diode.db_scripts.sql_to_json import MergeRuns, Conserver from dace.config import Config config_path = cmd['config_path'] with config_lock: Config.load(config_path) repetitions = Config.get("execution", "general", "repetitions") perf_tmp_dir = ExecutorServer.getPerfdataDir(cmd['cid']) perfdata_path = os.path.join(perf_tmp_dir, "perfdata.db") can_path = os.path.join(perf_tmp_dir, 'current.can') mr = MergeRuns() mr.mergev2(perfdata_path) print("Merged into " + perfdata_path) cons = Conserver() # TODO: Add sdfgs cons.conserveAll(perfdata_path, can_path, "", repetitions, clear_existing=False) print("Merged and Conserved!") self._perfdata_available[cmd['cid']] = can_path with self._oplock: del self._task_dict[cmd['index']] except queue.Empty: return
def configure_and_compile(program_folder, program_name=None, output_stream=None): """ Configures and compiles a DaCe program in the specified folder into a shared library file. :param program_folder: Folder containing all files necessary to build, equivalent to what was passed to `generate_program_folder`. :param output_stream: Additional output stream to write to (used for DIODE client). :return: Path to the compiled shared library file. """ if program_name is None: program_name = os.path.basename(program_folder) program_folder = os.path.abspath(program_folder) src_folder = os.path.join(program_folder, "src") # Prepare build folder build_folder = os.path.join(program_folder, "build") os.makedirs(build_folder, exist_ok=True) # Prepare performance report folder os.makedirs(os.path.join(program_folder, "perf"), exist_ok=True) # Read list of DaCe files to compile. # We do this instead of iterating over source files in the directory to # avoid globbing files from previous compilations, such that we don't need # to wipe the directory for every compilation. file_list = [ line.strip().split(",") for line in open(os.path.join(program_folder, "dace_files.csv"), "r") ] # Get absolute paths and targets for all source files files = [] targets = {} # {target name: target class} for target_name, target_type, file_name in file_list: if target_type: path = os.path.join(target_name, target_type, file_name) else: path = os.path.join(target_name, file_name) files.append(path) targets[target_name] = next( k for k, v in TargetCodeGenerator.extensions().items() if v['name'] == target_name) # Windows-only workaround: Override Visual C++'s linker to use # Multi-Threaded (MT) mode. This fixes linkage in CUDA applications where # CMake fails to do so. if os.name == 'nt': if '_CL_' not in os.environ: os.environ['_CL_'] = '/MT' elif '/MT' not in os.environ['_CL_']: os.environ['_CL_'] = os.environ['_CL_'] + ' /MT' # Start forming CMake command dace_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) cmake_command = [ "cmake", "-A x64" if os.name == 'nt' else "", # Windows-specific flag '"' + os.path.join(dace_path, "codegen") + '"', "-DDACE_SRC_DIR=\"{}\"".format(src_folder), "-DDACE_FILES=\"{}\"".format(";".join(files)), "-DDACE_PROGRAM_NAME={}".format(program_name), ] # Get required environments are retrieve the CMake information environments = set(l.strip() for l in open( os.path.join(program_folder, "dace_environments.csv"), "r")) environments = dace.library.get_environments_and_dependencies(environments) environment_flags, cmake_link_flags = get_environment_flags(environments) cmake_command += sorted(environment_flags) # Replace backslashes with forward slashes cmake_command = [cmd.replace('\\', '/') for cmd in cmake_command] # Generate CMake options for each compiler libraries = set() for target_name, target in sorted(targets.items()): try: cmake_command += target.cmake_options() libraries |= unique_flags( Config.get("compiler", target_name, "libs")) except KeyError: pass except ValueError as ex: # Cannot find compiler executable raise cgx.CompilerConfigurationError(str(ex)) cmake_command.append("-DDACE_LIBS=\"{}\"".format(" ".join( sorted(libraries)))) # Set linker and linker arguments, iff they have been specified cmake_linker = Config.get('compiler', 'linker', 'executable') or '' cmake_linker = cmake_linker.strip() if cmake_linker: cmake_linker = make_absolute(cmake_linker) cmake_command.append(f'-DCMAKE_LINKER="{cmake_linker}"') cmake_link_flags = ( ' '.join(sorted(cmake_link_flags)) + ' ' + (Config.get('compiler', 'linker', 'args') or '')).strip() if cmake_link_flags: cmake_command.append( f'-DCMAKE_SHARED_LINKER_FLAGS="{cmake_link_flags}"') cmake_command = ' '.join(cmake_command) cmake_filename = os.path.join(build_folder, 'cmake_configure.sh') ############################################## # Configure try: if not identical_file_exists(cmake_filename, cmake_command): _run_liveoutput(cmake_command, shell=True, cwd=build_folder, output_stream=output_stream) except subprocess.CalledProcessError as ex: # Clean CMake directory and try once more if Config.get_bool('debugprint'): print('Cleaning CMake build folder and retrying...') shutil.rmtree(build_folder) os.makedirs(build_folder) try: _run_liveoutput(cmake_command, shell=True, cwd=build_folder, output_stream=output_stream) except subprocess.CalledProcessError as ex: # If still unsuccessful, print results if Config.get_bool('debugprint'): raise cgx.CompilerConfigurationError('Configuration failure') else: raise cgx.CompilerConfigurationError( 'Configuration failure:\n' + ex.output) with open(cmake_filename, "w") as fp: fp.write(cmake_command) # Compile and link try: _run_liveoutput("cmake --build . --config %s" % (Config.get('compiler', 'build_type')), shell=True, cwd=build_folder, output_stream=output_stream) except subprocess.CalledProcessError as ex: # If unsuccessful, print results if Config.get_bool('debugprint'): raise cgx.CompilationError('Compiler failure') else: raise cgx.CompilationError('Compiler failure:\n' + ex.output) shared_library_path = os.path.join( build_folder, "lib{}.{}".format(program_name, Config.get('compiler', 'library_extension'))) return shared_library_path
def __getitem__(self, *key): return Config.get(*key)
def get_generated_codeobjects(self): execution_mode = Config.get("compiler", "xilinx", "mode") kernel_file_name = "DACE_BINARY_DIR \"/{}".format(self._program_name) if execution_mode == "software_emulation": kernel_file_name += "_sw_emu.xclbin\"" xcl_emulation_mode = "\"sw_emu\"" xilinx_sdx = "DACE_VITIS_DIR" elif execution_mode == "hardware_emulation": kernel_file_name += "_hw_emu.xclbin\"" xcl_emulation_mode = "\"hw_emu\"" xilinx_sdx = "DACE_VITIS_DIR" elif execution_mode == "hardware" or execution_mode == "simulation": kernel_file_name += "_hw.xclbin\"" xcl_emulation_mode = None xilinx_sdx = None else: raise dace.codegen.codegen.CodegenError( "Unknown Xilinx execution mode: {}".format(execution_mode)) set_env_vars = "" set_str = "dace::set_environment_variable(\"{}\", {});\n" unset_str = "dace::unset_environment_variable(\"{}\");\n" set_env_vars += (set_str.format("XCL_EMULATION_MODE", xcl_emulation_mode) if xcl_emulation_mode is not None else unset_str.format("XCL_EMULATION_MODE")) set_env_vars += (set_str.format("XILINX_SDX", xilinx_sdx) if xilinx_sdx is not None else unset_str.format("XILINX_SDX")) host_code = CodeIOStream() host_code.write("""\ #include "dace/xilinx/host.h" #include "dace/dace.h" #include <iostream>\n\n""") self._frame.generate_fileheader(self._global_sdfg, host_code) host_code.write(""" dace::fpga::Context *dace::fpga::_context; DACE_EXPORTED int __dace_init_xilinx({signature}) {{ {environment_variables} dace::fpga::_context = new dace::fpga::Context(); dace::fpga::_context->Get().MakeProgram({kernel_file_name}); return 0; }} DACE_EXPORTED void __dace_exit_xilinx({signature}) {{ delete dace::fpga::_context; }} {host_code}""".format(signature=self._global_sdfg.signature(), environment_variables=set_env_vars, kernel_file_name=kernel_file_name, host_code="".join([ "{separator}\n// Kernel: {kernel_name}" "\n{separator}\n\n{code}\n\n".format(separator="/" * 79, kernel_name=name, code=code) for (name, code) in self._host_codes ]))) host_code_obj = CodeObject(self._program_name, host_code.getvalue(), "cpp", XilinxCodeGen, "Xilinx", target_type="host") kernel_code_objs = [ CodeObject(kernel_name, code, "cpp", XilinxCodeGen, "Xilinx", target_type="device") for (kernel_name, code) in self._kernel_codes ] # Configuration file with interface assignments are_assigned = [ v is not None for v in self._interface_assignments.values() ] bank_assignment_code = [] if any(are_assigned): if not all(are_assigned): raise RuntimeError("Some, but not all global memory arrays " "were assigned to memory banks: {}".format( self._interface_assignments)) are_assigned = True else: are_assigned = False for name, _ in self._host_codes: # Only iterate over assignments if any exist if are_assigned: for (kernel_name, interface_name), ( memory_type, memory_bank) in self._interface_assignments.items(): if kernel_name != name: continue bank_assignment_code.append("{},{},{}".format( interface_name, memory_type.name, memory_bank)) # Create file even if there are no assignments kernel_code_objs.append( CodeObject("{}_memory_interfaces".format(name), "\n".join(bank_assignment_code), "csv", XilinxCodeGen, "Xilinx", target_type="device")) return [host_code_obj] + kernel_code_objs
def configure_and_compile(program_folder, program_name=None, output_stream=None): """ Configures and compiles a DaCe program in the specified folder into a shared library file. :param program_folder: Folder containing all files necessary to build, equivalent to what was passed to `generate_program_folder`. :param output_stream: Additional output stream to write to (used for DIODE client). :return: Path to the compiled shared library file. """ if program_name is None: program_name = os.path.basename(program_folder) program_folder = os.path.abspath(program_folder) src_folder = os.path.join(program_folder, "src") # Prepare build folder build_folder = os.path.join(program_folder, "build") os.makedirs(build_folder, exist_ok=True) # Prepare performance report folder os.makedirs(os.path.join(program_folder, "perf"), exist_ok=True) # Read list of DaCe files to compile. # We do this instead of iterating over source files in the directory to # avoid globbing files from previous compilations, such that we don't need # to wipe the directory for every compilation. file_list = [ line.strip().split(",") for line in open(os.path.join(program_folder, "dace_files.csv"), "r") ] # Get absolute paths and targets for all source files files = [] targets = {} # {target name: target class} for target_name, target_type, file_name in file_list: if target_type: path = os.path.join(target_name, target_type, file_name) else: path = os.path.join(target_name, file_name) files.append(path) targets[target_name] = next( k for k, v in TargetCodeGenerator.extensions().items() if v['name'] == target_name) # Start forming CMake command dace_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) cmake_command = [ "cmake", "-A x64" if os.name == 'nt' else "", # Windows-specific flag '"' + os.path.join(dace_path, "codegen") + '"', "-DDACE_SRC_DIR=\"{}\"".format(src_folder), "-DDACE_FILES=\"{}\"".format(";".join(files)), "-DDACE_PROGRAM_NAME={}".format(program_name), ] # Get required environments are retrieve the CMake information environments = set(l.strip() for l in open( os.path.join(program_folder, "dace_environments.csv"), "r")) cmake_minimum_version = [0] cmake_variables = dict() cmake_packages = set() cmake_includes = set() cmake_libraries = set() cmake_compile_flags = set() cmake_link_flags = set() cmake_files = set() cmake_module_paths = set() for env_name in environments: env = dace.library.get_environment(env_name) if (env.cmake_minimum_version is not None and len(env.cmake_minimum_version) > 0): version_list = list(map(int, env.cmake_minimum_version.split("."))) for i in range(max(len(version_list), len(cmake_minimum_version))): if i >= len(version_list): break if i >= len(cmake_minimum_version): cmake_minimum_version = version_list break if version_list[i] > cmake_minimum_version[i]: cmake_minimum_version = version_list break # Otherwise keep iterating for var in env.cmake_variables: if (var in cmake_variables and cmake_variables[var] != env.cmake_variables[var]): raise KeyError( "CMake variable {} was redefined from {} to {}.".format( var, cmake_variables[var], env.cmake_variables[var])) cmake_variables[var] = env.cmake_variables[var] cmake_packages |= set(env.cmake_packages) cmake_includes |= set(env.cmake_includes) cmake_libraries |= set(env.cmake_libraries) cmake_compile_flags |= set(env.cmake_compile_flags) cmake_link_flags |= set(env.cmake_link_flags) # Make path absolute env_dir = os.path.dirname(env._dace_file_path) cmake_files |= set( (f if os.path.isabs(f) else os.path.join(env_dir, f)) + (".cmake" if not f.endswith(".cmake") else "") for f in env.cmake_files) for header in env.headers: if os.path.isabs(header): # Giving an absolute path is not good practice, but allow it # for emergency overriding cmake_includes.add(os.path.dirname(header)) abs_path = os.path.join(env_dir, header) if os.path.isfile(abs_path): # Allow includes stored with the library, specified with a # relative path cmake_includes.add(env_dir) break environment_flags = [ "-DDACE_ENV_MINIMUM_VERSION={}".format(".".join( map(str, cmake_minimum_version))), # Make CMake list of key-value pairs "-DDACE_ENV_VAR_KEYS=\"{}\"".format(";".join(cmake_variables.keys())), "-DDACE_ENV_VAR_VALUES=\"{}\"".format(";".join( cmake_variables.values())), "-DDACE_ENV_PACKAGES=\"{}\"".format(" ".join(cmake_packages)), "-DDACE_ENV_INCLUDES=\"{}\"".format(" ".join(cmake_includes)), "-DDACE_ENV_LIBRARIES=\"{}\"".format(" ".join(cmake_libraries)), "-DDACE_ENV_COMPILE_FLAGS=\"{}\"".format( " ".join(cmake_compile_flags)), # "-DDACE_ENV_LINK_FLAGS=\"{}\"".format(" ".join(cmake_link_flags)), "-DDACE_ENV_CMAKE_FILES=\"{}\"".format(";".join(cmake_files)), ] # Escape variable expansions to defer their evaluation environment_flags = [ cmd.replace("$", "_DACE_CMAKE_EXPAND") for cmd in environment_flags ] cmake_command += environment_flags # Replace backslashes with forward slashes cmake_command = [cmd.replace('\\', '/') for cmd in cmake_command] # Generate CMake options for each compiler libraries = set() for target_name, target in targets.items(): try: cmake_command += target.cmake_options() libraries |= unique_flags( Config.get("compiler", target_name, "libs")) except KeyError: pass except ValueError as ex: # Cannot find compiler executable raise CompilerConfigurationError(str(ex)) # TODO: it should be possible to use the default arguments/compilers # found by CMake cmake_command += [ "-DDACE_LIBS=\"{}\"".format(" ".join(libraries)), "-DCMAKE_LINKER=\"{}\"".format( make_absolute(Config.get('compiler', 'linker', 'executable'))), "-DCMAKE_SHARED_LINKER_FLAGS=\"{}\"".format( Config.get('compiler', 'linker', 'args') + " " + Config.get('compiler', 'linker', 'additional_args') + " ".join(cmake_link_flags)), ] cmake_command = ' '.join(cmake_command) cmake_filename = os.path.join(build_folder, 'cmake_configure.sh') ############################################## # Configure try: _run_liveoutput(cmake_command, shell=True, cwd=build_folder, output_stream=output_stream) except subprocess.CalledProcessError as ex: # Clean CMake directory and try once more if Config.get_bool('debugprint'): print('Cleaning CMake build folder and retrying...') shutil.rmtree(build_folder) os.makedirs(build_folder) try: _run_liveoutput(cmake_command, shell=True, cwd=build_folder, output_stream=output_stream) except subprocess.CalledProcessError as ex: # If still unsuccessful, print results if Config.get_bool('debugprint'): raise CompilerConfigurationError('Configuration failure') else: raise CompilerConfigurationError('Configuration failure:\n' + ex.output) with open(cmake_filename, "w") as fp: fp.write(cmake_command) # Compile and link try: _run_liveoutput("cmake --build . --config %s" % (Config.get('compiler', 'build_type')), shell=True, cwd=build_folder, output_stream=output_stream) except subprocess.CalledProcessError as ex: # If unsuccessful, print results if Config.get_bool('debugprint'): raise CompilationError('Compiler failure') else: raise CompilationError('Compiler failure:\n' + ex.output) shared_library_path = os.path.join( build_folder, "lib{}.{}".format(program_name, Config.get('compiler', 'library_extension'))) return shared_library_path
def __init__(self, base_indentation=0): super(CodeIOStream, self).__init__() self._indent = 0 self._spaces = int(Config.get('compiler', 'indentation_spaces')) self._lineinfo = Config.get_bool('compiler', 'codegen_lineinfo')
def __init__(self, *args, **kwargs): fpga_vendor = Config.get("compiler", "fpga_vendor") if fpga_vendor.lower() != "xilinx": # Don't register this code generator return super().__init__(*args, **kwargs)