def test_nccl_reduce_symbolic():
    ng = Config.get('compiler', 'cuda', 'max_number_gpus')
    n = 2
    sdfg: dace.SDFG = nccl_reduce_symbolic.to_sdfg(strict=True)
    outer_map = find_map_by_param(sdfg, 'root_gpu')
    if outer_map:
        outer_map.schedule = dtypes.ScheduleType.Sequential
    gpu_map = find_map_by_param(sdfg, 'gpu')
    gpu_map.schedule = dtypes.ScheduleType.GPU_Multidevice
    infer_types.set_default_schedule_storage_types_and_location(sdfg, None)
    sdfg.specialize(dict(num_gpus=ng))

    out = np.ndarray(shape=[ng, n], dtype=np_dtype)
    out.fill(0)

    sdfg(out=out, N=n)

    res = np.array([ng * i for i in range(ng)])
    assert (np.unique(out) == res).all()
Beispiel #2
0
    def testDefaultDataTypes(self):
        # check that configuration about defult data types is enforced
        config_data_types = Config.get('compiler', 'default_data_types')

        code_str = """value1 = 10
value2=3.14
value3=5000000000"""
        inf_symbols = type_inference.infer_types(code_str)
        if config_data_types.lower() == "python":
            self.assertEqual(inf_symbols["value1"], dtypes.typeclass(np.int64))
            self.assertEqual(inf_symbols["value2"],
                             dtypes.typeclass(np.float64))
        elif config_data_types.lower() == "c":
            self.assertEqual(inf_symbols["value1"], dtypes.typeclass(np.int32))
            self.assertEqual(inf_symbols["value2"],
                             dtypes.typeclass(np.float32))

        # in any case, value3 needs uint64
        self.assertEqual(inf_symbols["value3"], dtypes.typeclass(np.uint64))
Beispiel #3
0
    def __init__(self, wrapped_type):
        # Convert python basic types
        if isinstance(wrapped_type, str):
            try:
                wrapped_type = getattr(numpy, wrapped_type)
            except AttributeError:
                raise ValueError("Unknown type: {}".format(wrapped_type))

        config_data_types = Config.get('compiler', 'default_data_types')
        if wrapped_type is int:
            if config_data_types.lower() == 'python':
                wrapped_type = numpy.int64
            elif config_data_types.lower() == 'c':
                wrapped_type = numpy.int32
            else:
                raise NameError(
                    "Unknown configuration for default_data_types: {}".format(
                        config_data_types))
        elif wrapped_type is float:
            if config_data_types.lower() == 'python':
                wrapped_type = numpy.float64
            elif config_data_types.lower() == 'c':
                wrapped_type = numpy.float32
            else:
                raise NameError(
                    "Unknown configuration for default_data_types: {}".format(
                        config_data_types))
        elif wrapped_type is complex:
            if config_data_types.lower() == 'python':
                wrapped_type = numpy.complex128
            elif config_data_types.lower() == 'c':
                wrapped_type = numpy.complex64
            else:
                raise NameError(
                    "Unknown configuration for default_data_types: {}".format(
                        config_data_types))

        self.type = wrapped_type  # Type in Python
        self.ctype = _CTYPES[wrapped_type]  # Type in C
        self.ctype_unaligned = self.ctype  # Type in C (without alignment)
        self.dtype = self  # For compatibility support with numpy
        self.bytes = _BYTES[wrapped_type]  # Number of bytes for this type
Beispiel #4
0
 def cmake_options():
     host_flags = Config.get("compiler", "xilinx", "host_flags")
     synthesis_flags = Config.get("compiler", "xilinx", "synthesis_flags")
     build_flags = Config.get("compiler", "xilinx", "build_flags")
     mode = Config.get("compiler", "xilinx", "mode")
     target_platform = Config.get("compiler", "xilinx", "platform")
     enable_debugging = ("ON" if Config.get_bool(
         "compiler", "xilinx", "enable_debugging") else "OFF")
     options = [
         "-DDACE_XILINX_HOST_FLAGS=\"{}\"".format(host_flags),
         "-DDACE_XILINX_SYNTHESIS_FLAGS=\"{}\"".format(synthesis_flags),
         "-DDACE_XILINX_BUILD_FLAGS=\"{}\"".format(build_flags),
         "-DDACE_XILINX_MODE={}".format(mode),
         "-DDACE_XILINX_TARGET_PLATFORM=\"{}\"".format(target_platform),
         "-DDACE_XILINX_ENABLE_DEBUGGING={}".format(enable_debugging),
     ]
     # Override Vitis/SDx/SDAccel installation directory
     if Config.get("compiler", "xilinx", "path"):
         options.append("-DVITIS_ROOT_DIR=\"{}\"".format(
             Config.get("compiler", "xilinx", "path").replace("\\", "/")))
     return options
Beispiel #5
0
    def render_config_dialog(self):
        # Load metadata for configuration
        Config.load_schema()

        self.window = Gtk.Window()
        notebook = Gtk.Notebook()
        notebook.set_scrollable(True)
        self.window.add(notebook)

        # General (top-level) settings
        gtklabel = Gtk.Label()
        gtklabel.set_label('General')
        general_grid = Gtk.Grid()
        general_grid.set_hexpand(True)
        notebook.append_page(general_grid, gtklabel)
        columized = False

        for i, (cname, cval) in enumerate(sorted(Config.get().items())):
            meta = Config.get_metadata(cname)
            if meta['type'] == 'dict':
                gtklabel = Gtk.Label()
                gtklabel.set_label(meta['title'])
                grid = Gtk.Grid()
                grid.set_hexpand(True)
                notebook.append_page(grid, gtklabel)
                self.render_config_subtree(cval, (cname, ), grid)
                continue

            if columized == False:
                general_grid.insert_column(0)
                general_grid.insert_column(1)
                columized = True
            self.render_config_element(cval, (cname, ), general_grid, i, meta)

        self.window.show_all()
        self.window.connect("delete-event", self.win_close_callback, None)
Beispiel #6
0
 def cmake_options():
     compiler = make_absolute(Config.get("compiler", "xilinx",
                                         "executable"))
     host_flags = Config.get("compiler", "xilinx", "host_flags")
     synthesis_flags = Config.get("compiler", "xilinx", "synthesis_flags")
     build_flags = Config.get("compiler", "xilinx", "build_flags")
     mode = Config.get("compiler", "xilinx", "mode")
     target_platform = Config.get("compiler", "xilinx", "platform")
     enable_debugging = ("ON" if Config.get_bool(
         "compiler", "xilinx", "enable_debugging") else "OFF")
     options = [
         "-DSDACCEL_ROOT_DIR={}".format(
             os.path.dirname(os.path.dirname(compiler))),
         "-DDACE_XILINX_HOST_FLAGS=\"{}\"".format(host_flags),
         "-DDACE_XILINX_SYNTHESIS_FLAGS=\"{}\"".format(synthesis_flags),
         "-DDACE_XILINX_BUILD_FLAGS=\"{}\"".format(build_flags),
         "-DDACE_XILINX_MODE={}".format(mode),
         "-DDACE_XILINX_TARGET_PLATFORM=\"{}\"".format(target_platform),
         "-DDACE_XILINX_ENABLE_DEBUGGING={}".format(enable_debugging),
     ]
     return options
Beispiel #7
0
def configure_and_compile(program_folder,
                          program_name=None,
                          output_stream=None):
    """ Configures and compiles a DaCe program in the specified folder into a
        shared library file.

        :param program_folder: Folder containing all files necessary to build,
                               equivalent to what was passed to
                               `generate_program_folder`.
        :param output_stream: Additional output stream to write to (used for
                              DIODE client).
        :return: Path to the compiled shared library file.
    """

    if program_name is None:
        program_name = os.path.basename(program_folder)
    program_folder = os.path.abspath(program_folder)
    src_folder = os.path.join(program_folder, "src")

    # Prepare build folder
    build_folder = os.path.join(program_folder, "build")
    try:
        os.makedirs(build_folder)
    except FileExistsError:
        pass

    # Read list of DaCe files to compile.
    # We do this instead of iterating over source files in the directory to
    # avoid globbing files from previous compilations, such that we don't need
    # to wipe the directory for every compilation.
    file_list = [
        line.strip().split(",")
        for line in open(os.path.join(program_folder, "dace_files.csv"), "r")
    ]

    # Get absolute paths and targets for all source files
    files = []
    targets = {}  # {target name: target class}
    for target_name, file_name in file_list:
        path = os.path.join(src_folder, target_name, file_name)
        files.append(path)
        targets[target_name] = codegen.STRING_TO_TARGET[target_name]

    # Start forming CMake command
    dace_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    cmake_command = [
        "cmake",
        "-A x64" if os.name == 'nt' else "",  # Windows-specific flag
        '"' + os.path.join(dace_path, "codegen") + '"',
        "-DDACE_FILES=\"{}\"".format(";".join(files)),
        "-DDACE_PROGRAM_NAME={}".format(program_name),
    ]

    # Replace backslashes with forward slashes
    cmake_command = [cmd.replace('\\', '/') for cmd in cmake_command]

    # Generate CMake options for each compiler
    libraries = set()
    for target_name, target in targets.items():
        cmake_command += target.cmake_options()
        try:
            libraries |= unique_flags(
                Config.get("compiler", target_name, "libs"))
        except KeyError:
            pass

    # TODO: it should be possible to use the default arguments/compilers
    #       found by CMake
    cmake_command += [
        "-DDACE_LIBS=\"{}\"".format(" ".join(libraries)),
        "-DCMAKE_LINKER=\"{}\"".format(
            make_absolute(Config.get('compiler', 'linker', 'executable'))),
        "-DCMAKE_SHARED_LINKER_FLAGS=\"{}\"".format(
            Config.get('compiler', 'linker', 'args') +
            Config.get('compiler', 'linker', 'additional_args')),
    ]

    ##############################################
    # Configure
    try:
        _run_liveoutput(" ".join(cmake_command),
                        shell=True,
                        cwd=build_folder,
                        output_stream=output_stream)
    except subprocess.CalledProcessError as ex:
        # Clean CMake directory and try once more
        if Config.get_bool('debugprint'):
            print('Cleaning CMake build folder and retrying...')
        shutil.rmtree(build_folder)
        os.makedirs(build_folder)
        try:
            _run_liveoutput(" ".join(cmake_command),
                            shell=True,
                            cwd=build_folder,
                            output_stream=output_stream)
        except subprocess.CalledProcessError as ex:
            # If still unsuccessful, print results
            if Config.get_bool('debugprint'):
                raise CompilerConfigurationError('Configuration failure')
            else:
                raise CompilerConfigurationError('Configuration failure:\n' +
                                                 ex.output)

    # Compile and link
    try:
        _run_liveoutput("cmake --build . --config %s" %
                        (Config.get('compiler', 'build_type')),
                        shell=True,
                        cwd=build_folder,
                        output_stream=output_stream)
    except subprocess.CalledProcessError as ex:
        # If unsuccessful, print results
        if Config.get_bool('debugprint'):
            raise CompilationError('Compiler failure')
        else:
            raise CompilationError('Compiler failure:\n' + ex.output)

    shared_library_path = os.path.join(
        build_folder,
        "lib{}.{}".format(program_name,
                          Config.get('compiler', 'library_extension')))

    return shared_library_path
Beispiel #8
0
    def get_generated_codeobjects(self):

        execution_mode = Config.get("compiler", "xilinx", "mode")

        kernel_file_name = "DACE_BINARY_DIR \"/{}".format(self._program_name)
        if execution_mode == "software_emulation":
            kernel_file_name += "_sw_emu.xclbin\""
            xcl_emulation_mode = "\"sw_emu\""
            xilinx_sdx = "DACE_VITIS_DIR"
        elif execution_mode == "hardware_emulation":
            kernel_file_name += "_hw_emu.xclbin\""
            xcl_emulation_mode = "\"hw_emu\""
            xilinx_sdx = "DACE_VITIS_DIR"
        elif execution_mode == "hardware" or execution_mode == "simulation":
            kernel_file_name += "_hw.xclbin\""
            xcl_emulation_mode = None
            xilinx_sdx = None
        else:
            raise cgx.CodegenError(
                "Unknown Xilinx execution mode: {}".format(execution_mode))

        set_env_vars = ""
        set_str = "dace::set_environment_variable(\"{}\", {});\n"
        unset_str = "dace::unset_environment_variable(\"{}\");\n"
        set_env_vars += (set_str.format("XCL_EMULATION_MODE",
                                        xcl_emulation_mode)
                         if xcl_emulation_mode is not None else
                         unset_str.format("XCL_EMULATION_MODE"))
        set_env_vars += (set_str.format("XILINX_SDX", xilinx_sdx) if xilinx_sdx
                         is not None else unset_str.format("XILINX_SDX"))
        set_env_vars += set_str.format(
            "EMCONFIG_PATH", "DACE_BINARY_DIR"
        ) if execution_mode == 'hardware_emulation' else unset_str.format(
            "EMCONFIG_PATH")

        host_code = CodeIOStream()
        host_code.write("""\
#include "dace/xilinx/host.h"
#include "dace/dace.h"
""")
        if len(self._dispatcher.instrumentation) > 1:
            host_code.write("""\
#include "dace/perf/reporting.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <limits>
""")
        host_code.write("\n\n")

        self._frame.generate_fileheader(self._global_sdfg, host_code,
                                        'xilinx_host')

        params_comma = self._global_sdfg.signature(with_arrays=False)
        if params_comma:
            params_comma = ', ' + params_comma

        host_code.write("""
DACE_EXPORTED int __dace_init_xilinx({sdfg.name}_t *__state{signature}) {{
    {environment_variables}

    __state->fpga_context = new dace::fpga::Context();
    __state->fpga_context->Get().MakeProgram({kernel_file_name});
    return 0;
}}

DACE_EXPORTED void __dace_exit_xilinx({sdfg.name}_t *__state) {{
    delete __state->fpga_context;
}}

{host_code}""".format(signature=params_comma,
                      sdfg=self._global_sdfg,
                      environment_variables=set_env_vars,
                      kernel_file_name=kernel_file_name,
                      host_code="".join([
                          "{separator}\n// Kernel: {kernel_name}"
                          "\n{separator}\n\n{code}\n\n".format(
                              separator="/" * 79, kernel_name=name, code=code)
                          for (name, code) in self._host_codes
                      ])))

        host_code_obj = CodeObject(self._program_name,
                                   host_code.getvalue(),
                                   "cpp",
                                   XilinxCodeGen,
                                   "Xilinx",
                                   target_type="host")

        kernel_code_objs = [
            CodeObject(kernel_name,
                       code,
                       "cpp",
                       XilinxCodeGen,
                       "Xilinx",
                       target_type="device")
            for (kernel_name, code) in self._kernel_codes
        ]

        # Memory bank and streaming interfaces connectivity configuration file
        link_cfg = CodeIOStream()
        self._other_codes["link.cfg"] = link_cfg
        link_cfg.write("[connectivity]")
        are_assigned = [v is not None for v in self._bank_assignments.values()]
        if any(are_assigned):
            if not all(are_assigned):
                raise RuntimeError("Some, but not all global memory arrays "
                                   "were assigned to memory banks: {}".format(
                                       self._bank_assignments))
            # Emit mapping from kernel memory interfaces to DRAM banks
            for (kernel_name, interface_name), (
                    memory_type,
                    memory_bank) in self._bank_assignments.items():
                link_cfg.write(
                    f"sp={kernel_name}_1.m_axi_{interface_name}:{memory_type}[{memory_bank}]"
                )
        # Emit mapping between inter-kernel streaming interfaces
        for _, (src, dst) in self._stream_connections.items():
            link_cfg.write(f"stream_connect={src}:{dst}")

        other_objs = []
        for name, code in self._other_codes.items():
            name = name.split(".")
            other_objs.append(
                CodeObject(name[0],
                           code.getvalue(),
                           ".".join(name[1:]),
                           XilinxCodeGen,
                           "Xilinx",
                           target_type="device"))

        return [host_code_obj] + kernel_code_objs + other_objs
Beispiel #9
0
    def get_generated_codeobjects(self):

        execution_mode = Config.get("compiler", "xilinx", "mode")

        kernel_file_name = "DACE_BINARY_DIR \"/{}".format(self._program_name)
        if execution_mode == "software_emulation":
            kernel_file_name += "_sw_emu.xclbin\""
            xcl_emulation_mode = "\"sw_emu\""
            xilinx_sdx = "DACE_VITIS_DIR"
        elif execution_mode == "hardware_emulation":
            kernel_file_name += "_hw_emu.xclbin\""
            xcl_emulation_mode = "\"hw_emu\""
            xilinx_sdx = "DACE_VITIS_DIR"
        elif execution_mode == "hardware" or execution_mode == "simulation":
            kernel_file_name += "_hw.xclbin\""
            xcl_emulation_mode = None
            xilinx_sdx = None
        else:
            raise dace.codegen.codegen.CodegenError(
                "Unknown Xilinx execution mode: {}".format(execution_mode))

        set_env_vars = ""
        set_str = "dace::set_environment_variable(\"{}\", {});\n"
        unset_str = "dace::unset_environment_variable(\"{}\");\n"
        set_env_vars += (set_str.format("XCL_EMULATION_MODE",
                                        xcl_emulation_mode)
                         if xcl_emulation_mode is not None else
                         unset_str.format("XCL_EMULATION_MODE"))
        set_env_vars += (set_str.format("XILINX_SDX", xilinx_sdx) if xilinx_sdx
                         is not None else unset_str.format("XILINX_SDX"))

        host_code = CodeIOStream()
        host_code.write("""\
#include "dace/xilinx/host.h"
#include "dace/dace.h"
#include <iostream>\n\n""")

        self._frame.generate_fileheader(self._global_sdfg, host_code)

        host_code.write("""
dace::fpga::Context *dace::fpga::_context;

DACE_EXPORTED int __dace_init_xilinx({signature}) {{
    {environment_variables}
    dace::fpga::_context = new dace::fpga::Context();
    dace::fpga::_context->Get().MakeProgram({kernel_file_name});
    return 0;
}}

DACE_EXPORTED void __dace_exit_xilinx({signature}) {{
    delete dace::fpga::_context;
}}

{host_code}""".format(signature=self._global_sdfg.signature(),
                      environment_variables=set_env_vars,
                      kernel_file_name=kernel_file_name,
                      host_code="".join([
                          "{separator}\n// Kernel: {kernel_name}"
                          "\n{separator}\n\n{code}\n\n".format(
                              separator="/" * 79, kernel_name=name, code=code)
                          for (name, code) in self._host_codes
                      ])))

        host_code_obj = CodeObject(self._program_name,
                                   host_code.getvalue(),
                                   "cpp",
                                   XilinxCodeGen,
                                   "Xilinx",
                                   target_type="host")

        kernel_code_objs = [
            CodeObject(kernel_name,
                       code,
                       "cpp",
                       XilinxCodeGen,
                       "Xilinx",
                       target_type="device")
            for (kernel_name, code) in self._kernel_codes
        ]

        return [host_code_obj] + kernel_code_objs
Beispiel #10
0
def test_set_temporary():
    path = ["compiler", "build_type"]
    current_value = Config.get(*path)
    with set_temporary(*path, value="I'm not a build type"):
        assert Config.get(*path) == "I'm not a build type"
    assert Config.get(*path) == current_value
Beispiel #11
0
 def __init__(self, base_indentation=0):
     super(CodeIOStream, self).__init__()
     self._indent = 0
     self._spaces = int(Config.get('compiler', 'indentation_spaces'))
    def apply(self, sdfg: SDFG) -> None:
        graph: SDFGState = sdfg.nodes()[self.state_id]

        inner_map_entry: nodes.MapEntry = graph.nodes()[self.subgraph[
            GPUMultiTransformMap._map_entry]]

        number_of_gpus = self.number_of_gpus
        ngpus = Config.get("compiler", "cuda", "max_number_gpus")
        if (number_of_gpus == None):
            number_of_gpus = ngpus
        if number_of_gpus > ngpus:
            raise ValueError(
                'Requesting more gpus than specified in the dace config')

        # Avoiding import loops
        from dace.transformation.dataflow import (StripMining, InLocalStorage,
                                                  OutLocalStorage,
                                                  AccumulateTransient)

        # The user has responsibility for the implementation of a Library node.
        scope_subgraph = graph.scope_subgraph(inner_map_entry)
        for node in scope_subgraph.nodes():
            if isinstance(node, nodes.LibraryNode):
                warnings.warn(
                    'Node %s is a library node, make sure to manually set the '
                    'implementation to a GPU compliant specialization.' % node)

        # Tile map into number_of_gpus tiles
        outer_map: nodes.Map = StripMining.apply_to(
            sdfg,
            dict(dim_idx=-1,
                 new_dim_prefix=self.new_dim_prefix,
                 tile_size=number_of_gpus,
                 tiling_type=dtypes.TilingType.NumberOfTiles),
            _map_entry=inner_map_entry)

        outer_map_entry: nodes.MapEntry = graph.scope_dict()[inner_map_entry]
        inner_map_exit: nodes.MapExit = graph.exit_node(inner_map_entry)
        outer_map_exit: nodes.MapExit = graph.exit_node(outer_map_entry)

        # Change map schedules
        inner_map_entry.map.schedule = dtypes.ScheduleType.GPU_Device
        outer_map.schedule = dtypes.ScheduleType.GPU_Multidevice

        symbolic_gpu_id = outer_map.params[0]

        # Add the parameter of the outer map
        for node in graph.successors(inner_map_entry):
            if isinstance(node, nodes.NestedSDFG):
                map_syms = inner_map_entry.range.free_symbols
                for sym in map_syms:
                    symname = str(sym)
                    if symname not in node.symbol_mapping.keys():
                        node.symbol_mapping[symname] = sym
                        node.sdfg.symbols[symname] = graph.symbols_defined_at(
                            node)[symname]

        # Add transient Data leading to the inner map
        prefix = self.new_transient_prefix
        for node in graph.predecessors(outer_map_entry):
            # Only AccessNodes are relevant
            if (isinstance(node, nodes.AccessNode)
                    and not (self.skip_scalar
                             and isinstance(node.desc(sdfg), Scalar))):
                if self.use_p2p and node.desc(
                        sdfg).storage is dtypes.StorageType.GPU_Global:
                    continue

                in_data_node = InLocalStorage.apply_to(sdfg,
                                                       dict(array=node.data,
                                                            prefix=prefix),
                                                       verify=False,
                                                       save=False,
                                                       node_a=outer_map_entry,
                                                       node_b=inner_map_entry)
                in_data_node.desc(sdfg).location['gpu'] = symbolic_gpu_id
                in_data_node.desc(sdfg).storage = dtypes.StorageType.GPU_Global

        wcr_data: Dict[str, Any] = {}
        # Add transient Data leading to the outer map
        for edge in graph.in_edges(outer_map_exit):
            node = graph.memlet_path(edge)[-1].dst
            if isinstance(node, nodes.AccessNode):
                data_name = node.data
                # Transients with write-conflict resolution need to be
                # collected first as AccumulateTransient creates a nestedSDFG
                if edge.data.wcr is not None:
                    dtype = sdfg.arrays[data_name].dtype
                    redtype = operations.detect_reduction_type(edge.data.wcr)
                    # Custom reduction can not have an accumulate transient,
                    # as the accumulation from the transient to the outer
                    # storage is not defined.
                    if redtype == dtypes.ReductionType.Custom:
                        warnings.warn(
                            'Using custom reductions in a GPUMultitransformed '
                            'Map only works for a small data volume. For large '
                            'volume there is no guarantee.')
                        continue
                    identity = dtypes.reduction_identity(dtype, redtype)
                    wcr_data[data_name] = identity
                elif (not isinstance(node.desc(sdfg), Scalar)
                      or not self.skip_scalar):
                    if self.use_p2p and node.desc(
                            sdfg).storage is dtypes.StorageType.GPU_Global:
                        continue
                    # Transients without write-conflict resolution
                    if prefix + '_' + data_name in sdfg.arrays:
                        create_array = False
                    else:
                        create_array = True
                    out_data_node = OutLocalStorage.apply_to(
                        sdfg,
                        dict(array=data_name,
                             prefix=prefix,
                             create_array=create_array),
                        verify=False,
                        save=False,
                        node_a=inner_map_exit,
                        node_b=outer_map_exit)
                    out_data_node.desc(sdfg).location['gpu'] = symbolic_gpu_id
                    out_data_node.desc(
                        sdfg).storage = dtypes.StorageType.GPU_Global

        # Add Transients for write-conflict resolution
        if len(wcr_data) != 0:
            nsdfg = AccumulateTransient.apply_to(
                sdfg,
                options=dict(array_identity_dict=wcr_data, prefix=prefix),
                map_exit=inner_map_exit,
                outer_map_exit=outer_map_exit)
            nsdfg.schedule = dtypes.ScheduleType.GPU_Multidevice
            nsdfg.location['gpu'] = symbolic_gpu_id
            for transient_node in graph.successors(nsdfg):
                if isinstance(transient_node, nodes.AccessNode):
                    transient_node.desc(sdfg).location['gpu'] = symbolic_gpu_id
                    transient_node.desc(
                        sdfg).storage = dtypes.StorageType.GPU_Global
                    nsdfg.sdfg.arrays[
                        transient_node.label].location['gpu'] = symbolic_gpu_id
                    nsdfg.sdfg.arrays[
                        transient_node.
                        label].storage = dtypes.StorageType.GPU_Global
            infer_types.set_default_schedule_storage_types_and_location(
                nsdfg.sdfg, dtypes.ScheduleType.GPU_Multidevice,
                symbolic_gpu_id)

        # Remove the parameter of the outer_map from the sdfg symbols,
        # as it got added as a symbol in StripMining.
        if outer_map.params[0] in sdfg.free_symbols:
            sdfg.remove_symbol(outer_map.params[0])
Beispiel #13
0
def preprocess_dace_program(
    f: Callable[..., Any],
    argtypes: Dict[str, data.Data],
    global_vars: Dict[str, Any],
    modules: Dict[str, Any],
    resolve_functions: bool = False,
    parent_closure: Optional[SDFGClosure] = None
) -> Tuple[PreprocessedAST, SDFGClosure]:
    """
    Preprocesses a ``@dace.program`` and all its nested functions, returning
    a preprocessed AST object and the closure of the resulting SDFG.
    :param f: A Python function to parse.
    :param argtypes: An dictionary of (name, type) for the given
                        function's arguments, which may pertain to data
                        nodes or symbols (scalars).
    :param global_vars: A dictionary of global variables in the closure
                        of `f`.
    :param modules: A dictionary from an imported module name to the
                    module itself.
    :param constants: A dictionary from a name to a constant value.
    :param resolve_functions: If True, treats all global functions defined
                                outside of the program as returning constant
                                values.
    :param parent_closure: If not None, represents the closure of the parent of
                           the currently processed function.
    :return: A 2-tuple of the AST and its reduced (used) closure.
    """
    src_ast, src_file, src_line, src = astutils.function_to_ast(f)

    # Resolve data structures
    src_ast = StructTransformer(global_vars).visit(src_ast)

    src_ast = ModuleResolver(modules).visit(src_ast)
    # Convert modules after resolution
    for mod, modval in modules.items():
        if mod == 'builtins':
            continue
        newmod = global_vars[mod]
        #del global_vars[mod]
        global_vars[modval] = newmod

    # Resolve constants to their values (if they are not already defined in this scope)
    # and symbols to their names
    resolved = {
        k: v
        for k, v in global_vars.items() if k not in argtypes and k != '_'
    }
    closure_resolver = GlobalResolver(resolved, resolve_functions)

    # Append element to call stack and handle max recursion depth
    if parent_closure is not None:
        fid = id(f)
        if fid in parent_closure.callstack:
            raise DaceRecursionError(fid)
        if len(parent_closure.callstack) > Config.get(
                'frontend', 'implicit_recursion_depth'):
            raise TypeError(
                'Implicit (automatically parsed) recursion depth '
                'exceeded. Functions below this call will not be '
                'parsed. To change this setting, modify the value '
                '`frontend.implicit_recursion_depth` in .dace.conf')

        closure_resolver.closure.callstack = parent_closure.callstack + [fid]

    src_ast = closure_resolver.visit(src_ast)
    src_ast = LoopUnroller(resolved, src_file).visit(src_ast)
    src_ast = ConditionalCodeResolver(resolved).visit(src_ast)
    src_ast = DeadCodeEliminator().visit(src_ast)
    try:
        ctr = CallTreeResolver(closure_resolver.closure, resolved)
        ctr.visit(src_ast)
    except DaceRecursionError as ex:
        if id(f) == ex.fid:
            raise TypeError(
                'Parsing failed due to recursion in a data-centric '
                'context called from this function')
        else:
            raise ex
    used_arrays = ArrayClosureResolver(closure_resolver.closure)
    used_arrays.visit(src_ast)

    # Filter out arrays that are not used after dead code elimination
    closure_resolver.closure.closure_arrays = {
        k: v
        for k, v in closure_resolver.closure.closure_arrays.items()
        if k in used_arrays.arrays
    }

    # Filter out callbacks that were removed after dead code elimination
    closure_resolver.closure.callbacks = {
        k: v
        for k, v in closure_resolver.closure.callbacks.items()
        if k in ctr.seen_calls
    }

    # Filter remaining global variables according to type and scoping rules
    program_globals = {
        k: v
        for k, v in global_vars.items() if k not in argtypes
    }

    # Fill in data descriptors from closure arrays
    argtypes.update({
        arrname: v[1]
        for arrname, v in closure_resolver.closure.closure_arrays.items()
    })

    # Combine nested closures with the current one
    closure_resolver.closure.combine_nested_closures()

    past = PreprocessedAST(src_file, src_line, src, src_ast, program_globals)

    return past, closure_resolver.closure
Beispiel #14
0
    def run(self, dace_state, fail_on_nonzero=False):
        dace_progname = dace_state.get_sdfg().name
        code_objects = dace_state.get_generated_code()

        # Figure out whether we should use MPI for launching
        use_mpi = False
        for code_object in code_objects:
            if code_object.target.target_name == 'mpi':
                use_mpi = True
                break

        # Check counter validity
        PerfUtils.check_performance_counters(self)

        remote_workdir = Config.get("execution", "general", "workdir")
        remote_dace_dir = remote_workdir + "/.dacecache/%s/" % dace_progname
        self.show_output("Executing DaCe program " + dace_progname + " on " + \
                Config.get("execution", "general", "host") + "\n")

        try:
            if self.running_async:
                # Add information about what is being run
                self.async_host.notify("Generating remote workspace")
            tmpfolder = tempfile.mkdtemp()
            generate_program_folder(dace_state.get_sdfg(), code_objects,
                                    tmpfolder)
            self.create_remote_directory(remote_dace_dir)
            self.copy_folder_to_remote(tmpfolder, remote_dace_dir)

            if self.running_async:
                # Add information about what is being run
                self.async_host.notify("Compiling...")
            # call compile.py on the remote node in the copied folder
            self.remote_compile(remote_dace_dir, dace_progname)

            if self.running_async:
                # Add information about what is being run
                self.async_host.notify("Done compiling")

            # copy the input file and the .so file (with the right name)
            # to remote_dace_dir
            so_name = "lib" + dace_progname + "." + Config.get(
                'compiler', 'library_extension')
            self.copy_file_from_remote(remote_dace_dir + "/build/" + so_name,
                                       tmpfolder + "/" + so_name)
            self.copy_file_to_remote(tmpfolder + "/" + so_name,
                                     remote_dace_dir)

            dace_file = dace_state.get_dace_tmpfile()
            if dace_file is None:
                raise ValueError("Dace file is None!")

            # copy the SDFG
            try:
                local_sdfg = tmpfolder + "/sdfg.out"
                sdfg = dace_state.get_sdfg()
                sdfg.save(local_sdfg)
                remote_sdfg = remote_workdir + "/sdfg.out"
                self.copy_file_to_remote(local_sdfg, remote_sdfg)
            except:
                print("Could NOT save the SDFG")

            remote_dace_file = remote_workdir + "/" + os.path.basename(
                dace_file)
            self.copy_file_to_remote(dace_file, remote_dace_file)

            if self.running_async:
                # Add information about what is being run
                self.async_host.notify("All files copied to remote")

            # We got the file there, now we can run with different
            # configurations.
            for iteration in range(0, PerfSettings.perf_multirun_num()):
                optdict, omp_thread_num = PerfUtils.get_run_options(
                    self, iteration)

                self.remote_exec_dace(remote_workdir,
                                      remote_dace_file,
                                      use_mpi,
                                      fail_on_nonzero,
                                      omp_num_threads=omp_thread_num,
                                      additional_options_dict=optdict)

                if self.running_async:
                    # Add information about what is being run
                    self.async_host.notify("Done option threads=" +
                                           str(omp_thread_num))

            self.show_output("Execution Terminated\n")

            try:
                self.copy_file_from_remote(remote_workdir + "/results.log",
                                           ".")
            except:
                pass

            # Copy back the vectorization results
            PerfUtils.retrieve_vectorization_report(self, code_objects,
                                                    remote_dace_dir)

            # Copy back the instrumentation results
            PerfUtils.retrieve_instrumentation_results(self, remote_workdir)

            if self.running_async:
                # Add information about what is being run
                self.async_host.notify("Cleaning up")

            try:
                self.remote_delete_file(remote_workdir + "/results.log")
            except:
                print(
                    "WARNING: results.log could not be transmitted (probably not created)"
                )

            self.remote_delete_file(remote_dace_file)
            self.remote_delete_dir(remote_dace_dir)

            def deferred():
                try:
                    res = self.update_performance_plot("results.log",
                                                       str(self.counter))
                    os.remove("results.log")
                except FileNotFoundError:
                    print("WARNING: results.log could not be read")

            self.async_host.run_sync(deferred)

            if self.running_async:
                # Add information about what is being run
                self.async_host.notify("Done cleaning")

            # Also, update the performance data.
            self.rendered_graphs.set_memspeed_target()
            self.rendered_graphs.render_performance_data(
                Config.get("instrumentation", "papi_mode"))
        except Exception as e:
            print("\n\n\n")
            print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            print("Running the program failed:")
            traceback.print_exc()
            print(
                "Inspect above output for more information about executed command sequence."
            )
            print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            if self.headless:
                sys.exit(1)

        if self.running_async:
            self.async_host.notify("All done")
        self.counter += 1
Beispiel #15
0
 def copy_file_from_remote(self, src, dst):
     s = Template(Config.get("execution", "general", "copycmd_r2l"))
     cmd = s.substitute(host=Config.get("execution", "general", "host"),
                        srcfile=src,
                        dstfile=dst)
     self.exec_cmd_and_show_output(cmd)
Beispiel #16
0
    def remote_exec_dace(self,
                         remote_workdir,
                         dace_file,
                         use_mpi=True,
                         fail_on_nonzero=False,
                         omp_num_threads=None,
                         additional_options_dict={}):
        run = "${command} "
        if use_mpi == True:
            run = Config.get("execution", "mpi", "mpiexec")
            nprocs = Config.get("execution", "mpi", "num_procs")
        else:
            nprocs = 1
        repetitions = Config.get("execution", "general", "repetitions")

        omp_num_threads_str = ""
        omp_num_threads_unset_str = ""
        perf_instrumentation_result_marker = ""
        if (omp_num_threads != None):
            omp_num_threads_str = "export OMP_NUM_THREADS=" + str(
                omp_num_threads) + "\n"
            omp_num_threads_unset_str = "unset OMP_NUM_THREADS\n"
            perf_instrumentation_result_marker = "echo '# ;%s; Running in multirun config' >> %s/instrumentation_results.txt\n" % (
                omp_num_threads_str.replace("\n", ""), remote_workdir)

        # Create string from all misc options
        miscoptstring = ""
        miscoptresetstring = ""
        for optkey, optval in additional_options_dict.items():
            miscoptstring += "export " + str(optkey) + "=" + str(optval) + "\n"
            miscoptresetstring += "unset " + str(optkey) + "\n"

        # Create a startscript which exports necessary env-vars

        start_sh = "set -x\n" + \
                   "export DACE_compiler_use_cache=1\n" + \
                   "export DACE_optimizer_interface=''\n" + \
                   "export DACE_profiling=1\n" + \
                   "export DACE_treps=" + str(repetitions) +"\n" + \
                   miscoptstring + \
                   omp_num_threads_str + \
                   "cd " + remote_workdir + "\n" + \
                   perf_instrumentation_result_marker
        s = Template(run + " ")
        cmd = s.substitute(command="python3 " + dace_file, num_procs=nprocs)
        start_sh += cmd + "\n"
        start_sh += "export RETVAL=$?\n"
        start_sh += (
            "unset DACE_compiler_use_cache\n" +
            "unset DACE_optimizer_interface\n" + "unset DACE_treps\n" +
            "unset DACE_profiling\n" + omp_num_threads_unset_str +
            miscoptresetstring +
            # TODO: separate program error and system error
            "exit $RETVAL\n")
        tempdir = tempfile.mkdtemp()
        startsh_file = os.path.join(tempdir, "start.sh")
        fh = open(startsh_file, "w")
        fh.write(start_sh)
        fh.close()
        st = os.stat(startsh_file)
        os.chmod(startsh_file, st.st_mode | stat.S_IEXEC)

        workdir = Config.get("execution", "general", "workdir")

        self.copy_file_to_remote(
            startsh_file,
            Config.get("execution", "general", "workdir") + "/start.sh")

        s = Template(Config.get("execution", "general", "execcmd"))
        cmd = s.substitute(host=Config.get("execution", "general", "host"),
                           command=workdir + "/start.sh")
        self.exec_cmd_and_show_output(cmd, fail_on_nonzero)

        self.remote_delete_file(workdir + "/start.sh")
Beispiel #17
0
 def remote_delete_dir(self, deldir):
     s = Template(Config.get("execution", "general", "execcmd"))
     cmd = s.substitute(host=Config.get("execution", "general", "host"),
                        command="rm -r " + deldir)
     self.exec_cmd_and_show_output(cmd)
Beispiel #18
0
def timethis(sdfg, title, flop_count, f, *args, **kwargs):
    """ Runs a function multiple (`DACE_treps`) times, logs the running times 
        to a file, and prints the median time (with FLOPs if given).
        :param sdfg: The SDFG belonging to the measurement.
        :param title: A title of the measurement.
        :param flop_count: Number of floating point operations in `program`.
                           If greater than zero, produces a median FLOPS 
                           report.
        :param f: The function to measure.
        :param args: Arguments to invoke the function with.
        :param kwargs: Keyword arguments to invoke the function with.
        :return: Latest return value of the function.
    """

    start = timer()
    REPS = int(Config.get('treps'))

    times = [start] * (REPS + 1)
    ret = None
    print('\nProfiling...')
    iterator = range(REPS)
    if Config.get_bool('profiling_status'):
        try:
            from tqdm import tqdm
            iterator = tqdm(iterator, desc="Profiling", file=sys.stdout)
        except ImportError:
            print(
                'WARNING: Cannot show profiling progress, missing optional '
                'dependency tqdm...\n\tTo see a live progress bar please install '
                'tqdm (`pip install tqdm`)\n\tTo disable this feature (and '
                'this warning) set `profiling_status` to false in the dace '
                'config (~/.dace.conf).')
    for i in iterator:
        # Call function
        ret = f(*args, **kwargs)
        times[i + 1] = timer()

    diffs = np.array([(times[i] - times[i - 1]) for i in range(1, REPS + 1)])

    problem_size = sys.argv[1] if len(sys.argv) >= 2 else 0

    profiling_dir = os.path.join(sdfg.build_folder, 'profiling')
    os.makedirs(profiling_dir, exist_ok=True)
    timestamp_string = str(int(time.time() * 1000))
    outfile_path = os.path.join(profiling_dir,
                                'results-' + timestamp_string + '.csv')

    with open(outfile_path, 'w') as f:
        f.write('Program,Optimization,Problem_Size,Runtime_sec\n')
        for d in diffs:
            f.write('%s,%s,%s,%.8f\n' % (sdfg.name, title, problem_size, d))

    if flop_count > 0:
        gflops_arr = (flop_count / diffs) * 1e-9
        time_secs = np.median(diffs)
        GFLOPs = (flop_count / time_secs) * 1e-9
        print(title, GFLOPs, 'GFLOP/s       (', time_secs * 1000, 'ms)')
    else:
        time_secs = np.median(diffs)
        print(title, time_secs * 1000, 'ms')

    return ret
Beispiel #19
0
 def expand(self, sdfg, state, *args, **kwargs) -> str:
     """ Create and perform the expansion transformation for this library
         node.
         :return: the name of the expanded implementation
     """
     implementation = self.implementation
     library_name = getattr(type(self), '_dace_library_name', '')
     try:
         if library_name:
             config_implementation = Config.get("library", library_name,
                                                "default_implementation")
         else:
             config_implementation = None
     except KeyError:
         # Non-standard libraries are not defined in the config schema, and
         # thus might not exist in the config.
         config_implementation = None
     if config_implementation is not None:
         try:
             config_override = Config.get("library", library_name,
                                          "override")
             if config_override and implementation in self.implementations:
                 if implementation is not None:
                     warnings.warn(
                         "Overriding explicitly specified "
                         "implementation {} for {} with {}.".format(
                             implementation, self.label,
                             config_implementation))
                 implementation = config_implementation
         except KeyError:
             config_override = False
     # If not explicitly set, try the node default
     if implementation is None:
         implementation = type(self).default_implementation
         # If no node default, try library default
         if implementation is None:
             import dace.library  # Avoid cyclic dependency
             lib = dace.library._DACE_REGISTERED_LIBRARIES[type(
                 self)._dace_library_name]
             implementation = lib.default_implementation
             # Try the default specified in the config
             if implementation is None:
                 implementation = config_implementation
                 # Otherwise we don't know how to expand
                 if implementation is None:
                     raise ValueError("No implementation or default "
                                      "implementation specified.")
     if implementation not in self.implementations.keys():
         raise KeyError("Unknown implementation for node {}: {}".format(
             type(self).__name__, implementation))
     transformation_type = type(self).implementations[implementation]
     sdfg_id = sdfg.sdfg_id
     state_id = sdfg.nodes().index(state)
     subgraph = {transformation_type._match_node: state.node_id(self)}
     transformation = transformation_type(sdfg, sdfg_id, state_id, subgraph,
                                          0)
     if not transformation.can_be_applied(state, 0, sdfg):
         raise RuntimeError("Library node "
                            "expansion applicability check failed.")
     sdfg.append_transformation(transformation)
     transformation.apply(state, sdfg, *args, **kwargs)
     return implementation
Beispiel #20
0
 def cmake_options():
     compiler = make_absolute(Config.get("compiler", "mpi", "executable"))
     return [
         "-DMPI_CXX_COMPILER=\"{}\"".format(compiler),
         "-DDACE_ENABLE_MPI=ON",
     ]
Beispiel #21
0
 def config_get(self, *key_hierarchy):
     if self._config is None:
         return Config.get(*key_hierarchy)
     else:
         return self._config.get(*key_hierarchy)
Beispiel #22
0
import os
import logging

import dace.library
from dace.config import Config

log = logging.getLogger(__name__)

if 'ORT_ROOT' not in os.environ and 'ORT_RELEASE' not in os.environ:
    raise ValueError("This environment expects the environment variable "
                     "ORT_ROOT or ORT_RELEASE to be set (see README.md)")

if Config.get("compiler", "cuda", "max_concurrent_streams") != -1:
    log.info("Setting compiler.cuda.max_concurrent_streams to -1")
    Config.set("compiler", "cuda", "max_concurrent_streams", value=-1)


def _get_src_includes():
    """
    Get the includes and dll path when ORT is built from source
    """
    ort_path = os.path.abspath(os.environ['ORT_ROOT'])
    cand_path = os.path.join(ort_path, "build", "Linux",
                             dace.Config.get("compiler", "build_type"))

    if os.path.isdir(cand_path):
        ort_build_path = cand_path
    else:
        ort_build_path = os.path.join(ort_path, "build", "Linux", "Release")

    ort_dll_path = os.path.join(ort_build_path, "libonnxruntime.so")
Beispiel #23
0
def unparse_tasklet(sdfg, state_id, dfg, node, function_stream,
                    callsite_stream, locals, ldepth, toplevel_schedule,
                    codegen):

    if node.label is None or node.label == "":
        return ""

    state_dfg = sdfg.nodes()[state_id]

    # Not [], "" or None
    if not node.code:
        return ""

    # If raw C++ code, return the code directly
    if node.language != dtypes.Language.Python:
        # If this code runs on the host and is associated with a GPU stream,
        # set the stream to a local variable.
        max_streams = int(
            Config.get("compiler", "cuda", "max_concurrent_streams"))
        if (max_streams >= 0 and not is_devicelevel_gpu(sdfg, state_dfg, node)
                and hasattr(node, "_cuda_stream")):
            callsite_stream.write(
                'int __dace_current_stream_id = %d;\n%sStream_t __dace_current_stream = dace::cuda::__streams[__dace_current_stream_id];'
                %
                (node._cuda_stream, Config.get('compiler', 'cuda', 'backend')),
                sdfg,
                state_id,
                node,
            )

        if node.language != dtypes.Language.CPP:
            raise ValueError(
                "Only Python or C++ code supported in CPU codegen, got: {}".
                format(node.language))
        callsite_stream.write(
            type(node).__properties__["code"].to_string(node.code), sdfg,
            state_id, node)

        if hasattr(node, "_cuda_stream") and not is_devicelevel_gpu(
                sdfg, state_dfg, node):
            synchronize_streams(sdfg, state_dfg, state_id, node, node,
                                callsite_stream)
        return

    body = node.code.code

    # Map local names to memlets (for WCR detection)
    memlets = {}
    for edge in state_dfg.all_edges(node):
        u, uconn, v, vconn, memlet = edge
        if u == node:
            memlet_nc = not is_write_conflicted(
                dfg, edge, sdfg_schedule=toplevel_schedule)
            memlet_wcr = memlet.wcr
            if uconn in u.out_connectors:
                conntype = u.out_connectors[uconn]
            else:
                conntype = None

            memlets[uconn] = (memlet, memlet_nc, memlet_wcr, conntype)
        elif v == node:
            if vconn in v.in_connectors:
                conntype = v.in_connectors[vconn]
            else:
                conntype = None

            memlets[vconn] = (memlet, False, None, conntype)

    callsite_stream.write("// Tasklet code (%s)\n" % node.label, sdfg,
                          state_id, node)
    for stmt in body:
        stmt = copy.deepcopy(stmt)
        rk = StructInitializer(sdfg).visit(stmt)
        if isinstance(stmt, ast.Expr):
            rk = DaCeKeywordRemover(sdfg, memlets, sdfg.constants,
                                    codegen).visit_TopLevelExpr(stmt)
        else:
            rk = DaCeKeywordRemover(sdfg, memlets, sdfg.constants,
                                    codegen).visit(stmt)

        if rk is not None:
            # Unparse to C++ and add 'auto' declarations if locals not declared
            result = StringIO()
            cppunparse.CPPUnparser(rk, ldepth + 1, locals, result)
            callsite_stream.write(result.getvalue(), sdfg, state_id, node)
Beispiel #24
0
    def consume_programs(self):

        try:
            cmd = self._executor_queue.get(timeout=3)

            if cmd['cmd'] == "run":
                while True:
                    with self._run_cv:
                        if self._slot_available:
                            break
                    import time
                    time.sleep(0.5)

                with self._run_cv:
                    self._slot_available = False
                    print("Running task")

                    self._task_dict[cmd['index']]['state'] = 'running'

                    runner = self.run(
                        cmd['cot'], {
                            'index': cmd['index'],
                            'config_path': cmd['config_path'],
                            'client_id': cmd['cid'],
                            'reset-perfdata': cmd['reset-perfdata'],
                            'perfopts': cmd['opt']['perfopts']
                        })
                    print("Wait for oplock")
                    with self._oplock:
                        self._current_runs[cmd['cid']] = runner

                    import time

                    # Wait a predefined time for clients to catch up on the outputs
                    time.sleep(RUNNING_TIMEOUT)
                    with self._oplock:
                        run_locally = True
                        try:
                            x = self._current_runs[cmd['cid']]
                        except:
                            run_locally = False

                    if run_locally:
                        print("running locally")

                        def tmp():
                            with self._oplock:
                                del self._current_runs[cmd['cid']]
                                try:
                                    c = self._orphaned_runs[cmd['cid']]
                                except:
                                    self._orphaned_runs[cmd['cid']] = []
                                self._orphaned_runs[cmd['cid']].append([])
                            print("Starting runner")
                            for x in runner():
                                self._orphaned_runs[cmd['cid']][-1] += x

                        # Because this holds locks (and the output should be generated even if nobody asks for it immediately), this is run when the timeout for direct interception expires
                        tmp()
            elif cmd['cmd'] == 'control':
                # Control operations that must be synchronous with execution (e.g. for cleanup, storage operations)
                with self._oplock:
                    self._task_dict[cmd['index']]['state'] = 'running'

                if cmd['operation'] == 'startgroup':
                    from diode.db_scripts.db_setup import db_setup
                    perf_tmp_dir = ExecutorServer.getPerfdataDir(cmd['cid'])
                    perfdata_path = os.path.join(perf_tmp_dir, "perfdata.db")

                    # Clean database and create tables
                    db_setup(perf_tmp_dir)

                elif cmd['operation'] == 'remove_group':
                    perfdir = ExecutorServer.getPerfdataDir(cmd['cid'])
                    perfdata_path = os.path.join(perfdir, "perfdata.db")
                    os.remove(perfdata_path)
                    os.rmdir(perfdir)

                elif cmd['operation'] == 'endgroup':
                    print("Ending group")
                    from diode.db_scripts.sql_to_json import MergeRuns, Conserver
                    from dace.config import Config

                    config_path = cmd['config_path']

                    with config_lock:
                        Config.load(config_path)
                        repetitions = Config.get("execution", "general",
                                                 "repetitions")

                    perf_tmp_dir = ExecutorServer.getPerfdataDir(cmd['cid'])
                    perfdata_path = os.path.join(perf_tmp_dir, "perfdata.db")
                    can_path = os.path.join(perf_tmp_dir, 'current.can')

                    mr = MergeRuns()
                    mr.mergev2(perfdata_path)
                    print("Merged into " + perfdata_path)

                    cons = Conserver()
                    # TODO: Add sdfgs
                    cons.conserveAll(perfdata_path,
                                     can_path,
                                     "",
                                     repetitions,
                                     clear_existing=False)

                    print("Merged and Conserved!")
                    self._perfdata_available[cmd['cid']] = can_path

                with self._oplock:
                    del self._task_dict[cmd['index']]

        except queue.Empty:
            return
Beispiel #25
0
def configure_and_compile(program_folder,
                          program_name=None,
                          output_stream=None):
    """ Configures and compiles a DaCe program in the specified folder into a
        shared library file.

        :param program_folder: Folder containing all files necessary to build,
                               equivalent to what was passed to
                               `generate_program_folder`.
        :param output_stream: Additional output stream to write to (used for
                              DIODE client).
        :return: Path to the compiled shared library file.
    """

    if program_name is None:
        program_name = os.path.basename(program_folder)
    program_folder = os.path.abspath(program_folder)
    src_folder = os.path.join(program_folder, "src")

    # Prepare build folder
    build_folder = os.path.join(program_folder, "build")
    os.makedirs(build_folder, exist_ok=True)

    # Prepare performance report folder
    os.makedirs(os.path.join(program_folder, "perf"), exist_ok=True)

    # Read list of DaCe files to compile.
    # We do this instead of iterating over source files in the directory to
    # avoid globbing files from previous compilations, such that we don't need
    # to wipe the directory for every compilation.
    file_list = [
        line.strip().split(",")
        for line in open(os.path.join(program_folder, "dace_files.csv"), "r")
    ]

    # Get absolute paths and targets for all source files
    files = []
    targets = {}  # {target name: target class}
    for target_name, target_type, file_name in file_list:
        if target_type:
            path = os.path.join(target_name, target_type, file_name)
        else:
            path = os.path.join(target_name, file_name)
        files.append(path)
        targets[target_name] = next(
            k for k, v in TargetCodeGenerator.extensions().items()
            if v['name'] == target_name)

    # Windows-only workaround: Override Visual C++'s linker to use
    # Multi-Threaded (MT) mode. This fixes linkage in CUDA applications where
    # CMake fails to do so.
    if os.name == 'nt':
        if '_CL_' not in os.environ:
            os.environ['_CL_'] = '/MT'
        elif '/MT' not in os.environ['_CL_']:
            os.environ['_CL_'] = os.environ['_CL_'] + ' /MT'

    # Start forming CMake command
    dace_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    cmake_command = [
        "cmake",
        "-A x64" if os.name == 'nt' else "",  # Windows-specific flag
        '"' + os.path.join(dace_path, "codegen") + '"',
        "-DDACE_SRC_DIR=\"{}\"".format(src_folder),
        "-DDACE_FILES=\"{}\"".format(";".join(files)),
        "-DDACE_PROGRAM_NAME={}".format(program_name),
    ]

    # Get required environments are retrieve the CMake information
    environments = set(l.strip() for l in open(
        os.path.join(program_folder, "dace_environments.csv"), "r"))

    environments = dace.library.get_environments_and_dependencies(environments)

    environment_flags, cmake_link_flags = get_environment_flags(environments)
    cmake_command += sorted(environment_flags)

    # Replace backslashes with forward slashes
    cmake_command = [cmd.replace('\\', '/') for cmd in cmake_command]

    # Generate CMake options for each compiler
    libraries = set()
    for target_name, target in sorted(targets.items()):
        try:
            cmake_command += target.cmake_options()
            libraries |= unique_flags(
                Config.get("compiler", target_name, "libs"))
        except KeyError:
            pass
        except ValueError as ex:  # Cannot find compiler executable
            raise cgx.CompilerConfigurationError(str(ex))

    cmake_command.append("-DDACE_LIBS=\"{}\"".format(" ".join(
        sorted(libraries))))

    # Set linker and linker arguments, iff they have been specified
    cmake_linker = Config.get('compiler', 'linker', 'executable') or ''
    cmake_linker = cmake_linker.strip()
    if cmake_linker:
        cmake_linker = make_absolute(cmake_linker)
        cmake_command.append(f'-DCMAKE_LINKER="{cmake_linker}"')
    cmake_link_flags = (
        ' '.join(sorted(cmake_link_flags)) + ' ' +
        (Config.get('compiler', 'linker', 'args') or '')).strip()
    if cmake_link_flags:
        cmake_command.append(
            f'-DCMAKE_SHARED_LINKER_FLAGS="{cmake_link_flags}"')
    cmake_command = ' '.join(cmake_command)

    cmake_filename = os.path.join(build_folder, 'cmake_configure.sh')
    ##############################################
    # Configure
    try:
        if not identical_file_exists(cmake_filename, cmake_command):
            _run_liveoutput(cmake_command,
                            shell=True,
                            cwd=build_folder,
                            output_stream=output_stream)
    except subprocess.CalledProcessError as ex:
        # Clean CMake directory and try once more
        if Config.get_bool('debugprint'):
            print('Cleaning CMake build folder and retrying...')
        shutil.rmtree(build_folder)
        os.makedirs(build_folder)
        try:
            _run_liveoutput(cmake_command,
                            shell=True,
                            cwd=build_folder,
                            output_stream=output_stream)
        except subprocess.CalledProcessError as ex:
            # If still unsuccessful, print results
            if Config.get_bool('debugprint'):
                raise cgx.CompilerConfigurationError('Configuration failure')
            else:
                raise cgx.CompilerConfigurationError(
                    'Configuration failure:\n' + ex.output)

    with open(cmake_filename, "w") as fp:
        fp.write(cmake_command)

    # Compile and link
    try:
        _run_liveoutput("cmake --build . --config %s" %
                        (Config.get('compiler', 'build_type')),
                        shell=True,
                        cwd=build_folder,
                        output_stream=output_stream)
    except subprocess.CalledProcessError as ex:
        # If unsuccessful, print results
        if Config.get_bool('debugprint'):
            raise cgx.CompilationError('Compiler failure')
        else:
            raise cgx.CompilationError('Compiler failure:\n' + ex.output)

    shared_library_path = os.path.join(
        build_folder,
        "lib{}.{}".format(program_name,
                          Config.get('compiler', 'library_extension')))

    return shared_library_path
Beispiel #26
0
 def __getitem__(self, *key):
     return Config.get(*key)
Beispiel #27
0
    def get_generated_codeobjects(self):

        execution_mode = Config.get("compiler", "xilinx", "mode")

        kernel_file_name = "DACE_BINARY_DIR \"/{}".format(self._program_name)
        if execution_mode == "software_emulation":
            kernel_file_name += "_sw_emu.xclbin\""
            xcl_emulation_mode = "\"sw_emu\""
            xilinx_sdx = "DACE_VITIS_DIR"
        elif execution_mode == "hardware_emulation":
            kernel_file_name += "_hw_emu.xclbin\""
            xcl_emulation_mode = "\"hw_emu\""
            xilinx_sdx = "DACE_VITIS_DIR"
        elif execution_mode == "hardware" or execution_mode == "simulation":
            kernel_file_name += "_hw.xclbin\""
            xcl_emulation_mode = None
            xilinx_sdx = None
        else:
            raise dace.codegen.codegen.CodegenError(
                "Unknown Xilinx execution mode: {}".format(execution_mode))

        set_env_vars = ""
        set_str = "dace::set_environment_variable(\"{}\", {});\n"
        unset_str = "dace::unset_environment_variable(\"{}\");\n"
        set_env_vars += (set_str.format("XCL_EMULATION_MODE",
                                        xcl_emulation_mode)
                         if xcl_emulation_mode is not None else
                         unset_str.format("XCL_EMULATION_MODE"))
        set_env_vars += (set_str.format("XILINX_SDX", xilinx_sdx) if xilinx_sdx
                         is not None else unset_str.format("XILINX_SDX"))

        host_code = CodeIOStream()
        host_code.write("""\
#include "dace/xilinx/host.h"
#include "dace/dace.h"
#include <iostream>\n\n""")

        self._frame.generate_fileheader(self._global_sdfg, host_code)

        host_code.write("""
dace::fpga::Context *dace::fpga::_context;

DACE_EXPORTED int __dace_init_xilinx({signature}) {{
    {environment_variables}
    dace::fpga::_context = new dace::fpga::Context();
    dace::fpga::_context->Get().MakeProgram({kernel_file_name});
    return 0;
}}

DACE_EXPORTED void __dace_exit_xilinx({signature}) {{
    delete dace::fpga::_context;
}}

{host_code}""".format(signature=self._global_sdfg.signature(),
                      environment_variables=set_env_vars,
                      kernel_file_name=kernel_file_name,
                      host_code="".join([
                          "{separator}\n// Kernel: {kernel_name}"
                          "\n{separator}\n\n{code}\n\n".format(separator="/" *
                                                               79,
                                                               kernel_name=name,
                                                               code=code)
                          for (name, code) in self._host_codes
                      ])))

        host_code_obj = CodeObject(self._program_name,
                                   host_code.getvalue(),
                                   "cpp",
                                   XilinxCodeGen,
                                   "Xilinx",
                                   target_type="host")

        kernel_code_objs = [
            CodeObject(kernel_name,
                       code,
                       "cpp",
                       XilinxCodeGen,
                       "Xilinx",
                       target_type="device")
            for (kernel_name, code) in self._kernel_codes
        ]

        # Configuration file with interface assignments
        are_assigned = [
            v is not None for v in self._interface_assignments.values()
        ]
        bank_assignment_code = []
        if any(are_assigned):
            if not all(are_assigned):
                raise RuntimeError("Some, but not all global memory arrays "
                                   "were assigned to memory banks: {}".format(
                                       self._interface_assignments))
            are_assigned = True
        else:
            are_assigned = False
        for name, _ in self._host_codes:
            # Only iterate over assignments if any exist
            if are_assigned:
                for (kernel_name, interface_name), (
                        memory_type,
                        memory_bank) in self._interface_assignments.items():
                    if kernel_name != name:
                        continue
                    bank_assignment_code.append("{},{},{}".format(
                        interface_name, memory_type.name, memory_bank))
            # Create file even if there are no assignments
            kernel_code_objs.append(
                CodeObject("{}_memory_interfaces".format(name),
                           "\n".join(bank_assignment_code),
                           "csv",
                           XilinxCodeGen,
                           "Xilinx",
                           target_type="device"))

        return [host_code_obj] + kernel_code_objs
Beispiel #28
0
def configure_and_compile(program_folder,
                          program_name=None,
                          output_stream=None):
    """ Configures and compiles a DaCe program in the specified folder into a
        shared library file.

        :param program_folder: Folder containing all files necessary to build,
                               equivalent to what was passed to
                               `generate_program_folder`.
        :param output_stream: Additional output stream to write to (used for
                              DIODE client).
        :return: Path to the compiled shared library file.
    """

    if program_name is None:
        program_name = os.path.basename(program_folder)
    program_folder = os.path.abspath(program_folder)
    src_folder = os.path.join(program_folder, "src")

    # Prepare build folder
    build_folder = os.path.join(program_folder, "build")
    os.makedirs(build_folder, exist_ok=True)

    # Prepare performance report folder
    os.makedirs(os.path.join(program_folder, "perf"), exist_ok=True)

    # Read list of DaCe files to compile.
    # We do this instead of iterating over source files in the directory to
    # avoid globbing files from previous compilations, such that we don't need
    # to wipe the directory for every compilation.
    file_list = [
        line.strip().split(",")
        for line in open(os.path.join(program_folder, "dace_files.csv"), "r")
    ]

    # Get absolute paths and targets for all source files
    files = []
    targets = {}  # {target name: target class}
    for target_name, target_type, file_name in file_list:
        if target_type:
            path = os.path.join(target_name, target_type, file_name)
        else:
            path = os.path.join(target_name, file_name)
        files.append(path)
        targets[target_name] = next(
            k for k, v in TargetCodeGenerator.extensions().items()
            if v['name'] == target_name)

    # Start forming CMake command
    dace_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    cmake_command = [
        "cmake",
        "-A x64" if os.name == 'nt' else "",  # Windows-specific flag
        '"' + os.path.join(dace_path, "codegen") + '"',
        "-DDACE_SRC_DIR=\"{}\"".format(src_folder),
        "-DDACE_FILES=\"{}\"".format(";".join(files)),
        "-DDACE_PROGRAM_NAME={}".format(program_name),
    ]

    # Get required environments are retrieve the CMake information
    environments = set(l.strip() for l in open(
        os.path.join(program_folder, "dace_environments.csv"), "r"))
    cmake_minimum_version = [0]
    cmake_variables = dict()
    cmake_packages = set()
    cmake_includes = set()
    cmake_libraries = set()
    cmake_compile_flags = set()
    cmake_link_flags = set()
    cmake_files = set()
    cmake_module_paths = set()
    for env_name in environments:
        env = dace.library.get_environment(env_name)
        if (env.cmake_minimum_version is not None
                and len(env.cmake_minimum_version) > 0):
            version_list = list(map(int, env.cmake_minimum_version.split(".")))
            for i in range(max(len(version_list), len(cmake_minimum_version))):
                if i >= len(version_list):
                    break
                if i >= len(cmake_minimum_version):
                    cmake_minimum_version = version_list
                    break
                if version_list[i] > cmake_minimum_version[i]:
                    cmake_minimum_version = version_list
                    break
                # Otherwise keep iterating
        for var in env.cmake_variables:
            if (var in cmake_variables
                    and cmake_variables[var] != env.cmake_variables[var]):
                raise KeyError(
                    "CMake variable {} was redefined from {} to {}.".format(
                        var, cmake_variables[var], env.cmake_variables[var]))
            cmake_variables[var] = env.cmake_variables[var]
        cmake_packages |= set(env.cmake_packages)
        cmake_includes |= set(env.cmake_includes)
        cmake_libraries |= set(env.cmake_libraries)
        cmake_compile_flags |= set(env.cmake_compile_flags)
        cmake_link_flags |= set(env.cmake_link_flags)
        # Make path absolute
        env_dir = os.path.dirname(env._dace_file_path)
        cmake_files |= set(
            (f if os.path.isabs(f) else os.path.join(env_dir, f)) +
            (".cmake" if not f.endswith(".cmake") else "")
            for f in env.cmake_files)
        for header in env.headers:
            if os.path.isabs(header):
                # Giving an absolute path is not good practice, but allow it
                # for emergency overriding
                cmake_includes.add(os.path.dirname(header))
            abs_path = os.path.join(env_dir, header)
            if os.path.isfile(abs_path):
                # Allow includes stored with the library, specified with a
                # relative path
                cmake_includes.add(env_dir)
                break
    environment_flags = [
        "-DDACE_ENV_MINIMUM_VERSION={}".format(".".join(
            map(str, cmake_minimum_version))),
        # Make CMake list of key-value pairs
        "-DDACE_ENV_VAR_KEYS=\"{}\"".format(";".join(cmake_variables.keys())),
        "-DDACE_ENV_VAR_VALUES=\"{}\"".format(";".join(
            cmake_variables.values())),
        "-DDACE_ENV_PACKAGES=\"{}\"".format(" ".join(cmake_packages)),
        "-DDACE_ENV_INCLUDES=\"{}\"".format(" ".join(cmake_includes)),
        "-DDACE_ENV_LIBRARIES=\"{}\"".format(" ".join(cmake_libraries)),
        "-DDACE_ENV_COMPILE_FLAGS=\"{}\"".format(
            " ".join(cmake_compile_flags)),
        # "-DDACE_ENV_LINK_FLAGS=\"{}\"".format(" ".join(cmake_link_flags)),
        "-DDACE_ENV_CMAKE_FILES=\"{}\"".format(";".join(cmake_files)),
    ]
    # Escape variable expansions to defer their evaluation
    environment_flags = [
        cmd.replace("$", "_DACE_CMAKE_EXPAND") for cmd in environment_flags
    ]
    cmake_command += environment_flags

    # Replace backslashes with forward slashes
    cmake_command = [cmd.replace('\\', '/') for cmd in cmake_command]

    # Generate CMake options for each compiler
    libraries = set()
    for target_name, target in targets.items():
        try:
            cmake_command += target.cmake_options()
            libraries |= unique_flags(
                Config.get("compiler", target_name, "libs"))
        except KeyError:
            pass
        except ValueError as ex:  # Cannot find compiler executable
            raise CompilerConfigurationError(str(ex))

    # TODO: it should be possible to use the default arguments/compilers
    #       found by CMake
    cmake_command += [
        "-DDACE_LIBS=\"{}\"".format(" ".join(libraries)),
        "-DCMAKE_LINKER=\"{}\"".format(
            make_absolute(Config.get('compiler', 'linker', 'executable'))),
        "-DCMAKE_SHARED_LINKER_FLAGS=\"{}\"".format(
            Config.get('compiler', 'linker', 'args') + " " +
            Config.get('compiler', 'linker', 'additional_args') +
            " ".join(cmake_link_flags)),
    ]
    cmake_command = ' '.join(cmake_command)

    cmake_filename = os.path.join(build_folder, 'cmake_configure.sh')
    ##############################################
    # Configure
    try:
        _run_liveoutput(cmake_command,
                        shell=True,
                        cwd=build_folder,
                        output_stream=output_stream)
    except subprocess.CalledProcessError as ex:
        # Clean CMake directory and try once more
        if Config.get_bool('debugprint'):
            print('Cleaning CMake build folder and retrying...')
        shutil.rmtree(build_folder)
        os.makedirs(build_folder)
        try:
            _run_liveoutput(cmake_command,
                            shell=True,
                            cwd=build_folder,
                            output_stream=output_stream)
        except subprocess.CalledProcessError as ex:
            # If still unsuccessful, print results
            if Config.get_bool('debugprint'):
                raise CompilerConfigurationError('Configuration failure')
            else:
                raise CompilerConfigurationError('Configuration failure:\n' +
                                                 ex.output)

        with open(cmake_filename, "w") as fp:
            fp.write(cmake_command)

    # Compile and link
    try:
        _run_liveoutput("cmake --build . --config %s" %
                        (Config.get('compiler', 'build_type')),
                        shell=True,
                        cwd=build_folder,
                        output_stream=output_stream)
    except subprocess.CalledProcessError as ex:
        # If unsuccessful, print results
        if Config.get_bool('debugprint'):
            raise CompilationError('Compiler failure')
        else:
            raise CompilationError('Compiler failure:\n' + ex.output)

    shared_library_path = os.path.join(
        build_folder,
        "lib{}.{}".format(program_name,
                          Config.get('compiler', 'library_extension')))

    return shared_library_path
Beispiel #29
0
 def __init__(self, base_indentation=0):
     super(CodeIOStream, self).__init__()
     self._indent = 0
     self._spaces = int(Config.get('compiler', 'indentation_spaces'))
     self._lineinfo = Config.get_bool('compiler', 'codegen_lineinfo')
Beispiel #30
0
 def __init__(self, *args, **kwargs):
     fpga_vendor = Config.get("compiler", "fpga_vendor")
     if fpga_vendor.lower() != "xilinx":
         # Don't register this code generator
         return
     super().__init__(*args, **kwargs)