Exemple #1
0
def build_checker():
    if hasattr(build_checker, "dll"):
        return build_checker.dll

    checker_code_path = os.path.join(
        os.path.dirname(inspect.getfile(daceml.onnx)), "include",
        "op_checker.h")

    with open(checker_code_path, "r") as f:
        checker_code = f.read()

    program = codeobject.CodeObject("onnx_op_checker",
                                    checker_code,
                                    "cpp",
                                    targets.cpu.CPUCodeGen,
                                    "ONNXOpChecker",
                                    environments={"ONNXRuntime"})

    BUILD_PATH = os.path.join('.dacecache', "onnx_op_checker")
    compiler.generate_program_folder(None, [program], BUILD_PATH)
    compiler.configure_and_compile(BUILD_PATH)

    checker_dll = ctypes.CDLL(
        compiler.get_binary_name(BUILD_PATH, "onnx_op_checker"))
    build_checker.dll = checker_dll

    return checker_dll
def cuda_helper():

    helper_code = """
    #include <dace/dace.h>
    
    extern "C" {
        int host_to_gpu(void* gpu, void* host, size_t size) {
            auto result = cudaMemcpy(gpu, host, size, cudaMemcpyHostToDevice);
            DACE_CUDA_CHECK(cudaGetLastError());
            DACE_CUDA_CHECK(cudaDeviceSynchronize());
            return result;
        } 
    } 
    """
    program = codeobject.CodeObject("cuda_helper", helper_code, "cpp",
                                    targets.cpu.CPUCodeGen, "CudaHelper")

    dummy_cuda_target = codeobject.CodeObject("dummy", "", "cu",
                                              targets.cuda.CUDACodeGen,
                                              "CudaDummy")

    build_folder = dace.Config.get('default_build_folder')
    BUILD_PATH = os.path.join(build_folder, "cuda_helper")
    compiler.generate_program_folder(None, [program, dummy_cuda_target],
                                     BUILD_PATH)
    compiler.configure_and_compile(BUILD_PATH)

    checker_dll = compiled_sdfg.ReloadableDLL(
        compiler.get_binary_name(BUILD_PATH, "cuda_helper"), "cuda_helper")

    class CudaHelper:
        def __init__(self):
            self.dll = checker_dll
            checker_dll.load()

            self._host_to_gpu = checker_dll.get_symbol("host_to_gpu")
            self._host_to_gpu.restype = ctypes.c_int

        def __del__(self):
            self.dll.unload()

        def host_to_gpu(self, gpu_ptr: int, numpy_array: np.ndarray):
            size = ctypes.sizeof(
                dtypes._FFI_CTYPES[numpy_array.dtype.type]) * numpy_array.size
            result = ctypes.c_int(
                self._host_to_gpu(
                    ctypes.c_void_p(gpu_ptr),
                    ctypes.c_void_p(
                        numpy_array.__array_interface__["data"][0]),
                    ctypes.c_size_t(size)))
            if result.value != 0:
                raise ValueError("host_to_gpu returned nonzero result!")

    return CudaHelper()
Exemple #3
0
    def run_local(self, sdfg: SDFG, driver_file: str):
        workdir = sdfg.build_folder
        if Config.get_bool('diode', 'general', 'library_autoexpand'):
            sdfg.expand_library_nodes()
        code_objects = sdfg.generate_code()
        use_mpi = Executor._use_mpi(code_objects)
        # TODO: Implement (instead of pyrun, use mpirun/mpiexec)
        if use_mpi:
            raise NotImplementedError('Running MPI locally unimplemented')

        # Pipe stdout/stderr back to client output
        stdout = sys.stdout
        stderr = sys.stderr
        sys.stdout = FunctionStreamWrapper(self.show_output, stdout.write)
        sys.stderr = FunctionStreamWrapper(self.show_output, stderr.write)

        # Compile SDFG
        generate_program_folder(sdfg, code_objects, workdir, self._config)
        configure_and_compile(workdir, sdfg.name)

        self.show_output("Running script\n")

        # Run driver script with the compiled SDFG(s) as the default
        old_usecache = Config.get_bool('compiler', 'use_cache')
        Config.set('compiler', 'use_cache', value=True)
        try:
            runpy.run_path(driver_file, run_name='__main__')
        # Catching all exceptions, including SystemExit
        except (Exception, SystemExit) as ex:
            # Corner case: If exited with error code 0, it is a success
            if isinstance(ex, SystemExit):
                # If the exit code is nonzero, "raise" will not trigger a
                # printout on the server
                if ex.code != 0:
                    traceback.print_exc()
                    raise
            else:
                raise

        self.show_output("Execution Terminated\n")

        # Revert configuration and output redirection
        Config.set('compiler', 'use_cache', value=old_usecache)
        sys.stdout = stdout
        sys.stderr = stderr
def test_batchnorm2d_dp_gpu():
    bngpusdfg: dace.SDFG = batchnorm2d_data_parallelism_gpu.to_sdfg(
        strict=True)
    bngpusdfg.apply_transformations(GPUTransformSDFG)
    # bngpusdfg.view()
    # rnsdfg: dace.SDFG = resnet_basicblock_gpu.to_sdfg()
    # rnsdfg.view()

    program_objects = bngpusdfg.generate_code()
    from dace.codegen import compiler
    out_path = '.dacecache/local/batchnorm/' + bngpusdfg.name
    program_folder = compiler.generate_program_folder(bngpusdfg,
                                                      program_objects,
                                                      out_path)
Exemple #5
0
    def run_remote(self, sdfg: SDFG, dace_state, fail_on_nonzero: bool):
        dace_progname = sdfg.name
        code_objects = sdfg.generate_code()
        use_mpi = Executor._use_mpi(code_objects)
        remote_workdir = self.config_get("execution", "general", "workdir")
        remote_base_path = self.config_get('default_build_folder')
        remote_dace_dir = os.path.join(remote_workdir, remote_base_path,
                                       dace_progname)

        try:
            tmpfolder = tempfile.mkdtemp()
            generate_program_folder(sdfg,
                                    code_objects,
                                    tmpfolder,
                                    config=self._config)
            self.create_remote_directory(remote_dace_dir)
            self.copy_folder_to_remote(tmpfolder, remote_dace_dir)

            # call compile.py on the remote node in the copied folder
            self.remote_compile(remote_dace_dir, dace_progname)

            # copy the input file and the .so file (with the right name)
            # to remote_dace_dir
            so_name = "lib" + dace_progname + "." + self.config_get(
                'compiler', 'library_extension')
            self.copy_file_from_remote(
                os.path.join(remote_dace_dir, 'build', so_name),
                os.path.join(tmpfolder, so_name))
            self.copy_file_to_remote(os.path.join(tmpfolder, so_name),
                                     remote_dace_dir)

            dace_file = dace_state.get_dace_tmpfile()
            if dace_file is None:
                raise ValueError("Dace file is None!")

            remote_dace_file = os.path.join(remote_workdir,
                                            os.path.basename(dace_file))
            self.copy_file_to_remote(dace_file, remote_dace_file)

            self.remote_exec_dace(remote_workdir,
                                  remote_dace_file,
                                  use_mpi,
                                  fail_on_nonzero,
                                  repetitions=dace_state.repetitions)

            self.show_output("Execution Terminated\n")

            try:
                self.copy_file_from_remote(remote_workdir + "/results.log",
                                           ".")
            except RuntimeError:
                pass

            # Copy back the instrumentation and vectorization results
            try:
                self.copy_folder_from_remote(
                    os.path.join(remote_dace_dir, 'perf'), ".")
            except RuntimeError:
                pass

            try:
                self.remote_delete_file(remote_workdir + "/results.log")
            except RuntimeError:
                pass

            self.remote_delete_file(remote_dace_file)
            self.remote_delete_dir(remote_dace_dir)
        except:  # Running a custom script (the driver file), which can raise
            # any exception
            self.show_output(traceback.format_exc())
            raise

        self.counter += 1
    def run_remote(self, sdfg: SDFG, dace_state, fail_on_nonzero: bool):
        dace_progname = sdfg.name
        code_objects = sdfg.generate_code()
        use_mpi = Executor._use_mpi(code_objects)
        remote_workdir = self.config_get("execution", "general", "workdir")
        remote_dace_dir = os.path.join(remote_workdir, ".dacecache",
                                       dace_progname)

        try:
            tmpfolder = tempfile.mkdtemp()
            generate_program_folder(sdfg,
                                    code_objects,
                                    tmpfolder,
                                    config=self._config)
            self.create_remote_directory(remote_dace_dir)
            self.copy_folder_to_remote(tmpfolder, remote_dace_dir)

            # call compile.py on the remote node in the copied folder
            self.remote_compile(remote_dace_dir, dace_progname)

            # copy the input file and the .so file (with the right name)
            # to remote_dace_dir
            so_name = "lib" + dace_progname + "." + self.config_get(
                'compiler', 'library_extension')
            self.copy_file_from_remote(remote_dace_dir + "/build/" + so_name,
                                       tmpfolder + "/" + so_name)
            self.copy_file_to_remote(tmpfolder + "/" + so_name,
                                     remote_dace_dir)

            dace_file = dace_state.get_dace_tmpfile()
            if dace_file is None:
                raise ValueError("Dace file is None!")

            remote_dace_file = remote_workdir + "/" + os.path.basename(
                dace_file)
            self.copy_file_to_remote(dace_file, remote_dace_file)

            papi = PAPIUtils.is_papi_used(sdfg)

            # We got the file there, now we can run with different
            # configurations.
            if papi:
                multirun_num = PAPISettings.perf_multirun_num(
                    config=self._config)
                for iteration in range(multirun_num):
                    optdict, omp_thread_num = PAPIUtils.get_run_options(
                        self, iteration)

                    self.remote_exec_dace(remote_workdir,
                                          remote_dace_file,
                                          use_mpi,
                                          fail_on_nonzero,
                                          omp_num_threads=omp_thread_num,
                                          repetitions=dace_state.repetitions,
                                          additional_options_dict=optdict)
            else:
                self.remote_exec_dace(remote_workdir,
                                      remote_dace_file,
                                      use_mpi,
                                      fail_on_nonzero,
                                      repetitions=dace_state.repetitions)

            self.show_output("Execution Terminated\n")

            try:
                self.copy_file_from_remote(remote_workdir + "/results.log",
                                           ".")
            except RuntimeError:
                pass

            if papi:
                # Copy back the vectorization results
                PAPIUtils.retrieve_vectorization_report(
                    self, code_objects, remote_dace_dir)

                # Copy back the instrumentation results
                PAPIUtils.retrieve_instrumentation_results(
                    self, remote_workdir)

            try:
                self.remote_delete_file(remote_workdir + "/results.log")
            except RuntimeError:
                pass

            self.remote_delete_file(remote_dace_file)
            self.remote_delete_dir(remote_dace_dir)
        except:  # Running a custom script (the driver file), which can raise
            # any exception
            self.show_output(traceback.format_exc())
            raise

        self.counter += 1
Exemple #7
0
    def run(self, dace_state, fail_on_nonzero=False):
        dace_progname = dace_state.get_sdfg().name
        code_objects = dace_state.get_generated_code()

        # Figure out whether we should use MPI for launching
        use_mpi = False
        for code_object in code_objects:
            if code_object.target.target_name == 'mpi':
                use_mpi = True
                break

        # Check counter validity
        PerfUtils.check_performance_counters(self)

        remote_workdir = Config.get("execution", "general", "workdir")
        remote_dace_dir = remote_workdir + "/.dacecache/%s/" % dace_progname
        self.show_output("Executing DaCe program " + dace_progname + " on " + \
                Config.get("execution", "general", "host") + "\n")

        try:
            if self.running_async:
                # Add information about what is being run
                self.async_host.notify("Generating remote workspace")
            tmpfolder = tempfile.mkdtemp()
            generate_program_folder(dace_state.get_sdfg(), code_objects,
                                    tmpfolder)
            self.create_remote_directory(remote_dace_dir)
            self.copy_folder_to_remote(tmpfolder, remote_dace_dir)

            if self.running_async:
                # Add information about what is being run
                self.async_host.notify("Compiling...")
            # call compile.py on the remote node in the copied folder
            self.remote_compile(remote_dace_dir, dace_progname)

            if self.running_async:
                # Add information about what is being run
                self.async_host.notify("Done compiling")

            # copy the input file and the .so file (with the right name)
            # to remote_dace_dir
            so_name = "lib" + dace_progname + "." + Config.get(
                'compiler', 'library_extension')
            self.copy_file_from_remote(remote_dace_dir + "/build/" + so_name,
                                       tmpfolder + "/" + so_name)
            self.copy_file_to_remote(tmpfolder + "/" + so_name,
                                     remote_dace_dir)

            dace_file = dace_state.get_dace_tmpfile()
            if dace_file is None:
                raise ValueError("Dace file is None!")

            # copy the SDFG
            try:
                local_sdfg = tmpfolder + "/sdfg.out"
                sdfg = dace_state.get_sdfg()
                sdfg.save(local_sdfg)
                remote_sdfg = remote_workdir + "/sdfg.out"
                self.copy_file_to_remote(local_sdfg, remote_sdfg)
            except:
                print("Could NOT save the SDFG")

            remote_dace_file = remote_workdir + "/" + os.path.basename(
                dace_file)
            self.copy_file_to_remote(dace_file, remote_dace_file)

            if self.running_async:
                # Add information about what is being run
                self.async_host.notify("All files copied to remote")

            # We got the file there, now we can run with different
            # configurations.
            for iteration in range(0, PerfSettings.perf_multirun_num()):
                optdict, omp_thread_num = PerfUtils.get_run_options(
                    self, iteration)

                self.remote_exec_dace(remote_workdir,
                                      remote_dace_file,
                                      use_mpi,
                                      fail_on_nonzero,
                                      omp_num_threads=omp_thread_num,
                                      additional_options_dict=optdict)

                if self.running_async:
                    # Add information about what is being run
                    self.async_host.notify("Done option threads=" +
                                           str(omp_thread_num))

            self.show_output("Execution Terminated\n")

            try:
                self.copy_file_from_remote(remote_workdir + "/results.log",
                                           ".")
            except:
                pass

            # Copy back the vectorization results
            PerfUtils.retrieve_vectorization_report(self, code_objects,
                                                    remote_dace_dir)

            # Copy back the instrumentation results
            PerfUtils.retrieve_instrumentation_results(self, remote_workdir)

            if self.running_async:
                # Add information about what is being run
                self.async_host.notify("Cleaning up")

            try:
                self.remote_delete_file(remote_workdir + "/results.log")
            except:
                print(
                    "WARNING: results.log could not be transmitted (probably not created)"
                )

            self.remote_delete_file(remote_dace_file)
            self.remote_delete_dir(remote_dace_dir)

            def deferred():
                try:
                    res = self.update_performance_plot("results.log",
                                                       str(self.counter))
                    os.remove("results.log")
                except FileNotFoundError:
                    print("WARNING: results.log could not be read")

            self.async_host.run_sync(deferred)

            if self.running_async:
                # Add information about what is being run
                self.async_host.notify("Done cleaning")

            # Also, update the performance data.
            self.rendered_graphs.set_memspeed_target()
            self.rendered_graphs.render_performance_data(
                Config.get("instrumentation", "papi_mode"))
        except Exception as e:
            print("\n\n\n")
            print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            print("Running the program failed:")
            traceback.print_exc()
            print(
                "Inspect above output for more information about executed command sequence."
            )
            print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            if self.headless:
                sys.exit(1)

        if self.running_async:
            self.async_host.notify("All done")
        self.counter += 1