예제 #1
0
def build_checker():
    if hasattr(build_checker, "dll"):
        return build_checker.dll

    checker_code_path = os.path.join(
        os.path.dirname(inspect.getfile(daceml.onnx)), "include",
        "op_checker.h")

    with open(checker_code_path, "r") as f:
        checker_code = f.read()

    program = codeobject.CodeObject("onnx_op_checker",
                                    checker_code,
                                    "cpp",
                                    targets.cpu.CPUCodeGen,
                                    "ONNXOpChecker",
                                    environments={"ONNXRuntime"})

    BUILD_PATH = os.path.join('.dacecache', "onnx_op_checker")
    compiler.generate_program_folder(None, [program], BUILD_PATH)
    compiler.configure_and_compile(BUILD_PATH)

    checker_dll = ctypes.CDLL(
        compiler.get_binary_name(BUILD_PATH, "onnx_op_checker"))
    build_checker.dll = checker_dll

    return checker_dll
예제 #2
0
def cuda_helper():

    helper_code = """
    #include <dace/dace.h>
    
    extern "C" {
        int host_to_gpu(void* gpu, void* host, size_t size) {
            auto result = cudaMemcpy(gpu, host, size, cudaMemcpyHostToDevice);
            DACE_CUDA_CHECK(cudaGetLastError());
            DACE_CUDA_CHECK(cudaDeviceSynchronize());
            return result;
        } 
    } 
    """
    program = codeobject.CodeObject("cuda_helper", helper_code, "cpp",
                                    targets.cpu.CPUCodeGen, "CudaHelper")

    dummy_cuda_target = codeobject.CodeObject("dummy", "", "cu",
                                              targets.cuda.CUDACodeGen,
                                              "CudaDummy")

    build_folder = dace.Config.get('default_build_folder')
    BUILD_PATH = os.path.join(build_folder, "cuda_helper")
    compiler.generate_program_folder(None, [program, dummy_cuda_target],
                                     BUILD_PATH)
    compiler.configure_and_compile(BUILD_PATH)

    checker_dll = compiled_sdfg.ReloadableDLL(
        compiler.get_binary_name(BUILD_PATH, "cuda_helper"), "cuda_helper")

    class CudaHelper:
        def __init__(self):
            self.dll = checker_dll
            checker_dll.load()

            self._host_to_gpu = checker_dll.get_symbol("host_to_gpu")
            self._host_to_gpu.restype = ctypes.c_int

        def __del__(self):
            self.dll.unload()

        def host_to_gpu(self, gpu_ptr: int, numpy_array: np.ndarray):
            size = ctypes.sizeof(
                dtypes._FFI_CTYPES[numpy_array.dtype.type]) * numpy_array.size
            result = ctypes.c_int(
                self._host_to_gpu(
                    ctypes.c_void_p(gpu_ptr),
                    ctypes.c_void_p(
                        numpy_array.__array_interface__["data"][0]),
                    ctypes.c_size_t(size)))
            if result.value != 0:
                raise ValueError("host_to_gpu returned nonzero result!")

    return CudaHelper()
예제 #3
0
    def run_local(self, sdfg: SDFG, driver_file: str):
        workdir = sdfg.build_folder
        if Config.get_bool('diode', 'general', 'library_autoexpand'):
            sdfg.expand_library_nodes()
        code_objects = sdfg.generate_code()
        use_mpi = Executor._use_mpi(code_objects)
        # TODO: Implement (instead of pyrun, use mpirun/mpiexec)
        if use_mpi:
            raise NotImplementedError('Running MPI locally unimplemented')

        # Pipe stdout/stderr back to client output
        stdout = sys.stdout
        stderr = sys.stderr
        sys.stdout = FunctionStreamWrapper(self.show_output, stdout.write)
        sys.stderr = FunctionStreamWrapper(self.show_output, stderr.write)

        # Compile SDFG
        generate_program_folder(sdfg, code_objects, workdir, self._config)
        configure_and_compile(workdir, sdfg.name)

        self.show_output("Running script\n")

        # Run driver script with the compiled SDFG(s) as the default
        old_usecache = Config.get_bool('compiler', 'use_cache')
        Config.set('compiler', 'use_cache', value=True)
        try:
            runpy.run_path(driver_file, run_name='__main__')
        # Catching all exceptions, including SystemExit
        except (Exception, SystemExit) as ex:
            # Corner case: If exited with error code 0, it is a success
            if isinstance(ex, SystemExit):
                # If the exit code is nonzero, "raise" will not trigger a
                # printout on the server
                if ex.code != 0:
                    traceback.print_exc()
                    raise
            else:
                raise

        self.show_output("Execution Terminated\n")

        # Revert configuration and output redirection
        Config.set('compiler', 'use_cache', value=old_usecache)
        sys.stdout = stdout
        sys.stderr = stderr