def build_checker(): if hasattr(build_checker, "dll"): return build_checker.dll checker_code_path = os.path.join( os.path.dirname(inspect.getfile(daceml.onnx)), "include", "op_checker.h") with open(checker_code_path, "r") as f: checker_code = f.read() program = codeobject.CodeObject("onnx_op_checker", checker_code, "cpp", targets.cpu.CPUCodeGen, "ONNXOpChecker", environments={"ONNXRuntime"}) BUILD_PATH = os.path.join('.dacecache', "onnx_op_checker") compiler.generate_program_folder(None, [program], BUILD_PATH) compiler.configure_and_compile(BUILD_PATH) checker_dll = ctypes.CDLL( compiler.get_binary_name(BUILD_PATH, "onnx_op_checker")) build_checker.dll = checker_dll return checker_dll
def cuda_helper(): helper_code = """ #include <dace/dace.h> extern "C" { int host_to_gpu(void* gpu, void* host, size_t size) { auto result = cudaMemcpy(gpu, host, size, cudaMemcpyHostToDevice); DACE_CUDA_CHECK(cudaGetLastError()); DACE_CUDA_CHECK(cudaDeviceSynchronize()); return result; } } """ program = codeobject.CodeObject("cuda_helper", helper_code, "cpp", targets.cpu.CPUCodeGen, "CudaHelper") dummy_cuda_target = codeobject.CodeObject("dummy", "", "cu", targets.cuda.CUDACodeGen, "CudaDummy") build_folder = dace.Config.get('default_build_folder') BUILD_PATH = os.path.join(build_folder, "cuda_helper") compiler.generate_program_folder(None, [program, dummy_cuda_target], BUILD_PATH) compiler.configure_and_compile(BUILD_PATH) checker_dll = compiled_sdfg.ReloadableDLL( compiler.get_binary_name(BUILD_PATH, "cuda_helper"), "cuda_helper") class CudaHelper: def __init__(self): self.dll = checker_dll checker_dll.load() self._host_to_gpu = checker_dll.get_symbol("host_to_gpu") self._host_to_gpu.restype = ctypes.c_int def __del__(self): self.dll.unload() def host_to_gpu(self, gpu_ptr: int, numpy_array: np.ndarray): size = ctypes.sizeof( dtypes._FFI_CTYPES[numpy_array.dtype.type]) * numpy_array.size result = ctypes.c_int( self._host_to_gpu( ctypes.c_void_p(gpu_ptr), ctypes.c_void_p( numpy_array.__array_interface__["data"][0]), ctypes.c_size_t(size))) if result.value != 0: raise ValueError("host_to_gpu returned nonzero result!") return CudaHelper()
def run_local(self, sdfg: SDFG, driver_file: str): workdir = sdfg.build_folder if Config.get_bool('diode', 'general', 'library_autoexpand'): sdfg.expand_library_nodes() code_objects = sdfg.generate_code() use_mpi = Executor._use_mpi(code_objects) # TODO: Implement (instead of pyrun, use mpirun/mpiexec) if use_mpi: raise NotImplementedError('Running MPI locally unimplemented') # Pipe stdout/stderr back to client output stdout = sys.stdout stderr = sys.stderr sys.stdout = FunctionStreamWrapper(self.show_output, stdout.write) sys.stderr = FunctionStreamWrapper(self.show_output, stderr.write) # Compile SDFG generate_program_folder(sdfg, code_objects, workdir, self._config) configure_and_compile(workdir, sdfg.name) self.show_output("Running script\n") # Run driver script with the compiled SDFG(s) as the default old_usecache = Config.get_bool('compiler', 'use_cache') Config.set('compiler', 'use_cache', value=True) try: runpy.run_path(driver_file, run_name='__main__') # Catching all exceptions, including SystemExit except (Exception, SystemExit) as ex: # Corner case: If exited with error code 0, it is a success if isinstance(ex, SystemExit): # If the exit code is nonzero, "raise" will not trigger a # printout on the server if ex.code != 0: traceback.print_exc() raise else: raise self.show_output("Execution Terminated\n") # Revert configuration and output redirection Config.set('compiler', 'use_cache', value=old_usecache) sys.stdout = stdout sys.stderr = stderr