def test(): toolchain = guess_toolchain() module_code = """ extern "C" { int const greet() { return 1; } } """ # compile to object file _, _, obj_path, _ = compile_from_string(toolchain, 'module', module_code, object=True) # and then to shared lib with open(obj_path, 'rb') as file: obj = file.read() _, _, ext_file, _ = compile_from_string( toolchain, 'module', obj, source_name=['module.o'], object=False, source_is_binary=True) # test module dll = CDLL(ext_file) _fn = getattr(dll, 'greet') _fn.restype = int assert _fn() == 1
def test(): toolchain = guess_toolchain() module_code = """ extern "C" { int const greet() { return 1; } } """ # compile to object file _, _, obj_path, _ = compile_from_string(toolchain, "module", module_code, object=True) # and then to shared lib with open(obj_path, "rb") as file: obj = file.read() _, _, ext_file, _ = compile_from_string(toolchain, "module", obj, source_name=["module.o"], object=False, source_is_binary=True) # test module dll = CDLL(ext_file) _fn = dll.greet _fn.restype = int assert _fn() == 1
def compile(self, host_toolchain, nvcc_toolchain, host_kwargs={}, nvcc_kwargs={}, **kwargs): """Return the extension module generated from the code described by *self*. If necessary, build the code using *toolchain* with :func:`codepy.jit.extension_from_string`. Any keyword arguments accept by that latter function may be passed in *kwargs*. """ from codepy.libraries import add_boost_python, add_cuda host_toolchain = host_toolchain.copy() add_boost_python(host_toolchain) add_cuda(host_toolchain) nvcc_toolchain = nvcc_toolchain.copy() add_cuda(nvcc_toolchain) host_code = str(self.boost_module.generate()) + "\n" device_code = str(self.generate()) + "\n" from codepy.jit import compile_from_string, extension_from_string from codepy.jit import link_extension local_host_kwargs = kwargs.copy() local_host_kwargs.update(host_kwargs) local_nvcc_kwargs = kwargs.copy() local_nvcc_kwargs.update(nvcc_kwargs) # Don't compile shared objects, just normal objects # (on some platforms, they're different) host_checksum, host_mod_name, host_object, host_compiled = compile_from_string( host_toolchain, self.boost_module.name, host_code, object=True, **local_host_kwargs) device_checksum, device_mod_name, device_object, device_compiled = compile_from_string( nvcc_toolchain, 'gpu', device_code, 'gpu.cu', object=True, **local_nvcc_kwargs) # The name of the shared lib depends on the hex checksums of both # host and device code to prevent accidentially returned a cached # module with wrong linkage mod_name = "codepy.temp.%s.%s.module" % (host_checksum, device_checksum) if host_compiled or device_compiled: return link_extension(host_toolchain, [host_object, device_object], mod_name, **kwargs) else: import os.path destination_base, first_object = os.path.split(host_object) module_path = os.path.join(destination_base, mod_name + host_toolchain.so_ext) try: from imp import load_dynamic return load_dynamic(mod_name, module_path) except: return link_extension(host_toolchain, [host_object, device_object], mod_name, **kwargs)
def jit_compile(soname, code, compiler): """ JIT compile the given C/C++ ``code``. This function relies upon codepy's ``compile_from_string``, which performs caching of compilation units and avoids potential race conditions due to multiple processing trying to compile the same object. :param soname: A unique name for the jit-compiled shared object. :param code: String of C source code. :param compiler: The toolchain used for compilation. """ target = str(get_jit_dir().joinpath(soname)) src_file = "%s.%s" % (target, compiler.src_ext) # `catch_warnings` suppresses codepy complaining that it's taking # too long to acquire the cache lock. This warning can only appear # in a multiprocess session, typically (but not necessarily) when # many processes are frequently attempting jit-compilation (e.g., # when running the test suite in parallel) with warnings.catch_warnings(): tic = time() _, _, _, recompiled = compile_from_string( compiler, target, code, src_file, cache_dir=get_codepy_dir(), debug=configuration['debug_compiler']) toc = time() if recompiled: debug("%s: compiled `%s` [%.2f s]" % (compiler, src_file, toc - tic)) else: debug("%s: cache hit `%s` [%.2f s]" % (compiler, src_file, toc - tic))
def jit_compile(self, soname, code): """ JIT compile some source code given as a string. This function relies upon codepy's ``compile_from_string``, which performs caching of compilation units and avoids potential race conditions due to multiple processing trying to compile the same object. Parameters ---------- soname : str Name of the .so file (w/o the suffix). code : str The source code to be JIT compiled. """ target = str(self.get_jit_dir().joinpath(soname)) src_file = "%s.%s" % (target, self.src_ext) cache_dir = self.get_codepy_dir().joinpath(soname[:7]) if configuration['jit-backdoor'] is False: # Typically we end up here # Make a suite of cache directories based on the soname cache_dir.mkdir(parents=True, exist_ok=True) else: # Warning: dropping `code` on the floor in favor to whatever is written # within `src_file` try: with open(src_file, 'r') as f: code = f.read() # Bypass the devito JIT cache # Note: can't simply use Python's `mkdtemp()` as, with MPI, different # ranks would end up creating different cache dirs cache_dir = cache_dir.joinpath('jit-backdoor') cache_dir.mkdir(parents=True, exist_ok=True) except FileNotFoundError: raise ValueError("Trying to use the JIT backdoor for `%s`, but " "the file isn't present" % src_file) # `catch_warnings` suppresses codepy complaining that it's taking # too long to acquire the cache lock. This warning can only appear # in a multiprocess session, typically (but not necessarily) when # many processes are frequently attempting jit-compilation (e.g., # when running the test suite in parallel) with warnings.catch_warnings(): warnings.simplefilter('ignore') tic = time() # Spinlock in case of MPI sleep_delay = 0 if configuration['mpi'] else 1 _, _, _, recompiled = compile_from_string( self, target, code, src_file, cache_dir=cache_dir, debug=configuration['debug-compiler'], sleep_delay=sleep_delay) toc = time() if recompiled: debug("%s: compiled `%s` [%.2f s]" % (self, src_file, toc-tic)) else: debug("%s: cache hit `%s` [%.2f s]" % (self, src_file, toc-tic))
def _compile_cuda(self, soname): # CUDA kernel compilation cuda_src = '%s/CUDA/%s_kernels.cu' % (self.get_jit_dir(), soname) cuda_target = '%s/%s_kernels_cu' % (self.get_jit_dir(), soname) cuda_code = "" try: with open(cuda_src, 'r') as f: cuda_code = f.read() except FileNotFoundError: raise ValueError("Couldn't find file: %s" % cuda_src) cuda_device_compiler = CUDADeviceCompiler() cuda_host_compiler = CudaHostCompiler() with warnings.catch_warnings(): warnings.simplefilter('ignore') # Spinlock in case of MPI sleep_delay = 0 if configuration['mpi'] else 1 _, _, cuda_o, _ = compile_from_string( cuda_device_compiler, cuda_target, cuda_code, cuda_src, cache_dir=self.cache_dir, debug=configuration['debug-compiler'], sleep_delay=sleep_delay, object=True) _, _, src_o, _ = compile_from_string( cuda_host_compiler, self.target, self.code, self.ops_src, cache_dir=self.cache_dir, debug=configuration['debug-compiler'], sleep_delay=sleep_delay, object=True) cuda_host_compiler.link_extension( '%s%s' % (self.target, cuda_host_compiler.so_ext), [src_o, cuda_o], debug=configuration['debug-compiler'])
def build(self, name, code, debug=False, wait_on_error=None, debug_recompile=True): """Compile code, build and load shared library.""" logger.debug(code) c_fname = self._tempname("code." + self.source_suffix) # build object _, mod_name, ext_file, recompiled = \ compile_from_string(self.toolchain, name, code, c_fname, self.tempdir, debug, wait_on_error, debug_recompile, False) if recompiled: logger.debug(f"Kernel {name} compiled from source") else: logger.debug(f"Kernel {name} retrieved from cache") # and return compiled return ctypes.CDLL(ext_file)
def build(self, name, code, debug=False, wait_on_error=None, debug_recompile=True): """Compile code, build and load shared library.""" logger.debug(code) c_fname = self._tempname('code.' + self.source_suffix) # build object _, mod_name, ext_file, recompiled = \ compile_from_string(self.toolchain, name, code, c_fname, self.tempdir, debug, wait_on_error, debug_recompile, False) if recompiled: logger.debug('Kernel {0} compiled from source'.format(name)) else: logger.debug('Kernel {0} retrieved from cache'.format(name)) # and return compiled return ctypes.CDLL(ext_file)
def call_function(source: str, fn_name: str, args: List[Any], argtypes: Optional[List[ctypes._SimpleCData]] = None): """ Calls the function *fn_name* in *source*. :arg source: The MLIR code whose function is to be called. :arg args: A list of args to be passed to the function. Each arg can have one of the following types: - :class:`numpy.ndarray` - :class:`numpy.number - :class:`Memref` :arg fn_name: Name of the function op which is the to be called """ source = mlir_opt(source, ["-convert-std-to-llvm=emit-c-wrappers"]) fn_name = f"_mlir_ciface_{fn_name}" if argtypes is None: argtypes = guess_argtypes(args) args = [preprocess_arg(arg) for arg in args] obj_code = llvmir_to_obj(mlir_to_llvmir(source)) toolchain = guess_toolchain() _, mod_name, ext_file, recompiled = \ compile_from_string(toolchain, fn_name, obj_code, ["module.o"], source_is_binary=True) f = ctypes.CDLL(ext_file) fn = getattr(f, fn_name) fn.argtypes = argtypes fn.restype = None fn(*args)
def compile(self, host_toolchain, nvcc_toolchain, host_kwargs=None, nvcc_kwargs=None, **kwargs): """Return the extension module generated from the code described by *self*. If necessary, build the code using *toolchain* with :func:`codepy.jit.extension_from_string`. Any keyword arguments accept by that latter function may be passed in *kwargs*. """ if host_kwargs is None: host_kwargs = {} if nvcc_kwargs is None: nvcc_kwargs = {} from codepy.libraries import add_boost_python, add_cuda host_toolchain = host_toolchain.copy() add_boost_python(host_toolchain) add_cuda(host_toolchain) nvcc_toolchain = nvcc_toolchain.copy() add_cuda(nvcc_toolchain) host_code = "{}\n".format(self.boost_module.generate()) device_code = "{}\n".format(self.generate()) from codepy.jit import compile_from_string from codepy.jit import link_extension local_host_kwargs = kwargs.copy() local_host_kwargs.update(host_kwargs) local_nvcc_kwargs = kwargs.copy() local_nvcc_kwargs.update(nvcc_kwargs) # Don't compile shared objects, just normal objects # (on some platforms, they're different) host_checksum, host_mod_name, host_object, host_compiled = \ compile_from_string( host_toolchain, self.boost_module.name, host_code, object=True, **local_host_kwargs) device_checksum, device_mod_name, device_object, device_compiled = \ compile_from_string( nvcc_toolchain, "gpu", device_code, "gpu.cu", object=True, **local_nvcc_kwargs) # The name of the shared lib depends on the hex checksums of both # host and device code to prevent accidentally returned a cached # module with wrong linkage mod_name = f"codepy.temp.{host_checksum}.{device_checksum}.module" if host_compiled or device_compiled: return link_extension(host_toolchain, [host_object, device_object], mod_name, **kwargs) else: import os.path destination_base, first_object = os.path.split(host_object) module_path = os.path.join(destination_base, mod_name + host_toolchain.so_ext) try: from imp import load_dynamic return load_dynamic(mod_name, module_path) except Exception: return link_extension(host_toolchain, [host_object, device_object], mod_name, **kwargs)
def jit_compile(soname, code, compiler): """ JIT compile some source code given as a string. This function relies upon codepy's ``compile_from_string``, which performs caching of compilation units and avoids potential race conditions due to multiple processing trying to compile the same object. Parameters ---------- soname : str Name of the .so file (w/o the suffix). code : str The source code to be JIT compiled. compiler : Compiler The toolchain used for JIT compilation. """ target = str(get_jit_dir().joinpath(soname)) src_file = "%s.%s" % (target, compiler.src_ext) cache_dir = get_codepy_dir().joinpath(soname[:7]) if configuration['jit-backdoor'] is False: # Typically we end up here # Make a suite of cache directories based on the soname cache_dir.mkdir(parents=True, exist_ok=True) else: # Warning: dropping `code` on the floor in favor to whatever is written # within `src_file` try: with open(src_file, 'r') as f: code = f.read() # Bypass the devito JIT cache # Note: can't simply use Python's `mkdtemp()` as, with MPI, different # ranks would end up creating different cache dirs cache_dir = cache_dir.joinpath('jit-backdoor') cache_dir.mkdir(parents=True, exist_ok=True) except FileNotFoundError: raise ValueError("Trying to use the JIT backdoor for `%s`, but " "the file isn't present" % src_file) # `catch_warnings` suppresses codepy complaining that it's taking # too long to acquire the cache lock. This warning can only appear # in a multiprocess session, typically (but not necessarily) when # many processes are frequently attempting jit-compilation (e.g., # when running the test suite in parallel) with warnings.catch_warnings(): warnings.simplefilter('ignore') tic = time() # Spinlock in case of MPI sleep_delay = 0 if configuration['mpi'] else 1 _, _, _, recompiled = compile_from_string(compiler, target, code, src_file, cache_dir=cache_dir, debug=configuration['debug-compiler'], sleep_delay=sleep_delay) toc = time() if recompiled: debug("%s: compiled `%s` [%.2f s]" % (compiler, src_file, toc-tic)) else: debug("%s: cache hit `%s` [%.2f s]" % (compiler, src_file, toc-tic))
def jit_compile(soname, code, h_code, compiler): """ JIT compile some source code given as a string. This function relies upon codepy's ``compile_from_string``, which performs caching of compilation units and avoids potential race conditions due to multiple processing trying to compile the same object. Parameters ---------- soname : str Name of the .so file (w/o the suffix). code : str The source code to be JIT compiled. compiler : Compiler The toolchain used for JIT compilation. """ target = str(get_jit_dir().joinpath(soname)) src_file = "%s.cpp" % target h_file = "%s.h" % target cache_dir = get_codepy_dir().joinpath(soname[:7]) # Typically we end up here # Make a suite of cache directories based on the soname cache_dir.mkdir(parents=True, exist_ok=True) subprocess.run("rm *.h", shell=True, cwd=get_jit_dir()) with open(h_file, 'w') as f: f.write("\n") f.write(h_code) with open(src_file, 'w') as f: f.write(code) ops_install_path = os.environ.get("OPS_INSTALL_PATH") # OPS transltation subprocess.run([ "%s/../ops_translator/c/ops.py" % ops_install_path, "%s.cpp" % soname ], cwd=get_jit_dir()) ops_src = '%s/%s_ops.cpp' % (get_jit_dir(), soname) with open(ops_src, 'r') as f: code = f.read() if configuration.ops['target'] == 'CUDA': # CUDA kernel compilation cuda_device_compiler = OPSCUDADeviceCompiler() cuda_src = '%s/CUDA/%s_kernels.cu' % (get_jit_dir(), soname) cuda_target = '%s/%s_kernels_cu' % (get_jit_dir(), soname) cuda_code = "" with open(cuda_src, 'r') as f: cuda_code = f.read() cuda_host_compiler = OPSCudaHostCompiler() with warnings.catch_warnings(): warnings.simplefilter('ignore') # Spinlock in case of MPI sleep_delay = 0 if configuration['mpi'] else 1 _, _, cuda_o, _ = compile_from_string( cuda_device_compiler, cuda_target, cuda_code, cuda_src, cache_dir=cache_dir, debug=configuration['debug-compiler'], sleep_delay=sleep_delay, object=True) _, _, src_o, _ = compile_from_string( cuda_host_compiler, target, code, ops_src, cache_dir=cache_dir, debug=configuration['debug-compiler'], sleep_delay=sleep_delay, object=True) cuda_host_compiler.link_extension( '%s%s' % (target, cuda_host_compiler.so_ext), [src_o, cuda_o], debug=configuration['debug-compiler']) # removing generated cuda kernels to avoid reuse subprocess.run(["rm -rf ./CUDA"], cwd=get_jit_dir(), shell=True) elif configuration.ops['target'] == 'OpenMP': omp_kernel = '%s/MPI_OpenMP/%s_omp_kernels.cpp' % (get_jit_dir(), soname) omp_code = "" with open(omp_kernel, 'r') as f: omp_code = f.read() compiler = OPSOpenMPCompiler() with warnings.catch_warnings(): warnings.simplefilter('ignore') # Spinlock in case of MPI sleep_delay = 0 if configuration['mpi'] else 1 compile_from_string(compiler, target, [code, omp_code], [ops_src, omp_kernel], cache_dir=cache_dir, debug=configuration['debug-compiler'], sleep_delay=sleep_delay) subprocess.run(["rm -rf ./MPI_OpenMP"], cwd=get_jit_dir(), shell=True) elif configuration.ops['target'] == 'MPI': mpi_kernel = '%s/MPI_OpenMP/%s_omp_kernels.cpp' % (get_jit_dir(), soname) mpi_code = "" with open(mpi_kernel, 'r') as f: mpi_code = f.read() cmpiiler = OPSMPICompiler() with warnings.catch_warnings(): warnings.simplefilter('ignore') # Spinlock in case of MPI sleep_delay = 0 if configuration['mpi'] else 1 _, _, _, recmpiiled = compile_from_string( cmpiiler, target, [code, mpi_code], [ops_src, mpi_kernel], cache_dir=cache_dir, debug=configuration['debug-compiler'], sleep_delay=sleep_delay) subprocess.run(["rm -rf ./MPI_OpenMP"], cwd=get_jit_dir(), shell=True)
def compile(self, host_toolchain, nvcc_toolchain, host_kwargs={}, nvcc_kwargs={}, **kwargs): """Return the extension module generated from the code described by *self*. If necessary, build the code using *toolchain* with :func:`codepy.jit.extension_from_string`. Any keyword arguments accept by that latter function may be passed in *kwargs*. """ from codepy.libraries import add_boost_python, add_cuda host_toolchain = host_toolchain.copy() add_boost_python(host_toolchain) add_cuda(host_toolchain) nvcc_toolchain = nvcc_toolchain.copy() add_cuda(nvcc_toolchain) host_code = str(self.boost_module.generate()) + "\n" device_code = str(self.generate()) + "\n" from codepy.jit import compile_from_string, extension_from_string from codepy.jit import link_extension local_host_kwargs = kwargs.copy() local_host_kwargs.update(host_kwargs) local_nvcc_kwargs = kwargs.copy() local_nvcc_kwargs.update(nvcc_kwargs) # Don't compile shared objects, just normal objects # (on some platforms, they're different) host_mod_name, host_object, host_compiled = compile_from_string( host_toolchain, self.boost_module.name, host_code, object=True, **local_host_kwargs) device_mod_name, device_object, device_compiled = compile_from_string( nvcc_toolchain, 'gpu', device_code, 'gpu.cu', object=True, **local_nvcc_kwargs) if host_compiled or device_compiled: return link_extension(host_toolchain, [host_object, device_object], host_mod_name, **kwargs) else: import os.path destination_base, first_object = os.path.split(host_object) module_path = os.path.join(destination_base, host_mod_name + host_toolchain.so_ext) try: from imp import load_dynamic return load_dynamic(host_mod_name, module_path) except: return link_extension(host_toolchain, [host_object, device_object], host_mod_name, **kwargs)