def compile_cuda(pyfunc, return_type, args, debug=False, inline=False): # First compilation will trigger the initialization of the CUDA backend. from .descriptor import CUDATargetDesc typingctx = CUDATargetDesc.typingctx targetctx = CUDATargetDesc.targetctx # TODO handle debug flag flags = compiler.Flags() # Do not compile (generate native code), just lower (to LLVM) flags.set('no_compile') flags.set('no_cpython_wrapper') flags.set('no_cfunc_wrapper') if debug: flags.set('debuginfo') if inline: flags.set('forceinline') # Run compilation pipeline cres = compiler.compile_extra(typingctx=typingctx, targetctx=targetctx, func=pyfunc, args=args, return_type=return_type, flags=flags, locals={}) library = cres.library library.finalize() return cres
def compile_hsa(pyfunc, return_type, args, debug): # First compilation will trigger the initialization of the HSA backend. from .descriptor import HSATargetDesc typingctx = HSATargetDesc.typingctx targetctx = HSATargetDesc.targetctx # TODO handle debug flag flags = compiler.Flags() # Do not compile (generate native code), just lower (to LLVM) flags.set('no_compile') flags.set('no_cpython_wrapper') flags.set('no_cfunc_wrapper') flags.unset('nrt') # Run compilation pipeline cres = compiler.compile_extra(typingctx=typingctx, targetctx=targetctx, func=pyfunc, args=args, return_type=return_type, flags=flags, locals={}) # Linking depending libraries # targetctx.link_dependencies(cres.llvm_module, cres.target_context.linking) library = cres.library library.finalize() return cres
def compile_cuda(pyfunc, return_type, args, debug=False, inline=False, fastmath=False, nvvm_options=None): from .descriptor import cuda_target typingctx = cuda_target.typing_context targetctx = cuda_target.target_context flags = CUDAFlags() # Do not compile (generate native code), just lower (to LLVM) flags.no_compile = True flags.no_cpython_wrapper = True flags.no_cfunc_wrapper = True if debug: flags.debuginfo = True if inline: flags.forceinline = True if fastmath: flags.fastmath = True if nvvm_options: flags.nvvm_options = nvvm_options # Run compilation pipeline cres = compiler.compile_extra(typingctx=typingctx, targetctx=targetctx, func=pyfunc, args=args, return_type=return_type, flags=flags, locals={}, pipeline_class=CUDACompiler) library = cres.library library.finalize() return cres
def compile_cuda(pyfunc, return_type, args, debug=False, inline=False, fastmath=False): from .descriptor import cuda_target typingctx = cuda_target.typingctx targetctx = cuda_target.targetctx flags = compiler.Flags() # Do not compile (generate native code), just lower (to LLVM) flags.set('no_compile') flags.set('no_cpython_wrapper') flags.set('no_cfunc_wrapper') if debug: flags.set('debuginfo') if inline: flags.set('forceinline') if fastmath: flags.set('fastmath') # Run compilation pipeline cres = compiler.compile_extra(typingctx=typingctx, targetctx=targetctx, func=pyfunc, args=args, return_type=return_type, flags=flags, locals={}, pipeline_class=CUDACompiler) library = cres.library library.finalize() return cres
def compile(self, func, args, return_type=None, flags=DEFAULT_FLAGS): """ Compile the function or retrieve an already compiled result from the cache. """ from numba.core.registry import cpu_target cache_key = (func, args, return_type, flags) try: cr = self.cr_cache[cache_key] except KeyError: # Register the contexts in case for nested @jit or @overload calls # (same as compile_isolated()) with cpu_target.nested_context(self.typingctx, self.targetctx): cr = compile_extra( self.typingctx, self.targetctx, func, args, return_type, flags, locals={}, ) self.cr_cache[cache_key] = cr return cr
def test_no_copy_usm_shared(capfd): a = usmarray.ones(10, dtype=np.int64) b = np.ones(10, dtype=np.int64) # f = njit(fn) flags = compiler.Flags() flags.no_compile = True flags.no_cpython_wrapper = True flags.nrt = False flags.auto_parallel = cpu.ParallelOptions(True) typingctx = cpu_target.typing_context targetctx = cpu_target.target_context args = typingctx.resolve_argument_type(a) try: device = dpctl.SyclDevice("opencl:gpu:0") except ValueError: pytest.skip("Device not found") with dppy.offload_to_sycl_device(device): cres = compiler.compile_extra( typingctx=typingctx, targetctx=targetctx, func=fn, args=tuple([args]), return_type=args, flags=flags, locals={}, pipeline_class=DPPYCompiler, ) assert "DPCTLQueue_Memcpy" not in cres.library.get_llvm_str() args = typingctx.resolve_argument_type(b) cres = compiler.compile_extra( typingctx=typingctx, targetctx=targetctx, func=fn, args=tuple([args]), return_type=args, flags=flags, locals={}, pipeline_class=DPPYCompiler, ) assert "DPCTLQueue_Memcpy" in cres.library.get_llvm_str()
def compile_isolated(pyfunc, argtypes, **kwargs): from numba.core.registry import cpu_target kwargs.setdefault('return_type', None) kwargs.setdefault('locals', {}) return compile_extra( cpu_target.typing_context, cpu_target.target_context, pyfunc, argtypes, **kwargs, )
def test_scalar(self): flags = Flags() # Compile the inner function global cnd_jitted cr1 = compile_isolated(cnd, (types.float64, )) cnd_jitted = cr1.entry_point # Manually type the compiled function for calling into tyctx = cr1.typing_context ctx = cr1.target_context signature = typing.make_concrete_template("cnd_jitted", cnd_jitted, [cr1.signature]) tyctx.insert_user_function(cnd_jitted, signature) # Compile the outer function array = types.Array(types.float64, 1, "C") argtys = (array, ) * 5 + (types.float64, types.float64) cr2 = compile_extra( tyctx, ctx, blackscholes_scalar_jitted, args=argtys, return_type=None, flags=flags, locals={}, ) jitted_bs = cr2.entry_point OPT_N = 400 iterations = 10 callResultGold = np.zeros(OPT_N) putResultGold = np.zeros(OPT_N) callResultNumba = np.zeros(OPT_N) putResultNumba = np.zeros(OPT_N) stockPrice = randfloat(self.random.random_sample(OPT_N), 5.0, 30.0) optionStrike = randfloat(self.random.random_sample(OPT_N), 1.0, 100.0) optionYears = randfloat(self.random.random_sample(OPT_N), 0.25, 10.0) args = stockPrice, optionStrike, optionYears, RISKFREE, VOLATILITY blackscholes_scalar(callResultGold, putResultGold, *args) jitted_bs(callResultNumba, putResultNumba, *args) delta = np.abs(callResultGold - callResultNumba) L1norm = delta.sum() / np.abs(callResultGold).sum() print("L1 norm: %E" % L1norm) print("Max absolute error: %E" % delta.max()) self.assertAlmostEqual(delta.max(), 0)
def try_lift(self, pyfunc, argtypes): from numba.core.registry import cpu_target cres = compile_extra( cpu_target.typing_context, cpu_target.target_context, pyfunc, argtypes, return_type=None, flags=looplift_flags, locals={}, ) # One lifted loop self.assertEqual(len(cres.lifted), 1) return cres
def _compile_core(self, args, return_type): flags = compiler.Flags() self.targetdescr.options.parse_as_flags(flags, self.targetoptions) flags = self._customize_flags(flags) impl = self._get_implementation(args, {}) cres = compiler.compile_extra(self.targetdescr.typing_context, self.targetdescr.target_context, impl, args=args, return_type=return_type, flags=flags, locals=self.locals, pipeline_class=self.pipeline_class) # Check typing error if object mode is used if cres.typing_error is not None and not flags.enable_pyobject: raise cres.typing_error return cres
def compile_cuda(pyfunc, return_type, args, debug=False, lineinfo=False, inline=False, fastmath=False, nvvm_options=None): from .descriptor import cuda_target typingctx = cuda_target.typing_context targetctx = cuda_target.target_context flags = CUDAFlags() # Do not compile (generate native code), just lower (to LLVM) flags.no_compile = True flags.no_cpython_wrapper = True flags.no_cfunc_wrapper = True if debug or lineinfo: # Note both debug and lineinfo turn on debug information in the # compiled code, but we keep them separate arguments in case we # later want to overload some other behavior on the debug flag. # In particular, -opt=3 is not supported with -g. flags.debuginfo = True flags.error_model = 'python' else: flags.error_model = 'numpy' if inline: flags.forceinline = True if fastmath: flags.fastmath = True if nvvm_options: flags.nvvm_options = nvvm_options # Run compilation pipeline cres = compiler.compile_extra(typingctx=typingctx, targetctx=targetctx, func=pyfunc, args=args, return_type=return_type, flags=flags, locals={}, pipeline_class=CUDACompiler) library = cres.library library.finalize() return cres
def _compile_core(self, sig, flags, locals): """ Trigger the compiler on the core function or load a previously compiled version from the cache. Returns the CompileResult. """ typingctx = self.targetdescr.typing_context targetctx = self.targetdescr.target_context @contextmanager def store_overloads_on_success(): # use to ensure overloads are stored on success try: yield except Exception: raise else: exists = self.overloads.get(cres.signature) if exists is None: self.overloads[cres.signature] = cres # Use cache and compiler in a critical section with global_compiler_lock: with store_overloads_on_success(): # attempt look up of existing cres = self.cache.load_overload(sig, targetctx) if cres is not None: return cres # Compile args, return_type = sigutils.normalize_signature(sig) cres = compiler.compile_extra( typingctx, targetctx, self.py_func, args=args, return_type=return_type, flags=flags, locals=locals, ) # cache lookup failed before so safe to save self.cache.save_overload(sig, cres) return cres
def _cull_exports(self): """Read all the exported functions/modules in the translator environment, and join them into a single LLVM module. """ self.exported_function_types = {} self.function_environments = {} self.environment_gvs = {} codegen = self.context.codegen() library = codegen.create_library(self.module_name) # Generate IR for all exported functions flags = Flags() flags.set("no_compile") if not self.export_python_wrap: flags.set("no_cpython_wrapper") flags.set("no_cfunc_wrapper") if self.use_nrt: flags.set("nrt") # Compile NRT helpers nrt_module, _ = nrtdynmod.create_nrt_module(self.context) library.add_ir_module(nrt_module) for entry in self.export_entries: cres = compile_extra(self.typing_context, self.context, entry.function, entry.signature.args, entry.signature.return_type, flags, locals={}, library=library) func_name = cres.fndesc.llvm_func_name llvm_func = cres.library.get_function(func_name) if self.export_python_wrap: llvm_func.linkage = lc.LINKAGE_INTERNAL wrappername = cres.fndesc.llvm_cpython_wrapper_name wrapper = cres.library.get_function(wrappername) wrapper.name = self._mangle_method_symbol(entry.symbol) wrapper.linkage = lc.LINKAGE_EXTERNAL fnty = cres.target_context.call_conv.get_function_type( cres.fndesc.restype, cres.fndesc.argtypes) self.exported_function_types[entry] = fnty self.function_environments[entry] = cres.environment self.environment_gvs[entry] = cres.fndesc.env_name else: llvm_func.name = entry.symbol self.dll_exports.append(entry.symbol) if self.export_python_wrap: wrapper_module = library.create_ir_module("wrapper") self._emit_python_wrapper(wrapper_module) library.add_ir_module(wrapper_module) # Hide all functions in the DLL except those explicitly exported library.finalize() for fn in library.get_defined_functions(): if fn.name not in self.dll_exports: if fn.linkage in {Linkage.private, Linkage.internal}: # Private/Internal linkage must have "default" visibility fn.visibility = "default" else: fn.visibility = 'hidden' return library
def compile_with_dppy(pyfunc, return_type, args, is_kernel, debug=None): """ Compiles with Numba_dppy's pipeline and returns the compiled result. Args: pyfunc: The Python function to be compiled. return_type: The Numba type of the return value. args: The list of arguments sent to the Python function. is_kernel (bool): Indicates whether the function is decorated with @dppy.kernel or not. debug (bool): Flag to turn debug mode ON/OFF. Returns: cres: Compiled result. Raises: TypeError: @dppy.kernel does not allow users to return any value. TypeError is raised when users do. """ # First compilation will trigger the initialization of the OpenCL backend. from .descriptor import dppy_target typingctx = dppy_target.typing_context targetctx = dppy_target.target_context flags = compiler.Flags() # Do not compile (generate native code), just lower (to LLVM) flags.debuginfo = config.DEBUGINFO_DEFAULT flags.no_compile = True flags.no_cpython_wrapper = True flags.nrt = False if debug is not None: flags.debuginfo = debug # Run compilation pipeline if isinstance(pyfunc, FunctionType): cres = compiler.compile_extra( typingctx=typingctx, targetctx=targetctx, func=pyfunc, args=args, return_type=return_type, flags=flags, locals={}, pipeline_class=DPPYCompiler, ) elif isinstance(pyfunc, ir.FunctionIR): cres = compiler.compile_ir( typingctx=typingctx, targetctx=targetctx, func_ir=pyfunc, args=args, return_type=return_type, flags=flags, locals={}, pipeline_class=DPPYCompiler, ) else: assert 0 if is_kernel: assert_no_return(cres.signature.return_type) # Linking depending libraries library = cres.library library.finalize() return cres
def compile_instance(func, sig, target: TargetInfo, typing_context, target_context, pipeline_class, main_library, debug=False): """Compile a function with given signature. Return function name when succesful. """ flags = compiler.Flags() if get_version('numba') >= (0, 54): flags.no_compile = True flags.no_cpython_wrapper = True flags.no_cfunc_wrapper = True else: flags.set('no_compile') flags.set('no_cpython_wrapper') flags.set('no_cfunc_wrapper') fname = func.__name__ + sig.mangling() args, return_type = sigutils.normalize_signature( sig.tonumba(bool_is_int8=True)) try: cres = compiler.compile_extra(typingctx=typing_context, targetctx=target_context, func=func, args=args, return_type=return_type, flags=flags, library=main_library, locals={}, pipeline_class=pipeline_class) except (UnsupportedError, nb_errors.TypingError, nb_errors.LoweringError) as msg: for m in re.finditer(r'UnsupportedError(.*?)\n', str(msg), re.S): warnings.warn(f'Skipping {fname}:{m.group(0)[18:]}') break else: raise return except Exception: raise result = get_called_functions(cres.library, cres.fndesc.llvm_func_name) for f in result['declarations']: if target.supports(f): continue warnings.warn(f'Skipping {fname} that uses undefined function `{f}`') return nvvmlib = libfuncs.Library.get('nvvm') llvmlib = libfuncs.Library.get('llvm') for f in result['intrinsics']: if target.is_gpu: if f in nvvmlib: continue if target.is_cpu: if f in llvmlib: continue warnings.warn( f'Skipping {fname} that uses unsupported intrinsic `{f}`') return make_wrapper(fname, args, return_type, cres, target, verbose=debug) main_module = main_library._final_module for lib in result['libraries']: main_module.link_in( lib._get_module_for_linking(), preserve=True, ) return fname
def compile_to_LLVM(functions_and_signatures, target: TargetInfo, pipeline_class=compiler.Compiler, debug=False): """Compile functions with given signatures to target specific LLVM IR. Parameters ---------- functions_and_signatures : list Specify a list of Python function and its signatures pairs. target : TargetInfo Specify target device information. debug : bool Returns ------- module : llvmlite.binding.ModuleRef LLVM module instance. To get the IR string, use `str(module)`. """ target_desc = registry.cpu_target if target is None: target = TargetInfo.host() typing_context = target_desc.typing_context target_context = target_desc.target_context else: typing_context = typing.Context() target_context = RemoteCPUContext(typing_context, target) # Bring over Array overloads (a hack): target_context._defns = target_desc.target_context._defns typing_context.target_info = target target_context.target_info = target codegen = target_context.codegen() main_library = codegen.create_library('rbc.irtools.compile_to_IR') main_module = main_library._final_module flags = compiler.Flags() flags.set('no_compile') flags.set('no_cpython_wrapper') function_names = [] for func, signatures in functions_and_signatures: for sig in signatures: fname = func.__name__ + sig.mangling function_names.append(fname) args, return_type = sigutils.normalize_signature( sig.tonumba(bool_is_int8=True)) cres = compiler.compile_extra(typingctx=typing_context, targetctx=target_context, func=func, args=args, return_type=return_type, flags=flags, library=main_library, locals={}, pipeline_class=pipeline_class) make_wrapper(fname, args, return_type, cres) seen = set() for _library in main_library._linking_libraries: if _library not in seen: seen.add(_library) main_module.link_in( _library._get_module_for_linking(), preserve=True, ) main_library._optimize_final_module() # Catch undefined functions: used_functions = set(function_names) for fname in function_names: deps = get_function_dependencies(main_module, fname) for fn, descr in deps.items(): used_functions.add(fn) if descr == 'undefined': if fn.startswith('numba_') and target.has_numba: continue if fn.startswith('Py') and target.has_cpython: continue raise RuntimeError('function `%s` is undefined' % (fn)) # for global_variable in main_module.global_variables: # global_variable.linkage = llvm.Linkage.private unused_functions = [ f.name for f in main_module.functions if f.name not in used_functions ] if debug: print('compile_to_IR: the following functions are used') for fname in used_functions: lf = main_module.get_function(fname) print(' [ALIVE]', fname, 'with', lf.linkage) if unused_functions: if debug: print('compile_to_IR: the following functions are not used' ' and will be removed:') for fname in unused_functions: lf = main_module.get_function(fname) if lf.is_declaration: # if the function is a declaration, # we just put the linkage as external lf.linkage = llvm.Linkage.external else: # but if the function is not a declaration, # we change the linkage to private lf.linkage = llvm.Linkage.private if debug: print(' [DEAD]', fname, 'with', lf.linkage) main_library._optimize_final_module() # TODO: determine unused global_variables and struct_types main_module.verify() main_library._finalized = True main_module.triple = target.triple main_module.data_layout = target.datalayout return main_module