def _compile_subroutine_no_cache(self, builder, impl, sig, locals={}, flags=None): """ Invoke the compiler to compile a function to be used inside a nopython function, but without generating code to call that function. Note this context's flags are not inherited. """ # Compile from numba.core import compiler with global_compiler_lock: codegen = self.codegen() library = codegen.create_library(impl.__name__) if flags is None: flags = compiler.Flags() flags.set("no_compile") flags.set("no_cpython_wrapper") flags.set("no_cfunc_wrapper") cres = compiler.compile_internal( self.typing_context, self, library, impl, sig.args, sig.return_type, flags, locals=locals, ) # Allow inlining the function inside callers. self.active_code_library.add_linking_library(cres.library) return cres
def generic(self, args, kws): """ Type the overloaded function by compiling the appropriate implementation for the given args. """ disp, new_args = self._get_impl(args, kws) if disp is None: return # Compile and type it for the given types disp_type = types.Dispatcher(disp) # Store the compiled overload for use in the lowering phase if there's # no inlining required (else functions are being compiled which will # never be used as they are inlined) if not self._inline.is_never_inline: # need to run the compiler front end up to type inference to compute # a signature from numba.core import typed_passes, compiler from numba.core.inline_closurecall import InlineWorker fcomp = disp._compiler flags = compiler.Flags() # Updating these causes problems?! #fcomp.targetdescr.options.parse_as_flags(flags, # fcomp.targetoptions) #flags = fcomp._customize_flags(flags) # spoof a compiler pipline like the one that will be in use tyctx = fcomp.targetdescr.typing_context tgctx = fcomp.targetdescr.target_context compiler_inst = fcomp.pipeline_class( tyctx, tgctx, None, None, None, flags, None, ) inline_worker = InlineWorker( tyctx, tgctx, fcomp.locals, compiler_inst, flags, None, ) # If the inlinee contains something to trigger literal arg dispatch # then the pipeline call will unconditionally fail due to a raised # ForceLiteralArg exception. Therefore `resolve` is run first, as # type resolution must occur at some point, this will hit any # `literally` calls and because it's going via the dispatcher will # handle them correctly i.e. ForceLiteralArg propagates. This having # the desired effect of ensuring the pipeline call is only made in # situations that will succeed. For context see #5887. resolve = disp_type.dispatcher.get_call_template template, pysig, folded_args, kws = resolve(new_args, kws) ir = inline_worker.run_untyped_passes(disp_type.dispatcher.py_func) (typemap, return_type, calltypes, _) = typed_passes.type_inference_stage(self.context, ir, folded_args, None) sig = Signature(return_type, folded_args, None) # this stores a load of info for the cost model function if supplied # it by default is None self._inline_overloads[sig.args] = {'folded_args': folded_args} # this stores the compiled overloads, if there's no compiled # overload available i.e. function is always inlined, the key still # needs to exist for type resolution # NOTE: If lowering is failing on a `_EmptyImplementationEntry`, # the inliner has failed to inline this entry corretly. impl_init = _EmptyImplementationEntry('always inlined') self._compiled_overloads[sig.args] = impl_init if not self._inline.is_always_inline: # this branch is here because a user has supplied a function to # determine whether to inline or not. As a result both compiled # function and inliner info needed, delaying the computation of # this leads to an internal state mess at present. TODO: Fix! sig = disp_type.get_call_type(self.context, new_args, kws) self._compiled_overloads[sig.args] = disp_type.get_overload( sig) # store the inliner information, it's used later in the cost # model function call iinfo = _inline_info(ir, typemap, calltypes, sig) self._inline_overloads[sig.args] = { 'folded_args': folded_args, 'iinfo': iinfo } else: sig = disp_type.get_call_type(self.context, new_args, kws) self._compiled_overloads[sig.args] = disp_type.get_overload(sig) return sig
def compile_with_dppy(pyfunc, return_type, args, is_kernel, debug=None): """ Compiles with Numba_dppy's pipeline and returns the compiled result. Args: pyfunc: The Python function to be compiled. return_type: The Numba type of the return value. args: The list of arguments sent to the Python function. is_kernel (bool): Indicates whether the function is decorated with @dppy.kernel or not. debug (bool): Flag to turn debug mode ON/OFF. Returns: cres: Compiled result. Raises: TypeError: @dppy.kernel does not allow users to return any value. TypeError is raised when users do. """ # First compilation will trigger the initialization of the OpenCL backend. from .descriptor import dppy_target typingctx = dppy_target.typing_context targetctx = dppy_target.target_context flags = compiler.Flags() # Do not compile (generate native code), just lower (to LLVM) flags.debuginfo = config.DEBUGINFO_DEFAULT flags.no_compile = True flags.no_cpython_wrapper = True flags.nrt = False if debug is not None: flags.debuginfo = debug # Run compilation pipeline if isinstance(pyfunc, FunctionType): cres = compiler.compile_extra( typingctx=typingctx, targetctx=targetctx, func=pyfunc, args=args, return_type=return_type, flags=flags, locals={}, pipeline_class=DPPYCompiler, ) elif isinstance(pyfunc, ir.FunctionIR): cres = compiler.compile_ir( typingctx=typingctx, targetctx=targetctx, func_ir=pyfunc, args=args, return_type=return_type, flags=flags, locals={}, pipeline_class=DPPYCompiler, ) else: assert 0 if is_kernel: assert_no_return(cres.signature.return_type) # Linking depending libraries library = cres.library library.finalize() return cres
def _lower_array_expr(lowerer, expr): '''Lower an array expression built by RewriteArrayExprs. ''' expr_name = "__numba_array_expr_%s" % (hex(hash(expr)).replace("-", "_")) expr_filename = expr.loc.filename expr_var_list = expr.list_vars() # The expression may use a given variable several times, but we # should only create one parameter for it. expr_var_unique = sorted(set(expr_var_list), key=lambda var: var.name) # Arguments are the names external to the new closure expr_args = [var.name for var in expr_var_unique] # 1. Create an AST tree from the array expression. with _legalize_parameter_names(expr_var_unique) as expr_params: ast_args = [ast.arg(param_name, None) for param_name in expr_params] # Parse a stub function to ensure the AST is populated with # reasonable defaults for the Python version. ast_module = ast.parse('def {0}(): return'.format(expr_name), expr_filename, 'exec') assert hasattr(ast_module, 'body') and len(ast_module.body) == 1 ast_fn = ast_module.body[0] ast_fn.args.args = ast_args ast_fn.body[0].value, namespace = _arr_expr_to_ast(expr.expr) ast.fix_missing_locations(ast_module) # 2. Compile the AST module and extract the Python function. code_obj = compile(ast_module, expr_filename, 'exec') exec(code_obj, namespace) impl = namespace[expr_name] # 3. Now compile a ufunc using the Python function as kernel. context = lowerer.context builder = lowerer.builder outer_sig = expr.ty(*(lowerer.typeof(name) for name in expr_args)) inner_sig_args = [] for argty in outer_sig.args: if isinstance(argty, types.Optional): argty = argty.type if isinstance(argty, types.Array): inner_sig_args.append(argty.dtype) else: inner_sig_args.append(argty) inner_sig = outer_sig.return_type.dtype(*inner_sig_args) flags = targetconfig.ConfigStack().top_or_none() flags = compiler.Flags() if flags is None else flags.copy() # make sure it's a clone or a fresh instance # Follow the Numpy error model. Note this also allows e.g. vectorizing # division (issue #1223). flags.error_model = 'numpy' cres = context.compile_subroutine(builder, impl, inner_sig, flags=flags, caching=False) # Create kernel subclass calling our native function from numba.np import npyimpl class ExprKernel(npyimpl._Kernel): def generate(self, *args): arg_zip = zip(args, self.outer_sig.args, inner_sig.args) cast_args = [self.cast(val, inty, outty) for val, inty, outty in arg_zip] result = self.context.call_internal( builder, cres.fndesc, inner_sig, cast_args) return self.cast(result, inner_sig.return_type, self.outer_sig.return_type) # create a fake ufunc object which is enough to trick numpy_ufunc_kernel ufunc = SimpleNamespace(nin=len(expr_args), nout=1, __name__=expr_name) ufunc.nargs = ufunc.nin + ufunc.nout args = [lowerer.loadvar(name) for name in expr_args] return npyimpl.numpy_ufunc_kernel( context, builder, outer_sig, args, ufunc, ExprKernel)
def compile_instance(func, sig, target: TargetInfo, typing_context, target_context, pipeline_class, main_library, debug=False): """Compile a function with given signature. Return function name when succesful. """ flags = compiler.Flags() if get_version('numba') >= (0, 54): flags.no_compile = True flags.no_cpython_wrapper = True flags.no_cfunc_wrapper = True else: flags.set('no_compile') flags.set('no_cpython_wrapper') flags.set('no_cfunc_wrapper') fname = func.__name__ + sig.mangling() args, return_type = sigutils.normalize_signature( sig.tonumba(bool_is_int8=True)) try: cres = compiler.compile_extra(typingctx=typing_context, targetctx=target_context, func=func, args=args, return_type=return_type, flags=flags, library=main_library, locals={}, pipeline_class=pipeline_class) except (UnsupportedError, nb_errors.TypingError, nb_errors.LoweringError) as msg: for m in re.finditer(r'UnsupportedError(.*?)\n', str(msg), re.S): warnings.warn(f'Skipping {fname}:{m.group(0)[18:]}') break else: raise return except Exception: raise result = get_called_functions(cres.library, cres.fndesc.llvm_func_name) for f in result['declarations']: if target.supports(f): continue warnings.warn(f'Skipping {fname} that uses undefined function `{f}`') return nvvmlib = libfuncs.Library.get('nvvm') llvmlib = libfuncs.Library.get('llvm') for f in result['intrinsics']: if target.is_gpu: if f in nvvmlib: continue if target.is_cpu: if f in llvmlib: continue warnings.warn( f'Skipping {fname} that uses unsupported intrinsic `{f}`') return make_wrapper(fname, args, return_type, cres, target, verbose=debug) main_module = main_library._final_module for lib in result['libraries']: main_module.link_in( lib._get_module_for_linking(), preserve=True, ) return fname
def _lower_array_expr(lowerer, expr): """Lower an array expression built by RewriteArrayExprs.""" expr_name = "__numba_array_expr_%s" % (hex(hash(expr)).replace("-", "_")) expr_filename = expr.loc.filename expr_var_list = expr.list_vars() # The expression may use a given variable several times, but we # should only create one parameter for it. expr_var_unique = sorted(set(expr_var_list), key=lambda var: var.name) # Arguments are the names external to the new closure expr_args = [var.name for var in expr_var_unique] # 1. Create an AST tree from the array expression. with _legalize_parameter_names(expr_var_unique) as expr_params: if hasattr(ast, "arg"): # Should be Python 3.x ast_args = [ ast.arg(param_name, None) for param_name in expr_params ] else: # Should be Python 2.x ast_args = [ ast.Name(param_name, ast.Param()) for param_name in expr_params ] # Parse a stub function to ensure the AST is populated with # reasonable defaults for the Python version. ast_module = ast.parse("def {0}(): return".format(expr_name), expr_filename, "exec") assert hasattr(ast_module, "body") and len(ast_module.body) == 1 ast_fn = ast_module.body[0] ast_fn.args.args = ast_args ast_fn.body[0].value, namespace = _arr_expr_to_ast(expr.expr) ast.fix_missing_locations(ast_module) # 2. Compile the AST module and extract the Python function. code_obj = compile(ast_module, expr_filename, "exec") exec(code_obj, namespace) impl = namespace[expr_name] # 3. Now compile a ufunc using the Python function as kernel. context = lowerer.context builder = lowerer.builder outer_sig = expr.ty(*(lowerer.typeof(name) for name in expr_args)) inner_sig_args = [] for argty in outer_sig.args: if isinstance(argty, types.Optional): argty = argty.type if isinstance(argty, types.Array): inner_sig_args.append(argty.dtype) else: inner_sig_args.append(argty) inner_sig = outer_sig.return_type.dtype(*inner_sig_args) # Follow the Numpy error model. Note this also allows e.g. vectorizing # division (issue #1223). flags = compiler.Flags() flags.set("error_model", "numpy") cres = context.compile_subroutine(builder, impl, inner_sig, flags=flags, caching=False) # Create kernel subclass calling our native function from numba.np import npyimpl class ExprKernel(npyimpl._Kernel): def generate(self, *args): arg_zip = zip(args, self.outer_sig.args, inner_sig.args) cast_args = [ self.cast(val, inty, outty) for val, inty, outty in arg_zip ] result = self.context.call_internal(builder, cres.fndesc, inner_sig, cast_args) return self.cast(result, inner_sig.return_type, self.outer_sig.return_type) args = [lowerer.loadvar(name) for name in expr_args] return npyimpl.numpy_ufunc_kernel(context, builder, outer_sig, args, ExprKernel, explicit_output=False)
def compile_to_LLVM(functions_and_signatures, target: TargetInfo, pipeline_class=compiler.Compiler, debug=False): """Compile functions with given signatures to target specific LLVM IR. Parameters ---------- functions_and_signatures : list Specify a list of Python function and its signatures pairs. target : TargetInfo Specify target device information. debug : bool Returns ------- module : llvmlite.binding.ModuleRef LLVM module instance. To get the IR string, use `str(module)`. """ target_desc = registry.cpu_target if target is None: target = TargetInfo.host() typing_context = target_desc.typing_context target_context = target_desc.target_context else: typing_context = typing.Context() target_context = RemoteCPUContext(typing_context, target) # Bring over Array overloads (a hack): target_context._defns = target_desc.target_context._defns typing_context.target_info = target target_context.target_info = target codegen = target_context.codegen() main_library = codegen.create_library('rbc.irtools.compile_to_IR') main_module = main_library._final_module flags = compiler.Flags() flags.set('no_compile') flags.set('no_cpython_wrapper') function_names = [] for func, signatures in functions_and_signatures: for sig in signatures: fname = func.__name__ + sig.mangling function_names.append(fname) args, return_type = sigutils.normalize_signature( sig.tonumba(bool_is_int8=True)) cres = compiler.compile_extra(typingctx=typing_context, targetctx=target_context, func=func, args=args, return_type=return_type, flags=flags, library=main_library, locals={}, pipeline_class=pipeline_class) make_wrapper(fname, args, return_type, cres) seen = set() for _library in main_library._linking_libraries: if _library not in seen: seen.add(_library) main_module.link_in( _library._get_module_for_linking(), preserve=True, ) main_library._optimize_final_module() # Catch undefined functions: used_functions = set(function_names) for fname in function_names: deps = get_function_dependencies(main_module, fname) for fn, descr in deps.items(): used_functions.add(fn) if descr == 'undefined': if fn.startswith('numba_') and target.has_numba: continue if fn.startswith('Py') and target.has_cpython: continue raise RuntimeError('function `%s` is undefined' % (fn)) # for global_variable in main_module.global_variables: # global_variable.linkage = llvm.Linkage.private unused_functions = [ f.name for f in main_module.functions if f.name not in used_functions ] if debug: print('compile_to_IR: the following functions are used') for fname in used_functions: lf = main_module.get_function(fname) print(' [ALIVE]', fname, 'with', lf.linkage) if unused_functions: if debug: print('compile_to_IR: the following functions are not used' ' and will be removed:') for fname in unused_functions: lf = main_module.get_function(fname) if lf.is_declaration: # if the function is a declaration, # we just put the linkage as external lf.linkage = llvm.Linkage.external else: # but if the function is not a declaration, # we change the linkage to private lf.linkage = llvm.Linkage.private if debug: print(' [DEAD]', fname, 'with', lf.linkage) main_library._optimize_final_module() # TODO: determine unused global_variables and struct_types main_module.verify() main_library._finalized = True main_module.triple = target.triple main_module.data_layout = target.datalayout return main_module