def jit(signature_or_function=None, locals={}, cache=False, pipeline_class=None, boundscheck=False, **options): """ This decorator is used to compile a Python function into native code. Args ----- signature_or_function: The (optional) signature or list of signatures to be compiled. If not passed, required signatures will be compiled when the decorated function is called, depending on the argument values. As a convenience, you can directly pass the function to be compiled instead. locals: dict Mapping of local variable names to Numba types. Used to override the types deduced by Numba's type inference engine. target (deprecated): str Specifies the target platform to compile for. Valid targets are cpu, gpu, npyufunc, and cuda. Defaults to cpu. pipeline_class: type numba.compiler.CompilerBase The compiler pipeline type for customizing the compilation stages. options: For a cpu target, valid options are: nopython: bool Set to True to disable the use of PyObjects and Python API calls. The default behavior is to allow the use of PyObjects and Python API. Default value is False. forceobj: bool Set to True to force the use of PyObjects for every value. Default value is False. looplift: bool Set to True to enable jitting loops in nopython mode while leaving surrounding code in object mode. This allows functions to allocate NumPy arrays and use Python objects, while the tight loops in the function can still be compiled in nopython mode. Any arrays that the tight loop uses should be created before the loop is entered. Default value is True. error_model: str The error-model affects divide-by-zero behavior. Valid values are 'python' and 'numpy'. The 'python' model raises exception. The 'numpy' model sets the result to *+/-inf* or *nan*. Default value is 'python'. inline: str or callable The inline option will determine whether a function is inlined at into its caller if called. String options are 'never' (default) which will never inline, and 'always', which will always inline. If a callable is provided it will be called with the call expression node that is requesting inlining, the caller's IR and callee's IR as arguments, it is expected to return Truthy as to whether to inline. NOTE: This inlining is performed at the Numba IR level and is in no way related to LLVM inlining. boundscheck: bool Set to True to enable bounds checking for array indices. Out of bounds accesses will raise IndexError. The default is to not do bounds checking. If bounds checking is disabled, out of bounds accesses can produce garbage results or segfaults. However, enabling bounds checking will slow down typical functions, so it is recommended to only use this flag for debugging. You can also set the NUMBA_BOUNDSCHECK environment variable to 0 or 1 to globally override this flag. Returns -------- A callable usable as a compiled function. Actual compiling will be done lazily if no explicit signatures are passed. Examples -------- The function can be used in the following ways: 1) jit(signatures, target='cpu', **targetoptions) -> jit(function) Equivalent to: d = dispatcher(function, targetoptions) for signature in signatures: d.compile(signature) Create a dispatcher object for a python function. Then, compile the function with the given signature(s). Example: @jit("int32(int32, int32)") def foo(x, y): return x + y @jit(["int32(int32, int32)", "float32(float32, float32)"]) def bar(x, y): return x + y 2) jit(function, target='cpu', **targetoptions) -> dispatcher Create a dispatcher function object that specializes at call site. Examples: @jit def foo(x, y): return x + y @jit(target='cpu', nopython=True) def bar(x, y): return x + y """ if 'argtypes' in options: raise DeprecationError( _msg_deprecated_signature_arg.format('argtypes')) if 'restype' in options: raise DeprecationError(_msg_deprecated_signature_arg.format('restype')) if options.get('nopython', False) and options.get('forceobj', False): raise ValueError("Only one of 'nopython' or 'forceobj' can be True.") if 'target' in options: target = options.pop('target') warnings.warn("The 'target' keyword argument is deprecated.", NumbaDeprecationWarning) else: target = options.pop('_target', 'cpu') options['boundscheck'] = boundscheck # Handle signature if signature_or_function is None: # No signature, no function pyfunc = None sigs = None elif isinstance(signature_or_function, list): # A list of signatures is passed pyfunc = None sigs = signature_or_function elif sigutils.is_signature(signature_or_function): # A single signature is passed pyfunc = None sigs = [signature_or_function] else: # A function is passed pyfunc = signature_or_function sigs = None dispatcher_args = {} if pipeline_class is not None: dispatcher_args['pipeline_class'] = pipeline_class wrapper = _jit(sigs, locals=locals, target=target, cache=cache, targetoptions=options, **dispatcher_args) if pyfunc is not None: return wrapper(pyfunc) else: return wrapper
def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None, opt=True, **kws): """ JIT compile a python function conforming to the CUDA Python specification. If a signature is supplied, then a function is returned that takes a function to compile. :param func_or_sig: A function to JIT compile, or a signature of a function to compile. If a function is supplied, then a :class:`numba.cuda.compiler.AutoJitCUDAKernel` is returned. If a signature is supplied, then a function is returned. The returned function accepts another function, which it will compile and then return a :class:`numba.cuda.compiler.AutoJitCUDAKernel`. .. note:: A kernel cannot have any return value. :param device: Indicates whether this is a device function. :type device: bool :param link: A list of files containing PTX source to link with the function :type link: list :param debug: If True, check for exceptions thrown when executing the kernel. Since this degrades performance, this should only be used for debugging purposes. Defaults to False. (The default value can be overridden by setting environment variable ``NUMBA_CUDA_DEBUGINFO=1``.) :param fastmath: When True, enables fastmath optimizations as outlined in the :ref:`CUDA Fast Math documentation <cuda-fast-math>`. :param max_registers: Request that the kernel is limited to using at most this number of registers per thread. The limit may not be respected if the ABI requires a greater number of registers than that requested. Useful for increasing occupancy. :param opt: Whether to compile from LLVM IR to PTX with optimization enabled. When ``True``, ``-opt=3`` is passed to NVVM. When ``False``, ``-opt=0`` is passed to NVVM. Defaults to ``True``. :type opt: bool """ if link and config.ENABLE_CUDASIM: raise NotImplementedError('Cannot link PTX in the simulator') if kws.get('boundscheck'): raise NotImplementedError("bounds checking is not supported for CUDA") if kws.get('argtypes') is not None: msg = _msg_deprecated_signature_arg.format('argtypes') raise DeprecationError(msg) if kws.get('restype') is not None: msg = _msg_deprecated_signature_arg.format('restype') raise DeprecationError(msg) if kws.get('bind') is not None: msg = _msg_deprecated_signature_arg.format('bind') raise DeprecationError(msg) debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug fastmath = kws.get('fastmath', False) if sigutils.is_signature(func_or_sig): if config.ENABLE_CUDASIM: def jitwrapper(func): return FakeCUDAKernel(func, device=device, fastmath=fastmath, debug=debug) return jitwrapper argtypes, restype = sigutils.normalize_signature(func_or_sig) if restype and not device and restype != types.void: raise TypeError("CUDA kernel must have void return type.") def kernel_jit(func): targetoptions = kws.copy() targetoptions['debug'] = debug targetoptions['link'] = link targetoptions['opt'] = opt targetoptions['fastmath'] = fastmath return Dispatcher(func, [func_or_sig], targetoptions=targetoptions) def device_jit(func): return compile_device(func, restype, argtypes, inline=inline, debug=debug) if device: return device_jit else: return kernel_jit else: if func_or_sig is None: if config.ENABLE_CUDASIM: def autojitwrapper(func): return FakeCUDAKernel(func, device=device, fastmath=fastmath, debug=debug) else: def autojitwrapper(func): return jit(func, device=device, debug=debug, opt=opt, **kws) return autojitwrapper # func_or_sig is a function else: if config.ENABLE_CUDASIM: return FakeCUDAKernel(func_or_sig, device=device, fastmath=fastmath, debug=debug) elif device: return jitdevice(func_or_sig, debug=debug, opt=opt, **kws) else: targetoptions = kws.copy() targetoptions['debug'] = debug targetoptions['opt'] = opt targetoptions['link'] = link targetoptions['fastmath'] = fastmath sigs = None return Dispatcher(func_or_sig, sigs, targetoptions=targetoptions)
def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None, opt=True, **kws): """ JIT compile a python function conforming to the CUDA Python specification. If a signature is supplied, then a function is returned that takes a function to compile. :param func_or_sig: A function to JIT compile, or a signature of a function to compile. If a function is supplied, then a :class:`numba.cuda.compiler.AutoJitCUDAKernel` is returned. If a signature is supplied, then a function is returned. The returned function accepts another function, which it will compile and then return a :class:`numba.cuda.compiler.AutoJitCUDAKernel`. .. note:: A kernel cannot have any return value. :param device: Indicates whether this is a device function. :type device: bool :param link: A list of files containing PTX source to link with the function :type link: list :param debug: If True, check for exceptions thrown when executing the kernel. Since this degrades performance, this should only be used for debugging purposes. If set to True, then ``opt`` should be set to False. Defaults to False. (The default value can be overridden by setting environment variable ``NUMBA_CUDA_DEBUGINFO=1``.) :param fastmath: When True, enables fastmath optimizations as outlined in the :ref:`CUDA Fast Math documentation <cuda-fast-math>`. :param max_registers: Request that the kernel is limited to using at most this number of registers per thread. The limit may not be respected if the ABI requires a greater number of registers than that requested. Useful for increasing occupancy. :param opt: Whether to compile from LLVM IR to PTX with optimization enabled. When ``True``, ``-opt=3`` is passed to NVVM. When ``False``, ``-opt=0`` is passed to NVVM. Defaults to ``True``. :type opt: bool :param lineinfo: If True, generate a line mapping between source code and assembly code. This enables inspection of the source code in NVIDIA profiling tools and correlation with program counter sampling. :type lineinfo: bool """ if link and config.ENABLE_CUDASIM: raise NotImplementedError('Cannot link PTX in the simulator') if kws.get('boundscheck'): raise NotImplementedError("bounds checking is not supported for CUDA") if kws.get('argtypes') is not None: msg = _msg_deprecated_signature_arg.format('argtypes') raise DeprecationError(msg) if kws.get('restype') is not None: msg = _msg_deprecated_signature_arg.format('restype') raise DeprecationError(msg) if kws.get('bind') is not None: msg = _msg_deprecated_signature_arg.format('bind') raise DeprecationError(msg) debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug fastmath = kws.get('fastmath', False) extensions = kws.get('extensions', []) if debug and opt: msg = ("debug=True with opt=True (the default) " "is not supported by CUDA. This may result in a crash" " - set debug=False or opt=False.") warn(NumbaInvalidConfigWarning(msg)) if device and kws.get('link'): raise ValueError("link keyword invalid for device function") if sigutils.is_signature(func_or_sig): if config.ENABLE_CUDASIM: def jitwrapper(func): return FakeCUDAKernel(func, device=device, fastmath=fastmath) return jitwrapper argtypes, restype = sigutils.normalize_signature(func_or_sig) if restype and not device and restype != types.void: raise TypeError("CUDA kernel must have void return type.") def _jit(func): targetoptions = kws.copy() targetoptions['debug'] = debug targetoptions['link'] = link targetoptions['opt'] = opt targetoptions['fastmath'] = fastmath targetoptions['device'] = device targetoptions['extensions'] = extensions disp = CUDADispatcher(func, targetoptions=targetoptions) if device: disp.compile_device(argtypes) else: disp.compile(argtypes) disp._specialized = True disp.disable_compile() return disp return _jit else: if func_or_sig is None: if config.ENABLE_CUDASIM: def autojitwrapper(func): return FakeCUDAKernel(func, device=device, fastmath=fastmath) else: def autojitwrapper(func): return jit(func, device=device, debug=debug, opt=opt, link=link, **kws) return autojitwrapper # func_or_sig is a function else: if config.ENABLE_CUDASIM: return FakeCUDAKernel(func_or_sig, device=device, fastmath=fastmath) else: targetoptions = kws.copy() targetoptions['debug'] = debug targetoptions['opt'] = opt targetoptions['link'] = link targetoptions['fastmath'] = fastmath targetoptions['device'] = device targetoptions['extensions'] = extensions return CUDADispatcher(func_or_sig, targetoptions=targetoptions)