예제 #1
0
 def add(self, sig):
     """
     Compile the DUFunc for the given signature.
     """
     args, return_type = sigutils.normalize_signature(sig)
     return self._compile_for_argtys(args, return_type)
예제 #2
0
def jit(func_or_sig=None,
        device=False,
        inline=False,
        link=[],
        debug=None,
        opt=True,
        cache=False,
        **kws):
    """
    JIT compile a python function conforming to the CUDA Python specification.
    If a signature is supplied, then a function is returned that takes a
    function to compile.

    :param func_or_sig: A function to JIT compile, or a signature of a function
       to compile. If a function is supplied, then a
       :class:`numba.cuda.compiler.AutoJitCUDAKernel` is returned. If a
       signature is supplied, then a function is returned. The returned
       function accepts another function, which it will compile and then return
       a :class:`numba.cuda.compiler.AutoJitCUDAKernel`.

       .. note:: A kernel cannot have any return value.
    :param device: Indicates whether this is a device function.
    :type device: bool
    :param link: A list of files containing PTX source to link with the function
    :type link: list
    :param debug: If True, check for exceptions thrown when executing the
       kernel. Since this degrades performance, this should only be used for
       debugging purposes. If set to True, then ``opt`` should be set to False.
       Defaults to False.  (The default value can be overridden by setting
       environment variable ``NUMBA_CUDA_DEBUGINFO=1``.)
    :param fastmath: When True, enables fastmath optimizations as outlined in
       the :ref:`CUDA Fast Math documentation <cuda-fast-math>`.
    :param max_registers: Request that the kernel is limited to using at most
       this number of registers per thread. The limit may not be respected if
       the ABI requires a greater number of registers than that requested.
       Useful for increasing occupancy.
    :param opt: Whether to compile from LLVM IR to PTX with optimization
                enabled. When ``True``, ``-opt=3`` is passed to NVVM. When
                ``False``, ``-opt=0`` is passed to NVVM. Defaults to ``True``.
    :type opt: bool
    :param lineinfo: If True, generate a line mapping between source code and
       assembly code. This enables inspection of the source code in NVIDIA
       profiling tools and correlation with program counter sampling.
    :type lineinfo: bool
    :param cache: If True, enables the file-based cache for this function.
    :type cache: bool
    """

    if link and config.ENABLE_CUDASIM:
        raise NotImplementedError('Cannot link PTX in the simulator')

    if kws.get('boundscheck'):
        raise NotImplementedError("bounds checking is not supported for CUDA")

    if kws.get('argtypes') is not None:
        msg = _msg_deprecated_signature_arg.format('argtypes')
        raise DeprecationError(msg)
    if kws.get('restype') is not None:
        msg = _msg_deprecated_signature_arg.format('restype')
        raise DeprecationError(msg)
    if kws.get('bind') is not None:
        msg = _msg_deprecated_signature_arg.format('bind')
        raise DeprecationError(msg)

    debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
    fastmath = kws.get('fastmath', False)
    extensions = kws.get('extensions', [])

    if debug and opt:
        msg = ("debug=True with opt=True (the default) "
               "is not supported by CUDA. This may result in a crash"
               " - set debug=False or opt=False.")
        warn(NumbaInvalidConfigWarning(msg))

    if device and kws.get('link'):
        raise ValueError("link keyword invalid for device function")

    if sigutils.is_signature(func_or_sig):
        if config.ENABLE_CUDASIM:

            def jitwrapper(func):
                return FakeCUDAKernel(func, device=device, fastmath=fastmath)

            return jitwrapper

        argtypes, restype = sigutils.normalize_signature(func_or_sig)

        if restype and not device and restype != types.void:
            raise TypeError("CUDA kernel must have void return type.")

        def _jit(func):
            targetoptions = kws.copy()
            targetoptions['debug'] = debug
            targetoptions['link'] = link
            targetoptions['opt'] = opt
            targetoptions['fastmath'] = fastmath
            targetoptions['device'] = device
            targetoptions['extensions'] = extensions

            disp = CUDADispatcher(func, targetoptions=targetoptions)

            if cache:
                disp.enable_caching()

            if device:
                from numba.core import typeinfer
                with typeinfer.register_dispatcher(disp):
                    disp.compile_device(argtypes)
            else:
                disp.compile(argtypes)

            disp._specialized = True
            disp.disable_compile()

            return disp

        return _jit
    else:
        if func_or_sig is None:
            if config.ENABLE_CUDASIM:

                def autojitwrapper(func):
                    return FakeCUDAKernel(func,
                                          device=device,
                                          fastmath=fastmath)
            else:

                def autojitwrapper(func):
                    return jit(func,
                               device=device,
                               debug=debug,
                               opt=opt,
                               link=link,
                               cache=cache,
                               **kws)

            return autojitwrapper
        # func_or_sig is a function
        else:
            if config.ENABLE_CUDASIM:
                return FakeCUDAKernel(func_or_sig,
                                      device=device,
                                      fastmath=fastmath)
            else:
                targetoptions = kws.copy()
                targetoptions['debug'] = debug
                targetoptions['opt'] = opt
                targetoptions['link'] = link
                targetoptions['fastmath'] = fastmath
                targetoptions['device'] = device
                targetoptions['extensions'] = extensions
                disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)

                if cache:
                    disp.enable_caching()

                return disp
예제 #3
0
 def get_overload(self, sig):
     """
     Return the compiled function for the given signature.
     """
     args, return_type = sigutils.normalize_signature(sig)
     return self.overloads[tuple(args)].entry_point
예제 #4
0
    def __init__(self, py_func, sigs, targetoptions):
        self.py_func = py_func
        self.sigs = []
        self.link = targetoptions.pop('link', (),)
        self._can_compile = True
        self._type = self._numba_type_

        # The compiling counter is only used when compiling device functions as
        # it is used to detect recursion - recursion is not possible when
        # compiling a kernel.
        self._compiling_counter = CompilingCounter()

        # Specializations for given sets of argument types
        self.specializations = {}

        # A mapping of signatures to compile results
        self.overloads = collections.OrderedDict()

        self.targetoptions = targetoptions

        # defensive copy
        self.targetoptions['extensions'] = \
            list(self.targetoptions.get('extensions', []))

        self.typingctx = self.targetdescr.typing_context

        self._tm = default_type_manager

        pysig = utils.pysignature(py_func)
        arg_count = len(pysig.parameters)
        argnames = tuple(pysig.parameters)
        default_values = self.py_func.__defaults__ or ()
        defargs = tuple(OmittedArg(val) for val in default_values)
        can_fallback = False # CUDA cannot fallback to object mode

        try:
            lastarg = list(pysig.parameters.values())[-1]
        except IndexError:
            has_stararg = False
        else:
            has_stararg = lastarg.kind == lastarg.VAR_POSITIONAL

        exact_match_required = False

        _dispatcher.Dispatcher.__init__(self, self._tm.get_pointer(),
                                        arg_count, self._fold_args, argnames,
                                        defargs, can_fallback, has_stararg,
                                        exact_match_required)

        if sigs:
            if len(sigs) > 1:
                raise TypeError("Only one signature supported at present")
            if targetoptions.get('device'):
                argtypes, restype = sigutils.normalize_signature(sigs[0])
                self.compile_device(argtypes)
            else:
                self.compile(sigs[0])

            self._can_compile = False

        if targetoptions.get('device'):
            self._register_device_function()
예제 #5
0
 def wrappped(func):
     fn_argtys, fn_retty = sigutils.normalize_signature(sig)
     signature = typing.signature(fn_retty, *fn_argtys)
     entry = ExportEntry(symbol=sym, signature=signature, function=func)
     export_registry.append(entry)
예제 #6
0
파일: irtools.py 프로젝트: pearu/rbc
def compile_instance(func,
                     sig,
                     target: TargetInfo,
                     typing_context,
                     target_context,
                     pipeline_class,
                     main_library,
                     debug=False):
    """Compile a function with given signature. Return function name when
    succesful.
    """
    flags = compiler.Flags()
    flags.set('no_compile')
    flags.set('no_cpython_wrapper')
    if get_version('numba') >= (0, 49):
        flags.set('no_cfunc_wrapper')

    fname = func.__name__ + sig.mangling()
    args, return_type = sigutils.normalize_signature(
        sig.tonumba(bool_is_int8=True))
    try:
        cres = compiler.compile_extra(typingctx=typing_context,
                                      targetctx=target_context,
                                      func=func,
                                      args=args,
                                      return_type=return_type,
                                      flags=flags,
                                      library=main_library,
                                      locals={},
                                      pipeline_class=pipeline_class)
    except UnsupportedError as msg:
        for m in re.finditer(r'[|]UnsupportedError[|](.*?)\n', str(msg), re.S):
            warnings.warn(f'Skipping {fname}: {m.group(0)[18:]}')
        return
    except nb_errors.TypingError as msg:
        for m in re.finditer(r'[|]UnsupportedError[|](.*?)\n', str(msg), re.S):
            warnings.warn(f'Skipping {fname}: {m.group(0)[18:]}')
            break
        else:
            raise
        return
    except Exception:
        raise

    result = get_called_functions(cres.library, cres.fndesc.llvm_func_name)

    for f in result['declarations']:
        if target.supports(f):
            continue
        warnings.warn(f'Skipping {fname} that uses undefined function `{f}`')
        return

    nvvmlib = libfuncs.Library.get('nvvm')
    llvmlib = libfuncs.Library.get('llvm')
    for f in result['intrinsics']:
        if target.is_gpu:
            if f in nvvmlib:
                continue

        if target.is_cpu:
            if f in llvmlib:
                continue

        warnings.warn(
            f'Skipping {fname} that uses unsupported intrinsic `{f}`')
        return

    make_wrapper(fname, args, return_type, cres, target, verbose=debug)

    main_module = main_library._final_module
    for lib in result['libraries']:
        main_module.link_in(
            lib._get_module_for_linking(),
            preserve=True,
        )

    return fname
예제 #7
0
def jit(func_or_sig=None,
        device=False,
        inline=False,
        link=[],
        debug=None,
        opt=True,
        **kws):
    """
    JIT compile a python function conforming to the CUDA Python specification.
    If a signature is supplied, then a function is returned that takes a
    function to compile.

    :param func_or_sig: A function to JIT compile, or a signature of a function
       to compile. If a function is supplied, then a
       :class:`numba.cuda.compiler.AutoJitCUDAKernel` is returned. If a
       signature is supplied, then a function is returned. The returned
       function accepts another function, which it will compile and then return
       a :class:`numba.cuda.compiler.AutoJitCUDAKernel`.

       .. note:: A kernel cannot have any return value.
    :param device: Indicates whether this is a device function.
    :type device: bool
    :param link: A list of files containing PTX source to link with the function
    :type link: list
    :param debug: If True, check for exceptions thrown when executing the
       kernel. Since this degrades performance, this should only be used for
       debugging purposes.  Defaults to False.  (The default value can be
       overridden by setting environment variable ``NUMBA_CUDA_DEBUGINFO=1``.)
    :param fastmath: If true, enables flush-to-zero and fused-multiply-add,
       disables precise division and square root. This parameter has no effect
       on device function, whose fastmath setting depends on the kernel function
       from which they are called.
    :param max_registers: Request that the kernel is limited to using at most
       this number of registers per thread. The limit may not be respected if
       the ABI requires a greater number of registers than that requested.
       Useful for increasing occupancy.
    :param opt: Whether to compile from LLVM IR to PTX with optimization
                enabled. When ``True``, ``-opt=3`` is passed to NVVM. When
                ``False``, ``-opt=0`` is passed to NVVM. Defaults to ``True``.
    :type opt: bool
    """
    debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug

    if link and config.ENABLE_CUDASIM:
        raise NotImplementedError('Cannot link PTX in the simulator')

    if kws.get('boundscheck'):
        raise NotImplementedError("bounds checking is not supported for CUDA")

    if kws.get('argtypes') is not None:
        msg = _msg_deprecated_signature_arg.format('argtypes')
        raise DeprecationError(msg)
    if kws.get('restype') is not None:
        msg = _msg_deprecated_signature_arg.format('restype')
        raise DeprecationError(msg)
    if kws.get('bind') is not None:
        msg = _msg_deprecated_signature_arg.format('bind')
        raise DeprecationError(msg)

    fastmath = kws.get('fastmath', False)
    if not sigutils.is_signature(func_or_sig):
        if func_or_sig is None:
            if config.ENABLE_CUDASIM:

                def autojitwrapper(func):
                    return FakeCUDAKernel(func,
                                          device=device,
                                          fastmath=fastmath,
                                          debug=debug)
            else:

                def autojitwrapper(func):
                    return jit(func,
                               device=device,
                               debug=debug,
                               opt=opt,
                               **kws)

            return autojitwrapper
        # func_or_sig is a function
        else:
            if config.ENABLE_CUDASIM:
                return FakeCUDAKernel(func_or_sig,
                                      device=device,
                                      fastmath=fastmath,
                                      debug=debug)
            elif device:
                return jitdevice(func_or_sig, debug=debug, opt=opt, **kws)
            else:
                targetoptions = kws.copy()
                targetoptions['debug'] = debug
                targetoptions['opt'] = opt
                targetoptions['link'] = link
                sigs = None
                return Dispatcher(func_or_sig,
                                  sigs,
                                  targetoptions=targetoptions)

    else:
        if config.ENABLE_CUDASIM:

            def jitwrapper(func):
                return FakeCUDAKernel(func,
                                      device=device,
                                      fastmath=fastmath,
                                      debug=debug)

            return jitwrapper

        if isinstance(func_or_sig, list):
            msg = 'Lists of signatures are not yet supported in CUDA'
            raise ValueError(msg)
        elif sigutils.is_signature(func_or_sig):
            sigs = [func_or_sig]
        else:
            raise ValueError("Expecting signature or list of signatures")

        for sig in sigs:
            argtypes, restype = sigutils.normalize_signature(sig)

            if restype and not device and restype != types.void:
                raise TypeError("CUDA kernel must have void return type.")

        def kernel_jit(func):
            targetoptions = kws.copy()
            targetoptions['debug'] = debug
            targetoptions['link'] = link
            targetoptions['opt'] = opt
            return Dispatcher(func, sigs, targetoptions=targetoptions)

        def device_jit(func):
            return compile_device(func,
                                  restype,
                                  argtypes,
                                  inline=inline,
                                  debug=debug)

        if device:
            return device_jit
        else:
            return kernel_jit
예제 #8
0
def declare_device(name, sig):
    argtypes, restype = sigutils.normalize_signature(sig)
    return declare_device_function(name, restype, argtypes)
예제 #9
0
def _compile_element_wise_function(nb_func, targetoptions, sig):
    # Do compilation
    # Return CompileResult to test
    cres = nb_func.compile(sig, **targetoptions)
    args, return_type = sigutils.normalize_signature(sig)
    return cres, args, return_type
예제 #10
0
 def compile(self, sig):
     args, _ = sigutils.normalize_signature(sig)
     sig = (types.ffi_forced_object, ) * len(args)
     return super().compile(sig)
예제 #11
0
파일: irtools.py 프로젝트: krunalkharat/rbc
def compile_to_LLVM(functions_and_signatures,
                    target: TargetInfo,
                    pipeline_class=compiler.Compiler,
                    debug=False):
    """Compile functions with given signatures to target specific LLVM IR.

    Parameters
    ----------
    functions_and_signatures : list
      Specify a list of Python function and its signatures pairs.
    target : TargetInfo
      Specify target device information.
    debug : bool

    Returns
    -------
    module : llvmlite.binding.ModuleRef
      LLVM module instance. To get the IR string, use `str(module)`.

    """
    target_desc = registry.cpu_target
    if target is None:
        target = TargetInfo.host()
        typing_context = target_desc.typing_context
        target_context = target_desc.target_context
    else:
        typing_context = typing.Context()
        target_context = RemoteCPUContext(typing_context, target)
        # Bring over Array overloads (a hack):
        target_context._defns = target_desc.target_context._defns

    typing_context.target_info = target
    target_context.target_info = target

    codegen = target_context.codegen()
    main_library = codegen.create_library('rbc.irtools.compile_to_IR')
    main_module = main_library._final_module

    flags = compiler.Flags()
    flags.set('no_compile')
    flags.set('no_cpython_wrapper')

    function_names = []
    for func, signatures in functions_and_signatures:
        for sig in signatures:
            fname = func.__name__ + sig.mangling
            function_names.append(fname)
            args, return_type = sigutils.normalize_signature(
                sig.tonumba(bool_is_int8=True))
            cres = compiler.compile_extra(typingctx=typing_context,
                                          targetctx=target_context,
                                          func=func,
                                          args=args,
                                          return_type=return_type,
                                          flags=flags,
                                          library=main_library,
                                          locals={},
                                          pipeline_class=pipeline_class)
            make_wrapper(fname, args, return_type, cres)

    seen = set()
    for _library in main_library._linking_libraries:
        if _library not in seen:
            seen.add(_library)
            main_module.link_in(
                _library._get_module_for_linking(),
                preserve=True,
            )

    main_library._optimize_final_module()

    # Catch undefined functions:
    used_functions = set(function_names)
    for fname in function_names:
        deps = get_function_dependencies(main_module, fname)
        for fn, descr in deps.items():
            used_functions.add(fn)
            if descr == 'undefined':
                if fn.startswith('numba_') and target.has_numba:
                    continue
                if fn.startswith('Py') and target.has_cpython:
                    continue
                raise RuntimeError('function `%s` is undefined' % (fn))

    # for global_variable in main_module.global_variables:
    #    global_variable.linkage = llvm.Linkage.private

    unused_functions = [
        f.name for f in main_module.functions if f.name not in used_functions
    ]

    if debug:
        print('compile_to_IR: the following functions are used')
        for fname in used_functions:
            lf = main_module.get_function(fname)
            print('  [ALIVE]', fname, 'with', lf.linkage)

    if unused_functions:
        if debug:
            print('compile_to_IR: the following functions are not used'
                  ' and will be removed:')
        for fname in unused_functions:
            lf = main_module.get_function(fname)
            if lf.is_declaration:
                # if the function is a declaration,
                # we just put the linkage as external
                lf.linkage = llvm.Linkage.external
            else:
                # but if the function is not a declaration,
                # we change the linkage to private
                lf.linkage = llvm.Linkage.private
            if debug:
                print('  [DEAD]', fname, 'with', lf.linkage)

        main_library._optimize_final_module()
    # TODO: determine unused global_variables and struct_types

    main_module.verify()
    main_library._finalized = True

    main_module.triple = target.triple
    main_module.data_layout = target.datalayout

    return main_module