Esempio n. 1
0
def compile_cuda(pyfunc, return_type, args, debug=False, inline=False):
    # First compilation will trigger the initialization of the CUDA backend.
    from .descriptor import CUDATargetDesc

    typingctx = CUDATargetDesc.typingctx
    targetctx = CUDATargetDesc.targetctx
    # TODO handle debug flag
    flags = compiler.Flags()
    # Do not compile (generate native code), just lower (to LLVM)
    flags.set('no_compile')
    flags.set('no_cpython_wrapper')
    flags.set('no_cfunc_wrapper')
    if debug:
        flags.set('debuginfo')
    if inline:
        flags.set('forceinline')
    # Run compilation pipeline
    cres = compiler.compile_extra(typingctx=typingctx,
                                  targetctx=targetctx,
                                  func=pyfunc,
                                  args=args,
                                  return_type=return_type,
                                  flags=flags,
                                  locals={})

    library = cres.library
    library.finalize()

    return cres
Esempio n. 2
0
def compile_hsa(pyfunc, return_type, args, debug):
    # First compilation will trigger the initialization of the HSA backend.
    from .descriptor import HSATargetDesc

    typingctx = HSATargetDesc.typingctx
    targetctx = HSATargetDesc.targetctx
    # TODO handle debug flag
    flags = compiler.Flags()
    # Do not compile (generate native code), just lower (to LLVM)
    flags.set('no_compile')
    flags.set('no_cpython_wrapper')
    flags.set('no_cfunc_wrapper')
    flags.unset('nrt')
    # Run compilation pipeline
    cres = compiler.compile_extra(typingctx=typingctx,
                                  targetctx=targetctx,
                                  func=pyfunc,
                                  args=args,
                                  return_type=return_type,
                                  flags=flags,
                                  locals={})

    # Linking depending libraries
    # targetctx.link_dependencies(cres.llvm_module, cres.target_context.linking)
    library = cres.library
    library.finalize()

    return cres
Esempio n. 3
0
def compile_cuda(pyfunc, return_type, args, debug=False, inline=False,
                 fastmath=False, nvvm_options=None):
    from .descriptor import cuda_target
    typingctx = cuda_target.typing_context
    targetctx = cuda_target.target_context

    flags = CUDAFlags()
    # Do not compile (generate native code), just lower (to LLVM)
    flags.no_compile = True
    flags.no_cpython_wrapper = True
    flags.no_cfunc_wrapper = True
    if debug:
        flags.debuginfo = True
    if inline:
        flags.forceinline = True
    if fastmath:
        flags.fastmath = True
    if nvvm_options:
        flags.nvvm_options = nvvm_options

    # Run compilation pipeline
    cres = compiler.compile_extra(typingctx=typingctx,
                                  targetctx=targetctx,
                                  func=pyfunc,
                                  args=args,
                                  return_type=return_type,
                                  flags=flags,
                                  locals={},
                                  pipeline_class=CUDACompiler)

    library = cres.library
    library.finalize()

    return cres
Esempio n. 4
0
def compile_cuda(pyfunc,
                 return_type,
                 args,
                 debug=False,
                 inline=False,
                 fastmath=False):
    from .descriptor import cuda_target
    typingctx = cuda_target.typingctx
    targetctx = cuda_target.targetctx

    flags = compiler.Flags()
    # Do not compile (generate native code), just lower (to LLVM)
    flags.set('no_compile')
    flags.set('no_cpython_wrapper')
    flags.set('no_cfunc_wrapper')
    if debug:
        flags.set('debuginfo')
    if inline:
        flags.set('forceinline')
    if fastmath:
        flags.set('fastmath')
    # Run compilation pipeline
    cres = compiler.compile_extra(typingctx=typingctx,
                                  targetctx=targetctx,
                                  func=pyfunc,
                                  args=args,
                                  return_type=return_type,
                                  flags=flags,
                                  locals={},
                                  pipeline_class=CUDACompiler)

    library = cres.library
    library.finalize()

    return cres
Esempio n. 5
0
    def compile(self, func, args, return_type=None, flags=DEFAULT_FLAGS):
        """
        Compile the function or retrieve an already compiled result
        from the cache.
        """
        from numba.core.registry import cpu_target

        cache_key = (func, args, return_type, flags)
        try:
            cr = self.cr_cache[cache_key]
        except KeyError:
            # Register the contexts in case for nested @jit or @overload calls
            # (same as compile_isolated())
            with cpu_target.nested_context(self.typingctx, self.targetctx):
                cr = compile_extra(
                    self.typingctx,
                    self.targetctx,
                    func,
                    args,
                    return_type,
                    flags,
                    locals={},
                )
            self.cr_cache[cache_key] = cr
        return cr
Esempio n. 6
0
def test_no_copy_usm_shared(capfd):
    a = usmarray.ones(10, dtype=np.int64)
    b = np.ones(10, dtype=np.int64)
    # f = njit(fn)

    flags = compiler.Flags()
    flags.no_compile = True
    flags.no_cpython_wrapper = True
    flags.nrt = False
    flags.auto_parallel = cpu.ParallelOptions(True)

    typingctx = cpu_target.typing_context
    targetctx = cpu_target.target_context
    args = typingctx.resolve_argument_type(a)

    try:
        device = dpctl.SyclDevice("opencl:gpu:0")
    except ValueError:
        pytest.skip("Device not found")

    with dppy.offload_to_sycl_device(device):
        cres = compiler.compile_extra(
            typingctx=typingctx,
            targetctx=targetctx,
            func=fn,
            args=tuple([args]),
            return_type=args,
            flags=flags,
            locals={},
            pipeline_class=DPPYCompiler,
        )

        assert "DPCTLQueue_Memcpy" not in cres.library.get_llvm_str()

        args = typingctx.resolve_argument_type(b)
        cres = compiler.compile_extra(
            typingctx=typingctx,
            targetctx=targetctx,
            func=fn,
            args=tuple([args]),
            return_type=args,
            flags=flags,
            locals={},
            pipeline_class=DPPYCompiler,
        )

        assert "DPCTLQueue_Memcpy" in cres.library.get_llvm_str()
Esempio n. 7
0
def compile_isolated(pyfunc, argtypes, **kwargs):
    from numba.core.registry import cpu_target

    kwargs.setdefault('return_type', None)
    kwargs.setdefault('locals', {})
    return compile_extra(
        cpu_target.typing_context,
        cpu_target.target_context,
        pyfunc,
        argtypes,
        **kwargs,
    )
Esempio n. 8
0
    def test_scalar(self):
        flags = Flags()

        # Compile the inner function
        global cnd_jitted
        cr1 = compile_isolated(cnd, (types.float64, ))
        cnd_jitted = cr1.entry_point
        # Manually type the compiled function for calling into
        tyctx = cr1.typing_context
        ctx = cr1.target_context
        signature = typing.make_concrete_template("cnd_jitted", cnd_jitted,
                                                  [cr1.signature])
        tyctx.insert_user_function(cnd_jitted, signature)

        # Compile the outer function
        array = types.Array(types.float64, 1, "C")
        argtys = (array, ) * 5 + (types.float64, types.float64)
        cr2 = compile_extra(
            tyctx,
            ctx,
            blackscholes_scalar_jitted,
            args=argtys,
            return_type=None,
            flags=flags,
            locals={},
        )
        jitted_bs = cr2.entry_point

        OPT_N = 400
        iterations = 10

        callResultGold = np.zeros(OPT_N)
        putResultGold = np.zeros(OPT_N)

        callResultNumba = np.zeros(OPT_N)
        putResultNumba = np.zeros(OPT_N)

        stockPrice = randfloat(self.random.random_sample(OPT_N), 5.0, 30.0)
        optionStrike = randfloat(self.random.random_sample(OPT_N), 1.0, 100.0)
        optionYears = randfloat(self.random.random_sample(OPT_N), 0.25, 10.0)

        args = stockPrice, optionStrike, optionYears, RISKFREE, VOLATILITY

        blackscholes_scalar(callResultGold, putResultGold, *args)
        jitted_bs(callResultNumba, putResultNumba, *args)

        delta = np.abs(callResultGold - callResultNumba)
        L1norm = delta.sum() / np.abs(callResultGold).sum()
        print("L1 norm: %E" % L1norm)
        print("Max absolute error: %E" % delta.max())
        self.assertAlmostEqual(delta.max(), 0)
Esempio n. 9
0
    def try_lift(self, pyfunc, argtypes):
        from numba.core.registry import cpu_target

        cres = compile_extra(
            cpu_target.typing_context,
            cpu_target.target_context,
            pyfunc,
            argtypes,
            return_type=None,
            flags=looplift_flags,
            locals={},
        )
        # One lifted loop
        self.assertEqual(len(cres.lifted), 1)
        return cres
Esempio n. 10
0
    def _compile_core(self, args, return_type):
        flags = compiler.Flags()
        self.targetdescr.options.parse_as_flags(flags, self.targetoptions)
        flags = self._customize_flags(flags)

        impl = self._get_implementation(args, {})
        cres = compiler.compile_extra(self.targetdescr.typing_context,
                                      self.targetdescr.target_context,
                                      impl,
                                      args=args, return_type=return_type,
                                      flags=flags, locals=self.locals,
                                      pipeline_class=self.pipeline_class)
        # Check typing error if object mode is used
        if cres.typing_error is not None and not flags.enable_pyobject:
            raise cres.typing_error
        return cres
Esempio n. 11
0
def compile_cuda(pyfunc,
                 return_type,
                 args,
                 debug=False,
                 lineinfo=False,
                 inline=False,
                 fastmath=False,
                 nvvm_options=None):
    from .descriptor import cuda_target
    typingctx = cuda_target.typing_context
    targetctx = cuda_target.target_context

    flags = CUDAFlags()
    # Do not compile (generate native code), just lower (to LLVM)
    flags.no_compile = True
    flags.no_cpython_wrapper = True
    flags.no_cfunc_wrapper = True
    if debug or lineinfo:
        # Note both debug and lineinfo turn on debug information in the
        # compiled code, but we keep them separate arguments in case we
        # later want to overload some other behavior on the debug flag.
        # In particular, -opt=3 is not supported with -g.
        flags.debuginfo = True
        flags.error_model = 'python'
    else:
        flags.error_model = 'numpy'
    if inline:
        flags.forceinline = True
    if fastmath:
        flags.fastmath = True
    if nvvm_options:
        flags.nvvm_options = nvvm_options

    # Run compilation pipeline
    cres = compiler.compile_extra(typingctx=typingctx,
                                  targetctx=targetctx,
                                  func=pyfunc,
                                  args=args,
                                  return_type=return_type,
                                  flags=flags,
                                  locals={},
                                  pipeline_class=CUDACompiler)

    library = cres.library
    library.finalize()

    return cres
Esempio n. 12
0
    def _compile_core(self, sig, flags, locals):
        """
        Trigger the compiler on the core function or load a previously
        compiled version from the cache.  Returns the CompileResult.
        """
        typingctx = self.targetdescr.typing_context
        targetctx = self.targetdescr.target_context

        @contextmanager
        def store_overloads_on_success():
            # use to ensure overloads are stored on success
            try:
                yield
            except Exception:
                raise
            else:
                exists = self.overloads.get(cres.signature)
                if exists is None:
                    self.overloads[cres.signature] = cres

        # Use cache and compiler in a critical section
        with global_compiler_lock:
            with store_overloads_on_success():
                # attempt look up of existing
                cres = self.cache.load_overload(sig, targetctx)
                if cres is not None:
                    return cres

                # Compile
                args, return_type = sigutils.normalize_signature(sig)
                cres = compiler.compile_extra(
                    typingctx,
                    targetctx,
                    self.py_func,
                    args=args,
                    return_type=return_type,
                    flags=flags,
                    locals=locals,
                )

                # cache lookup failed before so safe to save
                self.cache.save_overload(sig, cres)

                return cres
Esempio n. 13
0
    def _cull_exports(self):
        """Read all the exported functions/modules in the translator
        environment, and join them into a single LLVM module.
        """
        self.exported_function_types = {}
        self.function_environments = {}
        self.environment_gvs = {}

        codegen = self.context.codegen()
        library = codegen.create_library(self.module_name)

        # Generate IR for all exported functions
        flags = Flags()
        flags.set("no_compile")
        if not self.export_python_wrap:
            flags.set("no_cpython_wrapper")
            flags.set("no_cfunc_wrapper")
        if self.use_nrt:
            flags.set("nrt")
            # Compile NRT helpers
            nrt_module, _ = nrtdynmod.create_nrt_module(self.context)
            library.add_ir_module(nrt_module)

        for entry in self.export_entries:
            cres = compile_extra(self.typing_context,
                                 self.context,
                                 entry.function,
                                 entry.signature.args,
                                 entry.signature.return_type,
                                 flags,
                                 locals={},
                                 library=library)

            func_name = cres.fndesc.llvm_func_name
            llvm_func = cres.library.get_function(func_name)

            if self.export_python_wrap:
                llvm_func.linkage = lc.LINKAGE_INTERNAL
                wrappername = cres.fndesc.llvm_cpython_wrapper_name
                wrapper = cres.library.get_function(wrappername)
                wrapper.name = self._mangle_method_symbol(entry.symbol)
                wrapper.linkage = lc.LINKAGE_EXTERNAL
                fnty = cres.target_context.call_conv.get_function_type(
                    cres.fndesc.restype, cres.fndesc.argtypes)
                self.exported_function_types[entry] = fnty
                self.function_environments[entry] = cres.environment
                self.environment_gvs[entry] = cres.fndesc.env_name
            else:
                llvm_func.name = entry.symbol
                self.dll_exports.append(entry.symbol)

        if self.export_python_wrap:
            wrapper_module = library.create_ir_module("wrapper")
            self._emit_python_wrapper(wrapper_module)
            library.add_ir_module(wrapper_module)

        # Hide all functions in the DLL except those explicitly exported
        library.finalize()
        for fn in library.get_defined_functions():
            if fn.name not in self.dll_exports:
                if fn.linkage in {Linkage.private, Linkage.internal}:
                    # Private/Internal linkage must have "default" visibility
                    fn.visibility = "default"
                else:
                    fn.visibility = 'hidden'
        return library
Esempio n. 14
0
def compile_with_dppy(pyfunc, return_type, args, is_kernel, debug=None):
    """
    Compiles with Numba_dppy's pipeline and returns the compiled result.

    Args:
        pyfunc: The Python function to be compiled.
        return_type: The Numba type of the return value.
        args: The list of arguments sent to the Python function.
        is_kernel (bool): Indicates whether the function is decorated
            with @dppy.kernel or not.
        debug (bool): Flag to turn debug mode ON/OFF.

    Returns:
        cres: Compiled result.

    Raises:
        TypeError: @dppy.kernel does not allow users to return any
            value. TypeError is raised when users do.

    """
    # First compilation will trigger the initialization of the OpenCL backend.
    from .descriptor import dppy_target

    typingctx = dppy_target.typing_context
    targetctx = dppy_target.target_context

    flags = compiler.Flags()
    # Do not compile (generate native code), just lower (to LLVM)
    flags.debuginfo = config.DEBUGINFO_DEFAULT
    flags.no_compile = True
    flags.no_cpython_wrapper = True
    flags.nrt = False

    if debug is not None:
        flags.debuginfo = debug

    # Run compilation pipeline
    if isinstance(pyfunc, FunctionType):
        cres = compiler.compile_extra(
            typingctx=typingctx,
            targetctx=targetctx,
            func=pyfunc,
            args=args,
            return_type=return_type,
            flags=flags,
            locals={},
            pipeline_class=DPPYCompiler,
        )
    elif isinstance(pyfunc, ir.FunctionIR):
        cres = compiler.compile_ir(
            typingctx=typingctx,
            targetctx=targetctx,
            func_ir=pyfunc,
            args=args,
            return_type=return_type,
            flags=flags,
            locals={},
            pipeline_class=DPPYCompiler,
        )
    else:
        assert 0

    if is_kernel:
        assert_no_return(cres.signature.return_type)

    # Linking depending libraries
    library = cres.library
    library.finalize()

    return cres
Esempio n. 15
0
def compile_instance(func,
                     sig,
                     target: TargetInfo,
                     typing_context,
                     target_context,
                     pipeline_class,
                     main_library,
                     debug=False):
    """Compile a function with given signature. Return function name when
    succesful.
    """
    flags = compiler.Flags()
    if get_version('numba') >= (0, 54):
        flags.no_compile = True
        flags.no_cpython_wrapper = True
        flags.no_cfunc_wrapper = True
    else:
        flags.set('no_compile')
        flags.set('no_cpython_wrapper')
        flags.set('no_cfunc_wrapper')

    fname = func.__name__ + sig.mangling()
    args, return_type = sigutils.normalize_signature(
        sig.tonumba(bool_is_int8=True))
    try:
        cres = compiler.compile_extra(typingctx=typing_context,
                                      targetctx=target_context,
                                      func=func,
                                      args=args,
                                      return_type=return_type,
                                      flags=flags,
                                      library=main_library,
                                      locals={},
                                      pipeline_class=pipeline_class)
    except (UnsupportedError, nb_errors.TypingError,
            nb_errors.LoweringError) as msg:
        for m in re.finditer(r'UnsupportedError(.*?)\n', str(msg), re.S):
            warnings.warn(f'Skipping {fname}:{m.group(0)[18:]}')
            break
        else:
            raise
        return
    except Exception:
        raise

    result = get_called_functions(cres.library, cres.fndesc.llvm_func_name)

    for f in result['declarations']:
        if target.supports(f):
            continue
        warnings.warn(f'Skipping {fname} that uses undefined function `{f}`')
        return

    nvvmlib = libfuncs.Library.get('nvvm')
    llvmlib = libfuncs.Library.get('llvm')
    for f in result['intrinsics']:
        if target.is_gpu:
            if f in nvvmlib:
                continue

        if target.is_cpu:
            if f in llvmlib:
                continue

        warnings.warn(
            f'Skipping {fname} that uses unsupported intrinsic `{f}`')
        return

    make_wrapper(fname, args, return_type, cres, target, verbose=debug)

    main_module = main_library._final_module
    for lib in result['libraries']:
        main_module.link_in(
            lib._get_module_for_linking(),
            preserve=True,
        )

    return fname
Esempio n. 16
0
def compile_to_LLVM(functions_and_signatures,
                    target: TargetInfo,
                    pipeline_class=compiler.Compiler,
                    debug=False):
    """Compile functions with given signatures to target specific LLVM IR.

    Parameters
    ----------
    functions_and_signatures : list
      Specify a list of Python function and its signatures pairs.
    target : TargetInfo
      Specify target device information.
    debug : bool

    Returns
    -------
    module : llvmlite.binding.ModuleRef
      LLVM module instance. To get the IR string, use `str(module)`.

    """
    target_desc = registry.cpu_target
    if target is None:
        target = TargetInfo.host()
        typing_context = target_desc.typing_context
        target_context = target_desc.target_context
    else:
        typing_context = typing.Context()
        target_context = RemoteCPUContext(typing_context, target)
        # Bring over Array overloads (a hack):
        target_context._defns = target_desc.target_context._defns

    typing_context.target_info = target
    target_context.target_info = target

    codegen = target_context.codegen()
    main_library = codegen.create_library('rbc.irtools.compile_to_IR')
    main_module = main_library._final_module

    flags = compiler.Flags()
    flags.set('no_compile')
    flags.set('no_cpython_wrapper')

    function_names = []
    for func, signatures in functions_and_signatures:
        for sig in signatures:
            fname = func.__name__ + sig.mangling
            function_names.append(fname)
            args, return_type = sigutils.normalize_signature(
                sig.tonumba(bool_is_int8=True))
            cres = compiler.compile_extra(typingctx=typing_context,
                                          targetctx=target_context,
                                          func=func,
                                          args=args,
                                          return_type=return_type,
                                          flags=flags,
                                          library=main_library,
                                          locals={},
                                          pipeline_class=pipeline_class)
            make_wrapper(fname, args, return_type, cres)

    seen = set()
    for _library in main_library._linking_libraries:
        if _library not in seen:
            seen.add(_library)
            main_module.link_in(
                _library._get_module_for_linking(),
                preserve=True,
            )

    main_library._optimize_final_module()

    # Catch undefined functions:
    used_functions = set(function_names)
    for fname in function_names:
        deps = get_function_dependencies(main_module, fname)
        for fn, descr in deps.items():
            used_functions.add(fn)
            if descr == 'undefined':
                if fn.startswith('numba_') and target.has_numba:
                    continue
                if fn.startswith('Py') and target.has_cpython:
                    continue
                raise RuntimeError('function `%s` is undefined' % (fn))

    # for global_variable in main_module.global_variables:
    #    global_variable.linkage = llvm.Linkage.private

    unused_functions = [
        f.name for f in main_module.functions if f.name not in used_functions
    ]

    if debug:
        print('compile_to_IR: the following functions are used')
        for fname in used_functions:
            lf = main_module.get_function(fname)
            print('  [ALIVE]', fname, 'with', lf.linkage)

    if unused_functions:
        if debug:
            print('compile_to_IR: the following functions are not used'
                  ' and will be removed:')
        for fname in unused_functions:
            lf = main_module.get_function(fname)
            if lf.is_declaration:
                # if the function is a declaration,
                # we just put the linkage as external
                lf.linkage = llvm.Linkage.external
            else:
                # but if the function is not a declaration,
                # we change the linkage to private
                lf.linkage = llvm.Linkage.private
            if debug:
                print('  [DEAD]', fname, 'with', lf.linkage)

        main_library._optimize_final_module()
    # TODO: determine unused global_variables and struct_types

    main_module.verify()
    main_library._finalized = True

    main_module.triple = target.triple
    main_module.data_layout = target.datalayout

    return main_module