コード例 #1
0
    def _generate_kernel_wrapper(self, func, argtypes):
        module = func.module
        arginfo = self.get_arg_packer(argtypes)
        wrapperfnty = lc.Type.function(lc.Type.void(), arginfo.argument_types)
        wrapper_module = self.create_module("dppy.kernel.wrapper")
        wrappername = "dppyPy_{name}".format(name=func.name)
        argtys = list(arginfo.argument_types)
        fnty = lc.Type.function(
            lc.Type.int(),
            [self.call_conv.get_return_type(types.pyobject)] + argtys,
        )
        func = wrapper_module.add_function(fnty, name=func.name)
        func.calling_convention = CC_SPIR_FUNC
        wrapper = wrapper_module.add_function(wrapperfnty, name=wrappername)
        builder = lc.Builder(wrapper.append_basic_block(""))

        callargs = arginfo.from_arguments(builder, wrapper.args)

        # XXX handle error status
        status, _ = self.call_conv.call_function(builder, func, types.void,
                                                 argtypes, callargs)
        builder.ret_void()

        self._finalize_wrapper_module(wrapper)

        # Link the spir_func module to the wrapper module
        module.link_in(ll.parse_assembly(str(wrapper_module)))
        # Make sure the spir_func has internal linkage to be inlinable.
        func.linkage = "internal"
        wrapper = module.get_function(wrapper.name)
        module.get_function(func.name).linkage = "internal"
        return wrapper
コード例 #2
0
ファイル: parallel.py プロジェクト: stefanseefeld/numba
def _make_cas_function():
    """
    Generate a compare-and-swap function for portability sake.
    """
    from numba.targets.registry import cpu_target

    codegen = cpu_target.target_context.codegen()

    # Generate IR
    library = codegen.create_library('cas_for_parallel_ufunc')
    mod = library.create_ir_module('cas_module')

    llint = lc.Type.int()
    llintp = lc.Type.pointer(llint)
    fnty = lc.Type.function(llint, [llintp, llint, llint])
    fn = mod.add_function(fnty, name='.numba.parallel.ufunc.cas')
    ptr, old, repl = fn.args
    bb = fn.append_basic_block('')
    builder = lc.Builder(bb)
    outpack = builder.cmpxchg(ptr, old, repl, ordering='monotonic')
    out = builder.extract_value(outpack, 0)
    failed = builder.extract_value(outpack, 1)
    builder.ret(builder.select(failed, old, out))

    # Build & Link
    library.add_ir_module(mod)
    library.finalize()

    ptr = library.get_pointer_to_function(fn.name)

    return library, ptr
コード例 #3
0
ファイル: compiler.py プロジェクト: zxsted/numba
    def _emit_python_wrapper(self, llvm_module):

        # Define the module initialization function.
        mod_init_fn = llvm_module.add_function(*self.module_init_definition)
        entry = mod_init_fn.append_basic_block('Entry')
        builder = lc.Builder(entry)
        pyapi = self.context.get_python_api(builder)

        # Python C API module creation function.
        create_module_fn = llvm_module.add_function(
            *self.module_create_definition)
        create_module_fn.linkage = lc.LINKAGE_EXTERNAL

        # Define a constant string for the module name.
        mod_name_const = self.context.insert_const_string(
            llvm_module, self.module_name)

        method_array = self._emit_method_array(llvm_module)

        mod = builder.call(create_module_fn,
                           (mod_name_const, method_array, NULL,
                            lc.Constant.null(lt._pyobject_head_p),
                            lc.Constant.int(lt._int32, sys.api_version)))

        env_array = self._emit_environment_array(llvm_module, builder, pyapi)
        self._emit_module_init_code(llvm_module, builder, mod, method_array,
                                    env_array)
        # XXX No way to notify failure to caller...

        builder.ret_void()

        self.dll_exports.append(mod_init_fn.name)
コード例 #4
0
    def generate_kernel_wrapper(self, func, argtypes):
        module = func.module
        arginfo = self.get_arg_packer(argtypes)

        def sub_gen_with_global(lty):
            if isinstance(lty, llvmir.PointerType):
                return (lty.pointee.as_pointer(SPIR_GLOBAL_ADDRSPACE),
                        lty.addrspace)
            return lty, None

        if len(arginfo.argument_types) > 0:
            llargtys, changed = zip(
                *map(sub_gen_with_global, arginfo.argument_types))
        else:
            llargtys = changed = ()
        wrapperfnty = lc.Type.function(lc.Type.void(), llargtys)

        wrapper_module = self.create_module("hsa.kernel.wrapper")
        wrappername = 'hsaPy_{name}'.format(name=func.name)

        argtys = list(arginfo.argument_types)
        fnty = lc.Type.function(
            lc.Type.int(),
            [self.call_conv.get_return_type(types.pyobject)] + argtys)

        func = llvmir.Function(wrapper_module, fnty, func.name)
        func.calling_convention = CC_SPIR_FUNC

        wrapper = llvmir.Function(wrapper_module,
                                  wrapperfnty,
                                  name=wrappername)

        builder = lc.Builder(wrapper.append_basic_block(''))

        # Adjust address space of each kernel argument
        fixed_args = []
        for av, adrsp in zip(wrapper.args, changed):
            if adrsp is not None:
                casted = self.addrspacecast(builder, av, adrsp)
                fixed_args.append(casted)
            else:
                fixed_args.append(av)

        callargs = arginfo.from_arguments(builder, fixed_args)

        # XXX handle error status
        status, _ = self.call_conv.call_function(builder, func, types.void,
                                                 argtypes, callargs)
        builder.ret_void()

        set_hsa_kernel(wrapper)

        # Link
        module.link_in(ll.parse_assembly(str(wrapper_module)))
        # To enable inlining which is essential because addrspacecast 1->0 is
        # illegal.  Inlining will optimize the addrspacecast out.
        func.linkage = 'internal'
        wrapper = module.get_function(wrapper.name)
        module.get_function(func.name).linkage = 'internal'
        return wrapper
コード例 #5
0
ファイル: emitter.py プロジェクト: mattpaletta/numpile
 def start_function(self, name, module, rettype, argtypes):
     func_type = ll_core.Type.function(rettype, argtypes, False)
     function = ll_core.Function.new(module, func_type, name)
     entry_block = function.append_basic_block("entry")
     builder = ll_core.Builder(entry_block)
     self.exit_block = function.append_basic_block("exit")
     self.function = function
     self.builder = builder
コード例 #6
0
    def _context_builder_sig_args(self):
        typing_context = typing.Context()
        context = cpu.CPUContext(typing_context)
        module = lc.Module("test_module")

        sig = typing.signature(types.int32, types.int32)
        llvm_fnty = context.call_conv.get_function_type(
            sig.return_type, sig.args)
        function = module.get_or_insert_function(llvm_fnty, name='test_fn')
        args = context.call_conv.get_arguments(function)
        assert function.is_declaration
        entry_block = function.append_basic_block('entry')
        builder = lc.Builder(entry_block)

        return context, builder, sig, args
コード例 #7
0
    def _context_builder_sig_args(self):
        typing_context = typing.Context()
        context = cpu.CPUContext(typing_context)
        lib = context.codegen().create_library("testing")
        with context.push_code_library(lib):
            module = lc.Module("test_module")

            sig = typing.signature(types.int32, types.int32)
            llvm_fnty = context.call_conv.get_function_type(
                sig.return_type, sig.args)
            function = module.get_or_insert_function(llvm_fnty, name="test_fn")
            args = context.call_conv.get_arguments(function)
            assert function.is_declaration
            entry_block = function.append_basic_block("entry")
            builder = lc.Builder(entry_block)

            yield context, builder, sig, args
コード例 #8
0
    def test_cache(self):
        def times2(i):
            return 2 * i

        def times3(i):
            return i * 3

        with self._context_builder_sig_args() as (
                context,
                builder,
                sig,
                args,
        ):
            # Ensure the cache is empty to begin with
            self.assertEqual(0, len(context.cached_internal_func))

            # After one compile, it should contain one entry
            context.compile_internal(builder, times2, sig, args)
            self.assertEqual(1, len(context.cached_internal_func))

            # After a second compilation of the same thing, it should still contain
            # one entry
            context.compile_internal(builder, times2, sig, args)
            self.assertEqual(1, len(context.cached_internal_func))

            # After compilation of another function, the cache should have grown by
            # one more.
            context.compile_internal(builder, times3, sig, args)
            self.assertEqual(2, len(context.cached_internal_func))

            sig2 = typing.signature(types.float64, types.float64)
            llvm_fnty2 = context.call_conv.get_function_type(
                sig2.return_type, sig2.args)
            function2 = builder.module.get_or_insert_function(llvm_fnty2,
                                                              name='test_fn_2')
            args2 = context.call_conv.get_arguments(function2)
            assert function2.is_declaration
            entry_block2 = function2.append_basic_block('entry')
            builder2 = lc.Builder(entry_block2)

            # Ensure that the same function with a different signature does not
            # reuse an entry from the cache in error
            context.compile_internal(builder2, times3, sig2, args2)
            self.assertEqual(3, len(context.cached_internal_func))
コード例 #9
0
def real_divmod(context, builder, x, y):
    assert x.type == y.type
    floatty = x.type

    module = builder.module
    fname = context.mangler(".numba.python.rem", [x.type])
    fnty = Type.function(floatty, (floatty, floatty, Type.pointer(floatty)))
    fn = module.get_or_insert_function(fnty, fname)

    if fn.is_declaration:
        fn.linkage = lc.LINKAGE_LINKONCE_ODR
        fnbuilder = lc.Builder(fn.append_basic_block('entry'))
        fx, fy, pmod = fn.args
        div, mod = real_divmod_func_body(context, fnbuilder, fx, fy)
        fnbuilder.store(mod, pmod)
        fnbuilder.ret(div)

    pmod = cgutils.alloca_once(builder, floatty)
    quotient = builder.call(fn, (x, y, pmod))
    return quotient, builder.load(pmod)
コード例 #10
0
    def compile_function(self, nargs):
        llvm_fnty = lc.Type.function(machine_int, [machine_int] * nargs)
        ctypes_fnty = ctypes.CFUNCTYPE(ctypes.c_size_t,
                                       *(ctypes.c_size_t, ) * nargs)
        module = self.context.create_module("")

        function = module.get_or_insert_function(llvm_fnty, name=self.id())
        assert function.is_declaration
        entry_block = function.append_basic_block('entry')
        builder = lc.Builder(entry_block)

        first = [True]

        def call_func(*args):
            codegen = self.context.codegen()
            library = codegen.create_library("test_module.%s" % self.id())
            library.add_ir_module(module)
            cptr = library.get_pointer_to_function(function.name)
            cfunc = ctypes_fnty(cptr)
            return cfunc(*args)

        yield self.context, builder, function.args, call_func
コード例 #11
0
    def _emit_python_wrapper(self, llvm_module):
        # Figure out the Python C API module creation function, and
        # get a LLVM function for it.
        create_module_fn = llvm_module.add_function(
            *self.module_create_definition)
        create_module_fn.linkage = lc.LINKAGE_EXTERNAL

        # Define a constant string for the module name.
        mod_name_const = self.context.insert_const_string(
            llvm_module, self.module_name)

        mod_def_base_init = lc.Constant.struct((
            lt._pyobject_head_init,  # PyObject_HEAD
            lc.Constant.null(self.m_init_ty),  # m_init
            lc.Constant.null(lt._llvm_py_ssize_t),  # m_index
            lc.Constant.null(lt._pyobject_head_p),  # m_copy
        ))
        mod_def_base = llvm_module.add_global_variable(mod_def_base_init.type,
                                                       '.module_def_base')
        mod_def_base.initializer = mod_def_base_init
        mod_def_base.linkage = lc.LINKAGE_INTERNAL

        method_array = self._emit_method_array(llvm_module)

        mod_def_init = lc.Constant.struct((
            mod_def_base_init,  # m_base
            mod_name_const,  # m_name
            lc.Constant.null(self._char_star),  # m_doc
            lc.Constant.int(lt._llvm_py_ssize_t, -1),  # m_size
            method_array,  # m_methods
            lc.Constant.null(self.inquiry_ty),  # m_reload
            lc.Constant.null(self.traverseproc_ty),  # m_traverse
            lc.Constant.null(self.inquiry_ty),  # m_clear
            lc.Constant.null(self.freefunc_ty)  # m_free
        ))

        # Define a constant string for the module name.
        mod_def = llvm_module.add_global_variable(mod_def_init.type,
                                                  '.module_def')
        mod_def.initializer = mod_def_init
        mod_def.linkage = lc.LINKAGE_INTERNAL

        # Define the module initialization function.
        mod_init_fn = llvm_module.add_function(*self.module_init_definition)
        entry = mod_init_fn.append_basic_block('Entry')
        builder = lc.Builder(entry)
        pyapi = self.context.get_python_api(builder)

        mod = builder.call(
            create_module_fn,
            (mod_def, lc.Constant.int(lt._int32, sys.api_version)))

        # Test if module has been created correctly.
        # (XXX for some reason comparing with the NULL constant fails llvm
        #  with an assertion in pydebug mode)
        with builder.if_then(cgutils.is_null(builder, mod)):
            builder.ret(NULL.bitcast(mod_init_fn.type.pointee.return_type))

        env_array = self._emit_environment_array(llvm_module, builder, pyapi)
        envgv_array = self._emit_envgvs_array(llvm_module, builder, pyapi)
        ret = self._emit_module_init_code(llvm_module, builder, mod,
                                          method_array, env_array, envgv_array)
        if ret is not None:
            with builder.if_then(cgutils.is_not_null(builder, ret)):
                # Init function errored out
                builder.ret(lc.Constant.null(mod.type))

        builder.ret(mod)

        self.dll_exports.append(mod_init_fn.name)
コード例 #12
0
ファイル: parallel.py プロジェクト: zhaijf1992/numba
def build_gufunc_kernel(library, ctx, info, sig, inner_ndim):
    """Wrap the original CPU ufunc/gufunc with a parallel dispatcher.
    This function will wrap gufuncs and ufuncs something like.

    Args
    ----
    ctx
        numba's codegen context

    info: (library, env, name)
        inner function info

    sig
        type signature of the gufunc

    inner_ndim
        inner dimension of the gufunc (this is len(sig.args) in the case of a
        ufunc)

    Returns
    -------
    wrapper_info : (library, env, name)
        The info for the gufunc wrapper.

    Details
    -------

    The kernel signature looks like this:

    void kernel(char **args, npy_intp *dimensions, npy_intp* steps, void* data)

    args - the input arrays + output arrays
    dimensions - the dimensions of the arrays
    steps - the step size for the array (this is like sizeof(type))
    data - any additional data

    The parallel backend then stages multiple calls to this kernel concurrently
    across a number of threads. Practically, for each item of work, the backend
    duplicates `dimensions` and adjusts the first entry to reflect the size of
    the item of work, it also forms up an array of pointers into the args for
    offsets to read/write from/to with respect to its position in the items of
    work. This allows the same kernel to be used for each item of work, with
    simply adjusted reads/writes/domain sizes and is safe by virtue of the
    domain partitioning.

    NOTE: The execution backend is passed the requested thread count, but it can
    choose to ignore it (TBB)!
    """
    assert isinstance(info, tuple)  # guard against old usage
    # Declare types and function
    byte_t = lc.Type.int(8)
    byte_ptr_t = lc.Type.pointer(byte_t)
    byte_ptr_ptr_t = lc.Type.pointer(byte_ptr_t)

    intp_t = ctx.get_value_type(types.intp)
    intp_ptr_t = lc.Type.pointer(intp_t)

    fnty = lc.Type.function(lc.Type.void(), [
        lc.Type.pointer(byte_ptr_t),
        lc.Type.pointer(intp_t),
        lc.Type.pointer(intp_t), byte_ptr_t
    ])
    wrapperlib = ctx.codegen().create_library('parallelgufuncwrapper')
    mod = wrapperlib.create_ir_module('parallel.gufunc.wrapper')
    kernel_name = ".kernel.{}_{}".format(id(info.env), info.name)
    lfunc = ir.Function(mod, fnty, name=kernel_name)

    bb_entry = lfunc.append_basic_block('')

    # Function body starts
    builder = lc.Builder(bb_entry)

    args, dimensions, steps, data = lfunc.args

    # Release the GIL (and ensure we have the GIL)
    # Note: numpy ufunc may not always release the GIL; thus,
    #       we need to ensure we have the GIL.
    pyapi = ctx.get_python_api(builder)
    gil_state = pyapi.gil_ensure()
    thread_state = pyapi.save_thread()

    def as_void_ptr(arg):
        return builder.bitcast(arg, byte_ptr_t)

    # Array count is input signature plus 1 (due to output array)
    array_count = len(sig.args) + 1

    parallel_for_ty = lc.Type.function(lc.Type.void(), [byte_ptr_t] * 5 + [
        intp_t,
    ] * 3)
    parallel_for = cgutils.get_or_insert_function(mod, parallel_for_ty,
                                                  'numba_parallel_for')

    # Reference inner-function and link
    innerfunc_fnty = lc.Type.function(
        lc.Type.void(),
        [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t],
    )
    tmp_voidptr = cgutils.get_or_insert_function(
        mod,
        innerfunc_fnty,
        info.name,
    )
    wrapperlib.add_linking_library(info.library)

    get_num_threads = cgutils.get_or_insert_function(
        builder.module, lc.Type.function(lc.Type.int(types.intp.bitwidth), []),
        "get_num_threads")

    num_threads = builder.call(get_num_threads, [])

    # Prepare call
    fnptr = builder.bitcast(tmp_voidptr, byte_ptr_t)
    innerargs = [as_void_ptr(x) for x in [args, dimensions, steps, data]]
    builder.call(parallel_for, [fnptr] + innerargs +
                 [intp_t(x)
                  for x in (inner_ndim, array_count)] + [num_threads])

    # Release the GIL
    pyapi.restore_thread(thread_state)
    pyapi.gil_release(gil_state)

    builder.ret_void()

    wrapperlib.add_ir_module(mod)
    wrapperlib.add_linking_library(library)
    return _wrapper_info(library=wrapperlib, name=lfunc.name, env=info.env)
コード例 #13
0
def build_ufunc_kernel(library, ctx, innerfunc, sig):
    """Wrap the original CPU ufunc with a parallel dispatcher.

    Args
    ----
    ctx
        numba's codegen context

    innerfunc
        llvm function of the original CPU ufunc

    sig
        type signature of the ufunc

    Details
    -------

    Generate a function of the following signature:

    void ufunc_kernel(char **args, npy_intp *dimensions, npy_intp* steps,
                      void* data)

    Divide the work equally across all threads and let the last thread take all
    the left over.


    """
    # Declare types and function
    byte_t = lc.Type.int(8)
    byte_ptr_t = lc.Type.pointer(byte_t)

    intp_t = ctx.get_value_type(types.intp)

    fnty = lc.Type.function(lc.Type.void(), [
        lc.Type.pointer(byte_ptr_t),
        lc.Type.pointer(intp_t),
        lc.Type.pointer(intp_t), byte_ptr_t
    ])
    wrapperlib = ctx.codegen().create_library('parallelufuncwrapper')
    mod = wrapperlib.create_ir_module('parallel.ufunc.wrapper')
    lfunc = mod.add_function(fnty, name=".kernel." + str(innerfunc))

    bb_entry = lfunc.append_basic_block('')

    # Function body starts
    builder = lc.Builder(bb_entry)

    args, dimensions, steps, data = lfunc.args

    # Release the GIL (and ensure we have the GIL)
    # Note: numpy ufunc may not always release the GIL; thus,
    #       we need to ensure we have the GIL.
    pyapi = ctx.get_python_api(builder)
    gil_state = pyapi.gil_ensure()
    thread_state = pyapi.save_thread()

    # Distribute work
    total = builder.load(dimensions)
    ncpu = lc.Constant.int(total.type, NUM_THREADS)

    count = builder.udiv(total, ncpu)

    count_list = []
    remain = total

    for i in range(NUM_THREADS):
        space = builder.alloca(intp_t)
        count_list.append(space)

        if i == NUM_THREADS - 1:
            # Last thread takes all leftover
            builder.store(remain, space)
        else:
            builder.store(count, space)
            remain = builder.sub(remain, count)

    # Array count is input signature plus 1 (due to output array)
    array_count = len(sig.args) + 1

    # Get the increment step for each array
    steps_list = []
    for i in range(array_count):
        ptr = builder.gep(steps, [lc.Constant.int(lc.Type.int(), i)])
        step = builder.load(ptr)
        steps_list.append(step)

    # Get the array argument set for each thread
    args_list = []
    for i in range(NUM_THREADS):
        space = builder.alloca(byte_ptr_t,
                               size=lc.Constant.int(lc.Type.int(),
                                                    array_count))
        args_list.append(space)

        for j in range(array_count):
            # For each array, compute subarray pointer
            dst = builder.gep(space, [lc.Constant.int(lc.Type.int(), j)])
            src = builder.gep(args, [lc.Constant.int(lc.Type.int(), j)])

            baseptr = builder.load(src)
            base = builder.ptrtoint(baseptr, intp_t)
            multiplier = lc.Constant.int(count.type, i)
            offset = builder.mul(steps_list[j], builder.mul(count, multiplier))
            addr = builder.inttoptr(builder.add(base, offset), baseptr.type)

            builder.store(addr, dst)

    # Declare external functions
    add_task_ty = lc.Type.function(lc.Type.void(), [byte_ptr_t] * 5)
    empty_fnty = lc.Type.function(lc.Type.void(), ())
    add_task = mod.get_or_insert_function(add_task_ty, name='numba_add_task')
    synchronize = mod.get_or_insert_function(empty_fnty,
                                             name='numba_synchronize')
    ready = mod.get_or_insert_function(empty_fnty, name='numba_ready')

    # Add tasks for queue; one per thread
    as_void_ptr = lambda arg: builder.bitcast(arg, byte_ptr_t)

    # Note: the runtime address is taken and used as a constant in the function.
    fnptr = ctx.get_constant(types.uintp, innerfunc).inttoptr(byte_ptr_t)
    for each_args, each_dims in zip(args_list, count_list):
        innerargs = [
            as_void_ptr(x) for x in [each_args, each_dims, steps, data]
        ]

        builder.call(add_task, [fnptr] + innerargs)

    # Signal worker that we are ready
    builder.call(ready, ())

    # Wait for workers
    builder.call(synchronize, ())

    # Work is done. Reacquire the GIL
    pyapi.restore_thread(thread_state)
    pyapi.gil_release(gil_state)

    builder.ret_void()

    # Link and compile
    wrapperlib.add_ir_module(mod)
    wrapperlib.add_linking_library(library)
    return wrapperlib.get_pointer_to_function(lfunc.name)
コード例 #14
0
ファイル: parallel.py プロジェクト: zxsted/numba
def build_gufunc_kernel(library, ctx, innerfunc, sig, inner_ndim):
    """Wrap the original CPU gufunc with a parallel dispatcher.

    Args
    ----
    ctx
        numba's codegen context

    innerfunc
        llvm function of the original CPU gufunc

    sig
        type signature of the gufunc

    inner_ndim
        inner dimension of the gufunc

    Details
    -------

    Generate a function of the following signature:

    void ufunc_kernel(char **args, npy_intp *dimensions, npy_intp* steps,
                      void* data)

    Divide the work equally across all threads and let the last thread take all
    the left over.


    """
    # Declare types and function
    byte_t = lc.Type.int(8)
    byte_ptr_t = lc.Type.pointer(byte_t)

    intp_t = ctx.get_value_type(types.intp)

    fnty = lc.Type.function(lc.Type.void(), [lc.Type.pointer(byte_ptr_t),
                                             lc.Type.pointer(intp_t),
                                             lc.Type.pointer(intp_t),
                                             byte_ptr_t])

    mod = library.create_ir_module('parallel.gufunc.wrapper')
    lfunc = mod.add_function(fnty, name=".kernel")
    innerfunc = mod.add_function(fnty, name=innerfunc.name)

    bb_entry = lfunc.append_basic_block('')

    # Function body starts
    builder = lc.Builder(bb_entry)

    args, dimensions, steps, data = lfunc.args

    # Distribute work
    total = builder.load(dimensions)
    ncpu = lc.Constant.int(total.type, NUM_THREADS)

    count = builder.udiv(total, ncpu)

    count_list = []
    remain = total

    for i in range(NUM_THREADS):
        space = cgutils.alloca_once(builder, intp_t, size=inner_ndim + 1)
        cgutils.memcpy(builder, space, dimensions,
                       count=lc.Constant.int(intp_t, inner_ndim + 1))
        count_list.append(space)

        if i == NUM_THREADS - 1:
            # Last thread takes all leftover
            builder.store(remain, space)
        else:
            builder.store(count, space)
            remain = builder.sub(remain, count)

    # Array count is input signature plus 1 (due to output array)
    array_count = len(sig.args) + 1

    # Get the increment step for each array
    steps_list = []
    for i in range(array_count):
        ptr = builder.gep(steps, [lc.Constant.int(lc.Type.int(), i)])
        step = builder.load(ptr)
        steps_list.append(step)

    # Get the array argument set for each thread
    args_list = []
    for i in range(NUM_THREADS):
        space = builder.alloca(byte_ptr_t,
                               size=lc.Constant.int(lc.Type.int(), array_count))
        args_list.append(space)

        for j in range(array_count):
            # For each array, compute subarray pointer
            dst = builder.gep(space, [lc.Constant.int(lc.Type.int(), j)])
            src = builder.gep(args, [lc.Constant.int(lc.Type.int(), j)])

            baseptr = builder.load(src)
            base = builder.ptrtoint(baseptr, intp_t)
            multiplier = lc.Constant.int(count.type, i)
            offset = builder.mul(steps_list[j], builder.mul(count, multiplier))
            addr = builder.inttoptr(builder.add(base, offset), baseptr.type)

            builder.store(addr, dst)

    # Declare external functions
    add_task_ty = lc.Type.function(lc.Type.void(), [byte_ptr_t] * 5)
    empty_fnty = lc.Type.function(lc.Type.void(), ())
    add_task = mod.get_or_insert_function(add_task_ty, name='numba_add_task')
    synchronize = mod.get_or_insert_function(empty_fnty,
                                             name='numba_synchronize')
    ready = mod.get_or_insert_function(empty_fnty, name='numba_ready')

    # Add tasks for queue; one per thread
    as_void_ptr = lambda arg: builder.bitcast(arg, byte_ptr_t)

    for each_args, each_dims in zip(args_list, count_list):
        innerargs = [as_void_ptr(x) for x
                     in [innerfunc, each_args, each_dims, steps, data]]
        builder.call(add_task, innerargs)

    # Signal worker that we are ready
    builder.call(ready, ())
    # Wait for workers
    builder.call(synchronize, ())

    builder.ret_void()

    return lfunc
コード例 #15
0
    def test_cache(self):
        def times2(i):
            return 2*i

        def times3(i):
            return i*3

        def make_closure(x, y):
            def f(z):
                return y + z
            return f

        typing_context = typing.Context()
        context = cpu.CPUContext(typing_context)
        module = lc.Module("test_module")

        sig = typing.signature(types.int32, types.int32)
        llvm_fnty = context.call_conv.get_function_type(sig.return_type,
                                                        sig.args)
        function = module.get_or_insert_function(llvm_fnty, name='test_fn')
        args = context.call_conv.get_arguments(function)
        assert function.is_declaration
        entry_block = function.append_basic_block('entry')
        builder = lc.Builder(entry_block)

        # Ensure the cache is empty to begin with
        self.assertEqual(0, len(context.cached_internal_func))
        
        # After one compile, it should contain one entry
        context.compile_internal(builder, times2, sig, args)
        self.assertEqual(1, len(context.cached_internal_func))

        # After a second compilation of the same thing, it should still contain
        # one entry
        context.compile_internal(builder, times2, sig, args)
        self.assertEqual(1, len(context.cached_internal_func))

        # After compilation of another function, the cache should have grown by
        # one more.
        context.compile_internal(builder, times3, sig, args)
        self.assertEqual(2, len(context.cached_internal_func))

        sig2 = typing.signature(types.float64, types.float64)
        llvm_fnty2 = context.call_conv.get_function_type(sig2.return_type,
                                                         sig2.args)
        function2 = module.get_or_insert_function(llvm_fnty2, name='test_fn_2')
        args2 = context.call_conv.get_arguments(function2)
        assert function2.is_declaration
        entry_block2 = function2.append_basic_block('entry')
        builder2 = lc.Builder(entry_block2)
        
        # Ensure that the same function with a different signature does not
        # reuse an entry from the cache in error
        context.compile_internal(builder2, times3, sig2, args2)
        self.assertEqual(3, len(context.cached_internal_func))

        # Closures with distinct cell contents must each be compiled.
        clo11 = make_closure(1, 1)
        clo12 = make_closure(1, 2)
        clo22 = make_closure(2, 2)
        res1 = context.compile_internal(builder, clo11, sig, args)
        self.assertEqual(4, len(context.cached_internal_func))
        res2 = context.compile_internal(builder, clo12, sig, args)
        self.assertEqual(5, len(context.cached_internal_func))
        # Same cell contents as above (first parameter isn't captured)
        res3 = context.compile_internal(builder, clo22, sig, args)
        self.assertEqual(5, len(context.cached_internal_func))