def _generate_kernel_wrapper(self, func, argtypes): module = func.module arginfo = self.get_arg_packer(argtypes) wrapperfnty = lc.Type.function(lc.Type.void(), arginfo.argument_types) wrapper_module = self.create_module("dppy.kernel.wrapper") wrappername = "dppyPy_{name}".format(name=func.name) argtys = list(arginfo.argument_types) fnty = lc.Type.function( lc.Type.int(), [self.call_conv.get_return_type(types.pyobject)] + argtys, ) func = wrapper_module.add_function(fnty, name=func.name) func.calling_convention = CC_SPIR_FUNC wrapper = wrapper_module.add_function(wrapperfnty, name=wrappername) builder = lc.Builder(wrapper.append_basic_block("")) callargs = arginfo.from_arguments(builder, wrapper.args) # XXX handle error status status, _ = self.call_conv.call_function(builder, func, types.void, argtypes, callargs) builder.ret_void() self._finalize_wrapper_module(wrapper) # Link the spir_func module to the wrapper module module.link_in(ll.parse_assembly(str(wrapper_module))) # Make sure the spir_func has internal linkage to be inlinable. func.linkage = "internal" wrapper = module.get_function(wrapper.name) module.get_function(func.name).linkage = "internal" return wrapper
def _make_cas_function(): """ Generate a compare-and-swap function for portability sake. """ from numba.targets.registry import cpu_target codegen = cpu_target.target_context.codegen() # Generate IR library = codegen.create_library('cas_for_parallel_ufunc') mod = library.create_ir_module('cas_module') llint = lc.Type.int() llintp = lc.Type.pointer(llint) fnty = lc.Type.function(llint, [llintp, llint, llint]) fn = mod.add_function(fnty, name='.numba.parallel.ufunc.cas') ptr, old, repl = fn.args bb = fn.append_basic_block('') builder = lc.Builder(bb) outpack = builder.cmpxchg(ptr, old, repl, ordering='monotonic') out = builder.extract_value(outpack, 0) failed = builder.extract_value(outpack, 1) builder.ret(builder.select(failed, old, out)) # Build & Link library.add_ir_module(mod) library.finalize() ptr = library.get_pointer_to_function(fn.name) return library, ptr
def _emit_python_wrapper(self, llvm_module): # Define the module initialization function. mod_init_fn = llvm_module.add_function(*self.module_init_definition) entry = mod_init_fn.append_basic_block('Entry') builder = lc.Builder(entry) pyapi = self.context.get_python_api(builder) # Python C API module creation function. create_module_fn = llvm_module.add_function( *self.module_create_definition) create_module_fn.linkage = lc.LINKAGE_EXTERNAL # Define a constant string for the module name. mod_name_const = self.context.insert_const_string( llvm_module, self.module_name) method_array = self._emit_method_array(llvm_module) mod = builder.call(create_module_fn, (mod_name_const, method_array, NULL, lc.Constant.null(lt._pyobject_head_p), lc.Constant.int(lt._int32, sys.api_version))) env_array = self._emit_environment_array(llvm_module, builder, pyapi) self._emit_module_init_code(llvm_module, builder, mod, method_array, env_array) # XXX No way to notify failure to caller... builder.ret_void() self.dll_exports.append(mod_init_fn.name)
def generate_kernel_wrapper(self, func, argtypes): module = func.module arginfo = self.get_arg_packer(argtypes) def sub_gen_with_global(lty): if isinstance(lty, llvmir.PointerType): return (lty.pointee.as_pointer(SPIR_GLOBAL_ADDRSPACE), lty.addrspace) return lty, None if len(arginfo.argument_types) > 0: llargtys, changed = zip( *map(sub_gen_with_global, arginfo.argument_types)) else: llargtys = changed = () wrapperfnty = lc.Type.function(lc.Type.void(), llargtys) wrapper_module = self.create_module("hsa.kernel.wrapper") wrappername = 'hsaPy_{name}'.format(name=func.name) argtys = list(arginfo.argument_types) fnty = lc.Type.function( lc.Type.int(), [self.call_conv.get_return_type(types.pyobject)] + argtys) func = llvmir.Function(wrapper_module, fnty, func.name) func.calling_convention = CC_SPIR_FUNC wrapper = llvmir.Function(wrapper_module, wrapperfnty, name=wrappername) builder = lc.Builder(wrapper.append_basic_block('')) # Adjust address space of each kernel argument fixed_args = [] for av, adrsp in zip(wrapper.args, changed): if adrsp is not None: casted = self.addrspacecast(builder, av, adrsp) fixed_args.append(casted) else: fixed_args.append(av) callargs = arginfo.from_arguments(builder, fixed_args) # XXX handle error status status, _ = self.call_conv.call_function(builder, func, types.void, argtypes, callargs) builder.ret_void() set_hsa_kernel(wrapper) # Link module.link_in(ll.parse_assembly(str(wrapper_module))) # To enable inlining which is essential because addrspacecast 1->0 is # illegal. Inlining will optimize the addrspacecast out. func.linkage = 'internal' wrapper = module.get_function(wrapper.name) module.get_function(func.name).linkage = 'internal' return wrapper
def start_function(self, name, module, rettype, argtypes): func_type = ll_core.Type.function(rettype, argtypes, False) function = ll_core.Function.new(module, func_type, name) entry_block = function.append_basic_block("entry") builder = ll_core.Builder(entry_block) self.exit_block = function.append_basic_block("exit") self.function = function self.builder = builder
def _context_builder_sig_args(self): typing_context = typing.Context() context = cpu.CPUContext(typing_context) module = lc.Module("test_module") sig = typing.signature(types.int32, types.int32) llvm_fnty = context.call_conv.get_function_type( sig.return_type, sig.args) function = module.get_or_insert_function(llvm_fnty, name='test_fn') args = context.call_conv.get_arguments(function) assert function.is_declaration entry_block = function.append_basic_block('entry') builder = lc.Builder(entry_block) return context, builder, sig, args
def _context_builder_sig_args(self): typing_context = typing.Context() context = cpu.CPUContext(typing_context) lib = context.codegen().create_library("testing") with context.push_code_library(lib): module = lc.Module("test_module") sig = typing.signature(types.int32, types.int32) llvm_fnty = context.call_conv.get_function_type( sig.return_type, sig.args) function = module.get_or_insert_function(llvm_fnty, name="test_fn") args = context.call_conv.get_arguments(function) assert function.is_declaration entry_block = function.append_basic_block("entry") builder = lc.Builder(entry_block) yield context, builder, sig, args
def test_cache(self): def times2(i): return 2 * i def times3(i): return i * 3 with self._context_builder_sig_args() as ( context, builder, sig, args, ): # Ensure the cache is empty to begin with self.assertEqual(0, len(context.cached_internal_func)) # After one compile, it should contain one entry context.compile_internal(builder, times2, sig, args) self.assertEqual(1, len(context.cached_internal_func)) # After a second compilation of the same thing, it should still contain # one entry context.compile_internal(builder, times2, sig, args) self.assertEqual(1, len(context.cached_internal_func)) # After compilation of another function, the cache should have grown by # one more. context.compile_internal(builder, times3, sig, args) self.assertEqual(2, len(context.cached_internal_func)) sig2 = typing.signature(types.float64, types.float64) llvm_fnty2 = context.call_conv.get_function_type( sig2.return_type, sig2.args) function2 = builder.module.get_or_insert_function(llvm_fnty2, name='test_fn_2') args2 = context.call_conv.get_arguments(function2) assert function2.is_declaration entry_block2 = function2.append_basic_block('entry') builder2 = lc.Builder(entry_block2) # Ensure that the same function with a different signature does not # reuse an entry from the cache in error context.compile_internal(builder2, times3, sig2, args2) self.assertEqual(3, len(context.cached_internal_func))
def real_divmod(context, builder, x, y): assert x.type == y.type floatty = x.type module = builder.module fname = context.mangler(".numba.python.rem", [x.type]) fnty = Type.function(floatty, (floatty, floatty, Type.pointer(floatty))) fn = module.get_or_insert_function(fnty, fname) if fn.is_declaration: fn.linkage = lc.LINKAGE_LINKONCE_ODR fnbuilder = lc.Builder(fn.append_basic_block('entry')) fx, fy, pmod = fn.args div, mod = real_divmod_func_body(context, fnbuilder, fx, fy) fnbuilder.store(mod, pmod) fnbuilder.ret(div) pmod = cgutils.alloca_once(builder, floatty) quotient = builder.call(fn, (x, y, pmod)) return quotient, builder.load(pmod)
def compile_function(self, nargs): llvm_fnty = lc.Type.function(machine_int, [machine_int] * nargs) ctypes_fnty = ctypes.CFUNCTYPE(ctypes.c_size_t, *(ctypes.c_size_t, ) * nargs) module = self.context.create_module("") function = module.get_or_insert_function(llvm_fnty, name=self.id()) assert function.is_declaration entry_block = function.append_basic_block('entry') builder = lc.Builder(entry_block) first = [True] def call_func(*args): codegen = self.context.codegen() library = codegen.create_library("test_module.%s" % self.id()) library.add_ir_module(module) cptr = library.get_pointer_to_function(function.name) cfunc = ctypes_fnty(cptr) return cfunc(*args) yield self.context, builder, function.args, call_func
def _emit_python_wrapper(self, llvm_module): # Figure out the Python C API module creation function, and # get a LLVM function for it. create_module_fn = llvm_module.add_function( *self.module_create_definition) create_module_fn.linkage = lc.LINKAGE_EXTERNAL # Define a constant string for the module name. mod_name_const = self.context.insert_const_string( llvm_module, self.module_name) mod_def_base_init = lc.Constant.struct(( lt._pyobject_head_init, # PyObject_HEAD lc.Constant.null(self.m_init_ty), # m_init lc.Constant.null(lt._llvm_py_ssize_t), # m_index lc.Constant.null(lt._pyobject_head_p), # m_copy )) mod_def_base = llvm_module.add_global_variable(mod_def_base_init.type, '.module_def_base') mod_def_base.initializer = mod_def_base_init mod_def_base.linkage = lc.LINKAGE_INTERNAL method_array = self._emit_method_array(llvm_module) mod_def_init = lc.Constant.struct(( mod_def_base_init, # m_base mod_name_const, # m_name lc.Constant.null(self._char_star), # m_doc lc.Constant.int(lt._llvm_py_ssize_t, -1), # m_size method_array, # m_methods lc.Constant.null(self.inquiry_ty), # m_reload lc.Constant.null(self.traverseproc_ty), # m_traverse lc.Constant.null(self.inquiry_ty), # m_clear lc.Constant.null(self.freefunc_ty) # m_free )) # Define a constant string for the module name. mod_def = llvm_module.add_global_variable(mod_def_init.type, '.module_def') mod_def.initializer = mod_def_init mod_def.linkage = lc.LINKAGE_INTERNAL # Define the module initialization function. mod_init_fn = llvm_module.add_function(*self.module_init_definition) entry = mod_init_fn.append_basic_block('Entry') builder = lc.Builder(entry) pyapi = self.context.get_python_api(builder) mod = builder.call( create_module_fn, (mod_def, lc.Constant.int(lt._int32, sys.api_version))) # Test if module has been created correctly. # (XXX for some reason comparing with the NULL constant fails llvm # with an assertion in pydebug mode) with builder.if_then(cgutils.is_null(builder, mod)): builder.ret(NULL.bitcast(mod_init_fn.type.pointee.return_type)) env_array = self._emit_environment_array(llvm_module, builder, pyapi) envgv_array = self._emit_envgvs_array(llvm_module, builder, pyapi) ret = self._emit_module_init_code(llvm_module, builder, mod, method_array, env_array, envgv_array) if ret is not None: with builder.if_then(cgutils.is_not_null(builder, ret)): # Init function errored out builder.ret(lc.Constant.null(mod.type)) builder.ret(mod) self.dll_exports.append(mod_init_fn.name)
def build_gufunc_kernel(library, ctx, info, sig, inner_ndim): """Wrap the original CPU ufunc/gufunc with a parallel dispatcher. This function will wrap gufuncs and ufuncs something like. Args ---- ctx numba's codegen context info: (library, env, name) inner function info sig type signature of the gufunc inner_ndim inner dimension of the gufunc (this is len(sig.args) in the case of a ufunc) Returns ------- wrapper_info : (library, env, name) The info for the gufunc wrapper. Details ------- The kernel signature looks like this: void kernel(char **args, npy_intp *dimensions, npy_intp* steps, void* data) args - the input arrays + output arrays dimensions - the dimensions of the arrays steps - the step size for the array (this is like sizeof(type)) data - any additional data The parallel backend then stages multiple calls to this kernel concurrently across a number of threads. Practically, for each item of work, the backend duplicates `dimensions` and adjusts the first entry to reflect the size of the item of work, it also forms up an array of pointers into the args for offsets to read/write from/to with respect to its position in the items of work. This allows the same kernel to be used for each item of work, with simply adjusted reads/writes/domain sizes and is safe by virtue of the domain partitioning. NOTE: The execution backend is passed the requested thread count, but it can choose to ignore it (TBB)! """ assert isinstance(info, tuple) # guard against old usage # Declare types and function byte_t = lc.Type.int(8) byte_ptr_t = lc.Type.pointer(byte_t) byte_ptr_ptr_t = lc.Type.pointer(byte_ptr_t) intp_t = ctx.get_value_type(types.intp) intp_ptr_t = lc.Type.pointer(intp_t) fnty = lc.Type.function(lc.Type.void(), [ lc.Type.pointer(byte_ptr_t), lc.Type.pointer(intp_t), lc.Type.pointer(intp_t), byte_ptr_t ]) wrapperlib = ctx.codegen().create_library('parallelgufuncwrapper') mod = wrapperlib.create_ir_module('parallel.gufunc.wrapper') kernel_name = ".kernel.{}_{}".format(id(info.env), info.name) lfunc = ir.Function(mod, fnty, name=kernel_name) bb_entry = lfunc.append_basic_block('') # Function body starts builder = lc.Builder(bb_entry) args, dimensions, steps, data = lfunc.args # Release the GIL (and ensure we have the GIL) # Note: numpy ufunc may not always release the GIL; thus, # we need to ensure we have the GIL. pyapi = ctx.get_python_api(builder) gil_state = pyapi.gil_ensure() thread_state = pyapi.save_thread() def as_void_ptr(arg): return builder.bitcast(arg, byte_ptr_t) # Array count is input signature plus 1 (due to output array) array_count = len(sig.args) + 1 parallel_for_ty = lc.Type.function(lc.Type.void(), [byte_ptr_t] * 5 + [ intp_t, ] * 3) parallel_for = cgutils.get_or_insert_function(mod, parallel_for_ty, 'numba_parallel_for') # Reference inner-function and link innerfunc_fnty = lc.Type.function( lc.Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t], ) tmp_voidptr = cgutils.get_or_insert_function( mod, innerfunc_fnty, info.name, ) wrapperlib.add_linking_library(info.library) get_num_threads = cgutils.get_or_insert_function( builder.module, lc.Type.function(lc.Type.int(types.intp.bitwidth), []), "get_num_threads") num_threads = builder.call(get_num_threads, []) # Prepare call fnptr = builder.bitcast(tmp_voidptr, byte_ptr_t) innerargs = [as_void_ptr(x) for x in [args, dimensions, steps, data]] builder.call(parallel_for, [fnptr] + innerargs + [intp_t(x) for x in (inner_ndim, array_count)] + [num_threads]) # Release the GIL pyapi.restore_thread(thread_state) pyapi.gil_release(gil_state) builder.ret_void() wrapperlib.add_ir_module(mod) wrapperlib.add_linking_library(library) return _wrapper_info(library=wrapperlib, name=lfunc.name, env=info.env)
def build_ufunc_kernel(library, ctx, innerfunc, sig): """Wrap the original CPU ufunc with a parallel dispatcher. Args ---- ctx numba's codegen context innerfunc llvm function of the original CPU ufunc sig type signature of the ufunc Details ------- Generate a function of the following signature: void ufunc_kernel(char **args, npy_intp *dimensions, npy_intp* steps, void* data) Divide the work equally across all threads and let the last thread take all the left over. """ # Declare types and function byte_t = lc.Type.int(8) byte_ptr_t = lc.Type.pointer(byte_t) intp_t = ctx.get_value_type(types.intp) fnty = lc.Type.function(lc.Type.void(), [ lc.Type.pointer(byte_ptr_t), lc.Type.pointer(intp_t), lc.Type.pointer(intp_t), byte_ptr_t ]) wrapperlib = ctx.codegen().create_library('parallelufuncwrapper') mod = wrapperlib.create_ir_module('parallel.ufunc.wrapper') lfunc = mod.add_function(fnty, name=".kernel." + str(innerfunc)) bb_entry = lfunc.append_basic_block('') # Function body starts builder = lc.Builder(bb_entry) args, dimensions, steps, data = lfunc.args # Release the GIL (and ensure we have the GIL) # Note: numpy ufunc may not always release the GIL; thus, # we need to ensure we have the GIL. pyapi = ctx.get_python_api(builder) gil_state = pyapi.gil_ensure() thread_state = pyapi.save_thread() # Distribute work total = builder.load(dimensions) ncpu = lc.Constant.int(total.type, NUM_THREADS) count = builder.udiv(total, ncpu) count_list = [] remain = total for i in range(NUM_THREADS): space = builder.alloca(intp_t) count_list.append(space) if i == NUM_THREADS - 1: # Last thread takes all leftover builder.store(remain, space) else: builder.store(count, space) remain = builder.sub(remain, count) # Array count is input signature plus 1 (due to output array) array_count = len(sig.args) + 1 # Get the increment step for each array steps_list = [] for i in range(array_count): ptr = builder.gep(steps, [lc.Constant.int(lc.Type.int(), i)]) step = builder.load(ptr) steps_list.append(step) # Get the array argument set for each thread args_list = [] for i in range(NUM_THREADS): space = builder.alloca(byte_ptr_t, size=lc.Constant.int(lc.Type.int(), array_count)) args_list.append(space) for j in range(array_count): # For each array, compute subarray pointer dst = builder.gep(space, [lc.Constant.int(lc.Type.int(), j)]) src = builder.gep(args, [lc.Constant.int(lc.Type.int(), j)]) baseptr = builder.load(src) base = builder.ptrtoint(baseptr, intp_t) multiplier = lc.Constant.int(count.type, i) offset = builder.mul(steps_list[j], builder.mul(count, multiplier)) addr = builder.inttoptr(builder.add(base, offset), baseptr.type) builder.store(addr, dst) # Declare external functions add_task_ty = lc.Type.function(lc.Type.void(), [byte_ptr_t] * 5) empty_fnty = lc.Type.function(lc.Type.void(), ()) add_task = mod.get_or_insert_function(add_task_ty, name='numba_add_task') synchronize = mod.get_or_insert_function(empty_fnty, name='numba_synchronize') ready = mod.get_or_insert_function(empty_fnty, name='numba_ready') # Add tasks for queue; one per thread as_void_ptr = lambda arg: builder.bitcast(arg, byte_ptr_t) # Note: the runtime address is taken and used as a constant in the function. fnptr = ctx.get_constant(types.uintp, innerfunc).inttoptr(byte_ptr_t) for each_args, each_dims in zip(args_list, count_list): innerargs = [ as_void_ptr(x) for x in [each_args, each_dims, steps, data] ] builder.call(add_task, [fnptr] + innerargs) # Signal worker that we are ready builder.call(ready, ()) # Wait for workers builder.call(synchronize, ()) # Work is done. Reacquire the GIL pyapi.restore_thread(thread_state) pyapi.gil_release(gil_state) builder.ret_void() # Link and compile wrapperlib.add_ir_module(mod) wrapperlib.add_linking_library(library) return wrapperlib.get_pointer_to_function(lfunc.name)
def build_gufunc_kernel(library, ctx, innerfunc, sig, inner_ndim): """Wrap the original CPU gufunc with a parallel dispatcher. Args ---- ctx numba's codegen context innerfunc llvm function of the original CPU gufunc sig type signature of the gufunc inner_ndim inner dimension of the gufunc Details ------- Generate a function of the following signature: void ufunc_kernel(char **args, npy_intp *dimensions, npy_intp* steps, void* data) Divide the work equally across all threads and let the last thread take all the left over. """ # Declare types and function byte_t = lc.Type.int(8) byte_ptr_t = lc.Type.pointer(byte_t) intp_t = ctx.get_value_type(types.intp) fnty = lc.Type.function(lc.Type.void(), [lc.Type.pointer(byte_ptr_t), lc.Type.pointer(intp_t), lc.Type.pointer(intp_t), byte_ptr_t]) mod = library.create_ir_module('parallel.gufunc.wrapper') lfunc = mod.add_function(fnty, name=".kernel") innerfunc = mod.add_function(fnty, name=innerfunc.name) bb_entry = lfunc.append_basic_block('') # Function body starts builder = lc.Builder(bb_entry) args, dimensions, steps, data = lfunc.args # Distribute work total = builder.load(dimensions) ncpu = lc.Constant.int(total.type, NUM_THREADS) count = builder.udiv(total, ncpu) count_list = [] remain = total for i in range(NUM_THREADS): space = cgutils.alloca_once(builder, intp_t, size=inner_ndim + 1) cgutils.memcpy(builder, space, dimensions, count=lc.Constant.int(intp_t, inner_ndim + 1)) count_list.append(space) if i == NUM_THREADS - 1: # Last thread takes all leftover builder.store(remain, space) else: builder.store(count, space) remain = builder.sub(remain, count) # Array count is input signature plus 1 (due to output array) array_count = len(sig.args) + 1 # Get the increment step for each array steps_list = [] for i in range(array_count): ptr = builder.gep(steps, [lc.Constant.int(lc.Type.int(), i)]) step = builder.load(ptr) steps_list.append(step) # Get the array argument set for each thread args_list = [] for i in range(NUM_THREADS): space = builder.alloca(byte_ptr_t, size=lc.Constant.int(lc.Type.int(), array_count)) args_list.append(space) for j in range(array_count): # For each array, compute subarray pointer dst = builder.gep(space, [lc.Constant.int(lc.Type.int(), j)]) src = builder.gep(args, [lc.Constant.int(lc.Type.int(), j)]) baseptr = builder.load(src) base = builder.ptrtoint(baseptr, intp_t) multiplier = lc.Constant.int(count.type, i) offset = builder.mul(steps_list[j], builder.mul(count, multiplier)) addr = builder.inttoptr(builder.add(base, offset), baseptr.type) builder.store(addr, dst) # Declare external functions add_task_ty = lc.Type.function(lc.Type.void(), [byte_ptr_t] * 5) empty_fnty = lc.Type.function(lc.Type.void(), ()) add_task = mod.get_or_insert_function(add_task_ty, name='numba_add_task') synchronize = mod.get_or_insert_function(empty_fnty, name='numba_synchronize') ready = mod.get_or_insert_function(empty_fnty, name='numba_ready') # Add tasks for queue; one per thread as_void_ptr = lambda arg: builder.bitcast(arg, byte_ptr_t) for each_args, each_dims in zip(args_list, count_list): innerargs = [as_void_ptr(x) for x in [innerfunc, each_args, each_dims, steps, data]] builder.call(add_task, innerargs) # Signal worker that we are ready builder.call(ready, ()) # Wait for workers builder.call(synchronize, ()) builder.ret_void() return lfunc
def test_cache(self): def times2(i): return 2*i def times3(i): return i*3 def make_closure(x, y): def f(z): return y + z return f typing_context = typing.Context() context = cpu.CPUContext(typing_context) module = lc.Module("test_module") sig = typing.signature(types.int32, types.int32) llvm_fnty = context.call_conv.get_function_type(sig.return_type, sig.args) function = module.get_or_insert_function(llvm_fnty, name='test_fn') args = context.call_conv.get_arguments(function) assert function.is_declaration entry_block = function.append_basic_block('entry') builder = lc.Builder(entry_block) # Ensure the cache is empty to begin with self.assertEqual(0, len(context.cached_internal_func)) # After one compile, it should contain one entry context.compile_internal(builder, times2, sig, args) self.assertEqual(1, len(context.cached_internal_func)) # After a second compilation of the same thing, it should still contain # one entry context.compile_internal(builder, times2, sig, args) self.assertEqual(1, len(context.cached_internal_func)) # After compilation of another function, the cache should have grown by # one more. context.compile_internal(builder, times3, sig, args) self.assertEqual(2, len(context.cached_internal_func)) sig2 = typing.signature(types.float64, types.float64) llvm_fnty2 = context.call_conv.get_function_type(sig2.return_type, sig2.args) function2 = module.get_or_insert_function(llvm_fnty2, name='test_fn_2') args2 = context.call_conv.get_arguments(function2) assert function2.is_declaration entry_block2 = function2.append_basic_block('entry') builder2 = lc.Builder(entry_block2) # Ensure that the same function with a different signature does not # reuse an entry from the cache in error context.compile_internal(builder2, times3, sig2, args2) self.assertEqual(3, len(context.cached_internal_func)) # Closures with distinct cell contents must each be compiled. clo11 = make_closure(1, 1) clo12 = make_closure(1, 2) clo22 = make_closure(2, 2) res1 = context.compile_internal(builder, clo11, sig, args) self.assertEqual(4, len(context.cached_internal_func)) res2 = context.compile_internal(builder, clo12, sig, args) self.assertEqual(5, len(context.cached_internal_func)) # Same cell contents as above (first parameter isn't captured) res3 = context.compile_internal(builder, clo22, sig, args) self.assertEqual(5, len(context.cached_internal_func))