def check_numba_carray_farray(self, usecase, dtype_usecase): # With typed pointers and implicit dtype pyfunc = usecase for sig in self.make_carray_sigs(carray_float32_usecase_sig): f = cfunc(sig)(pyfunc) self.check_carray_usecase(self.make_float32_pointer, pyfunc, f.ctypes) # With typed pointers and explicit (matching) dtype pyfunc = dtype_usecase for sig in self.make_carray_sigs(carray_float32_usecase_sig): f = cfunc(sig)(pyfunc) self.check_carray_usecase(self.make_float32_pointer, pyfunc, f.ctypes) # With typed pointers and mismatching dtype with self.assertTypingError() as raises: f = cfunc(carray_float64_usecase_sig)(pyfunc) self.assertIn( "mismatching dtype 'float32' for pointer type 'float64*'", str(raises.exception)) # With voidptr pyfunc = dtype_usecase for sig in self.make_carray_sigs(carray_voidptr_usecase_sig): f = cfunc(sig)(pyfunc) self.check_carray_usecase(self.make_float32_pointer, pyfunc, f.ctypes)
def test_object_mode(self): """ Object mode is currently unsupported. """ with self.assertRaises(NotImplementedError): cfunc(add_sig, forceobj=True)(add_usecase) with self.assertTypingError() as raises: cfunc(add_sig)(objmode_usecase) self.assertIn("Untyped global name 'object'", str(raises.exception))
def test_cffi(self): from numba.tests import cffi_usecases ffi, lib = cffi_usecases.load_inline_module() f = cfunc(square_sig)(square_usecase) res = lib._numba_test_funcptr(f.cffi) self.assertPreciseEqual(res, 2.25) # 1.5 ** 2
def test_cffi(self): from . import cffi_usecases ffi, lib = cffi_usecases.load_inline_module() f = cfunc(square_sig)(square_usecase) res = lib._numba_test_funcptr(f.cffi) self.assertPreciseEqual(res, 2.25) # 1.5 ** 2
def test_numba_assembly(): mesh = UnitSquareMesh(MPI.comm_world, 13, 13) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) a = cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = cpp.fem.Form([Q._cpp_object]) sig = types.void(types.CPointer(typeof(ScalarType())), types.CPointer(types.CPointer(typeof(ScalarType()))), types.CPointer(types.double), types.intc) fnA = cfunc(sig, cache=True)(tabulate_tensor_A) a.set_cell_tabulate(0, fnA.address) fnb = cfunc(sig, cache=True)(tabulate_tensor_b) L.set_cell_tabulate(0, fnb.address) if (False): ufc_form = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) a = cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) ufc_form = ffc_jit(v * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) L = cpp.fem.Form(ufc_form, [Q._cpp_object]) assembler = cpp.fem.Assembler([[a]], [L], []) A = PETScMatrix() b = PETScVector() assembler.assemble(A, cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, cpp.fem.Assembler.BlockType.monolithic) Anorm = A.norm(cpp.la.Norm.frobenius) bnorm = b.norm(cpp.la.Norm.l2) print(Anorm, bnorm) assert (np.isclose(Anorm, 56.124860801609124)) assert (np.isclose(bnorm, 0.0739710713711999)) list_timings([TimingType.wall])
def check_numba_carray_farray(self, usecase, dtype_usecase): # With typed pointers and implicit dtype pyfunc = usecase f = cfunc(carray_float32_usecase_sig)(pyfunc) self.check_carray_usecase(self.make_float32_pointer, pyfunc, f.ctypes) # With typed pointers and explicit (matching) dtype pyfunc = dtype_usecase f = cfunc(carray_float32_usecase_sig)(pyfunc) self.check_carray_usecase(self.make_float32_pointer, pyfunc, f.ctypes) # With typed pointers and mismatching dtype with self.assertTypingError() as raises: f = cfunc(carray_float64_usecase_sig)(pyfunc) self.assertIn("mismatching dtype 'float32' for pointer type 'float64*'", str(raises.exception)) # With voidptr pyfunc = dtype_usecase f = cfunc(carray_voidptr_usecase_sig)(pyfunc) self.check_carray_usecase(self.make_float32_pointer, pyfunc, f.ctypes)
def test_refcount_pycallable(self, numba_only=True): """ Test refcount of decorated callable """ def f1(x): return x def f2(x): return x fn0 = ROOT.DeclareCppCallable(["float"], "float")(f1) import numba ref = numba.cfunc("float32(float32)", nopython=True)(f2) # ROOT holds an additional reference compared to plain numba self.assertEqual(sys.getrefcount(f1), sys.getrefcount(f2) + 1)
def codegen(cgctx, builder, sig, args): ty = sig.args[0] # trigger resolution to get a "custom_hash" impl based on the call type # "ty" and its literal value # import pdb; pdb.set_trace() lsig = fnty.get_call_type(tyctx, (ty, ty), {}) resolved = cgctx.get_function(fnty, lsig) # close over resolved function, this is to deal with python scoping def resolved_codegen(cgctx, builder, sig, args): return resolved(builder, args) # A python function "wrapper" is made for the `@cfunc` arg, this calls # the jitted function "wrappee", which will be compiled as part of the # compilation chain for the cfunc. In turn the wrappee jitted function # has an intrinsic call which is holding reference to the resolved type # specialised custom_hash call above. @intrinsic def dispatcher(_ityctx, _a, _b): return types.int8(thing, another), resolved_codegen @intrinsic def deref(_ityctx, _x): # to deref the void * passed. TODO: nrt awareness catchthing = thing sig = catchthing(_x) def codegen(cgctx, builder, sig, args): toty = cgctx.get_value_type(sig.return_type).as_pointer() addressable = builder.bitcast(args[0], toty) zero_intpt = cgctx.get_constant(types.intp, 0) vref = builder.gep(addressable, [zero_intpt], inbounds=True) return builder.load(vref) return sig, codegen @njit def wrappee(ap, bp): a = deref(ap) b = deref(bp) return dispatcher(a, b) def wrapper(a, b): return wrappee(a, b) callback = cfunc(types.int8(types.voidptr, types.voidptr))(wrapper) # bake in address as a int const address = callback.address return cgctx.get_constant(types.intp, address)
def test_errors(self): f = cfunc(div_sig)(div_usecase) with captured_stderr() as err: self.assertPreciseEqual(f.ctypes(5, 2), 2.5) self.assertEqual(err.getvalue(), "") with captured_stderr() as err: res = f.ctypes(5, 0) # This is just a side effect of Numba zero-initializing # stack variables, and could change in the future. self.assertPreciseEqual(res, 0.0) err = err.getvalue() self.assertIn("ZeroDivisionError:", err) self.assertIn("Exception ignored", err)
def test_refcount_pycallable(self): """ Test refcount of decorated callable """ def f1(x): return x def f2(x): return x fn0 = ROOT.Numba.Declare(["float"], "float")(f1) ref = nb.cfunc("float32(float32)", nopython=True)(f2) if sys.version_info.major == 2: self.assertEqual(sys.getrefcount(f1), sys.getrefcount(f2) + 1) else: self.assertEqual(sys.getrefcount(f1), sys.getrefcount(f2) + 2)
def Simulator(ticks, onTick, money=5000, tick_value=0.2, order_cost=4.0, cnumba=True): nticks = len(ticks) pmoney = np.empty(nticks, dtype=np.float64) if cnumba: # Numba callback 1000x faster, but call back print BUG cOnTick = onTick.ctypes else: cfunc = c.CFUNCTYPE(None, c.POINTER(c.c_double)) cOnTick = cfunc(onTick) _simulator(money, tick_value, order_cost, ticks.ctypes.data, len(ticks), pmoney.ctypes.data, cOnTick) return pmoney
def jit_integrand(integrand_function): jitted_function = numba.jit(integrand_function, nopython=True) no_args = len(inspect.getfullargspec(integrand_function).args) wrapped = None if no_args == 4: def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3]) elif no_args == 5: def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4]) elif no_args == 6: def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4], xx[5]) elif no_args == 7: def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4], xx[5], xx[6]) elif no_args == 8: def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4], xx[5], xx[6], xx[7]) elif no_args == 9: def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4], xx[5], xx[6], xx[7], xx[8]) elif no_args == 10: def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4], xx[5], xx[6], xx[7], xx[8], xx[9]) elif no_args == 11: def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4], xx[5], xx[6], xx[7], xx[8], xx[9], xx[10]) cf = cfunc(float64(intc, CPointer(float64))) return LowLevelCallable(cf(wrapped).ctypes)
def add_external_function(self, function, signature, number_of_args, target_ids): """ Wrap the function and make it available in the LLVM module """ f_c = cfunc(sig=signature)(function) name = function.__qualname__ f_c_sym = llvm.add_symbol(name, f_c.address) llvm_signature = np.tile(ll.DoubleType(), number_of_args).tolist() for i in target_ids: llvm_signature[i] = ll.DoubleType().as_pointer() fnty_c_func = ll.FunctionType(ll.VoidType(), llvm_signature) fnty_c_func.as_pointer(f_c_sym) f_llvm = ll.Function(self.module, fnty_c_func, name=name) self.ext_funcs[name] = f_llvm
def test_errors(self): f = cfunc(div_sig)(div_usecase) with captured_stderr() as err: self.assertPreciseEqual(f.ctypes(5, 2), 2.5) self.assertEqual(err.getvalue(), "") with captured_stderr() as err: res = f.ctypes(5, 0) # This is just a side effect of Numba zero-initializing # stack variables, and could change in the future. self.assertPreciseEqual(res, 0.0) err = err.getvalue() if sys.version_info >= (3,): self.assertIn("Exception ignored", err) self.assertIn("ZeroDivisionError: division by zero", err) else: self.assertIn("ZeroDivisionError('division by zero',)", err) self.assertIn(" ignored", err)
def test_basic(self): """ Basic usage and properties of a cfunc. """ f = cfunc(add_sig)(add_usecase) self.assertEqual(f.__name__, "add_usecase") self.assertEqual(f.__qualname__, "add_usecase") self.assertIs(f.__wrapped__, add_usecase) symbol = f.native_name self.assertIsInstance(symbol, str) self.assertIn("add_usecase", symbol) addr = f.address self.assertIsInstance(addr, utils.INT_TYPES) ct = f.ctypes self.assertEqual(ctypes.cast(ct, ctypes.c_void_p).value, addr) self.assertPreciseEqual(ct(2.0, 3.5), 5.5)
def test_basic(self): """ Basic usage and properties of a cfunc. """ f = cfunc(add_sig)(add_usecase) self.assertEqual(f.__name__, "add_usecase") self.assertEqual(f.__qualname__, "add_usecase") self.assertIs(f.__wrapped__, add_usecase) symbol = f.native_name self.assertIsInstance(symbol, str) self.assertIn("add_usecase", symbol) addr = f.address self.assertIsInstance(addr, int) ct = f.ctypes self.assertEqual(ctypes.cast(ct, ctypes.c_void_p).value, addr) self.assertPreciseEqual(ct(2.0, 3.5), 5.5)
def _compile_func_cpu(self): sig = numba.types.void(numba.types.CPointer(numba.types.voidptr), numba.types.uint64) return numba.cfunc(sig)(self._func)
def test_locals(self): # By forcing the intermediate result into an integer, we # truncate the ultimate function result f = cfunc(div_sig, locals={'c': types.int64})(div_usecase) self.assertPreciseEqual(f.ctypes(8, 3), 2.0)
def write_bitcode_with_cfunc(pyfunc, sig, filename_suffix=""): with open(pyfunc.__name__ + filename_suffix + ".bc", "wb") as fout: f = cfunc(sig, nopython=False, cache=False)(pyfunc) # print f.inspect_llvm() fout.write(f._library._final_module.as_bitcode())
def assembly(): # Whether to use custom kernels instead of FFC useCustomKernel = True # Whether to use CFFI kernels instead of Numba kernels useCffiKernel = False mesh = UnitSquareMesh(MPI.comm_world, 13, 13) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = dolfin.cpp.fem.Form([Q._cpp_object]) # Variant 1: Compile the Poisson kernel using CFFI kernel_name = "_poisson_kernel" compile_kernels(kernel_name) # Import the compiled kernel kernel_mod = importlib.import_module(kernel_name) ffi, lib = kernel_mod.ffi, kernel_mod.lib # Variant 2: Get pointers to the Numba kernels sig = nb.types.void(nb.types.CPointer(nb.types.double), nb.types.CPointer(nb.types.CPointer(nb.types.double)), nb.types.CPointer(nb.types.double), nb.types.intc) fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A) fnb = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_b) if useCustomKernel: if useCffiKernel: # Use the cffi kernel, compiled from raw C fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A")) fnB_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_b")) else: # Use the numba generated kernels fnA_ptr = fnA.address fnB_ptr = fnb.address a.set_cell_tabulate(0, fnA_ptr) L.set_cell_tabulate(0, fnB_ptr) else: # Use FFC ufc_form = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) a = cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) ufc_form = ffc_jit(v * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) L = cpp.fem.Form(ufc_form, [Q._cpp_object]) assembler = cpp.fem.Assembler([[a]], [L], []) A = PETScMatrix() b = PETScVector() assembler.assemble(A, cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, cpp.fem.Assembler.BlockType.monolithic) Anorm = A.norm(cpp.la.Norm.frobenius) bnorm = b.norm(cpp.la.Norm.l2) print(Anorm, bnorm) assert (np.isclose(Anorm, 56.124860801609124)) assert (np.isclose(bnorm, 0.0739710713711999))
import numpy as np from numba import cfunc def integrand(t): return np.exp(-t) / t**2 nb_integrand = cfunc("float64(float64)")(integrand) import scipy.integrate as si def do_integrate(func): """ Integrate the given function from 1.0 to +inf. """ return si.quad(func, 1, np.inf) # >>> do_integrate(integrand) # (0.14849550677592208, 3.8736750296130505e-10) # >>> do_integrate(nb_integrand.ctypes) # (0.14849550677592208, 3.8736750296130505e-10) # >>> %timeit do_integrate(integrand) # 1000 loops, best of 3: 242 µs per loop # >>> %timeit do_integrate(nb_integrand.ctypes) # 100000 loops, best of 3: 13.5 µs per loop
def inner(func, input_types=input_types, return_type=return_type, name=name): ''' Inner decorator without arguments, see outer decorator for documentation ''' # Jit the given Python callable with numba nb_return_type, nb_input_types = get_numba_signature( input_types, return_type) try: nbjit = nb.jit(nb_return_type(*nb_input_types), nopython=True, inline='always')(func) except: raise Exception( 'Failed to jit Python callable {} with numba.jit'.format(func)) func.numba_func = nbjit # Create Python wrapper with C++ friendly signature # Define signature pywrapper_signature = [ 'ptr_{0}, size_{0}'.format(i) if 'RVec' in t else 'x_{}'.format(i) \ for i, t in enumerate(input_types)] if 'RVec' in return_type: # If we return an RVec, we return via pointer the pointer of the allocated data, # the size in elements. In addition, we provide the size of the datatype in bytes. pywrapper_signature += ['ptrptr_r, ptrsize_r'] # Define arguments for jit function pywrapper_args_def = [ 'x_{0} = nb.carray(ptr_{0}, (size_{0},))'.format(i) if 'RVec' in t else 'x_{}'.format(i) \ for i, t in enumerate(input_types)] pywrapper_args = ['x_{}'.format(i) for i in range(len(input_types))] # Define return operation if 'RVec' in return_type: innert = get_inner_type(return_type) dtypesize = 1 if innert == 'bool' else int( get_numba_type(innert).bitwidth / 8) pywrapper_return = '\n '.join([ '# Because we cannot manipulate the memory management of the numpy array we copy the data', 'ptr = malloc(r.size * {})'.format(dtypesize), 'cp = nb.carray(ptr, r.size, dtype_r)', 'cp[:] = r[:]', '# Return size of the array and the pointer to the copied data', 'ptrsize_r[0] = r.size', 'ptrptr_r[0] = cp.ctypes.data' ]) else: pywrapper_return = 'return r' # Build wrapper code pywrappercode = '''\ def pywrapper({SIGNATURE}): """ Wrapper function for the jitted Python callable with special treatment of arrays """ # If an RVec is given, define numba carray wrapper for the input types {ARGS_DEF} # Call the jitted Python function r = nbjit({ARGS}) # Return the result {RETURN} '''.format(SIGNATURE=', '.join(pywrapper_signature), ARGS_DEF='\n '.join(pywrapper_args_def), ARGS=', '.join(pywrapper_args), RETURN=pywrapper_return) glob = dict( globals() ) # Make a shallow copy of the dictionary so we don't pollute the global scope glob['nb'] = nb glob['nbjit'] = nbjit ffi = cffi.FFI() ffi.cdef('void* malloc(long size);') C = ffi.dlopen(None) glob['malloc'] = C.malloc if 'RVec' in return_type: glob['dtype_r'] = get_numba_type(get_inner_type(return_type)) if sys.version_info[0] >= 3: exec(pywrappercode, glob, locals()) in {} else: exec(pywrappercode) in glob, locals() if not 'pywrapper' in locals(): raise Exception( 'Failed to create Python wrapper function:\n{}'.format( pywrappercode)) # Jit the Python wrapper code c_return_type, c_input_types = get_c_signature(input_types, return_type) try: nbcfunc = nb.cfunc(c_return_type(*c_input_types), nopython=True)(locals()['pywrapper']) except: raise Exception('Failed to jit Python wrapper with numba.cfunc') func.__py_wrapper__ = pywrappercode func.__numba_cfunc__ = nbcfunc # Get address of jitted wrapper function address = nbcfunc.address # Infer name of the C++ wrapper function if not name: name = func.__name__ # Build C++ wrapper for jitting with cling # Define input signature input_types_ref = [ 'ROOT::{}&'.format(t) if 'RVec' in t else t for t in input_types ] input_signature = ', '.join('{} x_{}'.format(t, i) for i, t in enumerate(input_types_ref)) # Define function pointer types func_ptr_input_types = [] for t in input_types: if 'RVec' in t: innert = get_inner_type(t) if innert == 'bool': # Special treatment for bool: In numpy, bools have 1 byte func_ptr_input_types += ['char*, int'] else: func_ptr_input_types += ['{}*, int'.format(innert)] else: func_ptr_input_types += [t] if 'RVec' in return_type: # See C++ wrapper code for the reason using these types innert = get_inner_type(return_type) func_ptr_input_types += [ '{}**, long*'.format('char' if innert == 'bool' else innert) ] func_ptr_type = '{RETURN_TYPE}(*)({INPUT_TYPES})'.format( RETURN_TYPE='void*' if 'RVec' in return_type else return_type, INPUT_TYPES=', '.join(func_ptr_input_types)) # Define function call vecbool_conversion = [] func_args = [] for i, t in enumerate(input_types): if 'RVec' in t: func_args += ['x_{0}.data(), x_{0}.size()'.format(i)] if get_inner_type(t) == 'bool': # Copy the RVec<bool> to a RVec<char> to match the numpy memory layout func_args[-1] = func_args[-1].replace('x_', 'xb_') vecbool_conversion += [ 'ROOT::RVec<char> xb_{0} = x_{0};'.format(i) ] else: func_args += ['x_{}'.format(i)] if 'RVec' in return_type: # See C++ wrapper code for the reason using these arguments func_args += ['&ptr, &size'] # Define return operation if 'RVec' in return_type: innert = get_inner_type(return_type) if innert == 'bool': innert = 'char' return_op = '\n '.join([ '// Because an RVec cannot take the ownership of external data, we have to copy the returned array', 'long size; // Size of the returned array', '{}* ptr; // Pointer to the data of the returned array'.format( innert), 'funcptr({});'.format(', '.join(func_args)), # TODO: Remove this copy as soon as RVec can adopt the ownership 'ROOT::RVec<{}> x_r(ptr, ptr + size);'.format(innert), 'free(ptr);', # If we return a RVec<bool>, we rely here on the automatic conversion of RVec<char> to RVec<bool> 'return x_r;' ]) else: return_op = 'return funcptr({});'.format(', '.join(func_args)) # Build wrapper code cppwrappercode = """\ namespace Numba {{ /* * C++ wrapper function around the jitted Python wrapping which calls the jitted Python callable */ {RETURN_TYPE} {FUNC_NAME}({INPUT_SIGNATURE}) {{ // Create a function pointer from the jitted Python wrapper const auto funcptr = reinterpret_cast<{FUNC_PTR_TYPE}>({FUNC_PTR}); // Perform conversion of RVec<bool> {VECBOOL_CONVERSION} // Return the result {RETURN_OP} }} }}""".format(RETURN_TYPE='ROOT::' + return_type if 'RVec' in return_type else return_type, FUNC_NAME=name, INPUT_SIGNATURE=input_signature, FUNC_PTR=address, FUNC_PTR_TYPE=func_ptr_type, VECBOOL_CONVERSION='\n '.join(vecbool_conversion), RETURN_OP=return_op) # Jit wrapper C++ code err = gbl_namespace.gInterpreter.Declare(cppwrappercode) if not err: raise Exception( 'Failed to jit C++ wrapper code with cling:\n{}'.format( cppwrappercode)) func.__cpp_wrapper__ = cppwrappercode return func
a, b = 0.01, 100 @contextlib.contextmanager def timing(): s = time.time() yield print('Time: {}s'.format(time.time() - s)) def func(x): return sin(1 / x) optimized_func = cfunc("float64(float64)")(func) print('Optimized version') with timing(): out1 = quad(LowLevelCallable(optimized_func.ctypes), a, b) print(out1) print('Optimized version') with timing(): out1 = quad(LowLevelCallable(optimized_func.ctypes), a, b) print(out1) print('Standard version') with timing(): out2 = quad(func, a, b) print(out2)
def solve(): # Whether to use custom Numba kernels instead of FFC useCustomKernels = True # Generate a unit cube with (n+1)^3 vertices n = 22 mesh = UnitCubeMesh(MPI.comm_world, n, n, n) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) # Define the boundary: vertices where any component is in machine precision accuracy 0 or 1 def boundary(x): return np.sum(np.logical_or(x < DOLFIN_EPS, x > 1.0 - DOLFIN_EPS), axis=1) > 0 u0 = Constant(0.0) bc = DirichletBC(Q, u0, boundary) # Initialize bilinear form and rhs a = dolfin.cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = dolfin.cpp.fem.Form([Q._cpp_object]) # Signature of tabulate_tensor functions sig = nb.types.void(nb.types.CPointer(nb.types.double), nb.types.CPointer(nb.types.CPointer(nb.types.double)), nb.types.CPointer(nb.types.double), nb.types.intc) # Compile the python functions using Numba fnA = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_A) fnL = nb.cfunc(sig, cache=True, nopython=True)(tabulate_tensor_L) module_name = "_laplace_kernel" # Build the kernel ffi = cffi.FFI() ffi.set_source(module_name, TABULATE_C) ffi.cdef(TABULATE_H) ffi.compile() # Import the compiled kernel kernel_mod = importlib.import_module(module_name) ffi, lib = kernel_mod.ffi, kernel_mod.lib # Get pointer to the compiled function fnA_ptr = ffi.cast("uintptr_t", ffi.addressof(lib, "tabulate_tensor_A")) # Get pointers to Numba functions #fnA_ptr = fnA.address fnL_ptr = fnL.address if useCustomKernels: # Configure Forms to use own tabulate functions a.set_cell_tabulate(0, fnA_ptr) L.set_cell_tabulate(0, fnL_ptr) else: # Use FFC # Bilinear form jit_result = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) a = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) # Rhs f = Expression("2.0", element=Q.ufl_element()) jit_result = ffc_jit(f * v * dx) ufc_form = dolfin.cpp.fem.make_ufc_form(jit_result[0]) L = dolfin.cpp.fem.Form(ufc_form, [Q._cpp_object]) # Attach rhs expression as coefficient L.set_coefficient(0, f._cpp_object) assembler = dolfin.cpp.fem.Assembler([[a]], [L], [bc]) A = PETScMatrix() b = PETScVector() # Perform assembly start = time.time() assembler.assemble(A, dolfin.cpp.fem.Assembler.BlockType.monolithic) end = time.time() # We don't care about the RHS assembler.assemble(b, dolfin.cpp.fem.Assembler.BlockType.monolithic) print(f"Time for assembly: {(end-start)*1000.0}ms") Anorm = A.norm(dolfin.cpp.la.Norm.frobenius) bnorm = b.norm(dolfin.cpp.la.Norm.l2) print(Anorm, bnorm) # Norms obtained with FFC and n=13 assert (np.isclose(Anorm, 60.86192203436385)) assert (np.isclose(bnorm, 0.018075523965828778)) comm = L.mesh().mpi_comm() solver = PETScKrylovSolver(comm) u = Function(Q) solver.set_operator(A) solver.solve(u.vector(), b) # Export result file = XDMFFile(MPI.comm_world, "poisson_3d.xdmf") file.write(u, XDMFFile.Encoding.HDF5)
def print_llvm(pyfunc, sig): f = cfunc(sig, nopython=False, cache=False)(pyfunc) print f.inspect_llvm()
def test_llvm_ir(self): f = cfunc(add_sig)(add_usecase) ir = f.inspect_llvm() self.assertIn(f.native_name, ir) self.assertIn("fadd double", ir)
def print_jvm(pyfunc, sig, optLevel=0, sizeLevel=0, debugLevel=0): f = cfunc(sig, nopython=False, cache=False)(pyfunc) bitcode = f._library._final_module.as_bitcode() lljvmapi.printAsJVMAssemblyCode(bitcode, len(bitcode), optLevel, sizeLevel, debugLevel)
def __init__(self, func, sig): self.pyfunc = func self.cfunc = cfunc(sig)(func) self.sig = sig
def test_cfunc(self): fn = self.make_testcase(unicode_name2, 'Ծ_Ծ') cfn = cfunc("int32(int32, int32)")(fn) self.assertEqual(cfn.ctypes(1, 2), 3)
def EvalValue(self, p): return np.array([[p[0], p[1], p[2]], [0.0, p[1], p[2]], [0.0, 0.0, p[2]]]) @cfunc("float64(float64, float64, float64)") def s_func0(x, y, z): return x def s_func1(x, y, z): return x s_func0 = cfunc("float64(float64, float64, float64)")(s_func1) @cfunc(mfem.scalar_sig, cache=False) def s_func(ptx, sdim): return s_func0(ptx[0], ptx[1], ptx[2]) @cfunc(mfem.vector_sig) def v_func(ptx, out, sdim, vdim): out_array = carray(out, (vdim, )) for i in range(sdim): out_array[i] = ptx[i] @cfunc(mfem.matrix_sig)
def jit_integrand(integrand_function): jitted_function = decorator_util.jit(nopython=True, cache=True)(integrand_function) no_args = len(inspect.getfullargspec(integrand_function).args) wrapped = None if no_args == 4: # noinspection PyUnusedLocal def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3]) elif no_args == 5: # noinspection PyUnusedLocal def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4]) elif no_args == 6: # noinspection PyUnusedLocal def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4], xx[5]) elif no_args == 7: # noinspection PyUnusedLocal def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4], xx[5], xx[6]) elif no_args == 8: # noinspection PyUnusedLocal def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4], xx[5], xx[6], xx[7]) elif no_args == 9: # noinspection PyUnusedLocal def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4], xx[5], xx[6], xx[7], xx[8]) elif no_args == 10: # noinspection PyUnusedLocal def wrapped(n, xx): return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4], xx[5], xx[6], xx[7], xx[8], xx[9]) elif no_args == 11: # noinspection PyUnusedLocal def wrapped(n, xx): return jitted_function( xx[0], xx[1], xx[2], xx[3], xx[4], xx[5], xx[6], xx[7], xx[8], xx[9], xx[10], ) cf = cfunc(float64(intc, CPointer(float64))) return LowLevelCallable(cf(wrapped).ctypes)
def B_rz_polygons_norm(r, z, R, Z, k=10, phi1=-np.pi / 4, phi2=np.pi / 4): """Calculate contribution to magnetic field at (r,z) from a train of polygons with R,Z vertices Parameters ---------- r, z : scalar or array_like (n,) coordinates at which the magnetic field is calculated R, Z : (2, m) array_like 2D meshgrid-like coordinates of the train (sharing opposing sides) of polygons each quad represents a cylindrical coil with polygonal cross-section k : int, optional Romberg integration uses 2**k + 1 samples, by default k=10 If None, uses vectorized quad(), possibly accelerated using numba.cfunc phi1 : float, optional bottom integration limit, by default -pi/4 phi2 : float, optional top integration limit, by default pi/4 Returns ------- B_norm : (2,m-1,n) ndarray the first dimension are the {r, z} vector components the next two dims are the normalized contributions of the m-th coil to the magnetic field at position (r,z)[n]. n will be at least 1 To get the true magnetic field at (r,z), the result must be multiplied with a (m-1,1) current density grid and summed over m. Notes ----- Romberg numerical integration is used because quad and other cannot efficiently integrate vector functions. """ b1v = b1_edge_slopes(R, Z, axis=0) # vertical b1h = b1_edge_slopes(R, Z, axis=1) # horizontal # prepare for broadcasting against each other args_ = [add_last_dims(a, i) for (a, i) in zip((r, z, R, Z), (1, 1, 2, 2))] if k: # sample-based Romberg integration x, dx = np.linspace(phi1, phi2, 2**k + 1, retstep=True) y = multi_integrand(x, *args_) y[~np.isfinite(y)] = 0 # probably should cancel out anyways H = scipy.integrate.romb(y, dx) else: if numba: # TODO refactor multi_integrand into separate function for quad to work nb_mint = numba.cfunc( 'float64(float64, float64, float64, float64, float64)')( multi_integrand).ctypes @numba.cfunc( numba.types.double(numba.types.intc, numba.types.CPointer(numba.types.double))) def integrand_c(n, xx_ptr): xx = numba.carray(xx_ptr, (n, )) return nb_mint(xx[0], xx[1], xx[2], xx[3], xx[4]) integrand = integrand_c.ctypes else: integrand = multi_integrand @np.vectorize def do_integrate(r, z, R, Z): return quad(integrand, phi1, phi2, args=(r, z, R, Z)) H = do_integrate(*args_) H /= 4 * np.pi # common for both B and A H = np.diff(np.diff(H, axis=1), axis=2) B = mu_0 * H[1:] A = H[0] return A, B
def cfunc_func(func): assert isinstance(func, pytypes.FunctionType), repr(func) f = cfunc(sig)(func) f.pyfunc = func return f
def _computeParameters(self): self.theta = sp.sympify(self.theta) self.b = sp.sympify(self.b) self.delta = sp.sympify(self.delta) self.epsilon = sp.sympify(self.epsilon) self.eosDisplayName = self.eosDisplayName self.eosInfo = self.eosInfo self._PsymbolicFromVT = R_IG * self.T / (self.V - self.b) - self.theta / ( (self.V - self.b) * (self.V ** 2 + self.delta * self.V + self.epsilon) ) self._ZsymbolicFromVT = self.V / (self.V - self.b) - ( self.V * self.theta / (R_IG * self.T) ) / (self.V ** 2 + self.V * self.delta + self.epsilon) self._dZdTsymbolicFromVT = sp.diff(self._ZsymbolicFromVT, self.T) self._numf_ZfromVT = njit()( sp.lambdify([self.V, self.T], self._ZsymbolicFromVT, modules="numpy") ) self._numf_dZdTfromVT = njit()( sp.lambdify([self.V, self.T], self._dZdTsymbolicFromVT, modules="numpy") ) self._Bl = self.b * self.P / (R_IG * self.T) self._deltal = self.delta * self.P / (R_IG * self.T) self._thetal = self.theta * self.P / (R_IG * self.T) ** 2 self._epsilonl = self.epsilon * (self.P / (R_IG * self.T)) ** 2 # coefficients Z**3 + a0*Z**2 + a1*Z + a2 = 0 self._a0 = self._deltal - self._Bl - 1 self._a1 = self._thetal + self._epsilonl - self._deltal * (self._Bl + 1) self._a2 = -(self._epsilonl * (self._Bl + 1) + self._thetal * self._Bl) self._numf_a0 = njit()(sp.lambdify([self.P, self.T], self._a0, modules="numpy")) self._numf_a1 = njit()(sp.lambdify([self.P, self.T], self._a1, modules="numpy")) self._numf_a2 = njit()(sp.lambdify([self.P, self.T], self._a2, modules="numpy")) self.tmp_cfunc = None self.tmp_cfunc2 = None c_sig = types.double(types.intc, types.CPointer(types.double)) exec( "self.tmp_cfunc = lambda n, data: {:s}".format( str((1 - self._ZsymbolicFromVT) / self.V) .replace("V", "data[0]") .replace("T", "data[1]") .replace("Abs", "np.abs") .replace("sign", "np.sign") ) ) qf = cfunc(c_sig)(self.tmp_cfunc) self._qnf = LowLevelCallable(qf.ctypes) exec( "self.tmp_cfunc2 = lambda n, data: {:s}".format( str(self.T * self._dZdTsymbolicFromVT / self.V) .replace("V", "data[0]") .replace("T", "data[1]") .replace("Abs", "np.abs") .replace("sign", "np.sign") ) ) tf = cfunc(c_sig)(self.tmp_cfunc2) self._numf_UR = LowLevelCallable(tf.ctypes)
from cffi import FFI import numba as nb import numpy as np ffi = FFI() lib = ffi.dlopen('./target/release/libnumba_test.dylib') ffi.cdef('double rust_double_callback(void*, int);') def func(n): x = np.zeros(n, dtype=np.float64) for i in xrange(n): x[i] = 1.0 * (i + 2) return np.mean(np.sin(x)) f_cfunc = nb.cfunc('float64(int64)', nopython=True)(func) f_nb = nb.jit(nopython=True)(func) print 'Result from Rust via CFFI: ', lib.rust_double_callback(f_cfunc.cffi, 4) print 'Result from Python: ', 2.0 * f_nb(4)