def __init__(self, dmm, fe_type): members = [ ('index_offsets', types.CPointer(offset_typ)), ('data_offsets', types.CPointer(offset_typ)), #('null_bitmap', types.CPointer(char_typ)), ] models.StructModel.__init__(self, dmm, fe_type, members)
def resize_overload(A_t, resize_shape_t): n_channels = 3 if not (isinstance(A_t, types.Array) and A_t.ndim == n_channels and A_t.dtype == types.uint8 and resize_shape_t == types.UniTuple(types.int64, 2)): raise ValueError("Unsupported cv2.resize() with types {} {}".format( A_t, resize_shape_t)) dtype = A_t.dtype sig = types.void( types.intp, # new num rows types.intp, # new num cols types.CPointer(dtype), # output data types.CPointer(dtype), # input data types.intp, # num rows types.intp, # num cols ) cv_resize = types.ExternalFunction("cv_resize", sig) def resize_imp(in_arr, resize_shape): A = np.ascontiguousarray(in_arr) n_channels = A.shape[2] # cv Size object has column first B = np.empty((resize_shape[1], resize_shape[0], n_channels), A.dtype) cv_resize(resize_shape[1], resize_shape[0], B.ctypes, A.ctypes, A.shape[0], A.shape[1]) return B return resize_imp
def make_signature(f, td=False, return_complex=False, shape=None): sig = inspect.signature(f) pp = sig.parameters from numba import types if shape is None: if return_complex: m = types.complex128 else: m = types.float64 l = len(pp) else: m = types.void l = len(pp)-1 if td: l = l=1 args = [types.double]*l if td: args.append(types.double) if shape is not None: if return_complex: args.append(types.CPointer(types.complex128)) else: args.append(types.CPointer(types.float64)) return l, m(*args)
def _populate_setup_args(args_no, is_input_args=False): ret = [] for _ in range(args_no): ret.append(types.CPointer(types.int64)) ret.append(types.int32) ret.append( types.int32 if is_input_args else types.CPointer(types.int32)) return ret
class NdIndexIter(cgutils.Structure): """ .ndindex() implementation. """ _fields = [ ('shape', types.UniTuple(types.intp, ndim)), ('indices', types.CPointer(types.intp)), ('exhausted', types.CPointer(types.boolean)), ] def init_specific(self, context, builder, shapes): zero = context.get_constant(types.intp, 0) indices = cgutils.alloca_once(builder, zero.type, size=context.get_constant( types.intp, ndim)) exhausted = cgutils.alloca_once_value(builder, cgutils.false_byte) for dim in range(ndim): idxptr = cgutils.gep(builder, indices, dim) builder.store(zero, idxptr) # 0-sized dimensions really indicate an empty array, # but we have to catch that condition early to avoid # a bug inside the iteration logic. dim_size = shapes[dim] dim_is_empty = builder.icmp(lc.ICMP_EQ, dim_size, zero) with cgutils.if_unlikely(builder, dim_is_empty): builder.store(cgutils.true_byte, exhausted) self.indices = indices self.exhausted = exhausted self.shape = cgutils.pack_array(builder, shapes) def iternext_specific(self, context, builder, result): zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) bbend = cgutils.append_basic_block(builder, 'end') exhausted = cgutils.as_bool_bit(builder, builder.load(self.exhausted)) with cgutils.if_unlikely(builder, exhausted): result.set_valid(False) builder.branch(bbend) indices = [ builder.load(cgutils.gep(builder, self.indices, dim)) for dim in range(ndim) ] result.yield_(cgutils.pack_array(builder, indices)) result.set_valid(True) shape = cgutils.unpack_tuple(builder, self.shape, ndim) _increment_indices(context, builder, ndim, shape, self.indices, self.exhausted) builder.branch(bbend) builder.position_at_end(bbend)
def _populate_run_args(types_list): ret = [] types_list = [types_list ] if not isinstance(types_list, list) else types_list for type in types_list: ret.append(types.CPointer(type)) ret.append(types.CPointer(types.int64)) ret.append(types.int32) return ret
def __init__(self, dmm, fe_type): members = [ ('num_items', types.uint64), ('num_total_chars', types.uint64), ('offsets', types.CPointer(offset_typ)), ('data', types.CPointer(char_typ)), ('meminfo', types.MemInfoPointer(str_arr_payload_type)), ] models.StructModel.__init__(self, dmm, fe_type, members)
def __init__(self, dmm, fe_type): array_type = fe_type.array_type dtype = array_type.dtype members = [('array', types.CPointer(array_type)), # NOTE: pointers and indices are arrays ('pointers', types.EphemeralPointer(types.CPointer(dtype))), ('indices', types.EphemeralPointer(types.intp)), ('exhausted', types.EphemeralPointer(types.boolean)), ] super(FlatIter, self).__init__(dmm, fe_type, members)
def __init__(self, dmm, fe_type): assert fe_type.array_type.layout == 'C' array_type = fe_type.array_type dtype = array_type.dtype members = [('array', types.CPointer(array_type)), ('stride', types.intp), ('pointer', types.EphemeralPointer(types.CPointer(dtype))), ('index', types.EphemeralPointer(types.intp)), # NOTE: indices is an array ('indices', types.EphemeralPointer(types.intp)), ] super(CContiguousFlatIter, self).__init__(dmm, fe_type, members)
def _conversion_tests(self, check): check(c_double, types.float64) check(c_int, types.intc) check(c_uint16, types.uint16) check(c_size_t, types.uintp) check(c_ssize_t, types.intp) check(c_void_p, types.voidptr) check(POINTER(c_float), types.CPointer(types.float32)) check(POINTER(POINTER(c_float)), types.CPointer(types.CPointer(types.float32))) check(None, types.void)
def map_type(cffi_type): """ Map CFFI type to numba type. """ kind = getattr(cffi_type, 'kind', '') if kind == 'union': raise TypeError("No support for CFFI union") elif kind == 'function': if cffi_type.ellipsis: raise TypeError("vararg function is not supported") restype = map_type(cffi_type.result) argtypes = [map_type(arg) for arg in cffi_type.args] return templates.signature(restype, *argtypes) elif kind == 'pointer': pointee = cffi_type.item if pointee.kind == 'void': return types.voidptr else: return types.CPointer(map_type(pointee)) elif kind == 'array': return map_type(cffi_type.item) else: result = _type_map().get(cffi_type) if result is None: raise TypeError(cffi_type) return result
def array_impl_unsafe_get_ctypes_pointer(arrtype): if isinstance(arrtype, types.Array): unsafe_cast = unsafe_caster(types.CPointer(arrtype.dtype)) def array_impl(arr): return unsafe_cast(src=arr.ctypes.data) return array_impl
def __init__(self, dmm, fe_type): members = [ ('table_ptr', types.CPointer(types.uint8)), ('meminfo', types.MemInfoPointer(types.voidptr)), ] models.StructModel.__init__(self, dmm, fe_type, members)
class ArrayTemplate(cgutils.Structure): _fields = [ ('data', types.CPointer(dtype)), ('shape', types.UniTuple(types.intp, nd)), ('strides', types.UniTuple(types.intp, nd)), ('parent', types.pyobject), ]
def _convert_internal(ctypeobj): # Recursive helper if issubclass(ctypeobj, ctypes._Pointer): valuety = _convert_internal(ctypeobj._type_) if valuety is not None: return types.CPointer(valuety) else: return _FROM_CTYPES.get(ctypeobj)
def generic(self, args, kws): if kws or (len(args) != 1): return [ary] = args if not (isinstance(ary, types.Array) and ary.layout in ('C', 'F')): return ptr = types.CPointer(ary.dtype) return templates.signature(ptr, ary)
def jit_integrand_function(integrand_function): """Based on https://stackoverflow.com/a/49732825/4779220""" jitted_function = numba.jit(integrand_function, nopython=True, nogil=True) @numba.cfunc(nut.float64(nut.intc, nut.CPointer(nut.float64))) def wrapped(n, xx): # TODO: nicer way to not hard code number of args? `*carray()` may not expand correctly return jitted_function(xx[0], xx[1], xx[2], xx[3], xx[4], xx[5]) return LowLevelCallable(wrapped.ctypes)
def __init__(self, dmm, fe_type): array_type = fe_type.array_type dtype = array_type.dtype ndim = array_type.ndim members = [('array', array_type), ('pointers', types.EphemeralArray(types.CPointer(dtype), ndim)), ('indices', types.EphemeralArray(types.intp, ndim)), ('exhausted', types.EphemeralPointer(types.boolean)), ] super(FlatIter, self).__init__(dmm, fe_type, members)
def test_numba_assembly(): mesh = UnitSquareMesh(MPI.comm_world, 13, 13) Q = FunctionSpace(mesh, "Lagrange", 1) u = TrialFunction(Q) v = TestFunction(Q) a = cpp.fem.Form([Q._cpp_object, Q._cpp_object]) L = cpp.fem.Form([Q._cpp_object]) sig = types.void(types.CPointer(typeof(ScalarType())), types.CPointer(types.CPointer(typeof(ScalarType()))), types.CPointer(types.double), types.intc) fnA = cfunc(sig, cache=True)(tabulate_tensor_A) a.set_cell_tabulate(0, fnA.address) fnb = cfunc(sig, cache=True)(tabulate_tensor_b) L.set_cell_tabulate(0, fnb.address) if (False): ufc_form = ffc_jit(dot(grad(u), grad(v)) * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) a = cpp.fem.Form(ufc_form, [Q._cpp_object, Q._cpp_object]) ufc_form = ffc_jit(v * dx) ufc_form = cpp.fem.make_ufc_form(ufc_form[0]) L = cpp.fem.Form(ufc_form, [Q._cpp_object]) assembler = cpp.fem.Assembler([[a]], [L], []) A = PETScMatrix() b = PETScVector() assembler.assemble(A, cpp.fem.Assembler.BlockType.monolithic) assembler.assemble(b, cpp.fem.Assembler.BlockType.monolithic) Anorm = A.norm(cpp.la.Norm.frobenius) bnorm = b.norm(cpp.la.Norm.l2) print(Anorm, bnorm) assert (np.isclose(Anorm, 56.124860801609124)) assert (np.isclose(bnorm, 0.0739710713711999)) list_timings([TimingType.wall])
class ArrayTemplate(cgutils.Structure): _fields = [ ('parent', types.pyobject), ('nitems', types.intp), ('itemsize', types.intp), # These three fields comprise the unofficiel llarray ABI # (used by the GPU backend) ('data', types.CPointer(dtype)), ('shape', types.UniTuple(types.intp, nd)), ('strides', types.UniTuple(types.intp, nd)), ]
def generic(self, args, kws): if kws or len(args) != 1: return [ary] = args if not isinstance(ary, types.Buffer): raise TypingError("from_buffer() expected a buffer object, got %s" % (ary,)) if ary.layout not in ('C', 'F'): raise TypingError("from_buffer() unsupported on non-contiguous buffers (got %s)" % (ary,)) ptr = types.CPointer(ary.dtype) return templates.signature(ptr, ary)
def __init__(self, dmm, fe_typ): cls_data_ty = types.ClassDataType(fe_typ) # MemInfoPointer uses the `dtype` attribute to traverse for nested # NRT MemInfo. Since we handle nested NRT MemInfo ourselves, # we will replace provide MemInfoPointer with an opaque type # so that it does not raise exception for nested meminfo. dtype = types.Opaque('Opaque.' + str(cls_data_ty)) members = [ ('meminfo', types.MemInfoPointer(dtype)), ('data', types.CPointer(cls_data_ty)), ] super(InstanceModel, self).__init__(dmm, fe_typ, members)
def __init__(self, dmm, fe_type): ndim = fe_type.ndim members = [ ('meminfo', types.MemInfoPointer(fe_type.dtype)), ('parent', types.pyobject), ('nitems', types.intp), ('itemsize', types.intp), ('data', types.CPointer(fe_type.dtype)), ('shape', types.UniTuple(types.intp, ndim)), ('strides', types.UniTuple(types.intp, ndim)), ] super(ArrayModel, self).__init__(dmm, fe_type, members)
def next_raw(self): sig = types.uint64(types.CPointer(types.uint64)) @cfunc(sig) def next_64(st): bit_gen_state = carray(st, (2, ), dtype=np.uint64) return splitmix_next(bit_gen_state) # Ensure a reference is held self._next_64 = next_64 return next_64
def next_double(self): sig = types.double(types.CPointer(types.uint64)) @cfunc(sig) def next_double(st): bit_gen_state = carray(st, (2, ), dtype=np.uint64) return (np.uint64(splitmix_next(bit_gen_state)) >> np.uint64(11)) / 9007199254740992.0 # Ensure a reference is held self._next_double = next_double return next_double
def _type_map(): """ Lazily compute type map, as calling ffi.typeof() involves costly parsing of C code... """ global _cached_type_map if _cached_type_map is None: _cached_type_map = { ffi.typeof('char'): types.int8, ffi.typeof('short'): types.short, ffi.typeof('int'): types.intc, ffi.typeof('long'): types.long_, ffi.typeof('long long'): types.longlong, ffi.typeof('unsigned char'): types.uchar, ffi.typeof('unsigned short'): types.ushort, ffi.typeof('unsigned int'): types.uintc, ffi.typeof('unsigned long'): types.ulong, ffi.typeof('unsigned long long'): types.ulonglong, ffi.typeof('int8_t'): types.char, ffi.typeof('uint8_t'): types.uchar, ffi.typeof('int16_t'): types.short, ffi.typeof('uint16_t'): types.ushort, ffi.typeof('int32_t'): types.intc, ffi.typeof('uint32_t'): types.uintc, ffi.typeof('int64_t'): types.longlong, ffi.typeof('uint64_t'): types.ulonglong, ffi.typeof('float'): types.float_, ffi.typeof('double'): types.double, ffi.typeof('char *'): types.voidptr, ffi.typeof('void *'): types.voidptr, ffi.typeof('uint8_t *'): types.CPointer(types.uint8), ffi.typeof('float *'): types.CPointer(types.float32), ffi.typeof('double *'): types.CPointer(types.float64), ffi.typeof('ssize_t'): types.intp, ffi.typeof('size_t'): types.uintp, ffi.typeof('void'): types.void, } return _cached_type_map
def __init__(self, dmm, fe_type): # TODO: types other than Array and StringArray? if fe_type.dtype == string_type: members = [ ('num_items', types.uint64), ('num_total_chars', types.uint64), ('offsets', types.CPointer(offset_typ)), ('data', types.CPointer(char_typ)), ('meminfo', types.MemInfoPointer(str_arr_payload_type)), ] else: ndim = 1 members = [ ('meminfo', types.MemInfoPointer(fe_type.dtype)), ('parent', types.pyobject), ('nitems', types.intp), ('itemsize', types.intp), ('data', types.CPointer(fe_type.dtype)), ('shape', types.UniTuple(types.intp, ndim)), ('strides', types.UniTuple(types.intp, ndim)), ] super(SeriesModel, self).__init__(dmm, fe_type, members)
def _integrand_function(integrand_function): """Wrap `integrand_function` as a `LowLevelCallable` to be used with quad. `integrand_function` has to have the signature (float, complex) -> float. This speeds up integration by removing call overhead. However only float arguments can be passed to and from the function. """ @numba.cfunc(nb_t.float64(nb_t.intc, nb_t.CPointer(nb_t.float64))) def wrapped(__, xx): return integrand_function(xx[0], xx[1] + xx[2]) return LowLevelCallable(wrapped.ctypes)
class CContiguousFlatIter(cgutils.Structure): """ .flat() implementation for C-contiguous arrays. """ _fields = [ ('array', types.CPointer(array_type)), ('stride', types.intp), ('pointer', types.CPointer(types.CPointer(dtype))), ('index', types.CPointer(types.intp)), ] def init_specific(self, context, builder, arrty, arr): zero = context.get_constant(types.intp, 0) self.index = cgutils.alloca_once_value(builder, zero) self.pointer = cgutils.alloca_once_value(builder, arr.data) # We can't trust strides[-1] to always contain the right # step value, see # http://docs.scipy.org/doc/numpy-dev/release.html#npy-relaxed-strides-checking self.stride = arr.itemsize def iternext_specific(self, context, builder, arrty, arr, result): nitems = arr.nitems index = builder.load(self.index) is_valid = builder.icmp(lc.ICMP_SLT, index, nitems) result.set_valid(is_valid) with cgutils.if_likely(builder, is_valid): ptr = builder.load(self.pointer) value = context.unpack_value(builder, arrty.dtype, ptr) result.yield_(value) index = builder.add(index, context.get_constant(types.intp, 1)) builder.store(index, self.index) ptr = cgutils.pointer_add(builder, ptr, self.stride) builder.store(ptr, self.pointer)
def imdecode_overload(A_t, flags_t): if (isinstance(A_t, types.Array) and A_t.ndim == 1 and A_t.dtype == types.uint8 and flags_t == types.intp): in_dtype = A_t.dtype out_dtype = A_t.dtype sig = types.CPointer(out_dtype)( types.CPointer(types.intp), # output shape types.CPointer(in_dtype), # input array types.intp, # input size (num_bytes) types.intp, # flags ) cv_imdecode = types.ExternalFunction("cv_imdecode", sig) def imdecode_imp(A, flags): out_shape = np.empty(2, dtype=np.int64) data = cv_imdecode(out_shape.ctypes, A.ctypes, len(A), flags) n_channels = 3 out_shape_tup = (out_shape[0], out_shape[1], n_channels) img = wrap_array(data, out_shape_tup) return img return imdecode_imp