def test_unsigned_prange(self): @njit(parallel=True) def foo(a): r = types.uint64(3) s = types.uint64(0) for i in prange(r): s = s + a[i] return s a = List.empty_list(types.uint64) a.append(types.uint64(12)) a.append(types.uint64(1)) a.append(types.uint64(7)) self.assertEqual(foo(a), 20)
def test_ouput_ctypes(): @cfunc(types.uint64(types.uint64, types.uint64)) def upper(high, low): return high a = LCG128Mix(SEED, output=upper.ctypes).random_raw(10) b = LCG128Mix(SEED, output="upper").random_raw(10) np.testing.assert_equal(a, b) @cfunc(types.uint64(types.uint64, types.uint64)) def lower(high, low): return low a = LCG128Mix(SEED, output=lower.ctypes).random_raw(10) b = LCG128Mix(SEED, output="lower").random_raw(10) np.testing.assert_equal(a, b)
def sizeof(context, t): sig = types.uint64(t) def codegen(cgctx, builder, sig, args): size = cgctx.get_abi_sizeof(t) return cgctx.get_constant(types.uint64, size) return sig, codegen
def _Py_HashBytes(val, _len): if (_len == 0): return process_return(0) if (_len < _Py_HASH_CUTOFF): # TODO: this branch needs testing, needs a CPython setup for it! # /* Optimize hashing of very small strings with inline DJBX33A. */ _hash = _Py_uhash_t(5381) # /* DJBX33A starts with 5381 */ for idx in range(_len): _hash = ((_hash << 5) + _hash) + np.uint8(grab_byte(val, idx)) _hash ^= _len _hash ^= _Py_HashSecret_djbx33a_suffix else: tmp = _siphash24(types.uint64(_Py_HashSecret_siphash_k0), types.uint64(_Py_HashSecret_siphash_k1), val, _len) _hash = process_return(tmp) return process_return(_hash)
def grab_uint64_t(typingctx, data, offset): # returns a uint64_t at a given offset in data def impl(context, builder, signature, args): data, idx = args ptr = builder.bitcast(data, ir.IntType(64).as_pointer()) ch = builder.load(builder.gep(ptr, [idx])) return ch sig = types.uint64(types.voidptr, types.intp) return sig, impl
def num_total_chars(typingctx, str_arr_typ=None): # None default to make IntelliSense happy assert is_str_arr_typ(str_arr_typ) def codegen(context, builder, sig, args): in_str_arr, = args string_array = context.make_helper(builder, string_array_type, in_str_arr) return string_array.num_total_chars return types.uint64(string_array_type), codegen
def list_itemsize(tyctx, list_ty): sig = types.uint64(list_ty) def codegen(cgctx, builder, sig, args): nb_lty = sig.args[0] nb_item_ty = nb_lty.item_type ll_item_ty = cgctx.get_value_type(nb_item_ty) item_size = cgctx.get_abi_sizeof(ll_item_ty) return cgctx.get_constant(sig.return_type, item_size) return sig, codegen
def next_raw(self): sig = types.uint64(types.CPointer(types.uint64)) @cfunc(sig) def next_64(st): bit_gen_state = carray(st, (2, ), dtype=np.uint64) return splitmix_next(bit_gen_state) # Ensure a reference is held self._next_64 = next_64 return next_64
def hashmap_size(typingctx, dict_type): ty_key, ty_val = dict_type.key_type, dict_type.value_type key_type_postfix, value_type_postfix = _get_types_postfixes(ty_key, ty_val) def codegen(context, builder, sig, args): dict_val, = args cdict = cgutils.create_struct_proxy(dict_type)(context, builder, value=dict_val) fnty = lir.FunctionType(lir.IntType(64), [lir.IntType(8).as_pointer()]) func_name = f"hashmap_size_{key_type_postfix}_to_{value_type_postfix}" fn_hashmap_size = cgutils.get_or_insert_function(builder.module, fnty, name=func_name) ret = builder.call(fn_hashmap_size, [cdict.data_ptr]) return ret return types.uint64(dict_type), codegen
def _siphash24(k0, k1, src, src_sz): b = types.uint64(src_sz) << 56 v0 = k0 ^ types.uint64(0x736f6d6570736575) v1 = k1 ^ types.uint64(0x646f72616e646f6d) v2 = k0 ^ types.uint64(0x6c7967656e657261) v3 = k1 ^ types.uint64(0x7465646279746573) idx = 0 while (src_sz >= 8): mi = grab_uint64_t(src, idx) idx += 1 src_sz -= 8 v3 ^= mi v0, v1, v2, v3 = _DOUBLE_ROUND(v0, v1, v2, v3) v0 ^= mi # this is the switch fallthrough: # https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Python/pyhash.c#L390-L400 t = types.uint64(0x0) boffset = idx * 8 ohexefef = types.uint64(0xff) if src_sz >= 7: jmp = (6 * 8) mask = ~types.uint64(ohexefef << jmp) t = (t & mask) | (types.uint64(grab_byte(src, boffset + 6)) << jmp) if src_sz >= 6: jmp = (5 * 8) mask = ~types.uint64(ohexefef << jmp) t = (t & mask) | (types.uint64(grab_byte(src, boffset + 5)) << jmp) if src_sz >= 5: jmp = (4 * 8) mask = ~types.uint64(ohexefef << jmp) t = (t & mask) | (types.uint64(grab_byte(src, boffset + 4)) << jmp) if src_sz >= 4: t &= types.uint64(0xffffffff00000000) for i in range(4): jmp = i * 8 mask = ~types.uint64(ohexefef << jmp) t = (t & mask) | ( types.uint64(grab_byte(src, boffset + i)) << jmp) if src_sz >= 3: jmp = (2 * 8) mask = ~types.uint64(ohexefef << jmp) t = (t & mask) | (types.uint64(grab_byte(src, boffset + 2)) << jmp) if src_sz >= 2: jmp = (1 * 8) mask = ~types.uint64(ohexefef << jmp) t = (t & mask) | (types.uint64(grab_byte(src, boffset + 1)) << jmp) if src_sz >= 1: mask = ~(ohexefef) t = (t & mask) | (types.uint64(grab_byte(src, boffset + 0))) b |= t v3 ^= b v0, v1, v2, v3 = _DOUBLE_ROUND(v0, v1, v2, v3) v0 ^= b v2 ^= ohexefef v0, v1, v2, v3 = _DOUBLE_ROUND(v0, v1, v2, v3) v0, v1, v2, v3 = _DOUBLE_ROUND(v0, v1, v2, v3) t = (v0 ^ v1) ^ (v2 ^ v3) return t
def _PyHASH_XXROTATE(x): # Rotate left 31 bits return ((x << types.uint64(31)) | (x >> types.uint64(33)))
def foo(a): r = types.uint64(3) s = types.uint64(0) for i in prange(r): s = s + a[i] return s
def _PyHASH_XXROTATE(x): # Rotate left 13 bits return ((x << types.uint64(13)) | (x >> types.uint64(16)))
from .base_general import basis_general from ._basis_general_core import user_core_wrap import numpy as _np from numba import cfunc, types, njit try: from numba.ccallback import CFunc # numba < 0.49.0 except ModuleNotFoundError: from numba.core.ccallback import CFunc # numba >= 0.49.0 map_sig_32 = types.uint32(types.uint32, types.intc, types.CPointer(types.int8), types.CPointer(types.uint32)) map_sig_64 = types.uint64(types.uint64, types.intc, types.CPointer(types.int8), types.CPointer(types.uint64)) next_state_sig_32 = types.uint32(types.uint32, types.uint32, types.uint32, types.CPointer(types.uint32)) next_state_sig_64 = types.uint64(types.uint64, types.uint64, types.uint64, types.CPointer(types.uint64)) pre_check_state_sig_32 = types.uint32(types.uint32, types.uint32, types.CPointer(types.uint32)) pre_check_state_sig_64 = types.uint64(types.uint64, types.uint64, types.CPointer(types.uint64)) op_results_32 = types.Record.make_c_struct([ ('matrix_ele', types.complex128), ('state', types.uint32), ]) op_results_64 = types.Record.make_c_struct([('matrix_ele', types.complex128), ('state', types.uint64)])
import numpy as np import pytest from randomgen.generator import Generator from randomgen.wrapper import UserBitGenerator HAS_NUMBA = False try: from numba import carray, cfunc, jit, types HAS_NUMBA = True except ImportError: pytestmark = pytest.mark.skip if HAS_NUMBA: murmur_hash_3_sig = types.uint64(types.uint64) @jit(signature_or_function=murmur_hash_3_sig, inline="always") def murmur_hash_3(z): z = (z ^ (z >> np.uint64(30))) * np.uint64(0xBF58476D1CE4E5B9) z = (z ^ (z >> np.uint64(27))) * np.uint64(0x94D049BB133111EB) return z ^ (z >> np.uint64(31)) split_mix_next_sig = types.uint64(types.uint64[:]) @jit(signature_or_function=split_mix_next_sig, inline="always") def splitmix_next(state): state[0] += 0x9E3779B97F4A7C15 return murmur_hash_3(state[0])
def _ROTATE(x, b): return types.uint64(((x) << (b)) | ((x) >> (types.uint64(64) - (b))))
def _siphash24(k0, k1, src, src_sz): b = types.uint64(src_sz) << 56 v0 = k0 ^ types.uint64(0x736f6d6570736575) v1 = k1 ^ types.uint64(0x646f72616e646f6d) v2 = k0 ^ types.uint64(0x6c7967656e657261) v3 = k1 ^ types.uint64(0x7465646279746573) idx = 0 while (src_sz >= 8): mi = grab_uint64_t(src, idx) idx += 1 src_sz -= 8 v3 ^= mi v0, v1, v2, v3 = _DOUBLE_ROUND(v0, v1, v2, v3) v0 ^= mi # this is the switch fallthrough: # https://github.com/python/cpython/blob/d1dd6be613381b996b9071443ef081de8e5f3aff/Python/pyhash.c#L390-L400 t = types.uint64(0x0) boffset = idx * 8 ohexefef = types.uint64(0xff) if src_sz >= 7: jmp = (6 * 8) mask = ~types.uint64(ohexefef << jmp) t = (t & mask) | (types.uint64(grab_byte(src, boffset + 6)) << jmp) if src_sz >= 6: jmp = (5 * 8) mask = ~types.uint64(ohexefef << jmp) t = (t & mask) | (types.uint64(grab_byte(src, boffset + 5)) << jmp) if src_sz >= 5: jmp = (4 * 8) mask = ~types.uint64(ohexefef << jmp) t = (t & mask) | (types.uint64(grab_byte(src, boffset + 4)) << jmp) if src_sz >= 4: t &= types.uint64(0xffffffff00000000) for i in range(4): jmp = i * 8 mask = ~types.uint64(ohexefef << jmp) t = (t & mask) | (types.uint64(grab_byte(src, boffset + i)) << jmp) if src_sz >= 3: jmp = (2 * 8) mask = ~types.uint64(ohexefef << jmp) t = (t & mask) | (types.uint64(grab_byte(src, boffset + 2)) << jmp) if src_sz >= 2: jmp = (1 * 8) mask = ~types.uint64(ohexefef << jmp) t = (t & mask) | (types.uint64(grab_byte(src, boffset + 1)) << jmp) if src_sz >= 1: mask = ~(ohexefef) t = (t & mask) | (types.uint64(grab_byte(src, boffset + 0))) b |= t v3 ^= b v0, v1, v2, v3 = _DOUBLE_ROUND(v0, v1, v2, v3) v0 ^= b v2 ^= ohexefef v0, v1, v2, v3 = _DOUBLE_ROUND(v0, v1, v2, v3) v0, v1, v2, v3 = _DOUBLE_ROUND(v0, v1, v2, v3) t = (v0 ^ v1) ^ (v2 ^ v3) return t
GrB_UnaryOp = OpContainer() GrB_BinaryOp = OpContainer() ################################## # Useful collections of signatures ################################## _unary_bool = [nt.boolean(nt.boolean)] _unary_int = [ nt.uint8(nt.uint8), nt.int8(nt.int8), nt.uint16(nt.uint16), nt.int16(nt.int16), nt.uint32(nt.uint32), nt.int32(nt.int32), nt.uint64(nt.uint64), nt.int64(nt.int64) ] _unary_float = [nt.float32(nt.float32), nt.float64(nt.float64)] _unary_all = _unary_bool + _unary_int + _unary_float _binary_bool = [nt.boolean(nt.boolean, nt.boolean)] _binary_int = [ nt.uint8(nt.uint8, nt.uint8), nt.int8(nt.int8, nt.int8), nt.uint16(nt.uint16, nt.uint16), nt.int16(nt.int16, nt.int16), nt.uint32(nt.uint32, nt.uint32), nt.int32(nt.int32, nt.int32), nt.uint64(nt.uint64, nt.uint64), nt.int64(nt.int64, nt.int64)