Esempio n. 1
0
def _long_impl(val):
    # This function assumes val came from a long int repr with val being a
    # uint64_t this means having to split the input into PyLong_SHIFT size
    # chunks in an unsigned hash wide type, max numba can handle is a 64bit int

    # mask to select low _PyLong_SHIFT bits
    _tmp_shift = 32 - _PyLong_SHIFT
    mask_shift = (~types.uint32(0x0)) >> _tmp_shift

    # a 64bit wide max means Numba only needs 3 x 30 bit values max,
    # or 5 x 15 bit values max on 32bit platforms
    i = (64 // _PyLong_SHIFT) + 1

    # alg as per hash_long
    x = 0
    p3 = (_PyHASH_BITS - _PyLong_SHIFT)
    for idx in range(i - 1, -1, -1):
        p1 = x << _PyLong_SHIFT
        p2 = p1 & _PyHASH_MODULUS
        p4 = x >> p3
        x = p2 | p4
        # the shift and mask splits out the `ob_digit` parts of a Long repr
        x += types.uint32((val >> idx * _PyLong_SHIFT) & mask_shift)
        if x >= _PyHASH_MODULUS:
            x -= _PyHASH_MODULUS
    return _Py_hash_t(x)
Esempio n. 2
0
File: hashing.py Progetto: esc/numba
def _long_impl(val):
    # This function assumes val came from a long int repr with val being a
    # uint64_t this means having to split the input into PyLong_SHIFT size
    # chunks in an unsigned hash wide type, max numba can handle is a 64bit int

    # mask to select low _PyLong_SHIFT bits
    _tmp_shift = 32 - _PyLong_SHIFT
    mask_shift = (~types.uint32(0x0)) >> _tmp_shift

    # a 64bit wide max means Numba only needs 3 x 30 bit values max,
    # or 5 x 15 bit values max on 32bit platforms
    i = (64 // _PyLong_SHIFT) + 1

    # alg as per hash_long
    x = 0
    p3 = (_PyHASH_BITS - _PyLong_SHIFT)
    for idx in range(i - 1, -1, -1):
        p1 = x << _PyLong_SHIFT
        p2 = p1 & _PyHASH_MODULUS
        p4 = x >> p3
        x = p2 | p4
        # the shift and mask splits out the `ob_digit` parts of a Long repr
        x += types.uint32((val >> idx * _PyLong_SHIFT) & mask_shift)
        if x >= _PyHASH_MODULUS:
            x -= _PyHASH_MODULUS
    return _Py_hash_t(x)
Esempio n. 3
0
def grind_seeds(matches, results, seedoffs, seedstep) -> int:
    R = numpy.full(0x80, uint32(0))
    arsonist_shuffle = numpy.full(6, uint32(0))
    rslot = 0

    seed = BASE_SEED
    for i in range(0x40):
        R[i] = seed
        R[i + 0x40] = seed
        seed = ((seed * 1103515245) + 12345) & 0x7FFFFFFF

    for j in range(seedoffs):
        R[(j + 0x40) & 0x7F] = seed
        R[(j + 0x80) & 0x7F] = seed
        seed = ((seed * 1103515245) + 12345) & 0x7FFFFFFF

    for r in range(seedoffs, 1 << 31, seedstep):
        if calc_all_from_seed(R, arsonist_shuffle, matches, results, rslot, r):
            rslot += 1

        for j in range(seedstep):
            R[(r + j + 0x40) & 0x7F] = seed
            R[(r + j + 0x80) & 0x7F] = seed
            seed = ((seed * 1103515245) + 12345) & 0x7FFFFFFF

        if ((r - seedoffs + seedstep) & 0xFFFFF) == 0:
            print((r - seedoffs + seedstep) * 100.0 / (1 << 31))

        if rslot >= 100:
            return rslot

    return rslot
Esempio n. 4
0
def euclidean_map_kernel(x: ArrayLike, y: ArrayLike,
                         out: ArrayLike) -> None:  # pragma: no cover.
    """Euclidean map CUDA kernel.

    Parameters
    ----------
    x
        [array-like, shape: (m, n)]
    y
        [array-like, shape: (p, n)]
    out
        [array-like, shape: (m, p, 1)]
        The zeros array of shape (m, p, 1) for returning the result.

    Returns
    -------
    An ndarray, which contains the output of the calculation of the application
    of euclidean distance on all pairs of vectors from x and y arrays.
    """
    # Aggresive typecasting of all the variables is done to improve performance.

    # Unique index of the thread in the whole grid.
    i1 = types.uint32(cuda.grid(2)[types.uint32(0)])
    i2 = types.uint32(cuda.grid(2)[types.uint32(1)])

    out_shape_0 = types.uint32(out.shape[types.uint32(0)])
    out_shape_1 = types.uint32(out.shape[types.uint32(1)])

    if i1 >= out_shape_0 or i2 >= out_shape_1:
        # Quit if (x, y) is outside of valid output array boundary
        # This is required because we may spin up more threads than we need.
        return
    _euclidean_distance_map(x[i1], y[i2], out[i1][i2])
Esempio n. 5
0
def _euclidean_distance_map(a: ArrayLike, b: ArrayLike,
                            out: ArrayLike) -> None:  # pragma: no cover.
    """Helper function for the map step of euclidean distance which runs on
    the device (GPU) itself.

    Parameters
    ----------
    a
        [array-like, shape: (1, n)]
    b
        [array-like, shape: (1, n)]
    out
        [array-like, shape: (1)]
        The output array for returning the result.

    Returns
    -------
    An ndarray, which contains the squared sum of the corresponding elements
    of the given pair of vectors.
    """
    square_sum = types.float32(0)

    zero = types.uint32(0)
    a_shape_0 = types.uint32(a.shape[types.uint32(0)])
    i = types.uint32(0)

    while i < a_shape_0:
        if a[i] >= zero and b[i] >= zero:
            square_sum += (a[i] - b[i])**types.uint32(2)
        i = types.uint32(i + types.uint32(1))
    out[0] = square_sum
Esempio n. 6
0
def _boss_distance_dict(first, second, best_dist):
    dist = 0
    for word, val_a in first.items():
        val_b = second.get(word, types.uint32(0))
        buf = val_a - val_b
        dist += buf * buf

        if dist > best_dist:
            return 0x7FFFFFFFFFFFFFFF
    return dist
Esempio n. 7
0
    def test_unsigned_access(self):
        L = List.empty_list(int32)
        ui32_0 = types.uint32(0)
        ui32_1 = types.uint32(1)
        ui32_2 = types.uint32(2)

        # insert
        L.append(types.uint32(10))
        L.append(types.uint32(11))
        L.append(types.uint32(12))
        self.assertEqual(len(L), 3)

        # getitem
        self.assertEqual(L[ui32_0], 10)
        self.assertEqual(L[ui32_1], 11)
        self.assertEqual(L[ui32_2], 12)

        # setitem
        L[ui32_0] = 123
        L[ui32_1] = 456
        L[ui32_2] = 789
        self.assertEqual(L[ui32_0], 123)
        self.assertEqual(L[ui32_1], 456)
        self.assertEqual(L[ui32_2], 789)

        # index
        ui32_123 = types.uint32(123)
        ui32_456 = types.uint32(456)
        ui32_789 = types.uint32(789)
        self.assertEqual(L.index(ui32_123), 0)
        self.assertEqual(L.index(ui32_456), 1)
        self.assertEqual(L.index(ui32_789), 2)

        # delitem
        L.__delitem__(ui32_2)
        del L[ui32_1]
        self.assertEqual(len(L), 1)
        self.assertEqual(L[ui32_0], 123)

        # pop
        L.append(2)
        L.append(3)
        L.append(4)
        self.assertEqual(len(L), 4)
        self.assertEqual(L.pop(), 4)
        self.assertEqual(L.pop(ui32_2), 3)
        self.assertEqual(L.pop(ui32_1), 2)
        self.assertEqual(L.pop(ui32_0), 123)
Esempio n. 8
0
def correlation_map_kernel(x: ArrayLike, y: ArrayLike,
                           out: ArrayLike) -> None:  # pragma: no cover.
    i1 = types.uint32(cuda.grid(2)[types.uint32(0)])
    i2 = types.uint32(cuda.grid(2)[types.uint32(1)])

    out_shape_0 = types.uint32(out.shape[types.uint32(0)])
    out_shape_1 = types.uint32(out.shape[types.uint32(1)])

    if i1 >= out_shape_0 or i2 >= out_shape_1:
        # Quit if (x, y) is outside of valid output array boundary
        return

    _correlation(x[i1], y[i2], out[i1][i2])
Esempio n. 9
0
def getitem_str_offset(typingctx, str_arr_typ, ind_t):
    def codegen(context, builder, sig, args):
        in_str_arr, ind = args

        string_array = context.make_helper(builder, string_array_type,
                                           in_str_arr)
        offsets = builder.bitcast(string_array.offsets,
                                  lir.IntType(32).as_pointer())
        return builder.load(builder.gep(offsets, [ind]))

    return types.uint32(string_array_type, ind_t), codegen
Esempio n. 10
0
def _correlation(x: ArrayLike, y: ArrayLike,
                 out: ArrayLike) -> None:  # pragma: no cover.
    # Note: assigning variable and only saving the final value in the
    # array made this significantly faster.

    # aggressively making all variables explicitly typed
    # makes it more performant by a factor of ~2-3x
    v0 = types.float32(0)
    v1 = types.float32(0)
    v2 = types.float32(0)
    v3 = types.float32(0)
    v4 = types.float32(0)
    v5 = types.float32(0)

    m = types.uint32(x.shape[types.uint32(0)])
    i = types.uint32(0)

    zero = types.uint32(0)

    while i < m:
        if x[i] >= zero and y[i] >= zero:
            v0 += x[i]
            v1 += y[i]
            v2 += x[i] * x[i]
            v3 += y[i] * y[i]
            v4 += x[i] * y[i]
            v5 += 1
        i = types.uint32(i + types.uint32(1))

    out[0] = v0
    out[1] = v1
    out[2] = v2
    out[3] = v3
    out[4] = v4
    out[5] = v5
Esempio n. 11
0
def param_lookup(param_dict, default_val, dtype):
    """
    Generate the ufunc lookup(channel, val), which returns a numpy array of
    values corresponding to various channels that are looked up in the provided
    param_dict. If there is no key, use default_val instead.
    """
    out_type = from_dtype(np.dtype(dtype))
    #convert types to avoid any necessity of casting...
    param_dict = { types.uint32(k):out_type(v) for k, v in param_dict.items() }
    default_val = out_type(default_val)
    
    @guvectorize(["void(uint32, "+out_type.name+"[:])"],
                 "()->()", forceobj = True)
    def lookup(channel, val):
        """Look up a value for the provided channel from a dictionary provided
        at compile time"""
        val[0] = param_dict.get(channel, default_val)

    return lookup
Esempio n. 12
0
    def next_32(self):

        sig = types.uint32(types.CPointer(types.uint64))

        @cfunc(sig)
        def next_32(st):
            bit_gen_state = carray(st, (2, ), dtype=np.uint64)
            if bit_gen_state[1] & np.uint64(0x1):
                out = bit_gen_state[1] >> np.uint64(32)
                bit_gen_state[1] = 0
                return out
            z = splitmix_next(bit_gen_state)
            bit_gen_state[1] = z | np.uint64(0x1)
            return z & 0xFFFFFFFF

        # Ensure a reference is held
        self._next_32 = next_32

        return next_32
Esempio n. 13
0
def deref_uint16(typingctx, data, offset):
    sig = types.uint32(types.voidptr, types.intp)
    return sig, make_deref_codegen(16)
Esempio n. 14
0
def _pick_ascii(is_ascii1, is_ascii2):
    if is_ascii1 == 1 and is_ascii2 == 1:
        return types.uint32(1)
    return types.uint32(0)
Esempio n. 15
0
def node_update_count(tree, idx_node, idx_sample):
    # TODO: Don't do it twice...
    c = uint32(tree.samples.labels[idx_sample])
    tree.nodes.counts[idx_node, c] += 1
Esempio n. 16
0
def _pick_ascii(is_ascii1, is_ascii2):
    if is_ascii1 == 1 and is_ascii2 == 1:
        return types.uint32(1)
    return types.uint32(0)
Esempio n. 17
0
File: unicode.py Progetto: esc/numba
def deref_uint32(typingctx, data, offset):
    sig = types.uint32(types.voidptr, types.intp)
    return sig, make_deref_codegen(32)
Esempio n. 18
0
def float_to_unsigned(x):
    return types.uint32(x)
Esempio n. 19
0
    node_update_downwards,
    node_split,
    node_update_depth,
    node_update_weight_tree,
)
from .tree import TreeClassifier
from .utils import sample_discrete


# TODO: an overall task is to minimize the O(#n_features) complexity: pass few
#  times over the features

# TODO: write all the docstrings


@njit(uint32(TreeClassifier.class_type.instance_type, uint32))
def tree_go_downwards(tree, idx_sample):
    # We update the nodes along the path which leads to the leaf containing
    # x_t. For each node on the path, we consider the possibility of
    # splitting it, following the Mondrian process definition.
    # Index of the root is 0
    idx_current_node = 0
    x_t = tree.samples.features[idx_sample]

    if tree.iteration == 0:
        # If it's the first iteration, we just put x_t in the range of root
        node_update_downwards(tree, idx_current_node, idx_sample, False)
        return idx_current_node
    else:
        while True:
            # If it's not the first iteration (otherwise the current node
Esempio n. 20
0
        "--arsonist",
        type=int,
        default=None,
        help="Force an arsonist location from 1 to 6.",
    )

    parser.add_argument(
        "--case",
        type=lambda x: briefcase_words.index(x),
        default=None,
        help="Force a 4-letter briefcase word.",
    )

    args = parser.parse_args()

    matches = numpy.full(9, uint32(0x10000))

    has_constraint = False

    clock_str = args.clock
    clock = None
    if clock_str is not None:
        clock_h_str, _, clock_m_str, = clock_str.partition(":")
        clock_h = int(clock_h_str)
        clock_m = int(clock_m_str)
        assert 0 <= clock_h < 12
        assert 0 <= clock_m < 60
        clock = 60 * clock_h + clock_m
        matches[0] = clock
        has_constraint = True
Esempio n. 21
0
            new = np.ones((size, ), dtype=arr.dtype)
        else:
            new = np.zeros((size, ), dtype=arr.dtype)
        new[:keep] = arr[:keep]
        return new
    elif arr.ndim == 2:
        _, n_cols = arr.shape
        new = np.zeros((size, n_cols), dtype=arr.dtype)
        new[:keep] = arr[:keep]
        return new
    else:
        raise ValueError("resize_array can resize only 1D and 2D arrays")


# Sadly there is no function to sample for a discrete distribution in numba
@njit(uint32(float32[::1]))
def sample_discrete(distribution):
    """Samples according to the given discrete distribution.

    Parameters
    ----------
    distribution : `np.array', shape=(size,), dtype='float32'
        The discrete distribution we want to sample from. This must contain
        non-negative entries that sum to one.

    Returns
    -------
    output : `uint32`
        Output sampled in {0, 1, 2, distribution.size} according to the given
        distribution
Esempio n. 22
0
from .base_general import basis_general
from ._basis_general_core import user_core_wrap
import numpy as _np
from numba import cfunc, types, njit
try:
    from numba.ccallback import CFunc  # numba < 0.49.0
except ModuleNotFoundError:
    from numba.core.ccallback import CFunc  # numba >= 0.49.0

map_sig_32 = types.uint32(types.uint32, types.intc, types.CPointer(types.int8),
                          types.CPointer(types.uint32))
map_sig_64 = types.uint64(types.uint64, types.intc, types.CPointer(types.int8),
                          types.CPointer(types.uint64))

next_state_sig_32 = types.uint32(types.uint32, types.uint32, types.uint32,
                                 types.CPointer(types.uint32))
next_state_sig_64 = types.uint64(types.uint64, types.uint64, types.uint64,
                                 types.CPointer(types.uint64))

pre_check_state_sig_32 = types.uint32(types.uint32, types.uint32,
                                      types.CPointer(types.uint32))
pre_check_state_sig_64 = types.uint64(types.uint64, types.uint64,
                                      types.CPointer(types.uint64))

op_results_32 = types.Record.make_c_struct([
    ('matrix_ele', types.complex128),
    ('state', types.uint32),
])

op_results_64 = types.Record.make_c_struct([('matrix_ele', types.complex128),
                                            ('state', types.uint64)])
Esempio n. 23
0
def float_to_unsigned(x):
    return types.uint32(x)
Esempio n. 24
0
def box_index(x, y, step, nb_x):
    """Return k_box index for each value"""
    return numba_types.uint32((x % 360) // step + nb_x * ((y + 90) // step))
Esempio n. 25
0
def deref_uint32(typingctx, data, offset):
    sig = types.uint32(data, types.intp)
    return sig, make_deref_codegen(32)
Esempio n. 26
0
def box_indexes(x, y, step):
    """Return i_box,j_box index for each value"""
    return numba_types.uint32((x % 360) // step), numba_types.uint32(
        (y + 90) // step)
Esempio n. 27
0
def _histogram_intersection_dict(first, second):
    sim = 0
    for word, val_a in first.items():
        val_b = second.get(word, types.uint32(0))
        sim += min(val_a, val_b)
    return sim
Esempio n. 28
0
        return ncompiler


GrB_UnaryOp = OpContainer()
GrB_BinaryOp = OpContainer()

##################################
# Useful collections of signatures
##################################
_unary_bool = [nt.boolean(nt.boolean)]
_unary_int = [
    nt.uint8(nt.uint8),
    nt.int8(nt.int8),
    nt.uint16(nt.uint16),
    nt.int16(nt.int16),
    nt.uint32(nt.uint32),
    nt.int32(nt.int32),
    nt.uint64(nt.uint64),
    nt.int64(nt.int64)
]
_unary_float = [nt.float32(nt.float32), nt.float64(nt.float64)]
_unary_all = _unary_bool + _unary_int + _unary_float

_binary_bool = [nt.boolean(nt.boolean, nt.boolean)]
_binary_int = [
    nt.uint8(nt.uint8, nt.uint8),
    nt.int8(nt.int8, nt.int8),
    nt.uint16(nt.uint16, nt.uint16),
    nt.int16(nt.int16, nt.int16),
    nt.uint32(nt.uint32, nt.uint32),
    nt.int32(nt.int32, nt.int32),