def barrier_no_arg_impl(context, builder, sig, args): assert not args sig = types.void(types.uint32) barrier = _declare_function(context, builder, "barrier", sig, ["unsigned int"]) flags = context.get_constant(types.uint32, enums.CLK_GLOBAL_MEM_FENCE) builder.call(barrier, [flags]) return _void_value
def void_func(typingctx, a): sig = types.void(types.int32) def codegen(context, builder, signature, args): pass # do nothing, return None, should be turned into # dummy value return sig, codegen
def gdb_internal(tyctx): function_sig = types.void() def codegen(cgctx, builder, signature, args): init_gdb_codegen(cgctx, builder, signature, args, const_args, do_break=do_break) return cgctx.get_constant(types.none, None) return function_sig, codegen
def bp_internal(tyctx): function_sig = types.void() def codegen(cgctx, builder, signature, args): mod = builder.module fnty = ir.FunctionType(ir.VoidType(), tuple()) breakpoint = mod.get_or_insert_function(fnty, "numba_gdb_breakpoint") builder.call(breakpoint, tuple()) return cgctx.get_constant(types.none, None) return function_sig, codegen
def memcpy_region(typingctx, dst, dst_offset, src, src_offset, nbytes, align): '''Copy nbytes from *(src + src_offset) to *(dst + dst_offset)''' def codegen(context, builder, signature, args): [dst_val, dst_offset_val, src_val, src_offset_val, nbytes_val, align_val] = args src_ptr = builder.gep(src_val, [src_offset_val]) dst_ptr = builder.gep(dst_val, [dst_offset_val]) cgutils.raw_memcpy(builder, dst_ptr, src_ptr, nbytes_val, align_val) return context.get_dummy_value() sig = types.void(types.voidptr, types.intp, types.voidptr, types.intp, types.intp, types.intp) return sig, codegen
def ctor_impl(context, builder, sig, args): # Allocate the instance inst_typ = sig.return_type alloc_type = context.get_data_type(inst_typ.get_data_type()) alloc_size = context.get_abi_sizeof(alloc_type) meminfo = context.nrt_meminfo_alloc_dtor( builder, context.get_constant(types.uintp, alloc_size), imp_dtor(context, builder.module, inst_typ), ) data_pointer = context.nrt_meminfo_data(builder, meminfo) data_pointer = builder.bitcast(data_pointer, alloc_type.as_pointer()) # Nullify all data builder.store(cgutils.get_null_value(alloc_type), data_pointer) inst_struct_typ = cgutils.create_struct_proxy(inst_typ) inst_struct = inst_struct_typ(context, builder) inst_struct.meminfo = meminfo inst_struct.data = data_pointer # Call the __init__ # TODO: extract the following into a common util init_sig = (sig.return_type,) + sig.args init = inst_typ.jitmethods['__init__'] init.compile(init_sig) cres = init._compileinfos[init_sig] realargs = [inst_struct._getvalue()] + list(args) context.call_internal(builder, cres.fndesc, types.void(*init_sig), realargs) # Prepare reutrn value ret = inst_struct._getvalue() # Add function to link codegen = context.codegen() codegen.add_linking_library(cres.library) return imputils.impl_ret_new_ref(context, builder, inst_typ, ret)
def ctor_impl(context, builder, sig, args): """ Generic constructor (__new__) for jitclasses. """ # Allocate the instance inst_typ = sig.return_type alloc_type = context.get_data_type(inst_typ.get_data_type()) alloc_size = context.get_abi_sizeof(alloc_type) meminfo = context.nrt.meminfo_alloc_dtor( builder, context.get_constant(types.uintp, alloc_size), imp_dtor(context, builder.module, inst_typ), ) data_pointer = context.nrt.meminfo_data(builder, meminfo) data_pointer = builder.bitcast(data_pointer, alloc_type.as_pointer()) # Nullify all data builder.store(cgutils.get_null_value(alloc_type), data_pointer) inst_struct = context.make_helper(builder, inst_typ) inst_struct.meminfo = meminfo inst_struct.data = data_pointer # Call the jitted __init__ # TODO: extract the following into a common util init_sig = (sig.return_type,) + sig.args init = inst_typ.jitmethods['__init__'] disp_type = types.Dispatcher(init) call = context.get_function(disp_type, types.void(*init_sig)) _add_linking_libs(context, call) realargs = [inst_struct._getvalue()] + list(args) call(builder, realargs) # Prepare return value ret = inst_struct._getvalue() return imputils.impl_ret_new_ref(context, builder, inst_typ, ret)
def _hashmap_dump(typingctx, dict_type): # load hashmap_dump here as otherwise module import will fail # since it's included in debug build only load_native_func('hashmap_dump', hconc_dict, hashmap_func_suffixes) ty_key, ty_val = dict_type.key_type, dict_type.value_type key_type_postfix, value_type_postfix = _get_types_postfixes(ty_key, ty_val) def codegen(context, builder, sig, args): dict_val, = args cdict = cgutils.create_struct_proxy(dict_type)(context, builder, value=dict_val) fnty = lir.FunctionType(lir.VoidType(), [lir.IntType(8).as_pointer()]) func_name = f"hashmap_dump_{key_type_postfix}_to_{value_type_postfix}" fn_hashmap_dump = cgutils.get_or_insert_function(builder.module, fnty, name=func_name) builder.call(fn_hashmap_dump, [cdict.data_ptr]) return return types.void(dict_type), codegen
def copy_non_null_offsets(typingctx, str_arr_typ, out_str_arr_typ=None): # precondition: output is allocated with offset the size non-nulls in input def codegen(context, builder, sig, args): out_str_arr, in_str_arr = args in_string_array = context.make_helper(builder, string_array_type, in_str_arr) out_string_array = context.make_helper(builder, string_array_type, out_str_arr) n = in_string_array.num_items zero = context.get_constant(offset_typ, 0) curr_offset_ptr = cgutils.alloca_once_value(builder, zero) # XXX: assuming last offset is already set by allocate_string_array # for i in range(n) # if not isna(): # out_offset[curr] = offset[i] with cgutils.for_range(builder, n) as loop: isna = lower_is_na(context, builder, in_string_array.null_bitmap, loop.index) with cgutils.if_likely(builder, builder.not_(isna)): in_val = builder.load( builder.gep(in_string_array.offsets, [loop.index])) curr_offset = builder.load(curr_offset_ptr) builder.store( in_val, builder.gep(out_string_array.offsets, [curr_offset])) builder.store( builder.add( curr_offset, lir.Constant(context.get_data_type(offset_typ), 1)), curr_offset_ptr) return context.get_dummy_value() return types.void(string_array_type, string_array_type), codegen
def lower_setattr(typingctx, inst_type, attr_type, val_type): if (isinstance(attr_type, types.Literal) and isinstance(inst_type, types.StructRef)): attr = attr_type.literal_value def codegen(context, builder, sig, args): [instance, attr_v, val] = args utils = _Utils(context, builder, inst_type) dataval = utils.get_data_struct(instance) # cast val to the correct type field_type = inst_type.field_dict[attr] casted = context.cast(builder, val, val_type, field_type) # read old old_value = getattr(dataval, attr) # incref new value context.nrt.incref(builder, val_type, casted) # decref old value (must be last in case new value is old value) context.nrt.decref(builder, val_type, old_value) # write new setattr(dataval, attr, casted) sig = types.void(inst_type, types.literal(attr), val_type) return sig, codegen
def get_type_enum(arr): return np.int32(-1) @overload(get_type_enum) def get_type_enum_overload(arr): dtype = arr.dtype if isinstance(dtype, hpat.hiframes.pd_categorical_ext.PDCategoricalDtype): dtype = hpat.hiframes.pd_categorical_ext.get_categories_int_type(dtype) typ_val = _numba_to_c_type_map[dtype] return lambda arr: np.int32(typ_val) INT_MAX = np.iinfo(np.int32).max _send = types.ExternalFunction("c_send", types.void(types.voidptr, types.int32, types.int32, types.int32, types.int32)) @numba.njit def send(val, rank, tag): # dummy array for val send_arr = np.full(1, val) type_enum = get_type_enum(send_arr) _send(send_arr.ctypes, 1, type_enum, rank, tag) _recv = types.ExternalFunction("c_recv", types.void(types.voidptr, types.int32, types.int32, types.int32, types.int32)) @numba.njit def recv(dtype, rank, tag): # dummy array for val recv_arr = np.empty(1, dtype)
for x in range(-2, 3): for y in range(-2, 3): r = math.sqrt(x*x + y*y) kernel[x + 2][y + 2] = (math.exp(-(r*r)/s))/(math.pi * s) sum_ += kernel[x + 2][y + 2] # normalize the Kernel for i in range(5): for j in range(5): kernel[i][j] /= sum_ gKernel = np.zeros((5, 5)) create_gaussian_kernel(gKernel) c_sig = types.void(types.CPointer(types.uchar), types.CPointer(types.uchar), types.intc, types.intc, types.intc, types.intc) @cfunc(c_sig) def gaussian_filter(in_, out, y, x, width, height): in_array = carray(in_, (height, width, 3)) out_array = carray(out, (height, width, 3)) for k in range(3): sum_ = 0. for p in range(-2, 3): for q in range(-2, 3): sum_ += gKernel[p+2, q+2] * in_array[y+p,x+q,k] out_array[y, x, k] = sum_ ifilter = cdll.LoadLibrary("libifilter.dylib")
out[3:5] = in_.strides out[5] = in_.flags.c_contiguous out[6] = in_.flags.f_contiguous s = 0 for i, j in np.ndindex(m, n): s += in_[i, j] * (i - j) out[7] = s return cfarray_usecase carray_dtype_usecase = make_cfarray_dtype_usecase(carray) farray_dtype_usecase = make_cfarray_dtype_usecase(farray) carray_float32_usecase_sig = types.void(types.CPointer(types.float32), types.CPointer(types.float32), types.intp, types.intp) carray_float64_usecase_sig = types.void(types.CPointer(types.float64), types.CPointer(types.float64), types.intp, types.intp) carray_voidptr_usecase_sig = types.void(types.voidptr, types.voidptr, types.intp, types.intp) class TestCFunc(TestCase): @tag('important') def test_basic(self): """ Basic usage and properties of a cfunc.
so numba dependency can be made optional """ import numpy as np import itertools from numba import jit, autojit from numba.types import void, float32, float64, int32, pyobject from escheresque import util from escheresque import geometry from escheresque import harmonics @jit(void(pyobject, float32[:,:,:,:], float32[:,:,:])) def boundify_normals_numba(complex, old, new): group = complex.group topology = complex.topology """ boundify normals using rotation information in the edge datastucture can also rotate on the fly. probably more efficient. or maybe not. only if relative rotation matrix is passed in with type information can also optimize bound structures. can broadcast I over I; only compute each edge once this could be a nopython module """ for e in range(3): E = group.edges[e] T = group.edge_transforms[e] be = topology.BE[e] for i in range(len(E)): for b in range(len(be)):
register_model(XeDSetType)(models.OpaqueModel) get_column_size_xenon = types.ExternalFunction( "get_column_size_xenon", types.int64(xe_connect_type, xe_dset_type, types.intp)) # read_xenon_col = types.ExternalFunction( # "c_read_xenon", # types.void( # string_type, # types.intp, # types.voidptr, # types.CPointer( # types.int64))) xe_connect = types.ExternalFunction("c_xe_connect", xe_connect_type(types.voidptr)) xe_open = types.ExternalFunction("c_xe_open", xe_dset_type(xe_connect_type, types.voidptr)) xe_close = types.ExternalFunction("c_xe_close", types.void(xe_connect_type, xe_dset_type)) # TODO: fix liveness/alias in Numba to be able to use arr.ctypes directly @intrinsic def read_xenon_col(typingctx, connect_tp, dset_tp, col_id_tp, column_tp, schema_arr_tp): def codegen(context, builder, sig, args): arr_info = context.make_array(column_tp)(context, builder, value=args[3]) ctinfo = context.make_array(schema_arr_tp)(context, builder, value=args[4]) fnty = lir.FunctionType(lir.VoidType(), [lir.IntType(8).as_pointer(), lir.IntType(8).as_pointer(), lir.IntType(64), lir.IntType(8).as_pointer(), lir.IntType(64).as_pointer()])
return cycle_vertices_mask, cycle_edges_mask, next_edge_indices_in_path_to_cycle @jit(int32(int32, planar_graph_nb_type, boolean[:]), nopython=True) def _get_cycle_vertex(edge_index, graph, cycle_vertices_mask): edge_vertex1 = graph.edges.vertex1[edge_index] edge_vertex2 = graph.edges.vertex2[edge_index] if cycle_vertices_mask[edge_vertex1]: return edge_vertex1 return edge_vertex2 @jit(void(planar_graph_nb_type, boolean[:], float32[:], int32[:], int32, int32, boolean[:], boolean[:], boolean), nopython=True) def iterate_tree_adjacency_costs_on_tree_cycle_side( graph, tree_edges_mask, total_descendants_costs, parent_edge_indices, start_vertex_on_cycle, start_edge_index_on_cycle, cycle_vertices_mask, cycle_edges_mask, add_end_marker): total_vertices_cost = graph.vertex_costs.sum() for edge_index in utils.iterate_subgraph_incidence_indices( graph, cycle_edges_mask, tree_edges_mask, start_vertex_on_cycle, start_edge_index_on_cycle): cycle_vertex = _get_cycle_vertex(edge_index, graph, cycle_vertices_mask) vertex_on_cycle_side = graph.edges.get_opposite_vertex(
change_log_likelihood = x_change_log_likelihood + (y_scale * y_change_log_likelihood) if change_log_likelihood - stay_log_likelihood > .0001: i_cells[i, k] = 1.0 - i_cells[i, k] z_count[i,:] = changed_z_count #return True return changed_z_vals else: #return False return z_arr @jit(void(float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:],float64,float64,float64[:],float64[:,:]),nopython=True) def greedy_optimize_j_numba_single(i_cells: np.ndarray, j_genes: np.ndarray, prob_matrix_a: np.ndarray, prob_matrix_b: np.ndarray, y: np.ndarray, sigma_y: float, y_scale: float,z_arr:np.ndarray, z_count:np.ndarray): for j in range(j_genes.shape[0]): for k in range(j_genes.shape[1]): opt_jk_numba_single(i_cells,j_genes,j,k,prob_matrix_a,prob_matrix_b,y,sigma_y,y_scale,z_arr,z_count) @jit(void(float64[:,:],float64[:,:],float64[:,:],float64[:,:],float64[:],float64,float64,float64[:],float64[:,:]),nopython=True) def greedy_optimize_i_numba_single(i_cells:np.ndarray,j_genes:np.ndarray, prob_matrix_a:np.ndarray, prob_matrix_b:np.ndarray, y: np.ndarray, sigma_y:float, y_scale: float, z_arr:np.ndarray, z_count:np.ndarray): for i in range(i_cells.shape[0]): for k in range(i_cells.shape[1]):
import numpy as np import numba import sdc from numba import types, cgutils from numba.targets.arrayobj import make_array from numba.extending import overload, intrinsic, overload_method from sdc.str_ext import string_type from numba.ir_utils import (compile_to_numba_ir, replace_arg_nodes, find_callname, guard) _get_file_size = types.ExternalFunction("get_file_size", types.int64(types.voidptr)) _file_read = types.ExternalFunction( "file_read", types.void(types.voidptr, types.voidptr, types.intp)) _file_read_parallel = types.ExternalFunction( "file_read_parallel", types.void(types.voidptr, types.voidptr, types.intp, types.intp)) file_write = types.ExternalFunction( "file_write", types.void(types.voidptr, types.voidptr, types.intp)) _file_write_parallel = types.ExternalFunction( "file_write_parallel", types.void(types.voidptr, types.voidptr, types.intp, types.intp, types.intp)) def _handle_np_fromfile(assign, lhs, rhs): """translate np.fromfile() to native
def _setup_fn_sig(self): return numba_types.void(numba_types.uint64, numba_types.uint64, numba_types.int32, numba_types.uint64, numba_types.uint64, numba_types.int32, numba_types.int32)
@box(CharType) def box_char(typ, val, c): """ """ fnty = lir.FunctionType(lir.IntType(8).as_pointer(), [lir.IntType(8)]) fn = c.builder.module.get_or_insert_function(fnty, name="get_char_ptr") c_str = c.builder.call(fn, [val]) pystr = c.pyapi.string_from_string_and_size( c_str, c.context.get_constant(types.intp, 1)) # TODO: delete ptr return pystr del_str = types.ExternalFunction("del_str", types.void(string_type)) _hash_str = types.ExternalFunction("_hash_str", types.int64(string_type)) get_c_str = types.ExternalFunction("get_c_str", types.voidptr(string_type)) @overload_method(StringType, 'c_str') def str_c_str(str_typ): return lambda s: get_c_str(s) @overload_method(StringType, 'join') def str_join(str_typ, iterable_typ): # TODO: more efficient implementation (e.g. C++ string buffer) def str_join_impl(sep_str, str_container): res = "" counter = 0
if t == numba.types.containers.Tuple(()): return lambda t: 1 def get_tuple_prod_impl(t): res = 1 for a in t: res *= a return res return get_tuple_prod_impl sig = types.void( types.voidptr, # output array types.voidptr, # input array types.intp, # old_len types.intp, # new_len types.intp, # input lower_dim size in bytes types.intp, # output lower_dim size in bytes ) oneD_reshape_shuffle = types.ExternalFunction("oneD_reshape_shuffle", sig) @numba.njit def dist_oneD_reshape_shuffle(lhs, in_arr, new_0dim_global_len, old_0dim_global_len, dtype_size): # pragma: no cover c_in_arr = np.ascontiguousarray(in_arr) in_lower_dims_size = get_tuple_prod(c_in_arr.shape[1:]) out_lower_dims_size = get_tuple_prod(lhs.shape[1:]) # print(c_in_arr)
import numpy as np from numba import jit from numba.types import void, Tuple, boolean, int32, float32 from . import planar_graph_constructor, utils from .planar_graph import PlanarGraph, planar_graph_nb_type from .planar_graph_edges import PlanarGraphEdges, planar_graph_edges_nb_type @jit(void(int32, planar_graph_edges_nb_type, int32, int32[:]), nopython=True) def _set_adjacent_vertex_level(vertex, edges, incident_edge_index, levels): adjacent_vertex = edges.get_opposite_vertex(incident_edge_index, vertex) levels[adjacent_vertex] = levels[vertex] + 1 _set_levels = utils.make_traverse_graph_via_bfs(_set_adjacent_vertex_level, int32[:]) @jit(int32[:](int32, planar_graph_nb_type), nopython=True) def construct_bfs_levels(root, graph): levels = utils.repeat_int(-1, graph.size) levels[root] = 0 used_vertex_flags = utils.repeat_bool(False, graph.size) _set_levels(root, graph, used_vertex_flags, levels) return levels @jit(void(int32, planar_graph_edges_nb_type, int32, boolean[:]), nopython=True) def _add_edge_to_tree(vertex, edges, incident_edge_index, tree_edges_mask):
import gym import gym.spaces as spaces import numpy as np import numba import numba.types as nt @numba.njit(nt.void(nt.bool_[:, ::1], nt.int_, nt.int_, nt.float_, nt.float_), cache=True) def maze_generation(maze, width=81, height=51, complexity=.75, density=.75): """ Simple maze generation algorithm I've taken from Wikipedia https://en.wikipedia.org/wiki/Maze_generation_algorithm#Python_code_example Should do the job, maybe some other algorithm can be fasteer, but this one was free in development time ;) """ # Only odd shapes assert width % 2 == 1, "Only odd maze shapes are supported" assert height % 2 == 1, "Only odd maze shapes are supported" # Adjust complexity and density relative to maze size complexity = int(complexity * (5 * (width + height))) # number of components density = int(density * ((width // 2) * (height // 2))) # size of components # Fill borders maze[0, :] = maze[-1, :] = 1 maze[:, 0] = maze[:, -1] = 1 # Make aisles for i in range(density): # pick a random position
# splitted the current node with the data point) leaf = idx_current_node node_update_downwards(tree, leaf, idx_sample, False) return leaf else: # There is no split, so we just update the node and go to # the next one node_update_downwards(tree, idx_current_node, idx_sample, True) is_leaf = tree.nodes.is_leaf[idx_current_node] if is_leaf: return idx_current_node else: idx_current_node = node_get_child(tree, idx_current_node, x_t) @njit(void(TreeClassifier.class_type.instance_type, uint32)) def tree_go_upwards(tree, leaf): idx_current_node = leaf if tree.iteration >= 1: while True: node_update_weight_tree(tree, idx_current_node) if idx_current_node == 0: # We arrived at the root break # Note that the root node is updated as well # We go up to the root in the tree idx_current_node = tree.nodes.parent[idx_current_node] @njit(void(TreeClassifier.class_type.instance_type, uint32)) def tree_partial_fit(tree, idx_sample):
def _rm_pd_index(col_names, col_types): """remove pandas index if found in columns """ try: pd_index_loc = col_names.index('__index_level_0__') del col_names[pd_index_loc] del col_types[pd_index_loc] except ValueError: pass _get_arrow_readers = types.ExternalFunction( "get_arrow_readers", types.Opaque('arrow_reader')(types.voidptr)) _del_arrow_readers = types.ExternalFunction( "del_arrow_readers", types.void(types.Opaque('arrow_reader'))) @infer_global(get_column_size_parquet) class SizeParquetInfer(AbstractTemplate): def generic(self, args, kws): assert not kws assert len(args) == 2 return signature(types.intp, args[0], types.unliteral(args[1])) @infer_global(read_parquet) class ReadParquetInfer(AbstractTemplate): def generic(self, args, kws): assert not kws assert len(args) == 4
for i in range(0, n): for j in range(0, m): coef[j * n + i] = math.exp(-norm(points[I[i], :] - points[J[j], :])) / ( 1e-5 + norm(points[I[i], :] - points[J[j], :])) with timer("building HMatrix with Python get_submatrix"): H2 = HMatrix.from_submatrices(get_submatrix, points, **params) H2.print_infos() ############################################################################### # PASSING A NUMBA COMPILED GET_COEF # https://numba.pydata.org/numba-doc/dev/user/cfunc.html @cfunc(types.void(types.intc, types.intc, types.CPointer(types.double)), nopython=True) def get_coef_2(i, j, coef): coef[0] = math.exp(-norm(points[i, :] - points[j, :])) / ( 1e-5 + norm(points[i, :] - points[j, :])) with timer("building HMatrix with Numba get_coef"): H3 = HMatrix.from_coefs(get_coef_2.ctypes, points, **params) H3.print_infos() ############################################################################### # PASSING A NUMBA COMPILED GET_SUBMATRIX @cfunc(types.void(types.CPointer(types.intc), types.CPointer(types.intc), types.intc, types.intc, types.CPointer(types.double)),
dict_byte_vec_int64_type = DictType(byte_vec_type, types.int64) dict_byte_vec_int64_init = types.ExternalFunction('dict_byte_vec_int64_init', dict_byte_vec_int64_type()) _add_dict_symbols('byte_vec', 'int64') ll.add_symbol('byte_vec_init', hdict_ext.byte_vec_init) ll.add_symbol('byte_vec_set', hdict_ext.byte_vec_set) ll.add_symbol('byte_vec_free', hdict_ext.byte_vec_free) ll.add_symbol('byte_vec_resize', hdict_ext.byte_vec_resize) byte_vec_init = types.ExternalFunction('byte_vec_init', byte_vec_type(types.int64, types.voidptr)) byte_vec_set = types.ExternalFunction( 'byte_vec_set', types.void( byte_vec_type, types.int64, types.voidptr, types.int64)) byte_vec_resize = types.ExternalFunction('byte_vec_resize', types.void(byte_vec_type, types.int64)) byte_vec_free = types.ExternalFunction('byte_vec_free', types.void(byte_vec_type)) class MultiMapType(types.Opaque): def __init__(self, key_typ, val_typ): self.key_typ = key_typ self.val_typ = val_typ super(MultiMapType, self).__init__( name='MultiMapType{}{}'.format(key_typ, val_typ)) @property def key(self):
return col_names, col_types def _rm_pd_index(col_names, col_types): """remove pandas index if found in columns """ try: pd_index_loc = col_names.index('__index_level_0__') del col_names[pd_index_loc] del col_types[pd_index_loc] except ValueError: pass _get_arrow_readers = types.ExternalFunction("get_arrow_readers", types.Opaque('arrow_reader')(types.voidptr)) _del_arrow_readers = types.ExternalFunction("del_arrow_readers", types.void(types.Opaque('arrow_reader'))) @infer_global(get_column_size_parquet) class SizeParquetInfer(AbstractTemplate): def generic(self, args, kws): assert not kws assert len(args) == 2 return signature(types.intp, args[0], types.unliteral(args[1])) @infer_global(read_parquet) class ReadParquetInfer(AbstractTemplate): def generic(self, args, kws): assert not kws assert len(args) == 4
@overload(get_type_enum) def get_type_enum_overload(arr): dtype = arr.dtype if isinstance(dtype, hpat.hiframes.pd_categorical_ext.PDCategoricalDtype): dtype = hpat.hiframes.pd_categorical_ext.get_categories_int_type(dtype) typ_val = _numba_to_c_type_map[dtype] return lambda arr: np.int32(typ_val) INT_MAX = np.iinfo(np.int32).max _send = types.ExternalFunction( "c_send", types.void(types.voidptr, types.int32, types.int32, types.int32, types.int32)) @numba.njit def send(val, rank, tag): # dummy array for val send_arr = np.full(1, val) type_enum = get_type_enum(send_arr) _send(send_arr.ctypes, 1, type_enum, rank, tag) _recv = types.ExternalFunction( "c_recv", types.void(types.voidptr, types.int32, types.int32, types.int32, types.int32))
"get_column_size_xenon", types.int64(xe_connect_type, xe_dset_type, types.intp)) # read_xenon_col = types.ExternalFunction( # "c_read_xenon", # types.void( # string_type, # types.intp, # types.voidptr, # types.CPointer( # types.int64))) xe_connect = types.ExternalFunction("c_xe_connect", xe_connect_type(types.voidptr)) xe_open = types.ExternalFunction("c_xe_open", xe_dset_type(xe_connect_type, types.voidptr)) xe_close = types.ExternalFunction("c_xe_close", types.void(xe_connect_type, xe_dset_type)) # TODO: fix liveness/alias in Numba to be able to use arr.ctypes directly @intrinsic def read_xenon_col(typingctx, connect_tp, dset_tp, col_id_tp, column_tp, schema_arr_tp): def codegen(context, builder, sig, args): arr_info = context.make_array(column_tp)(context, builder, value=args[3]) ctinfo = context.make_array(schema_arr_tp)(context, builder, value=args[4]) fnty = lir.FunctionType(lir.VoidType(), [ lir.IntType(8).as_pointer(),
types.CPointer(types.uint64)) op_results_32 = types.Record.make_c_struct([ ('matrix_ele', types.complex128), ('state', types.uint32), ]) op_results_64 = types.Record.make_c_struct([('matrix_ele', types.complex128), ('state', types.uint64)]) op_sig_32 = types.intc(types.CPointer(op_results_32), types.char, types.intc, types.intc, types.CPointer(types.uint32)) op_sig_64 = types.intc(types.CPointer(op_results_64), types.char, types.intc, types.intc, types.CPointer(types.uint64)) count_particles_sig_32 = types.void(types.uint32, types.CPointer(types.intc), types.CPointer(types.intc)) count_particles_sig_64 = types.void(types.uint64, types.CPointer(types.intc), types.CPointer(types.intc)) __all__ = [ "map_sig_32", "map_sig_64", "next_state_sig_32", "next_state_sig_64", "op_func_sig_32", "op_func_sig_64", "user_basis" ] @njit def _is_sorted_decending(a): for i in range(a.size - 1): if (a[i] < a[i + 1]): return False
dict_byte_vec_int64_type = DictType(byte_vec_type, types.int64) dict_byte_vec_int64_init = types.ExternalFunction('dict_byte_vec_int64_init', dict_byte_vec_int64_type()) _add_dict_symbols('byte_vec', 'int64') ll.add_symbol('byte_vec_init', hdict_ext.byte_vec_init) ll.add_symbol('byte_vec_set', hdict_ext.byte_vec_set) ll.add_symbol('byte_vec_free', hdict_ext.byte_vec_free) ll.add_symbol('byte_vec_resize', hdict_ext.byte_vec_resize) byte_vec_init = types.ExternalFunction( 'byte_vec_init', byte_vec_type(types.int64, types.voidptr)) byte_vec_set = types.ExternalFunction( 'byte_vec_set', types.void(byte_vec_type, types.int64, types.voidptr, types.int64)) byte_vec_resize = types.ExternalFunction( 'byte_vec_resize', types.void(byte_vec_type, types.int64)) byte_vec_free = types.ExternalFunction('byte_vec_free', types.void(byte_vec_type)) # XXX: needs Numba #3014 resolved # @overload("in") # def in_dict(key_typ, dict_typ): # def f(k, dict_int): # return dict_int_int_in(dict_int, k) # return f # XXX possible overload bug # @overload("setitem") # def setitem_dict(dict_typ, key_typ, val_typ):
# generate 5x5 kernel for x in range(-2, 3): for y in range(-2, 3): r = math.sqrt(x * x + y * y) kernel[x + 2][y + 2] = (math.exp(-(r * r) / s)) / (math.pi * s) sum_ += kernel[x + 2][y + 2] # normalize the Kernel for i in range(5): for j in range(5): kernel[i][j] /= sum_ gKernel = np.zeros((5, 5)) create_gaussian_kernel(gKernel) c_sig = types.void(types.CPointer(types.uchar), types.CPointer(types.uchar), types.intc, types.intc, types.intc, types.intc) @cfunc(c_sig) def gaussian_filter(in_, out, y, x, width, height): in_array = carray(in_, (height, width, 3)) out_array = carray(out, (height, width, 3)) for k in range(3): sum_ = 0. for p in range(-2, 3): for q in range(-2, 3): sum_ += gKernel[p + 2, q + 2] * in_array[y + p, x + q, k] out_array[y, x, k] = sum_ ifilter = cdll.LoadLibrary("libifilter.dylib")
dict_byte_vec_int64_type = DictType(byte_vec_type, types.int64) dict_byte_vec_int64_init = types.ExternalFunction('dict_byte_vec_int64_init', dict_byte_vec_int64_type()) _add_dict_symbols('byte_vec', 'int64') ll.add_symbol('byte_vec_init', hdict_ext.byte_vec_init) ll.add_symbol('byte_vec_set', hdict_ext.byte_vec_set) ll.add_symbol('byte_vec_free', hdict_ext.byte_vec_free) ll.add_symbol('byte_vec_resize', hdict_ext.byte_vec_resize) byte_vec_init = types.ExternalFunction( 'byte_vec_init', byte_vec_type(types.int64, types.voidptr)) byte_vec_set = types.ExternalFunction( 'byte_vec_set', types.void(byte_vec_type, types.int64, types.voidptr, types.int64)) byte_vec_resize = types.ExternalFunction( 'byte_vec_resize', types.void(byte_vec_type, types.int64)) byte_vec_free = types.ExternalFunction('byte_vec_free', types.void(byte_vec_type)) class MultiMapType(types.Opaque): def __init__(self, key_typ, val_typ): self.key_typ = key_typ self.val_typ = val_typ super(MultiMapType, self).__init__(name='MultiMapType{}{}'.format(key_typ, val_typ)) @property def key(self):
def set_uint32(typingctx, data, idx, ch): sig = types.void(types.voidptr, types.int64, types.uint32) return sig, make_set_codegen(32)
register_model(SetType)(models.OpaqueModel) _init_set_string = types.ExternalFunction("init_set_string", set_string_type()) def init_set_string(): return set() @overload(init_set_string) def init_set_overload(): return lambda: _init_set_string() add_set_string = types.ExternalFunction( "insert_set_string", types.void(set_string_type, types.voidptr)) len_set_string = types.ExternalFunction("len_set_string", types.intp(set_string_type)) num_total_chars_set_string = types.ExternalFunction( "num_total_chars_set_string", types.intp(set_string_type)) # TODO: box set(string) @generated_jit(nopython=True, cache=True) def build_set(A): if is_str_arr_typ(A): return _build_str_set_impl else:
from spead2.numba import intp_to_voidptr import spead2.recv from spead2.recv.numba import chunk_place_data import numba from numba import types import numpy as np import scipy HEAP_PAYLOAD_SIZE = 65536 HEAPS_PER_CHUNK = 64 CHUNK_PAYLOAD_SIZE = HEAPS_PER_CHUNK * HEAP_PAYLOAD_SIZE @numba.cfunc(types.void(types.CPointer(chunk_place_data), types.uintp), nopython=True) def chunk_place(data_ptr, data_size): data = numba.carray(data_ptr, 1) items = numba.carray(intp_to_voidptr(data[0].items), 2, dtype=np.int64) heap_cnt = items[0] payload_size = items[1] # If the payload size doesn't match, discard the heap (could be descriptors etc). if payload_size == HEAP_PAYLOAD_SIZE: data[0].chunk_id = heap_cnt // HEAPS_PER_CHUNK data[0].heap_index = heap_cnt % HEAPS_PER_CHUNK data[0].heap_offset = data[0].heap_index * HEAP_PAYLOAD_SIZE def main(): MAX_CHUNKS = 4
_init_set_string = types.ExternalFunction("init_set_string", set_string_type()) def init_set_string(): return set() @overload(init_set_string) def init_set_overload(): return lambda: _init_set_string() add_set_string = types.ExternalFunction("insert_set_string", types.void(set_string_type, types.voidptr)) len_set_string = types.ExternalFunction("len_set_string", types.intp(set_string_type)) num_total_chars_set_string = types.ExternalFunction("num_total_chars_set_string", types.intp(set_string_type)) # TODO: box set(string) @generated_jit(nopython=True, cache=True) def build_set(A): if is_str_arr_typ(A): return _build_str_set_impl else:
out[3:5] = in_.strides out[5] = in_.flags.c_contiguous out[6] = in_.flags.f_contiguous s = 0 for i, j in np.ndindex(m, n): s += in_[i, j] * (i - j) out[7] = s return cfarray_usecase carray_dtype_usecase = make_cfarray_dtype_usecase(carray) farray_dtype_usecase = make_cfarray_dtype_usecase(farray) carray_float32_usecase_sig = types.void(types.CPointer(types.float32), types.CPointer(types.float32), types.intp, types.intp) carray_float64_usecase_sig = types.void(types.CPointer(types.float64), types.CPointer(types.float64), types.intp, types.intp) carray_voidptr_usecase_sig = types.void(types.voidptr, types.voidptr, types.intp, types.intp) class TestCFunc(TestCase): @tag('important') def test_basic(self): """
if t == numba.types.containers.Tuple(()): return lambda a: 1 def get_tuple_prod_impl(t): res = 1 for a in t: res *= a return res return get_tuple_prod_impl sig = types.void( types.voidptr, # output array types.voidptr, # input array types.intp, # old_len types.intp, # new_len types.intp, # input lower_dim size in bytes types.intp, # output lower_dim size in bytes ) oneD_reshape_shuffle = types.ExternalFunction("oneD_reshape_shuffle", sig) @numba.njit def dist_oneD_reshape_shuffle(lhs, in_arr, new_0dim_global_len, old_0dim_global_len, dtype_size): # pragma: no cover c_in_arr = np.ascontiguousarray(in_arr) in_lower_dims_size = get_tuple_prod(c_in_arr.shape[1:]) out_lower_dims_size = get_tuple_prod(lhs.shape[1:]) #print(c_in_arr) # print(new_0dim_global_len, old_0dim_global_len, out_lower_dims_size, in_lower_dims_size) oneD_reshape_shuffle(lhs.ctypes, c_in_arr.ctypes, new_0dim_global_len, old_0dim_global_len,
from slumba.cyslumba import ( register_scalar_function, register_aggregate_function, ) from numba import cfunc from numba.types import void, voidptr, CPointer, intc @pytest.fixture def con(): return sqlite3.connect(':memory:') @cfunc(void(voidptr, intc, CPointer(voidptr))) def add_one(ctx, argc, argv): pass def test_register_scalar_function(con): assert add_one.address > 0 register_scalar_function( con, b'add_one', 1, add_one.address )