def _prepare_argument(ctxt, bld, inp, tyinp, where='input operand'): """returns an instance of the appropriate Helper (either _ScalarHelper or _ArrayHelper) class to handle the argument. using the polymorphic interface of the Helper classes, scalar and array cases can be handled with the same code""" # first un-Optional Optionals if isinstance(tyinp, types.Optional): oty = tyinp tyinp = tyinp.type inp = ctxt.cast(bld, inp, oty, tyinp) # then prepare the arg for a concrete instance if isinstance(tyinp, types.ArrayCompatible): ary = ctxt.make_array(tyinp)(ctxt, bld, inp) shape = cgutils.unpack_tuple(bld, ary.shape, tyinp.ndim) strides = cgutils.unpack_tuple(bld, ary.strides, tyinp.ndim) return _ArrayHelper(ctxt, bld, shape, strides, ary.data, tyinp.layout, tyinp.dtype, tyinp.ndim, inp) elif (types.unliteral(tyinp) in types.number_domain | {types.boolean} or isinstance(tyinp, types.scalars._NPDatetimeBase)): return _ScalarHelper(ctxt, bld, inp, tyinp) else: raise NotImplementedError('unsupported type for {0}: {1}'.format( where, str(tyinp)))
def iternext_series_array(context, builder, sig, args, result): """ Implementation of iternext() for the ArrayIterator type :param context: context descriptor :param builder: llvmlite IR Builder :param sig: iterator signature :param args: tuple with iterator arguments, such as instruction, operands and types :param result: iternext result """ [iterty] = sig.args [iter] = args arrayty = iterty.array_type if arrayty.ndim != 1: raise NotImplementedError("iterating over %dD array" % arrayty.ndim) iterobj = context.make_helper(builder, iterty, value=iter) ary = make_array(arrayty)(context, builder, value=iterobj.array) nitems, = cgutils.unpack_tuple(builder, ary.shape, count=1) index = builder.load(iterobj.index) is_valid = builder.icmp(lc.ICMP_SLT, index, nitems) result.set_valid(is_valid) with builder.if_then(is_valid): value = _getitem_array_single_int(context, builder, iterty.yield_type, arrayty, ary, index) result.yield_(value) nindex = cgutils.increment_index(builder, index) builder.store(nindex, iterobj.index)
def _define_atomic_cas(module, ordering): """Define a llvm function for atomic compare-and-swap. The generated function is a direct wrapper of the LLVM cmpxchg with the difference that the a int indicate success (1) or failure (0) is returned and the last argument is a output pointer for storing the old value. Note ---- On failure, the generated function behaves like an atomic load. The loaded value is stored to the last argument. """ ftype = ir.FunctionType(ir.IntType(32), [ _word_type.as_pointer(), _word_type, _word_type, _word_type.as_pointer() ]) fn_cas = ir.Function(module, ftype, name="nrt_atomic_cas") [ptr, cmp, repl, oldptr] = fn_cas.args bb = fn_cas.append_basic_block() builder = ir.IRBuilder(bb) outtup = builder.cmpxchg(ptr, cmp, repl, ordering=ordering) old, ok = cgutils.unpack_tuple(builder, outtup, 2) builder.store(old, oldptr) builder.ret(builder.zext(ok, ftype.return_type)) return fn_cas
def _gauss_impl(context, builder, sig, args, state): # The type for all computations (either float or double) ty = sig.return_type llty = context.get_data_type(ty) state_ptr = get_state_ptr(context, builder, state) _random = {"py": random.random, "np": np.random.random}[state] ret = cgutils.alloca_once(builder, llty, name="result") gauss_ptr = get_gauss_ptr(builder, state_ptr) has_gauss_ptr = get_has_gauss_ptr(builder, state_ptr) has_gauss = cgutils.is_true(builder, builder.load(has_gauss_ptr)) with builder.if_else(has_gauss) as (then, otherwise): with then: # if has_gauss: return it builder.store(builder.load(gauss_ptr), ret) builder.store(const_int(0), has_gauss_ptr) with otherwise: # if not has_gauss: compute a pair of numbers using the Box-Muller # transform; keep one and return the other pair = context.compile_internal(builder, _gauss_pair_impl(_random), signature(types.UniTuple(ty, 2)), ()) first, second = cgutils.unpack_tuple(builder, pair, 2) builder.store(first, gauss_ptr) builder.store(second, ret) builder.store(const_int(1), has_gauss_ptr) mu, sigma = args return builder.fadd(mu, builder.fmul(sigma, builder.load(ret)))
def _box_class_instance(typ, val, c): meminfo, dataptr = cgutils.unpack_tuple(c.builder, val) # Create Box instance box_subclassed = _specialize_box(typ) # Note: the ``box_subclassed`` is kept alive by the cache voidptr_boxcls = c.context.add_dynamic_addr( c.builder, id(box_subclassed), info="box_class_instance", ) box_cls = c.builder.bitcast(voidptr_boxcls, c.pyapi.pyobj) box = c.pyapi.call_function_objargs(box_cls, ()) # Initialize Box instance llvoidptr = ir.IntType(8).as_pointer() addr_meminfo = c.builder.bitcast(meminfo, llvoidptr) addr_data = c.builder.bitcast(dataptr, llvoidptr) def set_member(member_offset, value): # Access member by byte offset offset = c.context.get_constant(types.uintp, member_offset) ptr = cgutils.pointer_add(c.builder, box, offset) casted = c.builder.bitcast(ptr, llvoidptr.as_pointer()) c.builder.store(value, casted) set_member(_box.box_meminfoptr_offset, addr_meminfo) set_member(_box.box_dataptr_offset, addr_data) return box
def hsail_atomic_add_tuple(context, builder, sig, args): aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype if indty == types.intp: indices = [inds] # just a single integer indty = [indty] else: indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices) ] if dtype != valty: raise TypeError("expecting %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices) return builder.atomic_rmw("add", ptr, val, ordering='monotonic')
def codegen(context, builder, signature, args): # check that the return type is now defined arrty = signature.return_type assert arrty.is_precise() shapes = unpack_tuple(builder, args[0]) # redirect implementation to np.empty res = _empty_nd_impl(context, builder, arrty, shapes) return impl_ret_new_ref(context, builder, arrty, res._getvalue())
def codegen(context, builder, sig, args): (iterty, ) = sig.args (value, ) = args intp_t = context.get_value_type(types.intp) iterobj = context.make_helper(builder, iterty, value=value) arrayty = iterty.array_type ary = make_array(arrayty)(context, builder, value=iterobj.array) shape = cgutils.unpack_tuple(builder, ary.shape) # array iterates along the outer dimension return impl_ret_untracked(context, builder, intp_t, shape[0])
def atomic_add(context, builder, sig, args, name): from .atomics import atomic_support_present if atomic_support_present(): context.extra_compile_options[target.LINK_ATOMIC] = True aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype if indty == types.intp: indices = [inds] # just a single integer indty = [indty] else: indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices) ] if dtype != valty: raise TypeError("expecting %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices) if (isinstance(aryty, DPPYArray) and aryty.addrspace == address_space.LOCAL): return insert_and_call_atomic_fn( context, builder, sig, name, dtype, ptr, val, address_space.LOCAL, ) else: return insert_and_call_atomic_fn( context, builder, sig, name, dtype, ptr, val, address_space.GLOBAL, ) else: raise ImportError( "Atomic support is not present, can not perform atomic_add")
def literallist_to_literallist(context, builder, fromty, toty, val): if len(fromty) != len(toty): # Disallowed by typing layer raise NotImplementedError olditems = cgutils.unpack_tuple(builder, val, len(fromty)) items = [ context.cast(builder, v, f, t) for v, f, t in zip(olditems, fromty, toty) ] return context.make_tuple(builder, toty, items)
def codegen(context: CodegenContext, builder: ir.IRBuilder, signature: Signature, args: Sequence[ir.Value]): # llvm IRBuilder code here map_fn_ty, _ = signature.args funcs = (context.get_function(map_fn_ty, sig) for sig in fn_sigs) _, tuples = args tuples = cgutils.unpack_tuple(builder, tuples) elems = (func(builder, [builder.extract_value(t, i) for t in tuples]) for i, func in enumerate(funcs)) return context.make_tuple(builder, signature.return_type, elems)
def _normalize_indices(context, builder, indty, inds): """ Convert integer indices into tuple of intp """ if indty in types.integer_domain: indty = types.UniTuple(dtype=indty, count=1) indices = [inds] else: indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices)] return indty, indices
def tuple_to_tuple(context, builder, fromty, toty, val): if (isinstance(fromty, types.BaseNamedTuple) or isinstance(toty, types.BaseNamedTuple)): # Disallowed by typing layer raise NotImplementedError if len(fromty) != len(toty): # Disallowed by typing layer raise NotImplementedError olditems = cgutils.unpack_tuple(builder, val, len(fromty)) items = [context.cast(builder, v, f, t) for v, f, t in zip(olditems, fromty, toty)] return context.make_tuple(builder, toty, items)
def getitem(self, index): tyctx = self._context.typing_context ty = self._list_ty sig, fn = _list_getitem_borrowed._defn(tyctx, ty, types.intp) statnitem = fn(self._context, self._builder, sig, (self._iter.parent, index)) _, item = cgutils.unpack_tuple(self._builder, statnitem) retty = sig.return_type[1] if isinstance(self._list_ty.dtype, types.NoneType): raw_ty = self._list_ty.dtype else: raw_ty = retty.type raw_item = self._context.cast(self._builder, item, retty, raw_ty) return raw_item
def static_getitem_tuple(context, builder, sig, args): tupty, _ = sig.args tup, idx = args if isinstance(idx, int): if idx < 0: idx += len(tupty) if not 0 <= idx < len(tupty): raise IndexError("cannot index at %d in %s" % (idx, tupty)) res = builder.extract_value(tup, idx) elif isinstance(idx, slice): items = cgutils.unpack_tuple(builder, tup)[idx] res = context.make_tuple(builder, sig.return_type, items) else: raise NotImplementedError("unexpected index %r for %s" % (idx, sig.args[0])) return impl_ret_borrowed(context, builder, sig.return_type, res)
def func_impl(context, builder, sig, args): """ array[a] = scalar_or_array array[a,..,b] = scalar_or_array """ aryty, idxty, valty = sig.args ary, idx, val = args if isinstance(idxty, types.BaseTuple): index_types = idxty.types indices = cgutils.unpack_tuple(builder, idx, count=len(idxty)) else: index_types = (idxty, ) indices = (idx, ) ary = make_array(aryty)(context, builder, ary) # First try basic indexing to see if a single array location is denoted. index_types, indices = normalize_indices(context, builder, index_types, indices) dataptr, shapes, _strides = basic_indexing( context, builder, aryty, ary, index_types, indices, boundscheck=context.enable_boundscheck, ) if shapes: raise NotImplementedError("Complex shapes are not supported") # Store source value the given location val = context.cast(builder, val, valty, aryty.dtype) operation = None if isinstance(aryty.dtype, types.Integer) and aryty.dtype.signed: operation = iop elif isinstance(aryty.dtype, types.Integer) and not aryty.dtype.signed: operation = uop elif isinstance(aryty.dtype, types.Float): operation = fop if operation is None: raise TypeError("Atomic operation not supported on " + str(aryty)) return _atomic_rmw(context, builder, operation, aryty, val, dataptr)
def _image_to_array(context, builder, shapes_array, arrtype, data, img): # allocate array shapes = cgutils.unpack_tuple(builder, builder.load(shapes_array)) ary = _empty_nd_impl(context, builder, arrtype, shapes) cgutils.raw_memcpy(builder, ary.data, builder.load(data), ary.nitems, ary.itemsize, align=1) # clean up cv::Mat image fnty = lir.FunctionType(lir.VoidType(), [lir.IntType(8).as_pointer()]) fn_release = builder.module.get_or_insert_function(fnty, name="cv_mat_release") builder.call(fn_release, [img]) return impl_ret_new_ref(context, builder, arrtype, ary._getvalue())
def static_getitem_tuple(context, builder, sig, args): tupty, idxty = sig.args tup, idx = args if isinstance(idx, int): if idx < 0: idx += len(tupty) if not 0 <= idx < len(tupty): raise IndexError("cannot index at %d in %s" % (idx, tupty)) res = builder.extract_value(tup, idx) elif isinstance(idx, slice): items = cgutils.unpack_tuple(builder, tup)[idx] res = context.make_tuple(builder, sig.return_type, items) elif isinstance(tupty, types.LiteralStrKeyDict): # pretend to be a dictionary idx_val = idxty.literal_value idx_offset = tupty.fields.index(idx_val) res = builder.extract_value(tup, idx_offset) else: raise NotImplementedError("unexpected index %r for %s" % (idx, sig.args[0])) return impl_ret_borrowed(context, builder, sig.return_type, res)
def cast_LiteralStrKeyDict_LiteralStrKeyDict(context, builder, fromty, toty, val): # should have been picked up by typing for (k1, v1), (k2, v2) in zip(fromty.literal_value.items(), toty.literal_value.items()): # these checks are just guards, typing should have picked up any # problems if k1 != k2: # keys must be same msg = "LiteralDictionary keys are not the same {} != {}" raise LoweringError(msg.format(k1, k2)) # values must be same ty if context.typing_context.unify_pairs(v1, v2) is None: msg = "LiteralDictionary values cannot by unified, have {} and {}" raise LoweringError(msg.format(v1, v2)) else: fromty = types.Tuple(fromty.types) toty = types.Tuple(toty.types) olditems = cgutils.unpack_tuple(builder, val, len(fromty)) items = [context.cast(builder, v, f, t) for v, f, t in zip(olditems, fromty, toty)] return context.make_tuple(builder, toty, items)
def codegen(context, builder, sig, args): assert (len(args) == 2) data = args[0] shape = args[1] # XXX: unnecessary allocation and copy, reuse data pointer shape_list = cgutils.unpack_tuple(builder, shape, shape.type.count) ary = _empty_nd_impl(context, builder, arr_typ, shape_list) cgutils.raw_memcpy(builder, ary.data, data, ary.nitems, ary.itemsize, align=1) # clean up image buffer fnty = lir.FunctionType(lir.VoidType(), [lir.IntType(8).as_pointer()]) fn_release = builder.module.get_or_insert_function( fnty, name="cv_delete_buf") builder.call(fn_release, [data]) return impl_ret_new_ref(context, builder, sig.return_type, ary._getvalue())
def min_iterable(context, builder, sig, args): argtys = list(sig.args[0]) args = cgutils.unpack_tuple(builder, args[0]) return do_minmax(context, builder, argtys, args, operator.lt)
def _unpack_output_values(ufunc, builder, values): if ufunc.nout == 1: return [values] else: return cgutils.unpack_tuple(builder, values)
def _build_array(context, builder, array_ty, input_types, inputs): """Utility function to handle allocation of an implicit output array given the target context, builder, output array type, and a list of _ArrayHelper instances. """ # First, strip optional types, ufunc loops are typed on concrete types input_types = [ x.type if isinstance(x, types.Optional) else x for x in input_types ] intp_ty = context.get_value_type(types.intp) def make_intp_const(val): return context.get_constant(types.intp, val) ZERO = make_intp_const(0) ONE = make_intp_const(1) src_shape = cgutils.alloca_once(builder, intp_ty, array_ty.ndim, "src_shape") dest_ndim = make_intp_const(array_ty.ndim) dest_shape = cgutils.alloca_once(builder, intp_ty, array_ty.ndim, "dest_shape") dest_shape_addrs = tuple( cgutils.gep_inbounds(builder, dest_shape, index) for index in range(array_ty.ndim)) # Initialize the destination shape with all ones. for dest_shape_addr in dest_shape_addrs: builder.store(ONE, dest_shape_addr) # For each argument, try to broadcast onto the destination shape, # mutating along any axis where the argument shape is not one and # the destination shape is one. for arg_number, arg in enumerate(inputs): if not hasattr(arg, "ndim"): # Skip scalar arguments continue arg_ndim = make_intp_const(arg.ndim) for index in range(arg.ndim): builder.store(arg.shape[index], cgutils.gep_inbounds(builder, src_shape, index)) arg_result = context.compile_internal( builder, _broadcast_onto, _broadcast_onto_sig, [arg_ndim, src_shape, dest_ndim, dest_shape]) with cgutils.if_unlikely(builder, builder.icmp(lc.ICMP_SLT, arg_result, ONE)): msg = "unable to broadcast argument %d to output array" % ( arg_number, ) loc = errors.loc_info.get('loc', None) if loc is not None: msg += '\nFile "%s", line %d, ' % (loc.filename, loc.line) context.call_conv.return_user_exc(builder, ValueError, (msg, )) real_array_ty = array_ty.as_array dest_shape_tup = tuple( builder.load(dest_shape_addr) for dest_shape_addr in dest_shape_addrs) array_val = arrayobj._empty_nd_impl(context, builder, real_array_ty, dest_shape_tup) # Get the best argument to call __array_wrap__ on array_wrapper_index = select_array_wrapper(input_types) array_wrapper_ty = input_types[array_wrapper_index] try: # __array_wrap__(source wrapped array, out array) -> out wrapped array array_wrap = context.get_function( '__array_wrap__', array_ty(array_wrapper_ty, real_array_ty)) except NotImplementedError: # If it's the same priority as a regular array, assume we # should use the allocated array unchanged. if array_wrapper_ty.array_priority != types.Array.array_priority: raise out_val = array_val._getvalue() else: wrap_args = (inputs[array_wrapper_index].return_val, array_val._getvalue()) out_val = array_wrap(builder, wrap_args) ndim = array_ty.ndim shape = cgutils.unpack_tuple(builder, array_val.shape, ndim) strides = cgutils.unpack_tuple(builder, array_val.strides, ndim) return _ArrayHelper(context, builder, shape, strides, array_val.data, array_ty.layout, array_ty.dtype, ndim, out_val)
def range_to_range(context, builder, fromty, toty, val): olditems = cgutils.unpack_tuple(builder, val, 3) items = [ context.cast(builder, v, fromty.dtype, toty.dtype) for v in olditems ] return cgutils.make_anonymous_struct(builder, items)
def codegen(context, builder, sig, args): (val, ) = args items = cgutils.unpack_tuple(builder, val, 3) return impl_ret_untracked(context, builder, sig.return_type, items[index])
def native_atomic_add(context, builder, sig, args): aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype if indty == types.intp: indices = [inds] # just a single integer indty = [indty] else: indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [ context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices) ] if dtype != valty: raise TypeError("expecting %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices) if dtype == types.float32 or dtype == types.float64: context.extra_compile_options[target.LLVM_SPIRV_ARGS] = [ "--spirv-ext=+SPV_EXT_shader_atomic_float_add" ] name = "__spirv_AtomicFAddEXT" elif dtype == types.int32 or dtype == types.int64: name = "__spirv_AtomicIAdd" else: raise TypeError("Unsupported type") assert name != "" ptr_type = context.get_value_type(dtype).as_pointer() ptr_type.addrspace = aryty.addrspace retty = context.get_value_type(sig.return_type) spirv_fn_arg_types = [ ptr_type, ir.IntType(32), ir.IntType(32), context.get_value_type(sig.args[2]), ] from numba_dppy import extended_numba_itanium_mangler as ext_itanium_mangler numba_ptr_ty = types.CPointer(dtype, addrspace=ptr_type.addrspace) mangled_fn_name = ext_itanium_mangler.mangle( name, [ numba_ptr_ty, "__spv.Scope.Flag", "__spv.MemorySemanticsMask.Flag", valty, ], ) fnty = ir.FunctionType(retty, spirv_fn_arg_types) fn = cgutils.get_or_insert_function(builder.module, fnty, mangled_fn_name) fn.calling_convention = target.CC_SPIR_FUNC sycl_memory_order = atomic_helper.sycl_memory_order.relaxed sycl_memory_scope = atomic_helper.sycl_memory_scope.device spirv_scope = atomic_helper.get_scope(sycl_memory_scope) spirv_memory_semantics_mask = atomic_helper.get_memory_semantics_mask( sycl_memory_order) fn_args = [ ptr, context.get_constant(types.int32, spirv_scope), context.get_constant(types.int32, spirv_memory_semantics_mask), val, ] return builder.call(fn, fn_args)
def tuple_add(context, builder, sig, args): left, right = [cgutils.unpack_tuple(builder, x) for x in args] res = context.make_tuple(builder, sig.return_type, left + right) # The tuple's contents are borrowed return impl_ret_borrowed(context, builder, sig.return_type, res)