def numpy_ufunc_kernel(context, builder, sig, args, kernel_class, explicit_output=True): # This is the code generator that builds all the looping needed # to execute a numpy functions over several dimensions (including # scalar cases). # # context - the code generation context # builder - the code emitter # sig - signature of the ufunc # args - the args to the ufunc # kernel_class - a code generating subclass of _Kernel that provides # explicit_output - if the output was explicit in the call # (ie: np.add(x,y,r)) arguments = [ _prepare_argument(context, builder, arg, tyarg) for arg, tyarg in zip(args, sig.args) ] if not explicit_output: ret_ty = sig.return_type if isinstance(ret_ty, types.ArrayCompatible): output = _build_array(context, builder, ret_ty, sig.args, arguments) else: output = _prepare_argument( context, builder, lc.Constant.null(context.get_value_type(ret_ty)), ret_ty) arguments.append(output) elif context.enable_nrt: # Incref the output context.nrt.incref(builder, sig.return_type, args[-1]) inputs = arguments[0:-1] output = arguments[-1] outer_sig = [a.base_type for a in arguments] #signature expects return type first, while we have it last: outer_sig = outer_sig[-1:] + outer_sig[:-1] outer_sig = typing.signature(*outer_sig) kernel = kernel_class(context, builder, outer_sig) intpty = context.get_value_type(types.intp) indices = [inp.create_iter_indices() for inp in inputs] loopshape = output.shape with cgutils.loop_nest(builder, loopshape, intp=intpty) as loop_indices: vals_in = [] for i, (index, arg) in enumerate(zip(indices, inputs)): index.update_indices(loop_indices, i) vals_in.append(arg.load_data(index.as_values())) val_out = kernel.generate(*vals_in) output.store_data(loop_indices, val_out) out = arguments[-1].return_val return impl_ret_new_ref(context, builder, sig.return_type, out)
def box_charseq(typ, val, c): rawptr = cgutils.alloca_once_value(c.builder, value=val) strptr = c.builder.bitcast(rawptr, c.pyapi.cstring) fullsize = c.context.get_constant(types.intp, typ.count) zero = fullsize.type(0) one = fullsize.type(1) count = cgutils.alloca_once_value(c.builder, zero) # Find the length of the string, mimicking Numpy's behaviour: # search for the last non-null byte in the underlying storage # (e.g. b'A\0\0B\0\0\0' will return the logical string b'A\0\0B') with cgutils.loop_nest(c.builder, [fullsize], fullsize.type) as [idx]: # Get char at idx ch = c.builder.load(c.builder.gep(strptr, [idx])) # If the char is a non-null-byte, store the next index as count with c.builder.if_then(cgutils.is_not_null(c.builder, ch)): c.builder.store(c.builder.add(idx, one), count) strlen = c.builder.load(count) return c.pyapi.bytes_from_string_and_size(strptr, strlen)
def box_unicodecharseq(typ, val, c): # XXX could kind be determined from strptr? unicode_kind = { 1: c.pyapi.py_unicode_1byte_kind, 2: c.pyapi.py_unicode_2byte_kind, 4: c.pyapi.py_unicode_4byte_kind}[numpy_support.sizeof_unicode_char] kind = c.context.get_constant(types.int32, unicode_kind) rawptr = cgutils.alloca_once_value(c.builder, value=val) strptr = c.builder.bitcast(rawptr, c.pyapi.cstring) fullsize = c.context.get_constant(types.intp, typ.count) zero = fullsize.type(0) one = fullsize.type(1) step = fullsize.type(numpy_support.sizeof_unicode_char) count = cgutils.alloca_once_value(c.builder, zero) with cgutils.loop_nest(c.builder, [fullsize], fullsize.type) as [idx]: # Get char at idx ch = c.builder.load(c.builder.gep(strptr, [c.builder.mul(idx, step)])) # If the char is a non-null-byte, store the next index as count with c.builder.if_then(cgutils.is_not_null(c.builder, ch)): c.builder.store(c.builder.add(idx, one), count) strlen = c.builder.load(count) return c.pyapi.string_from_kind_and_data(kind, strptr, strlen)
def numpy_ufunc_kernel(context, builder, sig, args, ufunc, kernel_class): # This is the code generator that builds all the looping needed # to execute a numpy functions over several dimensions (including # scalar cases). # # context - the code generation context # builder - the code emitter # sig - signature of the ufunc # args - the args to the ufunc # ufunc - the ufunc itself # kernel_class - a code generating subclass of _Kernel that provides arguments = [ _prepare_argument(context, builder, arg, tyarg) for arg, tyarg in zip(args, sig.args) ] if len(arguments) < ufunc.nin: raise RuntimeError( "Not enough inputs to {}, expected {} got {}".format( ufunc.__name__, ufunc.nin, len(arguments))) for out_i, ret_ty in enumerate(_unpack_output_types(ufunc, sig)): if ufunc.nin + out_i >= len(arguments): # this out argument is not provided if isinstance(ret_ty, types.ArrayCompatible): output = _build_array(context, builder, ret_ty, sig.args, arguments) else: output = _prepare_argument( context, builder, lc.Constant.null(context.get_value_type(ret_ty)), ret_ty) arguments.append(output) elif context.enable_nrt: # Incref the output context.nrt.incref(builder, ret_ty, args[ufunc.nin + out_i]) inputs = arguments[:ufunc.nin] outputs = arguments[ufunc.nin:] assert len(outputs) == ufunc.nout outer_sig = _ufunc_loop_sig([a.base_type for a in outputs], [a.base_type for a in inputs]) kernel = kernel_class(context, builder, outer_sig) intpty = context.get_value_type(types.intp) indices = [inp.create_iter_indices() for inp in inputs] # assume outputs are all the same size, which numpy requires loopshape = outputs[0].shape with cgutils.loop_nest(builder, loopshape, intp=intpty) as loop_indices: vals_in = [] for i, (index, arg) in enumerate(zip(indices, inputs)): index.update_indices(loop_indices, i) vals_in.append(arg.load_data(index.as_values())) vals_out = _unpack_output_values(ufunc, builder, kernel.generate(*vals_in)) for val_out, output in zip(vals_out, outputs): output.store_data(loop_indices, val_out) out = _pack_output_values(ufunc, context, builder, sig.return_type, [o.return_val for o in outputs]) return impl_ret_new_ref(context, builder, sig.return_type, out)