def timedelta_mod_timedelta(context, builder, sig, args): # inspired by https://github.com/numpy/numpy/blob/fe8072a12d65e43bd2e0b0f9ad67ab0108cc54b3/numpy/core/src/umath/loops.c.src#L1424 # alg is basically as `a % b`: # if a or b is NaT return NaT # elseif b is 0 return NaT # else pretend a and b are int and do pythonic int modulus [va, vb] = args [ta, tb] = sig.args not_nan = are_not_nat(builder, [va, vb]) ll_ret_type = context.get_value_type(sig.return_type) ret = alloc_timedelta_result(builder) builder.store(NAT, ret) zero = Constant.int(ll_ret_type, 0) with cgutils.if_likely(builder, not_nan): va, vb = normalize_timedeltas(context, builder, va, vb, ta, tb) # is the denominator zero or NaT? denom_ok = builder.not_(builder.icmp_signed('==', vb, zero)) with cgutils.if_likely(builder, denom_ok): # is either arg negative? vapos = builder.icmp_signed('>', va, zero) vbpos = builder.icmp_signed('>', vb, zero) rem = builder.srem(va, vb) cond = builder.or_(builder.and_(vapos, vbpos), builder.icmp_signed('==', rem, zero)) with builder.if_else(cond) as (then, otherwise): with then: builder.store(rem, ret) with otherwise: builder.store(builder.add(rem, vb), ret) res = builder.load(ret) return impl_ret_untracked(context, builder, sig.return_type, res)
def timedelta_floor_div_timedelta(context, builder, sig, args): [va, vb] = args [ta, tb] = sig.args ll_ret_type = context.get_value_type(sig.return_type) not_nan = are_not_nat(builder, [va, vb]) ret = cgutils.alloca_once(builder, ll_ret_type, name='ret') zero = Constant.int(ll_ret_type, 0) one = Constant.int(ll_ret_type, 1) builder.store(zero, ret) with cgutils.if_likely(builder, not_nan): va, vb = normalize_timedeltas(context, builder, va, vb, ta, tb) # is the denominator zero or NaT? denom_ok = builder.not_(builder.icmp_signed('==', vb, zero)) with cgutils.if_likely(builder, denom_ok): # is either arg negative? vaneg = builder.icmp_signed('<', va, zero) neg = builder.or_(vaneg, builder.icmp_signed('<', vb, zero)) with builder.if_else(neg) as (then, otherwise): with then: # one or more value negative with builder.if_else(vaneg) as (negthen, negotherwise): with negthen: top = builder.sub(va, one) div = builder.sdiv(top, vb) builder.store(div, ret) with negotherwise: top = builder.add(va, one) div = builder.sdiv(top, vb) builder.store(div, ret) with otherwise: div = builder.sdiv(va, vb) builder.store(div, ret) res = builder.load(ret) return impl_ret_untracked(context, builder, sig.return_type, res)
def _increment_indices(context, builder, ndim, shape, indices, end_flag=None): zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) bbend = cgutils.append_basic_block(builder, 'end_increment') if end_flag is not None: builder.store(cgutils.false_byte, end_flag) for dim in reversed(range(ndim)): idxptr = cgutils.gep(builder, indices, dim) idx = builder.add(builder.load(idxptr), one) count = shape[dim] in_bounds = builder.icmp(lc.ICMP_SLT, idx, count) with cgutils.if_likely(builder, in_bounds): builder.store(idx, idxptr) builder.branch(bbend) builder.store(zero, idxptr) if end_flag is not None: builder.store(cgutils.true_byte, end_flag) builder.branch(bbend) builder.position_at_end(bbend)
def raise_error(self, builder, api, status): """ Given a non-ok *status*, raise the corresponding Python exception. """ bbend = builder.function.append_basic_block() with builder.if_then(status.is_user_exc): # Unserialize user exception. # Make sure another error may not interfere. api.err_clear() exc = api.unserialize(status.excinfoptr) with cgutils.if_likely(builder, cgutils.is_not_null(builder, exc)): api.raise_object(exc) # steals ref builder.branch(bbend) with builder.if_then(status.is_stop_iteration): api.err_set_none("PyExc_StopIteration") builder.branch(bbend) with builder.if_then(status.is_python_exc): # Error already raised => nothing to do builder.branch(bbend) api.err_set_string("PyExc_SystemError", "unknown error when calling native function") builder.branch(bbend) builder.position_at_end(bbend)
def codegen(context, builder, sig, args): out_str_arr, in_str_arr = args in_string_array = context.make_helper(builder, string_array_type, in_str_arr) out_string_array = context.make_helper(builder, string_array_type, out_str_arr) n = in_string_array.num_items zero = context.get_constant(offset_typ, 0) curr_offset_ptr = cgutils.alloca_once_value(builder, zero) # XXX: assuming last offset is already set by allocate_string_array # for i in range(n) # if not isna(): # out_offset[curr] = offset[i] with cgutils.for_range(builder, n) as loop: isna = lower_is_na(context, builder, in_string_array.null_bitmap, loop.index) with cgutils.if_likely(builder, builder.not_(isna)): in_val = builder.load( builder.gep(in_string_array.offsets, [loop.index])) curr_offset = builder.load(curr_offset_ptr) builder.store( in_val, builder.gep(out_string_array.offsets, [curr_offset])) builder.store( builder.add( curr_offset, lir.Constant(context.get_data_type(offset_typ), 1)), curr_offset_ptr) return context.get_dummy_value()
def iternext_specific(self, context, builder, arrty, arr, result): zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) ndim = arrty.ndim nitems = arr.nitems index = builder.load(self.index) is_valid = builder.icmp(lc.ICMP_SLT, index, nitems) result.set_valid(is_valid) with cgutils.if_likely(builder, is_valid): ptr = builder.load(self.pointer) value = context.unpack_value(builder, arrty.dtype, ptr) if kind == 'flat': result.yield_(value) else: # ndenumerate(): fetch and increment indices indices = self.indices idxvals = [ builder.load(cgutils.gep(builder, indices, dim)) for dim in range(ndim) ] idxtuple = cgutils.pack_array(builder, idxvals) result.yield_( cgutils.make_anonymous_struct( builder, [idxtuple, value])) _increment_indices_array(context, builder, arrty, arr, indices) index = builder.add(index, one) builder.store(index, self.index) ptr = cgutils.pointer_add(builder, ptr, self.stride) builder.store(ptr, self.pointer)
def iternext_specific(self, context, builder, arrty, arr, result): zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) ndim = arrty.ndim nitems = arr.nitems index = builder.load(self.index) is_valid = builder.icmp(lc.ICMP_SLT, index, nitems) result.set_valid(is_valid) with cgutils.if_likely(builder, is_valid): ptr = builder.load(self.pointer) value = context.unpack_value(builder, arrty.dtype, ptr) if kind == 'flat': result.yield_(value) else: # ndenumerate(): fetch and increment indices indices = self.indices idxvals = [builder.load(cgutils.gep(builder, indices, dim)) for dim in range(ndim)] idxtuple = cgutils.pack_array(builder, idxvals) result.yield_( cgutils.make_anonymous_struct(builder, [idxtuple, value])) _increment_indices_array(context, builder, arrty, arr, indices) index = builder.add(index, one) builder.store(index, self.index) ptr = cgutils.pointer_add(builder, ptr, self.stride) builder.store(ptr, self.pointer)
def timedelta_sub_impl(context, builder, sig, args): [va, vb] = args [ta, tb] = sig.args ret = alloc_timedelta_result(builder) with cgutils.if_likely(builder, are_not_nat(builder, [va, vb])): va = scale_timedelta(context, builder, va, ta, sig.return_type) vb = scale_timedelta(context, builder, vb, tb, sig.return_type) builder.store(builder.sub(va, vb), ret) return builder.load(ret)
def iternext_numpy_flatiter(context, builder, sig, args, result): [flatiterty] = sig.args [flatiter] = args flatitercls = make_array_flat_cls(flatiterty) flatiter = flatitercls(context, builder, value=flatiter) arrty = flatiterty.array_type arrcls = context.make_array(arrty) arr = arrcls(context, builder, value=builder.load(flatiter.array)) ndim = arrty.ndim shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) indptr = flatiter.iters # Load indices and check if they are valid indices = [] is_valid = cgutils.true_bit zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) for ax in range(ndim): axsize = shapes[ax] idxptr = builder.gep(indptr, [context.get_constant(types.intp, ax)]) idx = builder.load(idxptr) ax_valid = builder.icmp(lc.ICMP_SLT, idx, axsize) indices.append(idx) is_valid = builder.and_(is_valid, ax_valid) result.set_valid(is_valid) with cgutils.if_likely(builder, is_valid): # Get yielded value valptr = cgutils.get_item_pointer(builder, arrty, arr, indices) yield_value = builder.load(valptr) result.yield_(yield_value) # Increment iterator indices carry_flags = [cgutils.true_bit] for ax, (idx, axsize) in reversed(list(enumerate(zip(indices, shapes)))): idxptr = builder.gep(indptr, [context.get_constant(types.intp, ax)]) lastcarry = carry_flags[-1] idxp1 = builder.add(idx, one) carry = builder.icmp(lc.ICMP_SGE, idxp1, axsize) idxfinal = builder.select(lastcarry, builder.select(carry, zero, idxp1), idx) builder.store(idxfinal, idxptr) carry_flags.append(builder.and_(carry, lastcarry)) with cgutils.if_unlikely(builder, carry_flags[-1]): # If we have iterated all elements, # Set first index to out-of-bound idxptr = builder.gep(indptr, [context.get_constant(types.intp, 0)]) builder.store(shapes[0], idxptr)
def list_pack(self, items): n = len(items) seq = self.list_new(self.context.get_constant(types.intp, n)) not_null = cgutils.is_not_null(self.builder, seq) with cgutils.if_likely(self.builder, not_null): for i in range(n): idx = self.context.get_constant(types.intp, i) self.incref(items[i]) self.list_setitem(seq, idx, items[i]) return seq
def build_wrapper(self, api, builder, closure, args, kws): nargs = len(self.fndesc.args) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.unpack_tuple(args, self.fndesc.qualname, nargs, nargs, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) # Block that returns after erroneous argument unboxing/cleanup endblk = builder.append_basic_block("arg.end") with builder.goto_block(endblk): builder.ret(api.get_null_object()) cleanup_manager = _ArgManager(self.context, builder, api, endblk, nargs) innerargs = [] for obj, ty in zip(objs, self.fndesc.argtypes): val = cleanup_manager.add_arg(obj, ty) innerargs.append(val) if self.release_gil: cleanup_manager = _GilManager(builder, api, cleanup_manager) # Extract the Environment object from the Closure envptr, env_manager = self.get_env(api, builder, closure) status, res = self.context.call_conv.call_function( builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs, envptr) # Do clean up self.debug_print(builder, "# callwrapper: emit_cleanup") cleanup_manager.emit_cleanup() self.debug_print(builder, "# callwrapper: emit_cleanup end") # Determine return status with cgutils.if_likely(builder, status.is_ok): # Ok => return boxed Python value with builder.if_then(status.is_none): api.return_none() retval = api.from_native_return(res, self._simplified_return_type(), env_manager) builder.ret(retval) with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised self.make_exception_switch(api, builder, status) # Error out builder.ret(api.get_null_object())
def impl(context, builder, dt_arg, dt_unit, td_arg, td_unit, ret_unit): ret = alloc_timedelta_result(builder) with cgutils.if_likely(builder, are_not_nat(builder, [dt_arg, td_arg])): dt_arg = convert_datetime_for_arith(builder, dt_arg, dt_unit, ret_unit) td_factor = npdatetime.get_timedelta_conversion_factor(td_unit, ret_unit) td_arg = scale_by_constant(builder, td_arg, td_factor) ret_val = getattr(builder, ll_op_name)(dt_arg, td_arg) builder.store(ret_val, ret) return builder.load(ret)
def build_wrapper(self, api, builder, closure, args, kws): nargs = len(self.fndesc.args) keywords = self.make_keywords(self.fndesc.args) fmt = self.make_const_string("O" * nargs) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.parse_tuple_and_keywords(args, kws, fmt, keywords, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) # Block that returns after erroneous argument unboxing/cleanup endblk = cgutils.append_basic_block(builder, "arg.end") with cgutils.goto_block(builder, endblk): builder.ret(api.get_null_object()) cleanup_manager = _ArgManager(builder, api, endblk, nargs) innerargs = [] for obj, ty in zip(objs, self.fndesc.argtypes): val = cleanup_manager.add_arg(obj, ty) innerargs.append(val) if self.release_gil: cleanup_manager = _GilManager(builder, api, cleanup_manager) # The wrapped function doesn't take a full closure, only # the Environment object. env = self.context.get_env_from_closure(builder, closure) status, res = self.context.call_function(builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs, env) # Do clean up cleanup_manager.emit_cleanup() # Determine return status with cgutils.if_likely(builder, status.ok): with cgutils.ifthen(builder, status.none): api.return_none() retval = api.from_native_return(res, self.fndesc.restype) builder.ret(retval) with cgutils.ifthen(builder, builder.not_(status.exc)): # !ok && !exc # User exception raised self.make_exception_switch(api, builder, status.code) # !ok && exc builder.ret(api.get_null_object())
def dict_pack(self, keyvalues): """ Args ----- keyvalues: iterable of (str, llvm.Value of PyObject*) """ dictobj = self.dict_new() not_null = cgutils.is_not_null(self.builder, dictobj) with cgutils.if_likely(self.builder, not_null): for k, v in keyvalues: self.dict_setitem_string(dictobj, k, v) return dictobj
def build_wrapper(self, api, builder, closure, args, kws): nargs = len(self.fndesc.args) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.unpack_tuple(args, self.fndesc.qualname, nargs, nargs, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) # Block that returns after erroneous argument unboxing/cleanup endblk = builder.append_basic_block("arg.end") with builder.goto_block(endblk): builder.ret(api.get_null_object()) cleanup_manager = _ArgManager(self.context, builder, api, endblk, nargs) innerargs = [] for obj, ty in zip(objs, self.fndesc.argtypes): val = cleanup_manager.add_arg(obj, ty) innerargs.append(val) if self.release_gil: cleanup_manager = _GilManager(builder, api, cleanup_manager) # Extract the Environment object from the Closure envptr, env_manager = self.get_env(api, builder, closure) status, retval = self.context.call_conv.call_function( builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs, envptr) # Do clean up self.debug_print(builder, "# callwrapper: emit_cleanup") cleanup_manager.emit_cleanup() self.debug_print(builder, "# callwrapper: emit_cleanup end") # Determine return status with cgutils.if_likely(builder, status.is_ok): # Ok => return boxed Python value with builder.if_then(status.is_none): api.return_none() retty = self._simplified_return_type() obj = api.from_native_return(retval, retty, env_manager) builder.ret(obj) with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised self.make_exception_switch(api, builder, status) # Error out builder.ret(api.get_null_object())
def datetime_minus_datetime(context, builder, sig, args): va, vb = args ta, tb = sig.args unit_a = ta.unit unit_b = tb.unit ret_unit = sig.return_type.unit ret = alloc_timedelta_result(builder) with cgutils.if_likely(builder, are_not_nat(builder, [va, vb])): va = convert_datetime_for_arith(builder, va, unit_a, ret_unit) vb = convert_datetime_for_arith(builder, vb, unit_b, ret_unit) ret_val = builder.sub(va, vb) builder.store(ret_val, ret) return builder.load(ret)
def timedelta_over_timedelta(context, builder, sig, args): [va, vb] = args [ta, tb] = sig.args not_nan = are_not_nat(builder, [va, vb]) ll_ret_type = context.get_value_type(sig.return_type) ret = cgutils.alloca_once(builder, ll_ret_type, name='ret') builder.store(Constant.real(ll_ret_type, float('nan')), ret) with cgutils.if_likely(builder, not_nan): va, vb = normalize_timedeltas(context, builder, va, vb, ta, tb) va = builder.sitofp(va, ll_ret_type) vb = builder.sitofp(vb, ll_ret_type) builder.store(builder.fdiv(va, vb), ret) return builder.load(ret)
def build_wrapper(self, api, builder, closure, args, kws): nargs = len(self.fndesc.args) keywords = self.make_keywords(self.fndesc.args) fmt = self.make_const_string("O" * nargs) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.parse_tuple_and_keywords(args, kws, fmt, keywords, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) innerargs = [] cleanups = [] for obj, ty in zip(objs, self.fndesc.argtypes): #api.context.debug_print(builder, "%s -> %s" % (obj, ty)) #api.print_object(builder.load(obj)) val, dtor = api.to_native_arg(builder.load(obj), ty) innerargs.append(val) cleanups.append(dtor) # The wrapped function doesn't take a full closure, only # the Environment object. env = self.context.get_env_from_closure(builder, closure) status, res = self.context.call_function(builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs, env) # Do clean up for dtor in cleanups: dtor() # Determine return status with cgutils.if_likely(builder, status.ok): with cgutils.ifthen(builder, status.none): api.return_none() retval = api.from_native_return(res, self.fndesc.restype) builder.ret(retval) with cgutils.ifthen(builder, builder.not_(status.exc)): # !ok && !exc # User exception raised self.make_exception_switch(api, builder, status.code) # !ok && exc builder.ret(api.get_null_object())
def _timedelta_times_number(context, builder, td_arg, td_type, number_arg, number_type, return_type): ret = alloc_timedelta_result(builder) with cgutils.if_likely(builder, is_not_nat(builder, td_arg)): if isinstance(number_type, types.Float): val = builder.sitofp(td_arg, number_arg.type) val = builder.fmul(val, number_arg) val = builder.fptosi(val, TIMEDELTA64) else: val = builder.mul(td_arg, number_arg) # The scaling is required for ufunc np.multiply() with an explicit # output in a different unit. val = scale_timedelta(context, builder, val, td_type, return_type) builder.store(val, ret) return builder.load(ret)
def make_exception_switch(self, api, builder, status): """ Handle user exceptions. Unserialize the exception info and raise it. """ code = status.code # Handle user exceptions with cgutils.ifthen(builder, status.is_user_exc): exc = api.unserialize(status.excinfoptr) with cgutils.if_likely(builder, cgutils.is_not_null(builder, exc)): api.raise_object(exc) # steals ref builder.ret(api.get_null_object()) msg = "unknown error in native function: %s" % self.fndesc.mangled_name api.err_set_string("PyExc_SystemError", msg)
def iternext_zip(context, builder, sig, args, result): genty, = sig.args gen, = args # XXX We should link with the generator's library. # Currently, this doesn't make a difference as the library has already # been linked for the generator init function. impl = context.get_generator_impl(genty) status, retval = impl(context, builder, sig, args) with cgutils.if_likely(builder, status.is_ok): result.set_valid(True) result.yield_(retval) with cgutils.if_unlikely(builder, status.is_stop_iteration): result.set_exhausted() with cgutils.if_unlikely(builder, builder.and_(status.is_error, builder.not_(status.is_stop_iteration))): context.call_conv.return_status_propagate(builder, status)
def iternext_zip(context, builder, sig, args, result): genty, = sig.args gen, = args impl = context.get_generator_impl(genty) status, retval = impl(context, builder, sig, args) context.add_linking_libs(getattr(impl, 'libs', ())) with cgutils.if_likely(builder, status.is_ok): result.set_valid(True) result.yield_(retval) with cgutils.if_unlikely(builder, status.is_stop_iteration): result.set_exhausted() with cgutils.if_unlikely(builder, builder.and_(status.is_error, builder.not_(status.is_stop_iteration))): context.call_conv.return_status_propagate(builder, status)
def iternext_specific(self, context, builder, arrty, arr, result): nitems = arr.nitems index = builder.load(self.index) is_valid = builder.icmp(lc.ICMP_SLT, index, nitems) result.set_valid(is_valid) with cgutils.if_likely(builder, is_valid): ptr = builder.load(self.pointer) value = context.unpack_value(builder, arrty.dtype, ptr) result.yield_(value) index = builder.add(index, context.get_constant(types.intp, 1)) builder.store(index, self.index) ptr = cgutils.pointer_add(builder, ptr, self.stride) builder.store(ptr, self.pointer)
def build_wrapper(self, api, builder, args, kws): nargs = len(self.fndesc.args) keywords = self.make_keywords(self.fndesc.args) fmt = self.make_const_string("O" * nargs) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.parse_tuple_and_keywords(args, kws, fmt, keywords, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) innerargs = [] cleanups = [] for obj, ty in zip(objs, self.fndesc.argtypes): #api.context.debug_print(builder, "%s -> %s" % (obj, ty)) #api.print_object(builder.load(obj)) val, dtor = api.to_native_arg(builder.load(obj), ty) innerargs.append(val) cleanups.append(dtor) status, res = self.context.call_function(builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs) # Do clean up for dtor in cleanups: dtor() # Determine return status with cgutils.if_likely(builder, status.ok): with cgutils.ifthen(builder, status.none): api.return_none() retval = api.from_native_return(res, self.fndesc.restype) builder.ret(retval) with cgutils.ifthen(builder, builder.not_(status.exc)): # !ok && !exc # User exception raised self.make_exception_switch(api, builder, status.code) # !ok && exc builder.ret(api.get_null_object())
def make_exception_switch(self, api, builder, status): """ Handle user exceptions. Unserialize the exception info and raise it. """ code = status.code # Handle user exceptions with builder.if_then(status.is_user_exc): exc = api.unserialize(status.excinfoptr) with cgutils.if_likely(builder, cgutils.is_not_null(builder, exc)): api.raise_object(exc) # steals ref builder.ret(api.get_null_object()) with builder.if_then(status.is_stop_iteration): api.err_set_none("PyExc_StopIteration") builder.ret(api.get_null_object()) msg = "unknown error in native function: %s" % self.fndesc.mangled_name api.err_set_string("PyExc_SystemError", msg)
def to_native_buffer(self, obj, typ): buf = self.alloca_buffer() res = self.get_buffer(obj, buf) is_error = cgutils.is_not_null(self.builder, res) nativearycls = self.context.make_array(typ) nativeary = nativearycls(self.context, self.builder) aryptr = nativeary._getpointer() with cgutils.if_likely(self.builder, self.builder.not_(is_error)): ptr = self.builder.bitcast(aryptr, self.voidptr) self.numba_buffer_adaptor(buf, ptr) def cleanup(): self.release_buffer(buf) return NativeValue(self.builder.load(aryptr), is_error=is_error, cleanup=cleanup)
def timedelta_over_number(context, builder, sig, args): td_arg, number_arg = args number_type = sig.args[1] ret = alloc_timedelta_result(builder) ok = builder.and_(is_not_nat(builder, td_arg), builder.not_(cgutils.is_scalar_zero_or_nan(builder, number_arg))) with cgutils.if_likely(builder, ok): # Denominator is non-zero, non-NaN if isinstance(number_type, types.Float): val = builder.sitofp(td_arg, number_arg.type) val = builder.fdiv(val, number_arg) val = builder.fptosi(val, TIMEDELTA64) else: val = builder.sdiv(td_arg, number_arg) # The scaling is required for ufuncs np.*divide() with an explicit # output in a different unit. val = scale_timedelta(context, builder, val, sig.args[0], sig.return_type) builder.store(val, ret) return builder.load(ret)
def object_richcompare(self, lhs, rhs, opstr): """ Refer to Python source Include/object.h for macros definition of the opid. """ ops = ['<', '<=', '==', '!=', '>', '>='] if opstr in ops: opid = ops.index(opstr) fnty = Type.function( self.pyobj, [self.pyobj, self.pyobj, Type.int()]) fn = self._get_function(fnty, name="PyObject_RichCompare") lopid = self.context.get_constant(types.int32, opid) return self.builder.call(fn, (lhs, rhs, lopid)) elif opstr == 'is': bitflag = self.builder.icmp(lc.ICMP_EQ, lhs, rhs) return self.from_native_value(bitflag, types.boolean) elif opstr == 'is not': bitflag = self.builder.icmp(lc.ICMP_NE, lhs, rhs) return self.from_native_value(bitflag, types.boolean) elif opstr == 'in': fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PySequence_Contains") status = self.builder.call(fn, (rhs, lhs)) negone = self.context.get_constant(types.int32, -1) is_good = self.builder.icmp(lc.ICMP_NE, status, negone) # Stack allocate output and initialize to Null outptr = cgutils.alloca_once_value(self.builder, Constant.null(self.pyobj)) # If PySequence_Contains returns non-error value with cgutils.if_likely(self.builder, is_good): # Store the status as a boolean object truncated = self.builder.trunc(status, Type.int(1)) self.builder.store(self.bool_from_bool(truncated), outptr) return self.builder.load(outptr) else: raise NotImplementedError( "Unknown operator {op!r}".format(op=opstr))
def build_wrapper(self, api, builder, args, kws): nargs = len(self.fndesc.args) keywords = self.make_keywords(self.fndesc.args) fmt = self.make_const_string("O" * nargs) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.parse_tuple_and_keywords(args, kws, fmt, keywords, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) innerargs = [] for obj, ty in zip(objs, self.fndesc.argtypes): #api.context.debug_print(builder, "%s -> %s" % (obj, ty)) #api.print_object(builder.load(obj)) val = api.to_native_arg(builder.load(obj), ty) innerargs.append(val) status, res = self.context.call_function(builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs) with cgutils.if_likely(builder, status.ok): with cgutils.ifthen(builder, status.none): api.return_none() retval = api.from_native_return(res, self.fndesc.restype) builder.ret(retval) with cgutils.ifthen(builder, builder.not_(status.exc)): # User exception raised # TODO we will just raise a RuntimeError for now. api.raise_native_error("error in native function: %s" % self.fndesc.mangled_name) builder.ret(api.get_null_object())
def object_richcompare(self, lhs, rhs, opstr): """ Refer to Python source Include/object.h for macros definition of the opid. """ ops = ['<', '<=', '==', '!=', '>', '>='] if opstr in ops: opid = ops.index(opstr) fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj, Type.int()]) fn = self._get_function(fnty, name="PyObject_RichCompare") lopid = self.context.get_constant(types.int32, opid) return self.builder.call(fn, (lhs, rhs, lopid)) elif opstr == 'is': bitflag = self.builder.icmp(lc.ICMP_EQ, lhs, rhs) return self.from_native_value(bitflag, types.boolean) elif opstr == 'is not': bitflag = self.builder.icmp(lc.ICMP_NE, lhs, rhs) return self.from_native_value(bitflag, types.boolean) elif opstr == 'in': fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PySequence_Contains") status = self.builder.call(fn, (rhs, lhs)) negone = self.context.get_constant(types.int32, -1) is_good = self.builder.icmp(lc.ICMP_NE, status, negone) # Stack allocate output and initialize to Null outptr = cgutils.alloca_once_value(self.builder, Constant.null(self.pyobj)) # If PySequence_Contains returns non-error value with cgutils.if_likely(self.builder, is_good): # Store the status as a boolean object truncated = self.builder.trunc(status, Type.int(1)) self.builder.store(self.bool_from_bool(truncated), outptr) return self.builder.load(outptr) else: raise NotImplementedError("Unknown operator {op!r}".format( op=opstr))
def generate_kernel_wrapper(self, library, fname, argtypes, debug): """ Generate the kernel wrapper in the given ``library``. The function being wrapped have the name ``fname`` and argument types ``argtypes``. The wrapper function is returned. """ arginfo = self.get_arg_packer(argtypes) argtys = list(arginfo.argument_types) wrapfnty = Type.function(Type.void(), argtys) wrapper_module = self.create_module("cuda.kernel.wrapper") fnty = Type.function(Type.int(), [self.call_conv.get_return_type(types.pyobject)] + argtys) func = wrapper_module.add_function(fnty, name=fname) prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy') wrapfn = wrapper_module.add_function(wrapfnty, name=prefixed) builder = Builder(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = arginfo.from_arguments(builder, wrapfn.args) status, _ = self.call_conv.call_function(builder, func, types.void, argtypes, callargs) if debug: # Check error status with cgutils.if_likely(builder, status.is_ok): builder.ret_void() with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded casfnty = lc.Type.function(old.type, [gv_exc.type, old.type, old.type]) casfn = wrapper_module.add_function(casfnty, name="___numba_cas_hack") xchg = builder.call(casfn, [gv_exc, old, status.code]) changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with builder.if_then(changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() nvvm.set_cuda_kernel(wrapfn) library.add_ir_module(wrapper_module) library.finalize() wrapfn = library.get_function(wrapfn.name) return wrapfn
def generate_kernel_wrapper(self, func, argtypes): module = func.module argtys = self.get_arguments(func.type.pointee) fnty = Type.function(Type.void(), argtys) wrapfn = module.add_function(fnty, name="cudaPy_" + func.name) builder = Builder.new(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = [] for at, av in zip(argtypes, wrapfn.args): av = self.get_argument_value(builder, at, av) callargs.append(av) status, _ = self.call_function(builder, func, types.void, argtypes, callargs) # Check error status with cgutils.if_likely(builder, status.ok): builder.ret_void() with cgutils.ifthen(builder, builder.not_(status.exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded xchg = builder.atomic_cmpxchg(gv_exc, old, status.code, "monotonic") changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with cgutils.ifthen(builder, changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() # force inline inline_function(status.code) module.verify() return wrapfn
def if_object_ok(self, obj): with cgutils.if_likely(self.builder, cgutils.is_not_null(self.builder, obj)): yield
def iternext_specific(self, context, builder, arrty, arr, result): ndim = arrty.ndim data = arr.data shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) strides = cgutils.unpack_tuple(builder, arr.strides, ndim) indices = self.indices pointers = self.pointers zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) minus_one = context.get_constant(types.intp, -1) result.set_valid(True) bbcont = cgutils.append_basic_block(builder, 'continued') bbend = cgutils.append_basic_block(builder, 'end') # Catch already computed iterator exhaustion is_empty = cgutils.as_bool_bit(builder, builder.load(self.empty)) with cgutils.if_unlikely(builder, is_empty): result.set_valid(False) builder.branch(bbend) # Current pointer inside last dimension last_ptr = cgutils.alloca_once(builder, data.type) # Walk from inner dimension to outer for dim in reversed(range(ndim)): idxptr = cgutils.gep(builder, indices, dim) idx = builder.load(idxptr) count = shapes[dim] stride = strides[dim] in_bounds = builder.icmp(lc.ICMP_SLT, idx, count) with cgutils.if_likely(builder, in_bounds): # Index is valid => we point to the right slot ptrptr = cgutils.gep(builder, pointers, dim) ptr = builder.load(ptrptr) builder.store(ptr, last_ptr) # Compute next index and pointer for this dimension next_ptr = cgutils.pointer_add(builder, ptr, stride) builder.store(next_ptr, ptrptr) next_idx = builder.add(idx, one) builder.store(next_idx, idxptr) # Reset inner dimensions for inner_dim in range(dim + 1, ndim): idxptr = cgutils.gep(builder, indices, inner_dim) ptrptr = cgutils.gep(builder, pointers, inner_dim) # Compute next index and pointer for this dimension inner_ptr = cgutils.pointer_add( builder, ptr, strides[inner_dim]) builder.store(inner_ptr, ptrptr) builder.store(one, idxptr) builder.branch(bbcont) # End of array => skip to end result.set_valid(False) builder.branch(bbend) builder.position_at_end(bbcont) # After processing of indices and pointers: fetch value. ptr = builder.load(last_ptr) value = context.unpack_value(builder, arrty.dtype, ptr) result.yield_(value) builder.branch(bbend) builder.position_at_end(bbend)
def generate_kernel_wrapper(self, func, argtypes): module = func.module argtys = [self.get_argument_type(ty) for ty in argtypes] wrapfnty = Type.function(Type.void(), argtys) wrapper_module = self.create_module("cuda.kernel.wrapper") fnty = Type.function(Type.int(), [self.get_return_type(types.pyobject)] + argtys) func = wrapper_module.add_function(fnty, name=func.name) wrapfn = wrapper_module.add_function(wrapfnty, name="cudaPy_" + func.name) builder = Builder.new(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = [] for at, av in zip(argtypes, wrapfn.args): av = self.get_argument_value(builder, at, av) callargs.append(av) status, _ = self.call_function(builder, func, types.void, argtypes, callargs) # Check error status with cgutils.if_likely(builder, status.ok): builder.ret_void() with cgutils.ifthen(builder, builder.not_(status.exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded casfnty = lc.Type.function(old.type, [gv_exc.type, old.type, old.type]) casfn = wrapper_module.add_function(casfnty, name="___numba_cas_hack") xchg = builder.call(casfn, [gv_exc, old, status.code]) changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with cgutils.ifthen(builder, changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() # force inline # inline_function(status.code) nvvm.set_cuda_kernel(wrapfn) module.link_in(ll.parse_assembly(str(wrapper_module))) module.verify() wrapfn = module.get_function(wrapfn.name) return wrapfn
def iternext_specific(self, context, builder, arrty, arr, result): ndim = arrty.ndim data = arr.data shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) strides = cgutils.unpack_tuple(builder, arr.strides, ndim) indices = self.indices pointers = self.pointers zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) bbend = cgutils.append_basic_block(builder, 'end') # Catch already computed iterator exhaustion is_exhausted = cgutils.as_bool_bit( builder, builder.load(self.exhausted)) with cgutils.if_unlikely(builder, is_exhausted): result.set_valid(False) builder.branch(bbend) result.set_valid(True) # Current pointer inside last dimension last_ptr = cgutils.gep(builder, pointers, ndim - 1) ptr = builder.load(last_ptr) value = context.unpack_value(builder, arrty.dtype, ptr) if kind == 'flat': result.yield_(value) else: # ndenumerate() => yield (indices, value) idxvals = [builder.load(cgutils.gep(builder, indices, dim)) for dim in range(ndim)] idxtuple = cgutils.pack_array(builder, idxvals) result.yield_( cgutils.make_anonymous_struct(builder, [idxtuple, value])) # Update indices and pointers by walking from inner # dimension to outer. for dim in reversed(range(ndim)): idxptr = cgutils.gep(builder, indices, dim) idx = builder.add(builder.load(idxptr), one) count = shapes[dim] stride = strides[dim] in_bounds = builder.icmp(lc.ICMP_SLT, idx, count) with cgutils.if_likely(builder, in_bounds): # Index is valid => pointer can simply be incremented. builder.store(idx, idxptr) ptrptr = cgutils.gep(builder, pointers, dim) ptr = builder.load(ptrptr) ptr = cgutils.pointer_add(builder, ptr, stride) builder.store(ptr, ptrptr) # Reset pointers in inner dimensions for inner_dim in range(dim + 1, ndim): ptrptr = cgutils.gep(builder, pointers, inner_dim) builder.store(ptr, ptrptr) builder.branch(bbend) # Reset index and continue with next dimension builder.store(zero, idxptr) # End of array builder.store(cgutils.true_byte, self.exhausted) builder.branch(bbend) builder.position_at_end(bbend)
def iternext_specific(self, context, builder, arrty, arr, result): ndim = arrty.ndim data = arr.data shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) strides = cgutils.unpack_tuple(builder, arr.strides, ndim) indices = self.indices pointers = self.pointers zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) bbend = cgutils.append_basic_block(builder, 'end') # Catch already computed iterator exhaustion is_exhausted = cgutils.as_bool_bit( builder, builder.load(self.exhausted)) with cgutils.if_unlikely(builder, is_exhausted): result.set_valid(False) builder.branch(bbend) result.set_valid(True) # Current pointer inside last dimension last_ptr = cgutils.gep(builder, pointers, ndim - 1) ptr = builder.load(last_ptr) value = context.unpack_value(builder, arrty.dtype, ptr) if kind == 'flat': result.yield_(value) else: # ndenumerate() => yield (indices, value) idxvals = [ builder.load(cgutils.gep(builder, indices, dim)) for dim in range(ndim) ] idxtuple = cgutils.pack_array(builder, idxvals) result.yield_( cgutils.make_anonymous_struct(builder, [idxtuple, value])) # Update indices and pointers by walking from inner # dimension to outer. for dim in reversed(range(ndim)): idxptr = cgutils.gep(builder, indices, dim) idx = builder.add(builder.load(idxptr), one) count = shapes[dim] stride = strides[dim] in_bounds = builder.icmp(lc.ICMP_SLT, idx, count) with cgutils.if_likely(builder, in_bounds): # Index is valid => pointer can simply be incremented. builder.store(idx, idxptr) ptrptr = cgutils.gep(builder, pointers, dim) ptr = builder.load(ptrptr) ptr = cgutils.pointer_add(builder, ptr, stride) builder.store(ptr, ptrptr) # Reset pointers in inner dimensions for inner_dim in range(dim + 1, ndim): ptrptr = cgutils.gep(builder, pointers, inner_dim) builder.store(ptr, ptrptr) builder.branch(bbend) # Reset index and continue with next dimension builder.store(zero, idxptr) # End of array builder.store(cgutils.true_byte, self.exhausted) builder.branch(bbend) builder.position_at_end(bbend)
def generate_kernel_wrapper(self, func, argtypes): module = func.module arginfo = self.get_arg_packer(argtypes) argtys = list(arginfo.argument_types) wrapfnty = Type.function(Type.void(), argtys) wrapper_module = self.create_module("cuda.kernel.wrapper") fnty = Type.function(Type.int(), [self.call_conv.get_return_type(types.pyobject)] + argtys) func = wrapper_module.add_function(fnty, name=func.name) wrapfn = wrapper_module.add_function(wrapfnty, name="cudaPy_" + func.name) builder = Builder.new(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = arginfo.from_arguments(builder, wrapfn.args) status, _ = self.call_conv.call_function(builder, func, types.void, argtypes, callargs) # Check error status with cgutils.if_likely(builder, status.is_ok): builder.ret_void() with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded casfnty = lc.Type.function(old.type, [gv_exc.type, old.type, old.type]) casfn = wrapper_module.add_function(casfnty, name="___numba_cas_hack") xchg = builder.call(casfn, [gv_exc, old, status.code]) changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with builder.if_then(changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() # force inline # inline_function(status.code) nvvm.set_cuda_kernel(wrapfn) module.link_in(ll.parse_assembly(str(wrapper_module))) module.verify() wrapfn = module.get_function(wrapfn.name) return wrapfn
def generate_kernel_wrapper(self, library, fname, argtypes): """ Generate the kernel wrapper in the given ``library``. The function being wrapped have the name ``fname`` and argument types ``argtypes``. The wrapper function is returned. """ arginfo = self.get_arg_packer(argtypes) argtys = list(arginfo.argument_types) wrapfnty = Type.function(Type.void(), argtys) wrapper_module = self.create_module("cuda.kernel.wrapper") fnty = Type.function(Type.int(), [self.call_conv.get_return_type(types.pyobject)] + argtys) func = wrapper_module.add_function(fnty, name=fname) wrapfn = wrapper_module.add_function(wrapfnty, name="cudaPy_" + func.name) builder = Builder(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = arginfo.from_arguments(builder, wrapfn.args) status, _ = self.call_conv.call_function( builder, func, types.void, argtypes, callargs) # Check error status with cgutils.if_likely(builder, status.is_ok): builder.ret_void() with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded casfnty = lc.Type.function(old.type, [gv_exc.type, old.type, old.type]) casfn = wrapper_module.add_function(casfnty, name="___numba_cas_hack") xchg = builder.call(casfn, [gv_exc, old, status.code]) changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with builder.if_then(changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() nvvm.set_cuda_kernel(wrapfn) library.add_ir_module(wrapper_module) library.finalize() wrapfn = library.get_function(wrapfn.name) return wrapfn
def iternext_specific(self, context, builder, arrty, arr, result): ndim = arrty.ndim data = arr.data shapes = cgutils.unpack_tuple(builder, arr.shape, ndim) strides = cgutils.unpack_tuple(builder, arr.strides, ndim) indices = self.indices pointers = self.pointers zero = context.get_constant(types.intp, 0) one = context.get_constant(types.intp, 1) minus_one = context.get_constant(types.intp, -1) result.set_valid(True) bbcont = cgutils.append_basic_block(builder, 'continued') bbend = cgutils.append_basic_block(builder, 'end') # Catch already computed iterator exhaustion is_empty = cgutils.as_bool_bit(builder, builder.load(self.empty)) with cgutils.if_unlikely(builder, is_empty): result.set_valid(False) builder.branch(bbend) # Current pointer inside last dimension last_ptr = cgutils.alloca_once(builder, data.type) # Walk from inner dimension to outer for dim in reversed(range(ndim)): idxptr = cgutils.gep(builder, indices, dim) idx = builder.load(idxptr) count = shapes[dim] stride = strides[dim] in_bounds = builder.icmp(lc.ICMP_SLT, idx, count) with cgutils.if_likely(builder, in_bounds): # Index is valid => we point to the right slot ptrptr = cgutils.gep(builder, pointers, dim) ptr = builder.load(ptrptr) builder.store(ptr, last_ptr) # Compute next index and pointer for this dimension next_ptr = cgutils.pointer_add(builder, ptr, stride) builder.store(next_ptr, ptrptr) next_idx = builder.add(idx, one) builder.store(next_idx, idxptr) # Reset inner dimensions for inner_dim in range(dim + 1, ndim): idxptr = cgutils.gep(builder, indices, inner_dim) ptrptr = cgutils.gep(builder, pointers, inner_dim) # Compute next index and pointer for this dimension inner_ptr = cgutils.pointer_add(builder, ptr, strides[inner_dim]) builder.store(inner_ptr, ptrptr) builder.store(one, idxptr) builder.branch(bbcont) # End of array => skip to end result.set_valid(False) builder.branch(bbend) builder.position_at_end(bbcont) # After processing of indices and pointers: fetch value. ptr = builder.load(last_ptr) value = context.unpack_value(builder, arrty.dtype, ptr) result.yield_(value) builder.branch(bbend) builder.position_at_end(bbend)