def _gauss_impl(context, builder, sig, args, state): # The type for all computations (either float or double) ty = sig.return_type llty = context.get_data_type(ty) state_ptr = get_state_ptr(context, builder, state) _random = {"py": random.random, "np": np.random.random}[state] ret = cgutils.alloca_once(builder, llty, name="result") gauss_ptr = get_gauss_ptr(builder, state_ptr) has_gauss_ptr = get_has_gauss_ptr(builder, state_ptr) has_gauss = cgutils.is_true(builder, builder.load(has_gauss_ptr)) with cgutils.ifelse(builder, has_gauss) as (then, otherwise): with then: # if has_gauss: return it builder.store(builder.load(gauss_ptr), ret) builder.store(const_int(0), has_gauss_ptr) with otherwise: # if not has_gauss: compute a pair of numbers using the Box-Muller # transform; keep one and return the other pair = context.compile_internal(builder, _gauss_pair_impl(_random), signature(types.UniTuple(ty, 2)), ()) first, second = cgutils.unpack_tuple(builder, pair, 2) builder.store(first, gauss_ptr) builder.store(second, ret) builder.store(const_int(1), has_gauss_ptr) mu, sigma = args return builder.fadd(mu, builder.fmul(sigma, builder.load(ret)))
def timedelta_sign_impl(context, builder, sig, args): val, = args ret = alloc_timedelta_result(builder) zero = Constant.int(TIMEDELTA64, 0) with cgutils.ifelse(builder, builder.icmp(lc.ICMP_SGT, val, zero) ) as (gt_zero, le_zero): with gt_zero: builder.store(Constant.int(TIMEDELTA64, 1), ret) with le_zero: with cgutils.ifelse(builder, builder.icmp(lc.ICMP_EQ, val, zero) ) as (eq_zero, lt_zero): with eq_zero: builder.store(Constant.int(TIMEDELTA64, 0), ret) with lt_zero: builder.store(Constant.int(TIMEDELTA64, -1), ret) return builder.load(ret)
def _long_from_native_int(self, ival, func_name, native_int_type, signed): fnty = Type.function(self.pyobj, [native_int_type]) fn = self._get_function(fnty, name=func_name) resptr = cgutils.alloca_once(self.builder, self.pyobj) if PYVERSION < (3, 0): # Under Python 2, we try to return a PyInt object whenever # the given number fits in a C long. pyint_fnty = Type.function(self.pyobj, [self.long]) pyint_fn = self._get_function(pyint_fnty, name="PyInt_FromLong") long_max = Constant.int(native_int_type, _helperlib.long_max) if signed: long_min = Constant.int(native_int_type, _helperlib.long_min) use_pyint = self.builder.and_( self.builder.icmp(lc.ICMP_SGE, ival, long_min), self.builder.icmp(lc.ICMP_SLE, ival, long_max), ) else: use_pyint = self.builder.icmp(lc.ICMP_ULE, ival, long_max) with cgutils.ifelse(self.builder, use_pyint) as (then, otherwise): with then: downcast_ival = self.builder.trunc(ival, self.long) res = self.builder.call(pyint_fn, [downcast_ival]) self.builder.store(res, resptr) with otherwise: res = self.builder.call(fn, [ival]) self.builder.store(res, resptr) else: fn = self._get_function(fnty, name=func_name) self.builder.store(self.builder.call(fn, [ival]), resptr) return self.builder.load(resptr)
def to_native_optional(self, obj, typ): """ Convert object *obj* to a native optional structure. """ noneval = self.context.make_optional_none(self.builder, typ.type) is_not_none = self.builder.icmp(lc.ICMP_NE, obj, self.borrow_none()) retptr = cgutils.alloca_once(self.builder, noneval.type) errptr = cgutils.alloca_once_value(self.builder, cgutils.false_bit) with cgutils.ifelse(self.builder, is_not_none) as (then, orelse): with then: native = self.to_native_value(obj, typ.type) just = self.context.make_optional_value(self.builder, typ.type, native.value) self.builder.store(just, retptr) self.builder.store(native.is_error, errptr) with orelse: self.builder.store(ir.Constant(noneval.type, ir.Undefined), retptr) self.builder.store(noneval, retptr) if native.cleanup is not None: def cleanup(): with cgutils.ifthen(self.builder, is_not_none): native.cleanup() else: cleanup = None ret = self.builder.load(retptr) return NativeValue(ret, is_error=self.builder.load(errptr), cleanup=cleanup)
def getiter_range_generic(context, builder, iterobj, start, stop, step): diff = builder.sub(stop, start) intty = start.type zero = Constant.int(intty, 0) one = Constant.int(intty, 1) pos_diff = builder.icmp(lc.ICMP_SGT, diff, zero) pos_step = builder.icmp(lc.ICMP_SGT, step, zero) sign_differs = builder.xor(pos_diff, pos_step) zero_step = builder.icmp(lc.ICMP_EQ, step, zero) with cgutils.if_unlikely(builder, zero_step): # step shouldn't be zero context.return_errcode(builder, 1) with cgutils.ifelse(builder, sign_differs) as (then, orelse): with then: builder.store(zero, iterobj.count) with orelse: rem = builder.srem(diff, step) uneven = builder.icmp(lc.ICMP_SGT, rem, zero) newcount = builder.add(builder.sdiv(diff, step), builder.select(uneven, one, zero)) builder.store(newcount, iterobj.count) return iterobj._getvalue()
def get_next_int(context, builder, state_ptr, nbits): """ Get the next integer with width *nbits*. """ c32 = ir.Constant(nbits.type, 32) def get_shifted_int(nbits): shift = builder.sub(c32, nbits) y = get_next_int32(context, builder, state_ptr) return builder.lshr(y, builder.zext(shift, y.type)) ret = cgutils.alloca_once_value(builder, ir.Constant(int64_t, 0)) is_32b = builder.icmp_unsigned('<=', nbits, c32) with cgutils.ifelse(builder, is_32b) as (ifsmall, iflarge): with ifsmall: low = get_shifted_int(nbits) builder.store(builder.zext(low, int64_t), ret) with iflarge: # XXX This assumes nbits <= 64 low = get_next_int32(context, builder, state_ptr) high = get_shifted_int(builder.sub(nbits, c32)) total = builder.add( builder.zext(low, int64_t), builder.shl(builder.zext(high, int64_t), ir.Constant(int64_t, 32))) builder.store(total, ret) return builder.load(ret)
def complex128_power_impl(context, builder, sig, args): [ca, cb] = args a = Complex128(context, builder, value=ca) b = Complex128(context, builder, value=cb) c = Complex128(context, builder) module = cgutils.get_module(builder) pa = a._getpointer() pb = b._getpointer() pc = c._getpointer() # Optimize for square because cpow looses a lot of precsiion TWO = context.get_constant(types.float64, 2) ZERO = context.get_constant(types.float64, 0) b_real_is_two = builder.fcmp(lc.FCMP_OEQ, b.real, TWO) b_imag_is_zero = builder.fcmp(lc.FCMP_OEQ, b.imag, ZERO) b_is_two = builder.and_(b_real_is_two, b_imag_is_zero) with cgutils.ifelse(builder, b_is_two) as (then, otherwise): with then: # Lower as multiplication res = complex_mul_impl(context, builder, sig, (ca, ca)) cres = Complex128(context, builder, value=res) c.real = cres.real c.imag = cres.imag with otherwise: # Lower with call to external function fnty = Type.function(Type.void(), [pa.type] * 3) cpow = module.get_or_insert_function(fnty, name="numba.math.cpow") builder.call(cpow, (pa, pb, pc)) return builder.load(pc)
def timedelta_abs_impl(context, builder, sig, args): val, = args ret = alloc_timedelta_result(builder) with cgutils.ifelse(builder, cgutils.is_scalar_neg(builder, val)) as (then, otherwise): with then: builder.store(builder.neg(val), ret) with otherwise: builder.store(val, ret) return builder.load(ret)
def year_to_days(builder, year_val): """ Given a year *year_val* (offset to 1970), return the number of days since the 1970 epoch. """ # The algorithm below is copied from Numpy's get_datetimestruct_days() # (src/multiarray/datetime.c) ret = cgutils.alloca_once(builder, TIMEDELTA64) # First approximation days = scale_by_constant(builder, year_val, 365) # Adjust for leap years with cgutils.ifelse(builder, cgutils.is_neg_int(builder, year_val)) \ as (if_neg, if_pos): with if_pos: # At or after 1970: # 1968 is the closest leap year before 1970. # Exclude the current year, so add 1. from_1968 = add_constant(builder, year_val, 1) # Add one day for each 4 years p_days = builder.add(days, unscale_by_constant(builder, from_1968, 4)) # 1900 is the closest previous year divisible by 100 from_1900 = add_constant(builder, from_1968, 68) # Subtract one day for each 100 years p_days = builder.sub(p_days, unscale_by_constant(builder, from_1900, 100)) # 1600 is the closest previous year divisible by 400 from_1600 = add_constant(builder, from_1900, 300) # Add one day for each 400 years p_days = builder.add(p_days, unscale_by_constant(builder, from_1600, 400)) builder.store(p_days, ret) with if_neg: # Before 1970: # NOTE `year_val` is negative, and so will be `from_1972` and `from_2000`. # 1972 is the closest later year after 1970. # Include the current year, so subtract 2. from_1972 = add_constant(builder, year_val, -2) # Subtract one day for each 4 years (`from_1972` is negative) n_days = builder.add(days, unscale_by_constant(builder, from_1972, 4)) # 2000 is the closest later year divisible by 100 from_2000 = add_constant(builder, from_1972, -28) # Add one day for each 100 years n_days = builder.sub(n_days, unscale_by_constant(builder, from_2000, 100)) # 2000 is also the closest later year divisible by 400 # Subtract one day for each 400 years n_days = builder.add(n_days, unscale_by_constant(builder, from_2000, 400)) builder.store(n_days, ret) return builder.load(ret)
def store(retval): is_error = cgutils.is_null(builder, retval) with cgutils.ifelse(builder, is_error) as (if_error, if_ok): with if_error: msg = context.insert_const_string(pyapi.module, "object mode ufunc") msgobj = pyapi.string_from_string(msg) pyapi.err_write_unraisable(msgobj) pyapi.decref(msgobj) with if_ok: # Unbox retval = pyapi.to_native_value(retval, signature.return_type) # Store out.store_direct(retval, builder.load(store_offset))
def impl(context, builder, sig, args): [va, vb] = args [ta, tb] = sig.args ret = alloc_boolean_result(builder) with cgutils.ifelse(builder, are_not_nat(builder, [va, vb])) as (then, otherwise): with then: norm_a, norm_b = normalize_timedeltas(context, builder, va, vb, ta, tb) builder.store(builder.icmp(ll_op, norm_a, norm_b), ret) with otherwise: # No scaling when comparing NaT with something else # (i.e. NaT is <= everything else, since it's the smallest # int64 value) builder.store(builder.icmp(ll_op, va, vb), ret) return builder.load(ret)
def impl(context, builder, sig, args): [va, vb] = args [ta, tb] = sig.args ret = alloc_boolean_result(builder) with cgutils.ifelse(builder, are_not_nat(builder, [va, vb])) as (then, otherwise): with then: try: norm_a, norm_b = normalize_timedeltas(context, builder, va, vb, ta, tb) except RuntimeError: # Cannot normalize units => the values are unequal (except if NaT) builder.store(default_value, ret) else: builder.store(builder.icmp(ll_op, norm_a, norm_b), ret) with otherwise: # No scaling when comparing NaTs builder.store(builder.icmp(ll_op, va, vb), ret) return builder.load(ret)
def from_range_state(cls, context, builder, state): """ Create a RangeIter initialized from the given RangeState *state*. """ self = cls(context, builder) start = state.start stop = state.stop step = state.step startptr = cgutils.alloca_once(builder, start.type) builder.store(start, startptr) countptr = cgutils.alloca_once(builder, start.type) self.iter = startptr self.stop = stop self.step = step self.count = countptr diff = builder.sub(stop, start) zero = context.get_constant(int_type, 0) one = context.get_constant(int_type, 1) pos_diff = builder.icmp(lc.ICMP_SGT, diff, zero) pos_step = builder.icmp(lc.ICMP_SGT, step, zero) sign_differs = builder.xor(pos_diff, pos_step) zero_step = builder.icmp(lc.ICMP_EQ, step, zero) with cgutils.if_unlikely(builder, zero_step): # step shouldn't be zero context.call_conv.return_user_exc(builder, ValueError, ("range() arg 3 must not be zero",)) with cgutils.ifelse(builder, sign_differs) as (then, orelse): with then: builder.store(zero, self.count) with orelse: rem = builder.srem(diff, step) rem = builder.select(pos_diff, rem, builder.neg(rem)) uneven = builder.icmp(lc.ICMP_SGT, rem, zero) newcount = builder.add(builder.sdiv(diff, step), builder.select(uneven, one, zero)) builder.store(newcount, self.count) return self
def from_range_state(cls, context, builder, state): """ Create a RangeIter initialized from the given RangeState *state*. """ self = cls(context, builder) start = state.start stop = state.stop step = state.step startptr = cgutils.alloca_once(builder, start.type) builder.store(start, startptr) countptr = cgutils.alloca_once(builder, start.type) self.iter = startptr self.stop = stop self.step = step self.count = countptr diff = builder.sub(stop, start) zero = context.get_constant(int_type, 0) one = context.get_constant(int_type, 1) pos_diff = builder.icmp(lc.ICMP_SGT, diff, zero) pos_step = builder.icmp(lc.ICMP_SGT, step, zero) sign_differs = builder.xor(pos_diff, pos_step) zero_step = builder.icmp(lc.ICMP_EQ, step, zero) with cgutils.if_unlikely(builder, zero_step): # step shouldn't be zero context.call_conv.return_user_exc( builder, ValueError, ("range() arg 3 must not be zero", )) with cgutils.ifelse(builder, sign_differs) as (then, orelse): with then: builder.store(zero, self.count) with orelse: rem = builder.srem(diff, step) rem = builder.select(pos_diff, rem, builder.neg(rem)) uneven = builder.icmp(lc.ICMP_SGT, rem, zero) newcount = builder.add(builder.sdiv(diff, step), builder.select(uneven, one, zero)) builder.store(newcount, self.count) return self
def reduce_datetime_for_unit(builder, dt_val, src_unit, dest_unit): dest_unit_code = npdatetime.DATETIME_UNITS[dest_unit] src_unit_code = npdatetime.DATETIME_UNITS[src_unit] if dest_unit_code < 2 or src_unit_code >= 2: return dt_val, src_unit # Need to compute the day ordinal for *dt_val* if src_unit_code == 0: # Years to days year_val = dt_val days_val = year_to_days(builder, year_val) else: # Months to days leap_array = cgutils.global_constant(builder, "leap_year_months_acc", leap_year_months_acc) normal_array = cgutils.global_constant(builder, "normal_year_months_acc", normal_year_months_acc) days = cgutils.alloca_once(builder, TIMEDELTA64) # First compute year number and month number year, month = cgutils.divmod_by_constant(builder, dt_val, 12) # Then deduce the number of days with cgutils.ifelse(builder, is_leap_year(builder, year)) as (then, otherwise): with then: addend = builder.load(cgutils.gep(builder, leap_array, 0, month)) builder.store(addend, days) with otherwise: addend = builder.load(cgutils.gep(builder, normal_array, 0, month)) builder.store(addend, days) days_val = year_to_days(builder, year) days_val = builder.add(days_val, builder.load(days)) if dest_unit_code == 2: # Need to scale back to weeks weeks, _ = cgutils.divmod_by_constant(builder, days_val, 7) return weeks, 'W' else: return days_val, 'D'
def impl(context, builder, sig, args): va, vb = args ta, tb = sig.args unit_a = ta.unit unit_b = tb.unit ret_unit = npdatetime.get_best_unit(unit_a, unit_b) ret = alloc_boolean_result(builder) with cgutils.ifelse(builder, are_not_nat(builder, [va, vb])) as (then, otherwise): with then: norm_a = convert_datetime_for_arith(builder, va, unit_a, ret_unit) norm_b = convert_datetime_for_arith(builder, vb, unit_b, ret_unit) ret_val = builder.icmp(ll_op, norm_a, norm_b) builder.store(ret_val, ret) with otherwise: # No scaling when comparing NaTs ret_val = builder.icmp(ll_op, va, vb) builder.store(ret_val, ret) return builder.load(ret)
def build_ufunc_wrapper(library, context, func, signature, objmode, env): """ Wrap the scalar function with a loop that iterates over the arguments """ byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = library.create_ir_module('') if objmode: func_type = context.call_conv.get_function_type( types.pyobject, [types.pyobject] * len(signature.args)) else: func_type = context.call_conv.get_function_type( signature.return_type, signature.args) oldfunc = func func = wrapper_module.add_function(func_type, name=func.name) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") actual_args = context.call_conv.get_arguments(func) # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append( UArrayArg(context, builder, arg_args, arg_steps, i, context.get_argument_type(typ))) # Prepare output valty = context.get_data_type(signature.return_type) out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args), valty) # Setup indices offsets = [] zero = context.get_constant(types.intp, 0) for _ in arrays: p = cgutils.alloca_once(builder, intp_t) offsets.append(p) builder.store(zero, p) store_offset = cgutils.alloca_once(builder, intp_t) builder.store(zero, store_offset) unit_strided = cgutils.true_bit for ary in arrays: unit_strided = builder.and_(unit_strided, ary.is_unit_strided) if objmode: # General loop pyapi = context.get_python_api(builder) gil = pyapi.gil_ensure() with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_obj_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, pyapi, env) pyapi.gil_release(gil) builder.ret_void() else: with cgutils.ifelse(builder, unit_strided) as (is_unit_strided, is_strided): with is_unit_strided: with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: fastloop = build_fast_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, ind) builder.ret_void() with is_strided: # General loop with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_slow_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature) builder.ret_void() builder.ret_void() del builder # Run optimizer library.add_ir_module(wrapper_module) wrapper = library.get_function(wrapper.name) oldfunc.linkage = LINKAGE_INTERNAL return wrapper
def to_native_value(self, obj, typ): if isinstance(typ, types.Object) or typ == types.pyobject: return obj elif typ == types.boolean: istrue = self.object_istrue(obj) zero = Constant.null(istrue.type) return self.builder.icmp(lc.ICMP_NE, istrue, zero) elif typ in types.unsigned_domain: longobj = self.number_long(obj) ullval = self.long_as_ulonglong(longobj) self.decref(longobj) return self.builder.trunc(ullval, self.context.get_argument_type(typ)) elif typ in types.signed_domain: longobj = self.number_long(obj) llval = self.long_as_longlong(longobj) self.decref(longobj) return self.builder.trunc(llval, self.context.get_argument_type(typ)) elif typ == types.float32: fobj = self.number_float(obj) fval = self.float_as_double(fobj) self.decref(fobj) return self.builder.fptrunc(fval, self.context.get_argument_type(typ)) elif typ == types.float64: fobj = self.number_float(obj) fval = self.float_as_double(fobj) self.decref(fobj) return fval elif typ in (types.complex128, types.complex64): cplxcls = self.context.make_complex(types.complex128) cplx = cplxcls(self.context, self.builder) pcplx = cplx._getpointer() ok = self.complex_adaptor(obj, pcplx) failed = cgutils.is_false(self.builder, ok) with cgutils.if_unlikely(self.builder, failed): self.builder.ret(self.get_null_object()) if typ == types.complex64: c64cls = self.context.make_complex(typ) c64 = c64cls(self.context, self.builder) freal = self.context.cast(self.builder, cplx.real, types.float64, types.float32) fimag = self.context.cast(self.builder, cplx.imag, types.float64, types.float32) c64.real = freal c64.imag = fimag return c64._getvalue() else: return cplx._getvalue() elif isinstance(typ, types.NPDatetime): val = self.extract_np_datetime(obj) return val elif isinstance(typ, types.NPTimedelta): val = self.extract_np_timedelta(obj) return val elif isinstance(typ, types.Array): return self.to_native_array(typ, obj) elif isinstance(typ, types.Optional): isnone = self.builder.icmp(lc.ICMP_EQ, obj, self.borrow_none()) with cgutils.ifelse(self.builder, isnone) as (then, orelse): with then: noneval = self.context.make_optional_none( self.builder, typ.type) ret = cgutils.alloca_once(self.builder, noneval.type) self.builder.store(noneval, ret) with orelse: val = self.to_native_value(obj, typ.type) just = self.context.make_optional_value( self.builder, typ.type, val) self.builder.store(just, ret) return ret raise NotImplementedError(typ)
def build_ufunc_wrapper(library, context, func, signature, objmode, env): """ Wrap the scalar function with a loop that iterates over the arguments """ byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = library.create_ir_module('') if objmode: func_type = context.call_conv.get_function_type( types.pyobject, [types.pyobject] * len(signature.args)) else: func_type = context.call_conv.get_function_type( signature.return_type, signature.args) oldfunc = func func = wrapper_module.add_function(func_type, name=func.name) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, typ)) # Prepare output out = UArrayArg(context, builder, arg_args, arg_steps, len(arrays), signature.return_type) # Setup indices offsets = [] zero = context.get_constant(types.intp, 0) for _ in arrays: p = cgutils.alloca_once(builder, intp_t) offsets.append(p) builder.store(zero, p) store_offset = cgutils.alloca_once(builder, intp_t) builder.store(zero, store_offset) unit_strided = cgutils.true_bit for ary in arrays: unit_strided = builder.and_(unit_strided, ary.is_unit_strided) if objmode: # General loop pyapi = context.get_python_api(builder) gil = pyapi.gil_ensure() with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_obj_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, pyapi, env) pyapi.gil_release(gil) builder.ret_void() else: with cgutils.ifelse(builder, unit_strided) as (is_unit_strided, is_strided): with is_unit_strided: with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: fastloop = build_fast_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, ind) builder.ret_void() with is_strided: # General loop with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_slow_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature) builder.ret_void() builder.ret_void() del builder # Run optimizer library.add_ir_module(wrapper_module) wrapper = library.get_function(wrapper.name) return wrapper
def setitem_array1d_slice(context, builder, sig, args): aryty, idxty, valty = sig.args ary, idx, val = args arystty = make_array(aryty) ary = arystty(context, builder, ary) shapes = cgutils.unpack_tuple(builder, ary.shape, aryty.ndim) slicestruct = Slice(context, builder, value=idx) # the logic here follows that of Python's Objects/sliceobject.c # in particular PySlice_GetIndicesEx function ZERO = Constant.int(slicestruct.step.type, 0) NEG_ONE = Constant.int(slicestruct.start.type, -1) b_step_eq_zero = builder.icmp(lc.ICMP_EQ, slicestruct.step, ZERO) # bail if step is 0 with cgutils.ifthen(builder, b_step_eq_zero): context.return_errcode(builder, errcode.ASSERTION_ERROR) # adjust for negative indices for start start = cgutils.alloca_once_value(builder, slicestruct.start) b_start_lt_zero = builder.icmp(lc.ICMP_SLT, builder.load(start), ZERO) with cgutils.ifthen(builder, b_start_lt_zero): add = builder.add(builder.load(start), shapes[0]) builder.store(add, start) b_start_lt_zero = builder.icmp(lc.ICMP_SLT, builder.load(start), ZERO) with cgutils.ifthen(builder, b_start_lt_zero): b_step_lt_zero = builder.icmp(lc.ICMP_SLT, slicestruct.step, ZERO) cond = builder.select(b_step_lt_zero, NEG_ONE, ZERO) builder.store(cond, start) b_start_geq_len = builder.icmp(lc.ICMP_SGE, builder.load(start), shapes[0]) ONE = Constant.int(shapes[0].type, 1) with cgutils.ifthen(builder, b_start_geq_len): b_step_lt_zero = builder.icmp(lc.ICMP_SLT, slicestruct.step, ZERO) cond = builder.select(b_step_lt_zero, builder.sub(shapes[0], ONE), shapes[0]) builder.store(cond, start) # adjust stop for negative value stop = cgutils.alloca_once_value(builder, slicestruct.stop) b_stop_lt_zero = builder.icmp(lc.ICMP_SLT, builder.load(stop), ZERO) with cgutils.ifthen(builder, b_stop_lt_zero): add = builder.add(builder.load(stop), shapes[0]) builder.store(add, stop) b_stop_lt_zero = builder.icmp(lc.ICMP_SLT, builder.load(stop), ZERO) with cgutils.ifthen(builder, b_stop_lt_zero): b_step_lt_zero = builder.icmp(lc.ICMP_SLT, slicestruct.step, ZERO) cond = builder.select(b_step_lt_zero, NEG_ONE, ZERO) builder.store(cond, start) b_stop_geq_len = builder.icmp(lc.ICMP_SGE, builder.load(stop), shapes[0]) ONE = Constant.int(shapes[0].type, 1) with cgutils.ifthen(builder, b_stop_geq_len): b_step_lt_zero = builder.icmp(lc.ICMP_SLT, slicestruct.step, ZERO) cond = builder.select(b_step_lt_zero, builder.sub(shapes[0], ONE), shapes[0]) builder.store(cond, stop) b_step_gt_zero = builder.icmp(lc.ICMP_SGT, slicestruct.step, ZERO) with cgutils.ifelse(builder, b_step_gt_zero) as (then0, otherwise0): with then0: with cgutils.for_range_slice(builder, builder.load(start), builder.load(stop), slicestruct.step, slicestruct.start.type) as loop_idx1: ptr = cgutils.get_item_pointer(builder, aryty, ary, [loop_idx1], wraparound=True) context.pack_value(builder, aryty.dtype, val, ptr) with otherwise0: with cgutils.for_range_slice(builder, builder.load(start), builder.load(stop), slicestruct.step, slicestruct.start.type, inc=False) as loop_idx2: ptr = cgutils.get_item_pointer(builder, aryty, ary, [loop_idx2], wraparound=True) context.pack_value(builder, aryty.dtype, val, ptr)
def impl(context, builder, sig, args): [tyvx, tywy, tyout] = sig.args [vx, wy, out] = args assert tyvx.dtype == tywy.dtype ndim = tyvx.ndim xary = context.make_array(tyvx)(context, builder, vx) yary = context.make_array(tywy)(context, builder, wy) oary = context.make_array(tyout)(context, builder, out) intpty = context.get_value_type(types.intp) # TODO handle differing shape by mimicking broadcasting loopshape = cgutils.unpack_tuple(builder, xary.shape, ndim) xyo_shape = [cgutils.unpack_tuple(builder, ary.shape, ndim) for ary in (xary, yary, oary)] xyo_strides = [cgutils.unpack_tuple(builder, ary.strides, ndim) for ary in (xary, yary, oary)] xyo_data = [ary.data for ary in (xary, yary, oary)] xyo_layout = [ty.layout for ty in (tyvx, tywy, tyout)] with cgutils.loop_nest(builder, loopshape, intp=intpty) as indices: [px, py, po] = [cgutils.get_item_pointer2(builder, data=data, shape=shape, strides=strides, layout=layout, inds=indices) for data, shape, strides, layout in zip(xyo_data, xyo_shape, xyo_strides, xyo_layout)] x = builder.load(px) y = builder.load(py) if divbyzero: # Handle division iszero = cgutils.is_scalar_zero(builder, y) with cgutils.ifelse(builder, iszero, expect=False) as (then, orelse): with then: # Divide by zero if tyout.dtype in types.real_domain: # If x is float and is 0 also, return Nan; else # return Inf outltype = context.get_data_type(tyout.dtype) shouldretnan = cgutils.is_scalar_zero(builder, x) nan = Constant.real(outltype, float("nan")) inf = Constant.real(outltype, float("inf")) res = builder.select(shouldretnan, nan, inf) elif (tyout.dtype in types.signed_domain and not numpy_support.int_divbyzero_returns_zero): res = Constant.int(y.type, 0x1 << (y.type.width-1)) else: res = Constant.null(y.type) assert res.type == po.type.pointee, \ (str(res.type), str(po.type.pointee)) builder.store(res, po) with orelse: # Normal res = core(builder, (x, y)) assert res.type == po.type.pointee, \ (str(res.type), str(po.type.pointee)) builder.store(res, po) else: # Handle other operations res = core(builder, (x, y)) assert res.type == po.type.pointee, (res.type, po.type.pointee) builder.store(res, po) return out
def lower_expr(self, expr): if expr.op == 'binop': return self.lower_binop(expr, inplace=False) elif expr.op == 'inplace_binop': return self.lower_binop(expr, inplace=True) elif expr.op == 'unary': value = self.loadvar(expr.value.name) if expr.fn == '-': res = self.pyapi.number_negative(value) elif expr.fn == '+': res = self.pyapi.number_positive(value) elif expr.fn == 'not': res = self.pyapi.object_not(value) self.check_int_status(res) longval = self.builder.zext(res, self.pyapi.long) res = self.pyapi.bool_from_long(longval) elif expr.fn == '~': res = self.pyapi.number_invert(value) else: raise NotImplementedError(expr) self.check_error(res) return res elif expr.op == 'call': argvals = [self.loadvar(a.name) for a in expr.args] fn = self.loadvar(expr.func.name) if not expr.kws: # No keyword ret = self.pyapi.call_function_objargs(fn, argvals) else: # Have Keywords keyvalues = [(k, self.loadvar(v.name)) for k, v in expr.kws] args = self.pyapi.tuple_pack(argvals) kws = self.pyapi.dict_pack(keyvalues) ret = self.pyapi.call(fn, args, kws) self.decref(kws) self.decref(args) self.check_error(ret) return ret elif expr.op == 'getattr': obj = self.loadvar(expr.value.name) res = self.pyapi.object_getattr_string(obj, expr.attr) self.check_error(res) return res elif expr.op == 'build_tuple': items = [self.loadvar(it.name) for it in expr.items] res = self.pyapi.tuple_pack(items) self.check_error(res) return res elif expr.op == 'build_list': items = [self.loadvar(it.name) for it in expr.items] res = self.pyapi.list_pack(items) self.check_error(res) return res elif expr.op == 'build_map': res = self.pyapi.dict_new(expr.size) self.check_error(res) return res elif expr.op == 'build_set': items = [self.loadvar(it.name) for it in expr.items] res = self.pyapi.set_new() self.check_error(res) for it in items: ok = self.pyapi.set_add(res, it) self.check_int_status(ok) return res elif expr.op == 'getiter': obj = self.loadvar(expr.value.name) res = self.pyapi.object_getiter(obj) self.check_error(res) return res elif expr.op == 'iternext': iterobj = self.loadvar(expr.value.name) item = self.pyapi.iter_next(iterobj) is_valid = cgutils.is_not_null(self.builder, item) pair = self.pyapi.tuple_new(2) with cgutils.ifelse(self.builder, is_valid) as (then, otherwise): with then: self.pyapi.tuple_setitem(pair, 0, item) with otherwise: self.check_occurred() # Make the tuple valid by inserting None as dummy # iteration "result" (it will be ignored). self.pyapi.tuple_setitem(pair, 0, self.pyapi.make_none()) self.pyapi.tuple_setitem(pair, 1, self.pyapi.bool_from_bool(is_valid)) return pair elif expr.op == 'pair_first': pair = self.loadvar(expr.value.name) first = self.pyapi.tuple_getitem(pair, 0) self.incref(first) return first elif expr.op == 'pair_second': pair = self.loadvar(expr.value.name) second = self.pyapi.tuple_getitem(pair, 1) self.incref(second) return second elif expr.op == 'exhaust_iter': iterobj = self.loadvar(expr.value.name) tup = self.pyapi.sequence_tuple(iterobj) self.check_error(tup) # Check tuple size is as expected tup_size = self.pyapi.tuple_size(tup) expected_size = self.context.get_constant(types.intp, expr.count) has_wrong_size = self.builder.icmp(lc.ICMP_NE, tup_size, expected_size) with cgutils.if_unlikely(self.builder, has_wrong_size): excid = self.add_exception(ValueError) self.context.return_user_exc(self.builder, excid) return tup elif expr.op == 'getitem': value = self.loadvar(expr.value.name) index = self.loadvar(expr.index.name) res = self.pyapi.object_getitem(value, index) self.check_error(res) return res elif expr.op == 'static_getitem': value = self.loadvar(expr.value.name) index = self.context.get_constant(types.intp, expr.index) indexobj = self.pyapi.long_from_ssize_t(index) self.check_error(indexobj) res = self.pyapi.object_getitem(value, indexobj) self.decref(indexobj) self.check_error(res) return res elif expr.op == 'getslice': target = self.loadvar(expr.target.name) start = self.loadvar(expr.start.name) stop = self.loadvar(expr.stop.name) slicefn = self.get_builtin_obj("slice") sliceobj = self.pyapi.call_function_objargs(slicefn, (start, stop)) self.decref(slicefn) self.check_error(sliceobj) res = self.pyapi.object_getitem(target, sliceobj) self.check_error(res) return res elif expr.op == 'cast': val = self.loadvar(expr.value.name) self.incref(val) return val else: raise NotImplementedError(expr)
def build_ufunc_wrapper(context, func, signature): """ Wrap the scalar function with a loop that iterates over the arguments """ module = func.module byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper = module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") actual_args = context.get_arguments(func) # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append( UArrayArg(context, builder, arg_args, arg_steps, i, context.get_argument_type(typ))) # Prepare output valty = context.get_data_type(signature.return_type) out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args), valty) # Setup indices offsets = [] zero = context.get_constant(types.intp, 0) for _ in arrays: p = cgutils.alloca_once(builder, intp_t) offsets.append(p) builder.store(zero, p) store_offset = cgutils.alloca_once(builder, intp_t) builder.store(zero, store_offset) unit_strided = cgutils.true_bit for ary in arrays: unit_strided = builder.and_(unit_strided, ary.is_unit_strided) with cgutils.ifelse(builder, unit_strided) as (is_unit_strided, is_strided): with is_unit_strided: with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: fastloop = build_fast_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, ind) builder.ret_void() with is_strided: # General loop with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_slow_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature) builder.ret_void() builder.ret_void() del builder # Set core function to internal so that it is not generated func.linkage = LINKAGE_INTERNAL # Force inline of code function inline_function(slowloop) inline_function(fastloop) # Run optimizer context.optimize(module) if config.DUMP_OPTIMIZED: print(module) return wrapper
def impl(context, builder, sig, args): [tyinp1, tyinp2, tyout] = sig.args [inp1, inp2, out] = args if isinstance(tyinp1, types.Array): scalar_inp1 = False scalar_tyinp1 = tyinp1.dtype inp1_ndim = tyinp1.ndim elif tyinp1 in types.number_domain: scalar_inp1 = True scalar_tyinp1 = tyinp1 inp1_ndim = 1 else: raise TypeError('unknown type for first input operand') if isinstance(tyinp2, types.Array): scalar_inp2 = False scalar_tyinp2 = tyinp2.dtype inp2_ndim = tyinp2.ndim elif tyinp2 in types.number_domain: scalar_inp2 = True scalar_tyinp2 = tyinp2 inp2_ndim = 1 else: raise TypeError('unknown type for second input operand') out_ndim = tyout.ndim if asfloat: promote_type = types.float64 elif scalar_tyinp1 in types.real_domain or \ scalar_tyinp2 in types.real_domain: promote_type = types.float64 elif scalar_tyinp1 in types.signed_domain or \ scalar_tyinp2 in types.signed_domain: promote_type = types.int64 else: promote_type = types.uint64 result_type = promote_type # Temporary hack for __ftol2 llvm bug. Don't allow storing # float results in uint64 array on windows. if result_type in types.real_domain and \ tyout.dtype is types.uint64 and \ sys.platform.startswith('win32'): raise TypeError('Cannot store result in uint64 array') sig = typing.signature(result_type, promote_type, promote_type) if not scalar_inp1: i1ary = context.make_array(tyinp1)(context, builder, inp1) if not scalar_inp2: i2ary = context.make_array(tyinp2)(context, builder, inp2) oary = context.make_array(tyout)(context, builder, out) fnwork = context.get_function(funckey, sig) intpty = context.get_value_type(types.intp) if not scalar_inp1: inp1_shape = cgutils.unpack_tuple(builder, i1ary.shape, inp1_ndim) inp1_strides = cgutils.unpack_tuple(builder, i1ary.strides, inp1_ndim) inp1_data = i1ary.data inp1_layout = tyinp1.layout if not scalar_inp2: inp2_shape = cgutils.unpack_tuple(builder, i2ary.shape, inp2_ndim) inp2_strides = cgutils.unpack_tuple(builder, i2ary.strides, inp2_ndim) inp2_data = i2ary.data inp2_layout = tyinp2.layout out_shape = cgutils.unpack_tuple(builder, oary.shape, out_ndim) out_strides = cgutils.unpack_tuple(builder, oary.strides, out_ndim) out_data = oary.data out_layout = tyout.layout ZERO = Constant.int(Type.int(intpty.width), 0) ONE = Constant.int(Type.int(intpty.width), 1) inp1_indices = None if not scalar_inp1: inp1_indices = [] for i in range(inp1_ndim): x = builder.alloca(Type.int(intpty.width)) builder.store(ZERO, x) inp1_indices.append(x) inp2_indices = None if not scalar_inp2: inp2_indices = [] for i in range(inp2_ndim): x = builder.alloca(Type.int(intpty.width)) builder.store(ZERO, x) inp2_indices.append(x) loopshape = cgutils.unpack_tuple(builder, oary.shape, out_ndim) with cgutils.loop_nest(builder, loopshape, intp=intpty) as indices: # Increment input indices. # Since the output dimensions are already being incremented, # we'll use that to set the input indices. In order to # handle broadcasting, any input dimension of size 1 won't be # incremented. def build_increment_blocks(inp_indices, inp_shape, inp_ndim, inp_num): bb_inc_inp_index = [cgutils.append_basic_block(builder, '.inc_inp{0}_index{1}'.format(inp_num, str(i))) for i in range(inp_ndim)] bb_end_inc_index = cgutils.append_basic_block(builder, '.end_inc{0}_index'.format(inp_num)) builder.branch(bb_inc_inp_index[0]) for i in range(inp_ndim): with cgutils.goto_block(builder, bb_inc_inp_index[i]): # If the shape of this dimension is 1, then leave the # index at 0 so that this dimension is broadcasted over # the corresponding input and output dimensions. cond = builder.icmp(ICMP_UGT, inp_shape[i], ONE) with cgutils.ifthen(builder, cond): builder.store(indices[out_ndim-inp_ndim+i], inp_indices[i]) if i + 1 == inp_ndim: builder.branch(bb_end_inc_index) else: builder.branch(bb_inc_inp_index[i+1]) builder.position_at_end(bb_end_inc_index) if not scalar_inp1: build_increment_blocks(inp1_indices, inp1_shape, inp1_ndim, '1') if not scalar_inp2: build_increment_blocks(inp2_indices, inp2_shape, inp2_ndim, '2') if scalar_inp1: x = inp1 else: inds = [builder.load(index) for index in inp1_indices] px = cgutils.get_item_pointer2(builder, data=inp1_data, shape=inp1_shape, strides=inp1_strides, layout=inp1_layout, inds=inds) x = builder.load(px) if scalar_inp2: y = inp2 else: inds = [builder.load(index) for index in inp2_indices] py = cgutils.get_item_pointer2(builder, data=inp2_data, shape=inp2_shape, strides=inp2_strides, layout=inp2_layout, inds=inds) y = builder.load(py) po = cgutils.get_item_pointer2(builder, data=out_data, shape=out_shape, strides=out_strides, layout=out_layout, inds=indices) if divbyzero: # Handle division iszero = cgutils.is_scalar_zero(builder, y) with cgutils.ifelse(builder, iszero, expect=False) as (then, orelse): with then: # Divide by zero if (scalar_tyinp1 in types.real_domain or scalar_tyinp2 in types.real_domain) or \ not numpy_support.int_divbyzero_returns_zero: # If y is float and is 0 also, return Nan; else # return Inf outltype = context.get_data_type(result_type) shouldretnan = cgutils.is_scalar_zero(builder, x) nan = Constant.real(outltype, float("nan")) inf = Constant.real(outltype, float("inf")) tempres = builder.select(shouldretnan, nan, inf) res = context.cast(builder, tempres, result_type, tyout.dtype) elif tyout.dtype in types.signed_domain and \ not numpy_support.int_divbyzero_returns_zero: res = Constant.int(context.get_data_type(tyout.dtype), 0x1 << (y.type.width-1)) else: res = Constant.null(context.get_data_type(tyout.dtype)) assert res.type == po.type.pointee, \ (str(res.type), str(po.type.pointee)) builder.store(res, po) with orelse: # Normal d_x = context.cast(builder, x, scalar_tyinp1, promote_type) d_y = context.cast(builder, y, scalar_tyinp2, promote_type) tempres = fnwork(builder, [d_x, d_y]) res = context.cast(builder, tempres, result_type, tyout.dtype) assert res.type == po.type.pointee, (res.type, po.type.pointee) builder.store(res, po) else: # Handle non-division operations d_x = context.cast(builder, x, scalar_tyinp1, promote_type) d_y = context.cast(builder, y, scalar_tyinp2, promote_type) tempres = fnwork(builder, [d_x, d_y]) res = context.cast(builder, tempres, result_type, tyout.dtype) assert res.type == po.type.pointee, (res.type, po.type.pointee) builder.store(res, po) return out
def setitem_array1d_slice(context, builder, sig, args): aryty, idxty, valty = sig.args ary, idx, val = args arystty = make_array(aryty) ary = arystty(context, builder, ary) shapes = cgutils.unpack_tuple(builder, ary.shape, aryty.ndim) slicestruct = Slice(context, builder, value=idx) # the logic here follows that of Python's Objects/sliceobject.c # in particular PySlice_GetIndicesEx function ZERO = Constant.int(slicestruct.step.type, 0) NEG_ONE = Constant.int(slicestruct.start.type, -1) b_step_eq_zero = builder.icmp(lc.ICMP_EQ, slicestruct.step, ZERO) # bail if step is 0 with cgutils.ifthen(builder, b_step_eq_zero): context.call_conv.return_user_exc(builder, ValueError, ("slice step cannot be zero", )) # adjust for negative indices for start start = cgutils.alloca_once_value(builder, slicestruct.start) b_start_lt_zero = builder.icmp(lc.ICMP_SLT, builder.load(start), ZERO) with cgutils.ifthen(builder, b_start_lt_zero): add = builder.add(builder.load(start), shapes[0]) builder.store(add, start) b_start_lt_zero = builder.icmp(lc.ICMP_SLT, builder.load(start), ZERO) with cgutils.ifthen(builder, b_start_lt_zero): b_step_lt_zero = builder.icmp(lc.ICMP_SLT, slicestruct.step, ZERO) cond = builder.select(b_step_lt_zero, NEG_ONE, ZERO) builder.store(cond, start) b_start_geq_len = builder.icmp(lc.ICMP_SGE, builder.load(start), shapes[0]) ONE = Constant.int(shapes[0].type, 1) with cgutils.ifthen(builder, b_start_geq_len): b_step_lt_zero = builder.icmp(lc.ICMP_SLT, slicestruct.step, ZERO) cond = builder.select(b_step_lt_zero, builder.sub(shapes[0], ONE), shapes[0]) builder.store(cond, start) # adjust stop for negative value stop = cgutils.alloca_once_value(builder, slicestruct.stop) b_stop_lt_zero = builder.icmp(lc.ICMP_SLT, builder.load(stop), ZERO) with cgutils.ifthen(builder, b_stop_lt_zero): add = builder.add(builder.load(stop), shapes[0]) builder.store(add, stop) b_stop_lt_zero = builder.icmp(lc.ICMP_SLT, builder.load(stop), ZERO) with cgutils.ifthen(builder, b_stop_lt_zero): b_step_lt_zero = builder.icmp(lc.ICMP_SLT, slicestruct.step, ZERO) cond = builder.select(b_step_lt_zero, NEG_ONE, ZERO) builder.store(cond, start) b_stop_geq_len = builder.icmp(lc.ICMP_SGE, builder.load(stop), shapes[0]) ONE = Constant.int(shapes[0].type, 1) with cgutils.ifthen(builder, b_stop_geq_len): b_step_lt_zero = builder.icmp(lc.ICMP_SLT, slicestruct.step, ZERO) cond = builder.select(b_step_lt_zero, builder.sub(shapes[0], ONE), shapes[0]) builder.store(cond, stop) b_step_gt_zero = builder.icmp(lc.ICMP_SGT, slicestruct.step, ZERO) with cgutils.ifelse(builder, b_step_gt_zero) as (then0, otherwise0): with then0: with cgutils.for_range_slice(builder, builder.load(start), builder.load(stop), slicestruct.step, slicestruct.start.type) as loop_idx1: ptr = cgutils.get_item_pointer(builder, aryty, ary, [loop_idx1], wraparound=True) context.pack_value(builder, aryty.dtype, val, ptr) with otherwise0: with cgutils.for_range_slice(builder, builder.load(start), builder.load(stop), slicestruct.step, slicestruct.start.type, inc=False) as loop_idx2: ptr = cgutils.get_item_pointer(builder, aryty, ary, [loop_idx2], wraparound=True) context.pack_value(builder, aryty.dtype, val, ptr)
def build_ufunc_wrapper(context, func, signature): """ Wrap the scalar function with a loop that iterates over the arguments """ module = func.module byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper = module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") actual_args = context.get_arguments(func) # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, context.get_argument_type(typ))) # Prepare output valty = context.get_data_type(signature.return_type) out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args), valty) # Setup indices offsets = [] zero = context.get_constant(types.intp, 0) for _ in arrays: p = cgutils.alloca_once(builder, intp_t) offsets.append(p) builder.store(zero, p) store_offset = cgutils.alloca_once(builder, intp_t) builder.store(zero, store_offset) unit_strided = cgutils.true_bit for ary in arrays: unit_strided = builder.and_(unit_strided, ary.is_unit_strided) with cgutils.ifelse(builder, unit_strided) as (is_unit_strided, is_strided): with is_unit_strided: with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: fastloop = build_fast_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, ind) builder.ret_void() with is_strided: # General loop with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_slow_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature) builder.ret_void() builder.ret_void() del builder # Set core function to internal so that it is not generated func.linkage = LINKAGE_INTERNAL # Force inline of code function inline_function(slowloop) inline_function(fastloop) # Run optimizer context.optimize(module) if config.DUMP_OPTIMIZED: print(module) return wrapper
def impl(context, builder, sig, args): [tyinp1, tyinp2, tyout] = sig.args [inp1, inp2, out] = args if scalar_inputs: ndim = 1 else: ndim = tyinp1.ndim # Temporary hack for __ftol2 llvm bug. Don't allow storing # float results in uint64 array on windows. if scalar_inputs and tyinp1 in types.real_domain and \ tyout.dtype is types.uint64 and \ sys.platform.startswith('win32'): raise TypeError('Cannot store result in uint64 array') if not scalar_inputs and tyinp1.dtype in types.real_domain and \ tyout.dtype is types.uint64 and \ sys.platform.startswith('win32'): raise TypeError('Cannot store result in uint64 array') if not scalar_inputs: i1ary = context.make_array(tyinp1)(context, builder, inp1) i2ary = context.make_array(tyinp2)(context, builder, inp2) oary = context.make_array(tyout)(context, builder, out) if asfloat and not divbyzero: sig = typing.signature(types.float64, types.float64, types.float64) else: if scalar_inputs: sig = typing.signature(tyout.dtype, tyinp1, tyinp2) else: sig = typing.signature(tyout.dtype, tyinp1.dtype, tyinp2.dtype) fnwork = context.get_function(funckey, sig) intpty = context.get_value_type(types.intp) # TODO handle differing shape by mimicking broadcasting loopshape = cgutils.unpack_tuple(builder, oary.shape, ndim) if scalar_inputs: xyo_shape = [cgutils.unpack_tuple(builder, ary.shape, ndim) for ary in (oary,)] xyo_strides = [cgutils.unpack_tuple(builder, ary.strides, ndim) for ary in (oary,)] xyo_data = [ary.data for ary in (oary,)] xyo_layout = [ty.layout for ty in (tyout,)] else: xyo_shape = [cgutils.unpack_tuple(builder, ary.shape, ndim) for ary in (i1ary, i2ary, oary)] xyo_strides = [cgutils.unpack_tuple(builder, ary.strides, ndim) for ary in (i1ary, i2ary, oary)] xyo_data = [ary.data for ary in (i1ary, i2ary, oary)] xyo_layout = [ty.layout for ty in (tyinp1, tyinp2, tyout)] with cgutils.loop_nest(builder, loopshape, intp=intpty) as indices: if scalar_inputs: [po] = [cgutils.get_item_pointer2(builder, data=data, shape=shape, strides=strides, layout=layout, inds=indices) for data, shape, strides, layout in zip(xyo_data, xyo_shape, xyo_strides, xyo_layout)] else: [px, py, po] = [cgutils.get_item_pointer2(builder, data=data, shape=shape, strides=strides, layout=layout, inds=indices) for data, shape, strides, layout in zip(xyo_data, xyo_shape, xyo_strides, xyo_layout)] if scalar_inputs: x = inp1 y = inp2 else: x = builder.load(px) y = builder.load(py) if divbyzero: # Handle division iszero = cgutils.is_scalar_zero(builder, y) with cgutils.ifelse(builder, iszero, expect=False) as (then, orelse): with then: # Divide by zero if ((scalar_inputs and tyinp2 in types.real_domain) or (not scalar_inputs and tyinp2.dtype in types.real_domain) or not numpy_support.int_divbyzero_returns_zero): # If y is float and is 0 also, return Nan; else # return Inf outltype = context.get_data_type(tyout.dtype) shouldretnan = cgutils.is_scalar_zero(builder, x) nan = Constant.real(outltype, float("nan")) inf = Constant.real(outltype, float("inf")) res = builder.select(shouldretnan, nan, inf) elif (scalar_inputs and tyout in types.signed_domain and not numpy_support.int_divbyzero_returns_zero): res = Constant.int(context.get_data_type(tyout), 0x1 << (y.type.width-1)) elif (not scalar_inputs and tyout.dtype in types.signed_domain and not numpy_support.int_divbyzero_returns_zero): res = Constant.int(context.get_data_type(tyout.dtype), 0x1 << (y.type.width-1)) else: res = Constant.null(context.get_data_type(tyout.dtype)) assert res.type == po.type.pointee, \ (str(res.type), str(po.type.pointee)) builder.store(res, po) with orelse: # Normal tempres = fnwork(builder, (x, y)) if scalar_inputs and tyinp1 in types.real_domain: res = context.cast(builder, tempres, tyinp1, tyout.dtype) elif (not scalar_inputs and tyinp1.dtype in types.real_domain): res = context.cast(builder, tempres, tyinp1.dtype, tyout.dtype) else: res = context.cast(builder, tempres, types.float64, tyout.dtype) assert res.type == po.type.pointee, \ (str(res.type), str(po.type.pointee)) builder.store(res, po) else: # Handle non-division operations if asfloat: if scalar_inputs: d_x = context.cast(builder, x, tyinp1, types.float64) d_y = context.cast(builder, y, tyinp2, types.float64) else: d_x = context.cast(builder, x, tyinp1.dtype, types.float64) d_y = context.cast(builder, y, tyinp2.dtype, types.float64) tempres = fnwork(builder, [d_x, d_y]) res = context.cast(builder, tempres, types.float64, tyout.dtype) elif scalar_inputs: if tyinp1 != tyout.dtype: tempres = fnwork(builder, [x, y]) res = context.cast(builder, tempres, tyinp1, tyout.dtype) else: res = fnwork(builder, (x, y)) elif tyinp1.dtype != tyout.dtype: tempres = fnwork(builder, [x, y]) res = context.cast(builder, tempres, tyinp1.dtype, tyout.dtype) else: res = fnwork(builder, (x, y)) assert res.type == po.type.pointee, (res.type, po.type.pointee) builder.store(res, po) return out
def lower_expr(self, expr): if expr.op == 'binop': return self.lower_binop(expr, inplace=False) elif expr.op == 'inplace_binop': return self.lower_binop(expr, inplace=True) elif expr.op == 'unary': value = self.loadvar(expr.value.name) if expr.fn == '-': res = self.pyapi.number_negative(value) elif expr.fn == '+': res = self.pyapi.number_positive(value) elif expr.fn == 'not': res = self.pyapi.object_not(value) self.check_int_status(res) longval = self.builder.zext(res, self.pyapi.long) res = self.pyapi.bool_from_long(longval) elif expr.fn == '~': res = self.pyapi.number_invert(value) else: raise NotImplementedError(expr) self.check_error(res) return res elif expr.op == 'call': argvals = [self.loadvar(a.name) for a in expr.args] fn = self.loadvar(expr.func.name) if not expr.kws: # No keyword ret = self.pyapi.call_function_objargs(fn, argvals) else: # Have Keywords keyvalues = [(k, self.loadvar(v.name)) for k, v in expr.kws] args = self.pyapi.tuple_pack(argvals) kws = self.pyapi.dict_pack(keyvalues) ret = self.pyapi.call(fn, args, kws) self.decref(kws) self.decref(args) self.check_error(ret) return ret elif expr.op == 'getattr': obj = self.loadvar(expr.value.name) res = self.pyapi.object_getattr(obj, self._freeze_string(expr.attr)) self.check_error(res) return res elif expr.op == 'build_tuple': items = [self.loadvar(it.name) for it in expr.items] res = self.pyapi.tuple_pack(items) self.check_error(res) return res elif expr.op == 'build_list': items = [self.loadvar(it.name) for it in expr.items] res = self.pyapi.list_pack(items) self.check_error(res) return res elif expr.op == 'build_map': res = self.pyapi.dict_new(expr.size) self.check_error(res) return res elif expr.op == 'build_set': items = [self.loadvar(it.name) for it in expr.items] res = self.pyapi.set_new() self.check_error(res) for it in items: ok = self.pyapi.set_add(res, it) self.check_int_status(ok) return res elif expr.op == 'getiter': obj = self.loadvar(expr.value.name) res = self.pyapi.object_getiter(obj) self.check_error(res) return res elif expr.op == 'iternext': iterobj = self.loadvar(expr.value.name) item = self.pyapi.iter_next(iterobj) is_valid = cgutils.is_not_null(self.builder, item) pair = self.pyapi.tuple_new(2) with cgutils.ifelse(self.builder, is_valid) as (then, otherwise): with then: self.pyapi.tuple_setitem(pair, 0, item) with otherwise: self.check_occurred() # Make the tuple valid by inserting None as dummy # iteration "result" (it will be ignored). self.pyapi.tuple_setitem(pair, 0, self.pyapi.make_none()) self.pyapi.tuple_setitem(pair, 1, self.pyapi.bool_from_bool(is_valid)) return pair elif expr.op == 'pair_first': pair = self.loadvar(expr.value.name) first = self.pyapi.tuple_getitem(pair, 0) self.incref(first) return first elif expr.op == 'pair_second': pair = self.loadvar(expr.value.name) second = self.pyapi.tuple_getitem(pair, 1) self.incref(second) return second elif expr.op == 'exhaust_iter': iterobj = self.loadvar(expr.value.name) tup = self.pyapi.sequence_tuple(iterobj) self.check_error(tup) # Check tuple size is as expected tup_size = self.pyapi.tuple_size(tup) expected_size = self.context.get_constant(types.intp, expr.count) has_wrong_size = self.builder.icmp(lc.ICMP_NE, tup_size, expected_size) with cgutils.if_unlikely(self.builder, has_wrong_size): excid = self.add_exception(ValueError) self.context.return_user_exc(self.builder, excid) return tup elif expr.op == 'getitem': value = self.loadvar(expr.value.name) index = self.loadvar(expr.index.name) res = self.pyapi.object_getitem(value, index) self.check_error(res) return res elif expr.op == 'static_getitem': value = self.loadvar(expr.value.name) index = self.context.get_constant(types.intp, expr.index) indexobj = self.pyapi.long_from_ssize_t(index) self.check_error(indexobj) res = self.pyapi.object_getitem(value, indexobj) self.decref(indexobj) self.check_error(res) return res elif expr.op == 'getslice': target = self.loadvar(expr.target.name) start = self.loadvar(expr.start.name) stop = self.loadvar(expr.stop.name) slicefn = self.get_builtin_obj("slice") sliceobj = self.pyapi.call_function_objargs(slicefn, (start, stop)) self.decref(slicefn) self.check_error(sliceobj) res = self.pyapi.object_getitem(target, sliceobj) self.check_error(res) return res elif expr.op == 'cast': val = self.loadvar(expr.value.name) self.incref(val) return val else: raise NotImplementedError(expr)
def to_native_value(self, obj, typ): if isinstance(typ, types.Object) or typ == types.pyobject: return obj elif typ == types.boolean: istrue = self.object_istrue(obj) zero = Constant.null(istrue.type) return self.builder.icmp(lc.ICMP_NE, istrue, zero) elif typ in types.unsigned_domain: longobj = self.number_long(obj) ullval = self.long_as_ulonglong(longobj) self.decref(longobj) return self.builder.trunc(ullval, self.context.get_argument_type(typ)) elif typ in types.signed_domain: longobj = self.number_long(obj) llval = self.long_as_longlong(longobj) self.decref(longobj) return self.builder.trunc(llval, self.context.get_argument_type(typ)) elif typ == types.float32: fobj = self.number_float(obj) fval = self.float_as_double(fobj) self.decref(fobj) return self.builder.fptrunc(fval, self.context.get_argument_type(typ)) elif typ == types.float64: fobj = self.number_float(obj) fval = self.float_as_double(fobj) self.decref(fobj) return fval elif typ in (types.complex128, types.complex64): cplxcls = self.context.make_complex(types.complex128) cplx = cplxcls(self.context, self.builder) pcplx = cplx._getpointer() ok = self.complex_adaptor(obj, pcplx) failed = cgutils.is_false(self.builder, ok) with cgutils.if_unlikely(self.builder, failed): self.builder.ret(self.get_null_object()) if typ == types.complex64: c64cls = self.context.make_complex(typ) c64 = c64cls(self.context, self.builder) freal = self.context.cast(self.builder, cplx.real, types.float64, types.float32) fimag = self.context.cast(self.builder, cplx.imag, types.float64, types.float32) c64.real = freal c64.imag = fimag return c64._getvalue() else: return cplx._getvalue() elif isinstance(typ, types.NPDatetime): val = self.extract_np_datetime(obj) return val elif isinstance(typ, types.NPTimedelta): val = self.extract_np_timedelta(obj) return val elif isinstance(typ, types.Array): return self.to_native_array(typ, obj) elif isinstance(typ, types.Optional): isnone = self.builder.icmp(lc.ICMP_EQ, obj, self.borrow_none()) with cgutils.ifelse(self.builder, isnone) as (then, orelse): with then: noneval = self.context.make_optional_none(self.builder, typ.type) ret = cgutils.alloca_once(self.builder, noneval.type) self.builder.store(noneval, ret) with orelse: val = self.to_native_value(obj, typ.type) just = self.context.make_optional_value(self.builder, typ.type, val) self.builder.store(just, ret) return ret raise NotImplementedError(typ)