def get_constant_struct(self, builder, ty, val): assert self.is_struct_type(ty) if ty in types.complex_domain: if ty == types.complex64: innertype = types.float32 elif ty == types.complex128: innertype = types.float64 else: raise Exception("unreachable") real = self.get_constant(innertype, val.real) imag = self.get_constant(innertype, val.imag) const = Constant.struct([real, imag]) return const elif isinstance(ty, types.Tuple): consts = [self.get_constant_generic(builder, ty.types[i], v) for i, v in enumerate(val)] return Constant.struct(consts) elif isinstance(ty, types.Record): consts = [self.get_constant(types.int8, b) for b in bytearray(val.tostring())] return Constant.array(consts[0].type, consts) else: raise NotImplementedError("%s as constant unsupported" % ty)
def make_exception_switch(self, api, builder, code): """Handle user defined exceptions. Build a switch to check which exception class was raised. """ nexc = len(self.exceptions) elseblk = cgutils.append_basic_block(builder, ".invalid.user.exception") swt = builder.switch(code, elseblk, n=nexc) for num, exc in self.exceptions.items(): bb = cgutils.append_basic_block(builder, ".user.exception.%d" % num) swt.add_case(Constant.int(code.type, num), bb) builder.position_at_end(bb) api.raise_exception(exc, exc) builder.ret(api.get_null_object()) builder.position_at_end(elseblk) # Handle native error elseblk = cgutils.append_basic_block(builder, ".invalid.native.error") swt = builder.switch(code, elseblk, n=len(errcode.error_names)) msgfmt = "{error} in native function: {fname}" for errnum, errname in errcode.error_names.items(): bb = cgutils.append_basic_block(builder, ".native.error.%d" % errnum) swt.add_case(Constant.int(code.type, errnum), bb) builder.position_at_end(bb) api.raise_native_error(msgfmt.format(error=errname, fname=self.fndesc.mangled_name)) builder.ret(api.get_null_object()) builder.position_at_end(elseblk) msg = "unknown error in native function: %s" % self.fndesc.mangled_name api.raise_native_error(msg)
def get_constant(self, ty, val): assert not self.is_struct_type(ty) lty = self.get_value_type(ty) if ty == types.none: assert val is None return self.get_dummy_value() elif ty == types.boolean: return Constant.int(Type.int(1), int(val)) elif ty in types.signed_domain: return Constant.int_signextend(lty, val) elif ty in types.unsigned_domain: return Constant.int(lty, val) elif ty in types.real_domain: return Constant.real(lty, val) elif isinstance(ty, types.UniTuple): consts = [self.get_constant(ty.dtype, v) for v in val] return Constant.array(consts[0].type, consts) raise NotImplementedError("cannot lower constant of type '%s'" % (ty,))
def for_range(builder, count, intp): start = Constant.int(intp, 0) stop = count bbcond = append_basic_block(builder, "for.cond") bbbody = append_basic_block(builder, "for.body") bbend = append_basic_block(builder, "for.end") bbstart = builder.basic_block builder.branch(bbcond) ONE = Constant.int(intp, 1) with goto_block(builder, bbcond): index = builder.phi(intp, name="loop.index") pred = builder.icmp(lc.ICMP_SLT, index, stop) builder.cbranch(pred, bbbody, bbend) with goto_block(builder, bbbody): yield index bbbody = builder.basic_block incr = builder.add(index, ONE) terminate(builder, bbcond) index.add_incoming(start, bbstart) index.add_incoming(incr, bbbody) builder.position_at_end(bbend)
def __init__(self, context, builder, value=None, ref=None, cast_ref=False): self._type = context.get_struct_type(self) self._context = context self._builder = builder if ref is None: self._value = alloca_once(builder, self._type) if value is not None: assert not is_pointer(value.type) assert value.type == self._type, (value.type, self._type) builder.store(value, self._value) else: assert value is None assert is_pointer(ref.type) if self._type != ref.type.pointee: if cast_ref: ref = builder.bitcast(ref, Type.pointer(self._type)) else: raise TypeError( "mismatching pointer type: got %s, expected %s" % (ref.type.pointee, self._type)) self._value = ref self._namemap = {} self._fdmap = [] self._typemap = [] base = Constant.int(Type.int(), 0) for i, (k, tp) in enumerate(self._fields): self._namemap[k] = i self._fdmap.append((base, Constant.int(Type.int(), i))) self._typemap.append(tp)
def divmod_by_constant(builder, val, divisor): """ Compute the (quotient, remainder) of *val* divided by the constant positive *divisor*. The semantics reflects those of Python integer floor division, rather than C's / LLVM's signed division and modulo. The difference lies with a negative *val*. """ assert divisor > 0 divisor = Constant.int(val.type, divisor) one = Constant.int(val.type, 1) quot = alloca_once(builder, val.type) with ifelse(builder, is_neg_int(builder, val)) as (if_neg, if_pos): with if_pos: # quot = val / divisor quot_val = builder.sdiv(val, divisor) builder.store(quot_val, quot) with if_neg: # quot = -1 + (val + 1) / divisor val_plus_one = builder.add(val, one) quot_val = builder.sdiv(val_plus_one, divisor) builder.store(builder.sub(quot_val, one), quot) # rem = val - quot * divisor # (should be slightly faster than a separate modulo operation) quot_val = builder.load(quot) rem_val = builder.sub(val, builder.mul(quot_val, divisor)) return quot_val, rem_val
def int_power_func_body(context, builder, x, y): pcounter = cgutils.alloca_once(builder, y.type) presult = cgutils.alloca_once(builder, x.type) result = Constant.int(x.type, 1) counter = y builder.store(counter, pcounter) builder.store(result, presult) bbcond = cgutils.append_basic_block(builder, ".cond") bbbody = cgutils.append_basic_block(builder, ".body") bbexit = cgutils.append_basic_block(builder, ".exit") del counter del result builder.branch(bbcond) with cgutils.goto_block(builder, bbcond): counter = builder.load(pcounter) ONE = Constant.int(counter.type, 1) ZERO = Constant.null(counter.type) builder.store(builder.sub(counter, ONE), pcounter) pred = builder.icmp(lc.ICMP_SGT, counter, ZERO) builder.cbranch(pred, bbbody, bbexit) with cgutils.goto_block(builder, bbbody): result = builder.load(presult) builder.store(builder.mul(result, x), presult) builder.branch(bbcond) builder.position_at_end(bbexit) return builder.load(presult)
def _long_from_native_int(self, ival, func_name, native_int_type, signed): fnty = Type.function(self.pyobj, [native_int_type]) fn = self._get_function(fnty, name=func_name) resptr = cgutils.alloca_once(self.builder, self.pyobj) if PYVERSION < (3, 0): # Under Python 2, we try to return a PyInt object whenever # the given number fits in a C long. pyint_fnty = Type.function(self.pyobj, [self.long]) pyint_fn = self._get_function(pyint_fnty, name="PyInt_FromLong") long_max = Constant.int(native_int_type, _helperlib.long_max) if signed: long_min = Constant.int(native_int_type, _helperlib.long_min) use_pyint = self.builder.and_( self.builder.icmp(lc.ICMP_SGE, ival, long_min), self.builder.icmp(lc.ICMP_SLE, ival, long_max), ) else: use_pyint = self.builder.icmp(lc.ICMP_ULE, ival, long_max) with self.builder.if_else(use_pyint) as (then, otherwise): with then: downcast_ival = self.builder.trunc(ival, self.long) res = self.builder.call(pyint_fn, [downcast_ival]) self.builder.store(res, resptr) with otherwise: res = self.builder.call(fn, [ival]) self.builder.store(res, resptr) else: fn = self._get_function(fnty, name=func_name) self.builder.store(self.builder.call(fn, [ival]), resptr) return self.builder.load(resptr)
def timedelta_floor_div_timedelta(context, builder, sig, args): [va, vb] = args [ta, tb] = sig.args ll_ret_type = context.get_value_type(sig.return_type) not_nan = are_not_nat(builder, [va, vb]) ret = cgutils.alloca_once(builder, ll_ret_type, name='ret') zero = Constant.int(ll_ret_type, 0) one = Constant.int(ll_ret_type, 1) builder.store(zero, ret) with cgutils.if_likely(builder, not_nan): va, vb = normalize_timedeltas(context, builder, va, vb, ta, tb) # is the denominator zero or NaT? denom_ok = builder.not_(builder.icmp_signed('==', vb, zero)) with cgutils.if_likely(builder, denom_ok): # is either arg negative? vaneg = builder.icmp_signed('<', va, zero) neg = builder.or_(vaneg, builder.icmp_signed('<', vb, zero)) with builder.if_else(neg) as (then, otherwise): with then: # one or more value negative with builder.if_else(vaneg) as (negthen, negotherwise): with negthen: top = builder.sub(va, one) div = builder.sdiv(top, vb) builder.store(div, ret) with negotherwise: top = builder.add(va, one) div = builder.sdiv(top, vb) builder.store(div, ret) with otherwise: div = builder.sdiv(va, vb) builder.store(div, ret) res = builder.load(ret) return impl_ret_untracked(context, builder, sig.return_type, res)
def real_sign_impl(context, builder, sig, args): [x] = args POS = Constant.real(x.type, 1) NEG = Constant.real(x.type, -1) ZERO = Constant.real(x.type, 0) presult = cgutils.alloca_once(builder, x.type) is_pos = builder.fcmp(lc.FCMP_OGT, x, ZERO) is_neg = builder.fcmp(lc.FCMP_OLT, x, ZERO) with builder.if_else(is_pos) as (gt_zero, not_gt_zero): with gt_zero: builder.store(POS, presult) with not_gt_zero: with builder.if_else(is_neg) as (lt_zero, not_lt_zero): with lt_zero: builder.store(NEG, presult) with not_lt_zero: # For both NaN and 0, the result of sign() is simply # the input value. builder.store(x, presult) res = builder.load(presult) return impl_ret_untracked(context, builder, sig.return_type, res)
def make_constant_array(self, builder, typ, ary): assert typ.layout == "C" # assumed in typeinfer.py ary = numpy.ascontiguousarray(ary) flat = ary.flatten() # Handle data if self.is_struct_type(typ.dtype): values = [self.get_constant_struct(builder, typ.dtype, flat[i]) for i in range(flat.size)] else: values = [self.get_constant(typ.dtype, flat[i]) for i in range(flat.size)] lldtype = values[0].type consts = Constant.array(lldtype, values) data = cgutils.global_constant(builder, ".const.array.data", consts) # Handle shape llintp = self.get_value_type(types.intp) shapevals = [self.get_constant(types.intp, s) for s in ary.shape] cshape = Constant.array(llintp, shapevals) # Handle strides stridevals = [self.get_constant(types.intp, s) for s in ary.strides] cstrides = Constant.array(llintp, stridevals) # Create array structure cary = self.make_array(typ)(self, builder) cary.data = builder.bitcast(data, cary.data.type) cary.shape = cshape cary.strides = cstrides return cary._getvalue()
def unpack_tuple(self, args, name, n_min, n_max, *objs): charptr = Type.pointer(Type.int(8)) argtypes = [self.pyobj, charptr, self.py_ssize_t, self.py_ssize_t] fnty = Type.function(Type.int(), argtypes, var_arg=True) fn = self._get_function(fnty, name="PyArg_UnpackTuple") n_min = Constant.int(self.py_ssize_t, n_min) n_max = Constant.int(self.py_ssize_t, n_max) if isinstance(name, str): name = self.context.insert_const_string(self.builder.module, name) return self.builder.call(fn, [args, name, n_min, n_max] + list(objs))
def make_keywords(self, kws): strings = [] stringtype = Type.pointer(Type.int(8)) for k in kws: strings.append(self.make_const_string(k)) strings.append(Constant.null(stringtype)) kwlist = Constant.array(stringtype, strings) kwlist = cgutils.global_constant(self.module, ".kwlist", kwlist) return Constant.bitcast(kwlist, Type.pointer(stringtype))
def is_true(self, builder, typ, val): if typ in types.integer_domain: return builder.icmp(lc.ICMP_NE, val, Constant.null(val.type)) elif typ in types.real_domain: return builder.fcmp(lc.FCMP_UNE, val, Constant.real(val.type, 0)) elif typ in types.complex_domain: cmplx = self.make_complex(typ)(self, builder, val) real_istrue = self.is_true(builder, typ.underlying_float, cmplx.real) imag_istrue = self.is_true(builder, typ.underlying_float, cmplx.imag) return builder.or_(real_istrue, imag_istrue) raise NotImplementedError("is_true", val, typ)
def insert_const_string(self, mod, string): stringtype = GENERIC_POINTER text = Constant.stringz(string) name = ".const.%s" % string for gv in mod.global_variables: if gv.name == name and gv.type.pointee == text.type: break else: gv = cgutils.global_constant(mod, name, text) gv.linkage = lc.LINKAGE_INTERNAL return Constant.bitcast(gv, stringtype)
def make_constant_array(self, builder, typ, ary): """ Create an array structure reifying the given constant array. A low-level contiguous array constant is created in the LLVM IR. """ datatype = self.get_data_type(typ.dtype) # don't freeze ary of non-contig or bigger than 1MB size_limit = 10**6 if (self.allow_dynamic_globals and (typ.layout not in 'FC' or ary.nbytes > size_limit)): # get pointer from the ary dataptr = ary.ctypes.data data = self.add_dynamic_addr(builder, dataptr, info=str(type(dataptr))) rt_addr = self.add_dynamic_addr(builder, id(ary), info=str(type(ary))) else: # Handle data: reify the flattened array in "C" or "F" order as a # global array of bytes. flat = ary.flatten(order=typ.layout) # Note: we use `bytearray(flat.data)` instead of `bytearray(flat)` to # workaround issue #1850 which is due to numpy issue #3147 consts = Constant.array(Type.int(8), bytearray(flat.data)) data = cgutils.global_constant(builder, ".const.array.data", consts) # Ensure correct data alignment (issue #1933) data.align = self.get_abi_alignment(datatype) # No reference to parent ndarray rt_addr = None # Handle shape llintp = self.get_value_type(types.intp) shapevals = [self.get_constant(types.intp, s) for s in ary.shape] cshape = Constant.array(llintp, shapevals) # Handle strides stridevals = [self.get_constant(types.intp, s) for s in ary.strides] cstrides = Constant.array(llintp, stridevals) # Create array structure cary = self.make_array(typ)(self, builder) intp_itemsize = self.get_constant(types.intp, ary.dtype.itemsize) self.populate_array(cary, data=builder.bitcast(data, cary.data.type), shape=cshape, strides=cstrides, itemsize=intp_itemsize, parent=rt_addr, meminfo=None) return cary._getvalue()
def get_item_pointer2(builder, data, shape, strides, layout, inds, wraparound=False): if wraparound: # Wraparound indices = [] for ind, dimlen in zip(inds, shape): ZERO = Constant.null(ind.type) negative = builder.icmp(lc.ICMP_SLT, ind, ZERO) wrapped = builder.add(dimlen, ind) selected = builder.select(negative, wrapped, ind) indices.append(selected) else: indices = inds if not indices: # Indexing with empty tuple return builder.gep(data, [get_null_value(Type.int(32))]) intp = indices[0].type # Indexing code if layout in 'CF': steps = [] # Compute steps for each dimension if layout == 'C': # C contiguous for i in range(len(shape)): last = Constant.int(intp, 1) for j in shape[i + 1:]: last = builder.mul(last, j) steps.append(last) elif layout == 'F': # F contiguous for i in range(len(shape)): last = Constant.int(intp, 1) for j in shape[:i]: last = builder.mul(last, j) steps.append(last) else: raise Exception("unreachable") # Compute index loc = Constant.int(intp, 0) for i, s in zip(indices, steps): tmp = builder.mul(i, s) loc = builder.add(loc, tmp) ptr = builder.gep(data, [loc]) return ptr else: # Any layout dimoffs = [builder.mul(s, i) for s, i in zip(strides, indices)] offset = functools.reduce(builder.add, dimoffs) return pointer_add(builder, data, offset)
def is_leap_year(builder, year_val): """ Return a predicate indicating whether *year_val* (offset by 1970) is a leap year. """ actual_year = builder.add(year_val, Constant.int(DATETIME64, 1970)) multiple_of_4 = cgutils.is_null( builder, builder.and_(actual_year, Constant.int(DATETIME64, 3))) not_multiple_of_100 = cgutils.is_not_null( builder, builder.srem(actual_year, Constant.int(DATETIME64, 100))) multiple_of_400 = cgutils.is_null( builder, builder.srem(actual_year, Constant.int(DATETIME64, 400))) return builder.and_(multiple_of_4, builder.or_(not_multiple_of_100, multiple_of_400))
def populate_array(array, data, shape, strides, itemsize, meminfo, parent=None): """ Helper function for populating array structures. This avoids forgetting to set fields. """ context = array._context builder = array._builder datamodel = array._datamodel required_fields = set(datamodel._fields) if meminfo is None: meminfo = Constant.null(context.get_value_type( datamodel.get_type('meminfo'))) attrs = dict(shape=shape, strides=strides, data=data, itemsize=itemsize, meminfo=meminfo,) # Set `parent` attribute if parent is None: attrs['parent'] = Constant.null(context.get_value_type( datamodel.get_type('parent'))) else: attrs['parent'] = parent # Calc num of items from shape nitems = context.get_constant(types.intp, 1) unpacked_shape = cgutils.unpack_tuple(builder, shape, shape.type.count) if unpacked_shape: # Shape is not empty for axlen in unpacked_shape: nitems = builder.mul(nitems, axlen) else: # Shape is empty nitems = context.get_constant(types.intp, 0) attrs['nitems'] = nitems # Make sure that we have all the fields got_fields = set(attrs.keys()) if got_fields != required_fields: raise ValueError("missing {0}".format(required_fields - got_fields)) # Set field value for k, v in attrs.items(): setattr(array, k, v) return array
def make_constant_array(self, builder, typ, ary): """ Create an array structure reifying the given constant array. A low-level contiguous array constant is created in the LLVM IR. """ assert typ.layout == 'C' # assumed in typeinfer.py datatype = self.get_data_type(typ.dtype) # Handle data: reify the flattened array in "C" order as a # global array of bytes. flat = ary.flatten() # Note: we use `bytearray(flat.data)` instead of `bytearray(flat)` to # workaround issue #1850 which is due to numpy issue #3147 consts = Constant.array(Type.int(8), bytearray(flat.data)) data = cgutils.global_constant(builder, ".const.array.data", consts) # Ensure correct data alignment (issue #1933) data.align = self.get_abi_alignment(datatype) # Handle shape llintp = self.get_value_type(types.intp) shapevals = [self.get_constant(types.intp, s) for s in ary.shape] cshape = Constant.array(llintp, shapevals) # Handle strides if ary.ndim > 0: # Use strides of the equivalent C-contiguous array. contig = np.ascontiguousarray(ary) stridevals = [self.get_constant(types.intp, s) for s in contig.strides] else: stridevals = [] cstrides = Constant.array(llintp, stridevals) # Create array structure cary = self.make_array(typ)(self, builder) rt_addr = self.get_constant(types.uintp, id(ary)).inttoptr( self.get_value_type(types.pyobject)) intp_itemsize = self.get_constant(types.intp, ary.dtype.itemsize) self.populate_array(cary, data=builder.bitcast(data, cary.data.type), shape=cshape, strides=cstrides, itemsize=intp_itemsize, parent=rt_addr, meminfo=None) return cary._getvalue()
def timedelta_mod_timedelta(context, builder, sig, args): # inspired by https://github.com/numpy/numpy/blob/fe8072a12d65e43bd2e0b0f9ad67ab0108cc54b3/numpy/core/src/umath/loops.c.src#L1424 # alg is basically as `a % b`: # if a or b is NaT return NaT # elseif b is 0 return NaT # else pretend a and b are int and do pythonic int modulus [va, vb] = args [ta, tb] = sig.args not_nan = are_not_nat(builder, [va, vb]) ll_ret_type = context.get_value_type(sig.return_type) ret = alloc_timedelta_result(builder) builder.store(NAT, ret) zero = Constant.int(ll_ret_type, 0) with cgutils.if_likely(builder, not_nan): va, vb = normalize_timedeltas(context, builder, va, vb, ta, tb) # is the denominator zero or NaT? denom_ok = builder.not_(builder.icmp_signed('==', vb, zero)) with cgutils.if_likely(builder, denom_ok): # is either arg negative? vapos = builder.icmp_signed('>', va, zero) vbpos = builder.icmp_signed('>', vb, zero) rem = builder.srem(va, vb) cond = builder.or_(builder.and_(vapos, vbpos), builder.icmp_signed('==', rem, zero)) with builder.if_else(cond) as (then, otherwise): with then: builder.store(rem, ret) with otherwise: builder.store(builder.add(rem, vb), ret) res = builder.load(ret) return impl_ret_untracked(context, builder, sig.return_type, res)
def int_invert_impl(context, builder, sig, args): [typ] = sig.args [val] = args # Invert before upcasting, for unsigned numbers res = builder.xor(val, Constant.all_ones(val.type)) res = context.cast(builder, res, typ, sig.return_type) return impl_ret_untracked(context, builder, sig.return_type, res)
def init_generator_state(self, lower): """ NULL-initialize all generator state variables, to avoid spurious decref's on cleanup. """ lower.builder.store(Constant.null(self.gen_state_ptr.type.pointee), self.gen_state_ptr)
def int_abs_impl(context, builder, sig, args): [x] = args ZERO = Constant.null(x.type) ltz = builder.icmp(lc.ICMP_SLT, x, ZERO) negated = builder.neg(x) res = builder.select(ltz, negated, x) return impl_ret_untracked(context, builder, sig.return_type, res)
def memset(builder, ptr, size, value): """ Fill *size* bytes starting from *ptr* with *value*. """ sizety = size.type memset = "llvm.memset.p0i8.i%d" % (sizety.width) i32 = lc.Type.int(32) i8 = lc.Type.int(8) i8_star = i8.as_pointer() i1 = lc.Type.int(1) fn = builder.module.declare_intrinsic('llvm.memset', (i8_star, size.type)) ptr = builder.bitcast(ptr, i8_star) if isinstance(value, int): value = Constant.int(i8, value) builder.call(fn, [ptr, value, size, Constant.int(i32, 0), Constant.int(i1, 0)])
def timedelta_sign_impl(context, builder, sig, args): val, = args ret = alloc_timedelta_result(builder) zero = Constant.int(TIMEDELTA64, 0) with cgutils.ifelse(builder, builder.icmp(lc.ICMP_SGT, val, zero) ) as (gt_zero, le_zero): with gt_zero: builder.store(Constant.int(TIMEDELTA64, 1), ret) with le_zero: with cgutils.ifelse(builder, builder.icmp(lc.ICMP_EQ, val, zero) ) as (eq_zero, lt_zero): with eq_zero: builder.store(Constant.int(TIMEDELTA64, 0), ret) with lt_zero: builder.store(Constant.int(TIMEDELTA64, -1), ret) return builder.load(ret)
def pack_array(builder, values): n = len(values) ty = values[0].type ary = Constant.undef(Type.array(ty, n)) for i, v in enumerate(values): ary = builder.insert_value(ary, v, i) return ary
def insert_string_const_addrspace(self, builder, string): """ Insert a constant string in the constant addresspace and return a generic i8 pointer to the data. This function attempts to deduplicate. """ lmod = builder.basic_block.function.module text = Constant.stringz(string) name = "__conststring__.%s" % string charty = Type.int(8) for gv in lmod.global_variables: if gv.name == name and gv.type.pointee == text.type: break else: gv = lmod.add_global_variable(text.type, name=name, addrspace=nvvm.ADDRSPACE_CONSTANT) gv.linkage = LINKAGE_INTERNAL gv.global_constant = True gv.initializer = text constcharptrty = Type.pointer(charty, nvvm.ADDRSPACE_CONSTANT) charptr = builder.bitcast(gv, constcharptrty) conv = nvvmutils.insert_addrspace_conv(lmod, charty, nvvm.ADDRSPACE_CONSTANT) return builder.call(conv, [charptr])
def lower_finalize_func_body(self, builder, genptr): """ Lower the body of the generator's finalizer: decref all live state variables. """ pyapi = self.context.get_python_api(builder) resume_index_ptr = self.get_resume_index_ptr(builder, genptr) resume_index = builder.load(resume_index_ptr) # If resume_index is 0, next() was never called # If resume_index is -1, generator terminated cleanly # (note function arguments are saved in state variables, # so they don't need a separate cleanup step) need_cleanup = builder.icmp_signed( '>', resume_index, Constant.int(resume_index.type, 0)) with cgutils.if_unlikely(builder, need_cleanup): # Decref all live vars (some may be NULL) gen_state_ptr = self.get_state_ptr(builder, genptr) for state_index in range(len(self.gentype.state_types)): state_slot = cgutils.gep_inbounds(builder, gen_state_ptr, 0, state_index) ty = self.gentype.state_types[state_index] val = self.context.unpack_value(builder, ty, state_slot) pyapi.decref(val) builder.ret_void()
def return_from_generator(self, lower): """ Emit a StopIteration at generator end and mark the generator exhausted. """ indexval = Constant.int(self.resume_index_ptr.type.pointee, -1) lower.builder.store(indexval, self.resume_index_ptr) self.call_conv.return_stop_iteration(lower.builder)
def get_constant_null(self, ty): lty = self.get_value_type(ty) return Constant.null(lty)
def get_constant_undef(self, ty): lty = self.get_value_type(ty) return Constant.undef(lty)
def int_invert_impl(context, builder, sig, args): [typ] = sig.args [val] = args val = context.cast(builder, val, typ, sig.return_type) return builder.xor(val, Constant.all_ones(val.type))
def real_divmod_func_body(context, builder, vx, wx): # Reference Objects/floatobject.c # # float_divmod(PyObject *v, PyObject *w) # { # double vx, wx; # double div, mod, floordiv; # CONVERT_TO_DOUBLE(v, vx); # CONVERT_TO_DOUBLE(w, wx); # mod = fmod(vx, wx); # /* fmod is typically exact, so vx-mod is *mathematically* an # exact multiple of wx. But this is fp arithmetic, and fp # vx - mod is an approximation; the result is that div may # not be an exact integral value after the division, although # it will always be very close to one. # */ # div = (vx - mod) / wx; # if (mod) { # /* ensure the remainder has the same sign as the denominator */ # if ((wx < 0) != (mod < 0)) { # mod += wx; # div -= 1.0; # } # } # else { # /* the remainder is zero, and in the presence of signed zeroes # fmod returns different results across platforms; ensure # it has the same sign as the denominator; we'd like to do # "mod = wx * 0.0", but that may get optimized away */ # mod *= mod; /* hide "mod = +0" from optimizer */ # if (wx < 0.0) # mod = -mod; # } # /* snap quotient to nearest integral value */ # if (div) { # floordiv = floor(div); # if (div - floordiv > 0.5) # floordiv += 1.0; # } # else { # /* div is zero - get the same sign as the true quotient */ # div *= div; /* hide "div = +0" from optimizers */ # floordiv = div * vx / wx; /* zero w/ sign of vx/wx */ # } # return Py_BuildValue("(dd)", floordiv, mod); # } pmod = cgutils.alloca_once(builder, vx.type) pdiv = cgutils.alloca_once(builder, vx.type) pfloordiv = cgutils.alloca_once(builder, vx.type) mod = builder.frem(vx, wx) div = builder.fdiv(builder.fsub(vx, mod), wx) builder.store(mod, pmod) builder.store(div, pdiv) ZERO = Constant.real(vx.type, 0) ONE = Constant.real(vx.type, 1) mod_istrue = builder.fcmp(lc.FCMP_ONE, mod, ZERO) wx_ltz = builder.fcmp(lc.FCMP_OLT, wx, ZERO) mod_ltz = builder.fcmp(lc.FCMP_OLT, mod, ZERO) with cgutils.ifthen(builder, mod_istrue): wx_ltz_ne_mod_ltz = builder.icmp(lc.ICMP_NE, wx_ltz, mod_ltz) with cgutils.ifthen(builder, wx_ltz_ne_mod_ltz): mod = builder.fadd(mod, wx) div = builder.fsub(div, ONE) builder.store(mod, pmod) builder.store(div, pdiv) del mod del div with cgutils.ifnot(builder, mod_istrue): mod = builder.load(pmod) mod = builder.fmul(mod, mod) builder.store(mod, pmod) del mod with cgutils.ifthen(builder, wx_ltz): mod = builder.load(pmod) mod = builder.fsub(ZERO, mod) builder.store(mod, pmod) del mod div = builder.load(pdiv) div_istrue = builder.fcmp(lc.FCMP_ONE, div, ZERO) with cgutils.ifthen(builder, div_istrue): module = cgutils.get_module(builder) floorfn = lc.Function.intrinsic(module, lc.INTR_FLOOR, [wx.type]) floordiv = builder.call(floorfn, [div]) floordivdiff = builder.fsub(div, floordiv) floordivincr = builder.fadd(floordiv, ONE) HALF = Constant.real(wx.type, 0.5) pred = builder.fcmp(lc.FCMP_OGT, floordivdiff, HALF) floordiv = builder.select(pred, floordivincr, floordiv) builder.store(floordiv, pfloordiv) with cgutils.ifnot(builder, div_istrue): div = builder.fmul(div, div) builder.store(div, pdiv) floordiv = builder.fdiv(builder.fmul(div, vx), wx) builder.store(floordiv, pfloordiv) return builder.load(pfloordiv), builder.load(pmod)
def real_divmod_func_body(context, builder, vx, wx): # Reference Objects/floatobject.c # # float_divmod(PyObject *v, PyObject *w) # { # double vx, wx; # double div, mod, floordiv; # CONVERT_TO_DOUBLE(v, vx); # CONVERT_TO_DOUBLE(w, wx); # mod = fmod(vx, wx); # /* fmod is typically exact, so vx-mod is *mathematically* an # exact multiple of wx. But this is fp arithmetic, and fp # vx - mod is an approximation; the result is that div may # not be an exact integral value after the division, although # it will always be very close to one. # */ # div = (vx - mod) / wx; # if (mod) { # /* ensure the remainder has the same sign as the denominator */ # if ((wx < 0) != (mod < 0)) { # mod += wx; # div -= 1.0; # } # } # else { # /* the remainder is zero, and in the presence of signed zeroes # fmod returns different results across platforms; ensure # it has the same sign as the denominator; we'd like to do # "mod = wx * 0.0", but that may get optimized away */ # mod *= mod; /* hide "mod = +0" from optimizer */ # if (wx < 0.0) # mod = -mod; # } # /* snap quotient to nearest integral value */ # if (div) { # floordiv = floor(div); # if (div - floordiv > 0.5) # floordiv += 1.0; # } # else { # /* div is zero - get the same sign as the true quotient */ # div *= div; /* hide "div = +0" from optimizers */ # floordiv = div * vx / wx; /* zero w/ sign of vx/wx */ # } # return Py_BuildValue("(dd)", floordiv, mod); # } pmod = cgutils.alloca_once(builder, vx.type) pdiv = cgutils.alloca_once(builder, vx.type) pfloordiv = cgutils.alloca_once(builder, vx.type) mod = builder.frem(vx, wx) div = builder.fdiv(builder.fsub(vx, mod), wx) builder.store(mod, pmod) builder.store(div, pdiv) # Note the use of negative zero for proper negating with `ZERO - x` ZERO = vx.type(0.0) NZERO = vx.type(-0.0) ONE = vx.type(1.0) mod_istrue = builder.fcmp_unordered('!=', mod, ZERO) wx_ltz = builder.fcmp_ordered('<', wx, ZERO) mod_ltz = builder.fcmp_ordered('<', mod, ZERO) with builder.if_else(mod_istrue, likely=True) as (if_nonzero_mod, if_zero_mod): with if_nonzero_mod: # `mod` is non-zero or NaN # Ensure the remainder has the same sign as the denominator wx_ltz_ne_mod_ltz = builder.icmp(lc.ICMP_NE, wx_ltz, mod_ltz) with builder.if_then(wx_ltz_ne_mod_ltz): builder.store(builder.fsub(div, ONE), pdiv) builder.store(builder.fadd(mod, wx), pmod) with if_zero_mod: # `mod` is zero, select the proper sign depending on # the denominator's sign mod = builder.select(wx_ltz, NZERO, ZERO) builder.store(mod, pmod) del mod, div div = builder.load(pdiv) div_istrue = builder.fcmp(lc.FCMP_ONE, div, ZERO) with builder.if_then(div_istrue): realtypemap = {'float': types.float32, 'double': types.float64} realtype = realtypemap[str(wx.type)] floorfn = context.get_function(math.floor, typing.signature(realtype, realtype)) floordiv = floorfn(builder, [div]) floordivdiff = builder.fsub(div, floordiv) floordivincr = builder.fadd(floordiv, ONE) HALF = Constant.real(wx.type, 0.5) pred = builder.fcmp(lc.FCMP_OGT, floordivdiff, HALF) floordiv = builder.select(pred, floordivincr, floordiv) builder.store(floordiv, pfloordiv) with cgutils.ifnot(builder, div_istrue): div = builder.fmul(div, div) builder.store(div, pdiv) floordiv = builder.fdiv(builder.fmul(div, vx), wx) builder.store(floordiv, pfloordiv) return builder.load(pfloordiv), builder.load(pmod)
def generate_kernel_wrapper(self, library, fname, argtypes, debug): """ Generate the kernel wrapper in the given ``library``. The function being wrapped have the name ``fname`` and argument types ``argtypes``. The wrapper function is returned. """ arginfo = self.get_arg_packer(argtypes) argtys = list(arginfo.argument_types) wrapfnty = Type.function(Type.void(), argtys) wrapper_module = self.create_module("cuda.kernel.wrapper") fnty = Type.function(Type.int(), [self.call_conv.get_return_type(types.pyobject)] + argtys) func = wrapper_module.add_function(fnty, name=fname) prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy') wrapfn = wrapper_module.add_function(wrapfnty, name=prefixed) builder = Builder(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = arginfo.from_arguments(builder, wrapfn.args) status, _ = self.call_conv.call_function(builder, func, types.void, argtypes, callargs) if debug: # Check error status with cgutils.if_likely(builder, status.is_ok): builder.ret_void() with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded casfnty = lc.Type.function(old.type, [gv_exc.type, old.type, old.type]) casfn = wrapper_module.add_function(casfnty, name="___numba_cas_hack") xchg = builder.call(casfn, [gv_exc, old, status.code]) changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with builder.if_then(changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() nvvm.set_cuda_kernel(wrapfn) library.add_ir_module(wrapper_module) library.finalize() wrapfn = library.get_function(wrapfn.name) return wrapfn
def _delete_variable(self, varname): """ Zero-fill variable to avoid crashing due to extra ir.Del """ storage = self.getvar(varname) self.builder.store(Constant.null(storage.type.pointee), storage)
def _get_ptr_by_index(self, index): geped = self._builder.gep( self._value, [Constant.int(Type.int(), 0), Constant.int(Type.int(), index)]) return geped
def get_null_value(ltype): return Constant.null(ltype)
""" Generic helpers for LLVM code generation. """ from __future__ import print_function, division, absolute_import from contextlib import contextmanager import functools import re from llvmlite import ir from llvmlite.llvmpy.core import Constant, Type import llvmlite.llvmpy.core as lc from . import utils true_bit = Constant.int(Type.int(1), 1) false_bit = Constant.int(Type.int(1), 0) true_byte = Constant.int(Type.int(8), 1) false_byte = Constant.int(Type.int(8), 0) def as_bool_byte(builder, value): return builder.zext(value, Type.int(8)) def as_bool_bit(builder, value): return builder.icmp(lc.ICMP_NE, value, Constant.null(value.type)) def make_anonymous_struct(builder, values, struct_type=None): """
def to_native_value(self, obj, typ): if isinstance(typ, types.Object) or typ == types.pyobject: return obj elif typ == types.boolean: istrue = self.object_istrue(obj) zero = Constant.null(istrue.type) return self.builder.icmp(lc.ICMP_NE, istrue, zero) elif typ in types.unsigned_domain: longobj = self.number_long(obj) ullval = self.long_as_ulonglong(longobj) self.decref(longobj) return self.builder.trunc(ullval, self.context.get_argument_type(typ)) elif typ in types.signed_domain: longobj = self.number_long(obj) llval = self.long_as_longlong(longobj) self.decref(longobj) return self.builder.trunc(llval, self.context.get_argument_type(typ)) elif typ == types.float32: fobj = self.number_float(obj) fval = self.float_as_double(fobj) self.decref(fobj) return self.builder.fptrunc(fval, self.context.get_argument_type(typ)) elif typ == types.float64: fobj = self.number_float(obj) fval = self.float_as_double(fobj) self.decref(fobj) return fval elif typ in (types.complex128, types.complex64): cplxcls = self.context.make_complex(types.complex128) cplx = cplxcls(self.context, self.builder) pcplx = cplx._getpointer() ok = self.complex_adaptor(obj, pcplx) failed = cgutils.is_false(self.builder, ok) with cgutils.if_unlikely(self.builder, failed): self.builder.ret(self.get_null_object()) if typ == types.complex64: c64cls = self.context.make_complex(typ) c64 = c64cls(self.context, self.builder) freal = self.context.cast(self.builder, cplx.real, types.float64, types.float32) fimag = self.context.cast(self.builder, cplx.imag, types.float64, types.float32) c64.real = freal c64.imag = fimag return c64._getvalue() else: return cplx._getvalue() elif isinstance(typ, types.NPDatetime): val = self.extract_np_datetime(obj) return val elif isinstance(typ, types.NPTimedelta): val = self.extract_np_timedelta(obj) return val elif isinstance(typ, types.Array): return self.to_native_array(typ, obj) elif isinstance(typ, types.Optional): isnone = self.builder.icmp(lc.ICMP_EQ, obj, self.borrow_none()) with cgutils.ifelse(self.builder, isnone) as (then, orelse): with then: noneval = self.context.make_optional_none( self.builder, typ.type) ret = cgutils.alloca_once(self.builder, noneval.type) self.builder.store(noneval, ret) with orelse: val = self.to_native_value(obj, typ.type) just = self.context.make_optional_value( self.builder, typ.type, val) self.builder.store(just, ret) return ret raise NotImplementedError(typ)
def _prepare_call_to_object_mode(context, builder, pyapi, func, signature, args): mod = builder.module bb_core_return = builder.append_basic_block('ufunc.core.return') # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function( ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) # The PyObject* arguments to the kernel function object_args = [] object_pointers = [] for i, (arg, argty) in enumerate(zip(args, signature.args)): # Allocate NULL-initialized slot for this argument objptr = cgutils.alloca_once(builder, ll_pyobj, zfill=True) object_pointers.append(objptr) if isinstance(argty, types.Array): # Special case arrays: we don't need full-blown NRT reflection # since the argument will be gone at the end of the kernel arycls = context.make_array(argty) array = arycls(context, builder, value=arg) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, argty.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(argty.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) else: # Other argument types => use generic boxing obj = pyapi.from_native_value(argty, arg) builder.store(obj, objptr) object_args.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(object_args) status, retval = context.call_conv.call_function(builder, func, types.pyobject, object_sig, object_args) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument objects for objptr in object_pointers: pyapi.decref(builder.load(objptr)) innercall = status.code return innercall, builder.load(error_pointer)
def get_dummy_value(self): return Constant.null(self.get_dummy_type())
def make_constant_array(vals): consts = [Constant.int(TIMEDELTA64, v) for v in vals] return Constant.array(TIMEDELTA64, consts)
def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv
def get_null_object(self): return Constant.null(self.pyobj)
import llvmlite.llvmpy.core as lc from numba import npdatetime, types, typing, cgutils, utils from numba.targets.imputils import (builtin, builtin_attr, implement, impl_attribute, impl_attribute_generic, iterator_impl, iternext_impl, struct_factory, type_factory) from numba.typing import signature if not npdatetime.NPDATETIME_SUPPORTED: raise NotImplementedError( "numpy.datetime64 unsupported in this configuration") # datetime64 and timedelta64 use the same internal representation DATETIME64 = TIMEDELTA64 = Type.int(64) NAT = Constant.int(TIMEDELTA64, npdatetime.NAT) TIMEDELTA_BINOP_SIG = (types.Kind(types.NPTimedelta), ) * 2 @type_factory(types.NPDatetime) def llvm_datetime_type(context, tp): return DATETIME64 @type_factory(types.NPTimedelta) def llvm_timedelta_type(context, tp): return TIMEDELTA64 def scale_by_constant(builder, val, factor):
def bool_invert_impl(context, builder, sig, args): [typ] = sig.args [val] = args return builder.sub(Constant.int(val.type, 1), val)
def scale_by_constant(builder, val, factor): """ Multiply *val* by the constant *factor*. """ return builder.mul(val, Constant.int(TIMEDELTA64, factor))
def setitem_array1d_slice(context, builder, sig, args): aryty, idxty, valty = sig.args ary, idx, val = args arystty = make_array(aryty) ary = arystty(context, builder, ary) shapes = cgutils.unpack_tuple(builder, ary.shape, aryty.ndim) slicestruct = Slice(context, builder, value=idx) # the logic here follows that of Python's Objects/sliceobject.c # in particular PySlice_GetIndicesEx function ZERO = Constant.int(slicestruct.step.type, 0) NEG_ONE = Constant.int(slicestruct.start.type, -1) b_step_eq_zero = builder.icmp(lc.ICMP_EQ, slicestruct.step, ZERO) # bail if step is 0 with cgutils.ifthen(builder, b_step_eq_zero): context.call_conv.return_user_exc(builder, ValueError, ("slice step cannot be zero", )) # adjust for negative indices for start start = cgutils.alloca_once_value(builder, slicestruct.start) b_start_lt_zero = builder.icmp(lc.ICMP_SLT, builder.load(start), ZERO) with cgutils.ifthen(builder, b_start_lt_zero): add = builder.add(builder.load(start), shapes[0]) builder.store(add, start) b_start_lt_zero = builder.icmp(lc.ICMP_SLT, builder.load(start), ZERO) with cgutils.ifthen(builder, b_start_lt_zero): b_step_lt_zero = builder.icmp(lc.ICMP_SLT, slicestruct.step, ZERO) cond = builder.select(b_step_lt_zero, NEG_ONE, ZERO) builder.store(cond, start) b_start_geq_len = builder.icmp(lc.ICMP_SGE, builder.load(start), shapes[0]) ONE = Constant.int(shapes[0].type, 1) with cgutils.ifthen(builder, b_start_geq_len): b_step_lt_zero = builder.icmp(lc.ICMP_SLT, slicestruct.step, ZERO) cond = builder.select(b_step_lt_zero, builder.sub(shapes[0], ONE), shapes[0]) builder.store(cond, start) # adjust stop for negative value stop = cgutils.alloca_once_value(builder, slicestruct.stop) b_stop_lt_zero = builder.icmp(lc.ICMP_SLT, builder.load(stop), ZERO) with cgutils.ifthen(builder, b_stop_lt_zero): add = builder.add(builder.load(stop), shapes[0]) builder.store(add, stop) b_stop_lt_zero = builder.icmp(lc.ICMP_SLT, builder.load(stop), ZERO) with cgutils.ifthen(builder, b_stop_lt_zero): b_step_lt_zero = builder.icmp(lc.ICMP_SLT, slicestruct.step, ZERO) cond = builder.select(b_step_lt_zero, NEG_ONE, ZERO) builder.store(cond, start) b_stop_geq_len = builder.icmp(lc.ICMP_SGE, builder.load(stop), shapes[0]) ONE = Constant.int(shapes[0].type, 1) with cgutils.ifthen(builder, b_stop_geq_len): b_step_lt_zero = builder.icmp(lc.ICMP_SLT, slicestruct.step, ZERO) cond = builder.select(b_step_lt_zero, builder.sub(shapes[0], ONE), shapes[0]) builder.store(cond, stop) b_step_gt_zero = builder.icmp(lc.ICMP_SGT, slicestruct.step, ZERO) with cgutils.ifelse(builder, b_step_gt_zero) as (then0, otherwise0): with then0: with cgutils.for_range_slice(builder, builder.load(start), builder.load(stop), slicestruct.step, slicestruct.start.type) as loop_idx1: ptr = cgutils.get_item_pointer(builder, aryty, ary, [loop_idx1], wraparound=True) context.pack_value(builder, aryty.dtype, val, ptr) with otherwise0: with cgutils.for_range_slice(builder, builder.load(start), builder.load(stop), slicestruct.step, slicestruct.start.type, inc=False) as loop_idx2: ptr = cgutils.get_item_pointer(builder, aryty, ary, [loop_idx2], wraparound=True) context.pack_value(builder, aryty.dtype, val, ptr)
def add_constant(builder, val, const): """ Add constant *const* to *val*. """ return builder.add(val, Constant.int(TIMEDELTA64, const))
def int_abs_impl(context, builder, sig, args): [x] = args ZERO = Constant.null(x.type) ltz = builder.icmp(lc.ICMP_SLT, x, ZERO) negated = builder.neg(x) return builder.select(ltz, negated, x)
def unscale_by_constant(builder, val, factor): """ Divide *val* by the constant *factor*. """ return builder.sdiv(val, Constant.int(TIMEDELTA64, factor))
def _prepare_call_to_object_mode(context, builder, pyapi, func, signature, args, env): mod = builder.module bb_core_return = builder.append_basic_block('ufunc.core.return') # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function( ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) ndarray_pointers = [] ndarray_objects = [] for i, (arr, arrtype) in enumerate(zip(args, signature.args)): ptr = cgutils.alloca_once(builder, ll_pyobj) ndarray_pointers.append(ptr) builder.store(Constant.null(ll_pyobj), ptr) # initialize to NULL arycls = context.make_array(arrtype) array = arycls(context, builder, value=arr) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, arrtype.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(arrtype.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) builder.store(obj, ptr) ndarray_objects.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(ndarray_objects) status, retval = context.call_conv.call_function(builder, func, types.pyobject, object_sig, ndarray_objects, env=env) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument object for ndary_ptr in ndarray_pointers: pyapi.decref(builder.load(ndary_ptr)) innercall = status.code return innercall, builder.load(error_pointer)
def generate_kernel_wrapper(self, func, argtypes): module = func.module arginfo = self.get_arg_packer(argtypes) argtys = list(arginfo.argument_types) wrapfnty = Type.function(Type.void(), argtys) wrapper_module = self.create_module("cuda.kernel.wrapper") fnty = Type.function(Type.int(), [self.call_conv.get_return_type(types.pyobject)] + argtys) func = wrapper_module.add_function(fnty, name=func.name) wrapfn = wrapper_module.add_function(wrapfnty, name="cudaPy_" + func.name) builder = Builder.new(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = arginfo.from_arguments(builder, wrapfn.args) status, _ = self.call_conv.call_function(builder, func, types.void, argtypes, callargs) # Check error status with cgutils.if_likely(builder, status.is_ok): builder.ret_void() with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded casfnty = lc.Type.function(old.type, [gv_exc.type, old.type, old.type]) casfn = wrapper_module.add_function(casfnty, name="___numba_cas_hack") xchg = builder.call(casfn, [gv_exc, old, status.code]) changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with builder.if_then(changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() # force inline # inline_function(status.code) nvvm.set_cuda_kernel(wrapfn) module.link_in(ll.parse_assembly(str(wrapper_module))) module.verify() wrapfn = module.get_function(wrapfn.name) return wrapfn
_plat_bits = struct_.calcsize("@P") * 8 _int8 = Type.int(8) _int32 = Type.int(32) _void_star = Type.pointer(_int8) _int8_star = _void_star _sizeof_py_ssize_t = ctypes.sizeof(getattr(ctypes, "c_size_t")) _llvm_py_ssize_t = Type.int(_sizeof_py_ssize_t * 8) if _trace_refs_: _pyobject_head = Type.struct( [_void_star, _void_star, _llvm_py_ssize_t, _void_star]) _pyobject_head_init = Constant.struct([ Constant.null(_void_star), # _ob_next Constant.null(_void_star), # _ob_prev Constant.int(_llvm_py_ssize_t, 1), # ob_refcnt Constant.null(_void_star), # ob_type ]) else: _pyobject_head = Type.struct([_llvm_py_ssize_t, _void_star]) _pyobject_head_init = Constant.struct([ Constant.int(_llvm_py_ssize_t, 1), # ob_refcnt Constant.null(_void_star), # ob_type ]) _pyobject_head_p = Type.pointer(_pyobject_head)
def create_np_datetime(self, val, unit_code): unit_code = Constant.int(Type.int(), unit_code) fnty = Type.function(self.pyobj, [Type.int(64), Type.int()]) fn = self._get_function(fnty, name="numba_create_np_datetime") return self.builder.call(fn, [val, unit_code])
def from_native_value(self, val, typ): if typ == types.pyobject: return val elif typ == types.boolean: longval = self.builder.zext(val, self.long) return self.bool_from_long(longval) elif typ in types.unsigned_domain: ullval = self.builder.zext(val, self.ulonglong) return self.long_from_ulonglong(ullval) elif typ in types.signed_domain: ival = self.builder.sext(val, self.longlong) return self.long_from_longlong(ival) elif typ == types.float32: dbval = self.builder.fpext(val, self.double) return self.float_from_double(dbval) elif typ == types.float64: return self.float_from_double(val) elif typ == types.complex128: cmplxcls = self.context.make_complex(typ) cval = cmplxcls(self.context, self.builder, value=val) return self.complex_from_doubles(cval.real, cval.imag) elif typ == types.complex64: cmplxcls = self.context.make_complex(typ) cval = cmplxcls(self.context, self.builder, value=val) freal = self.context.cast(self.builder, cval.real, types.float32, types.float64) fimag = self.context.cast(self.builder, cval.imag, types.float32, types.float64) return self.complex_from_doubles(freal, fimag) elif isinstance(typ, types.NPDatetime): return self.create_np_datetime(val, typ.unit_code) elif isinstance(typ, types.NPTimedelta): return self.create_np_timedelta(val, typ.unit_code) elif typ == types.none: ret = self.make_none() return ret elif isinstance(typ, types.Optional): return self.from_native_return(val, typ.type) elif isinstance(typ, types.Array): return self.from_native_array(typ, val) elif isinstance(typ, types.Record): # Note we will create a copy of the record # This is the only safe way. pdata = cgutils.get_record_data(self.builder, val) size = Constant.int(Type.int(), pdata.type.pointee.count) ptr = self.builder.bitcast(pdata, Type.pointer(Type.int(8))) # Note: this will only work for CPU mode # The following requires access to python object dtype_addr = Constant.int(self.py_ssize_t, id(typ.dtype)) dtypeobj = dtype_addr.inttoptr(self.pyobj) return self.recreate_record(ptr, size, dtypeobj) elif isinstance(typ, (types.Tuple, types.UniTuple)): return self.from_tuple(typ, val) raise NotImplementedError(typ)
def as_bool_bit(builder, value): return builder.icmp(lc.ICMP_NE, value, Constant.null(value.type))
def build_wrapper(self, api, builder, closure, args, kws): nargs = len(self.fndesc.argtypes) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.unpack_tuple(args, self.fndesc.qualname, nargs, nargs, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) # Block that returns after erroneous argument unboxing/cleanup endblk = builder.append_basic_block("arg.end") with builder.goto_block(endblk): builder.ret(api.get_null_object()) # Get the Environment object env_manager = self.get_env(api, builder) cleanup_manager = _ArgManager(self.context, builder, api, env_manager, endblk, nargs) # Compute the arguments to the compiled Numba function. innerargs = [] for obj, ty in zip(objs, self.fndesc.argtypes): if isinstance(ty, types.Omitted): # It's an omitted value => ignore dummy Python object innerargs.append(None) else: val = cleanup_manager.add_arg(builder.load(obj), ty) innerargs.append(val) if self.release_gil: cleanup_manager = _GilManager(builder, api, cleanup_manager) # We elect to not inline the top level user function into the call # wrapper, this incurs an overhead of a function call, however, it # increases optimisation stability in that the optimised user function # is what will actually be run and it is this function that all the # inspection tools "see". Further, this makes optimisation "stable" in # that calling the user function from e.g. C or from this wrapper will # result in the same code executing, were inlining permitted this may # not be the case as the inline could trigger additional optimisation # as the function goes into the wrapper, this resulting in the executing # instruction stream being different from that of the instruction stream # present in the user function. status, retval = self.context.call_conv.call_function( builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs, attrs=('noinline', )) # Do clean up self.debug_print(builder, "# callwrapper: emit_cleanup") cleanup_manager.emit_cleanup() self.debug_print(builder, "# callwrapper: emit_cleanup end") # Determine return status with builder.if_then(status.is_ok, likely=True): # Ok => return boxed Python value with builder.if_then(status.is_none): api.return_none() retty = self._simplified_return_type() obj = api.from_native_return(retty, retval, env_manager) builder.ret(obj) # Error out self.context.call_conv.raise_error(builder, api, status) builder.ret(api.get_null_object())