def getptr(self, *indices): assert len(indices) == self.nd indices = [auto_const_intp(x) for x in indices] shape = self.shape strides = self.strides order = self._order data = self._data_ptr builder = self.builder intp = intp_type if self.nd == 0: ptr = builder.gep(data, [zero_p]) elif order in 'CF': # optimize for C and F contiguous if order == 'F': shape = list(reversed(shape)) loc = Constant.null(intp) for ival, sval in zip(indices, shape[1:]): tmp = builder.mul(ival, sval) loc = builder.add(loc, tmp) loc = builder.add(loc, indices[-1]) ptr = builder.gep(data, [loc]) else: # any order loc = Constant.null(intp) for i, s in zip(indices, strides): tmp = builder.mul(i, s) loc = builder.add(loc, tmp) base = builder.ptrtoint(data, intp) target = builder.add(base, loc) ptr = builder.inttoptr(target, data.type) return ptr
def getptr(self, *indices): assert len(indices) == self.nd indices = [auto_const_intp(x) for x in indices] shape = self.shape strides = self.strides order = self._order data = self._data_ptr builder = self.builder intp = intp_type if self.nd == 0: ptr = builder.gep(data, [zero_p]) elif order in 'CF': # optimize for C and F contiguous if order == 'F': shape = list(reversed(shape)) loc = Constant.null(intp) for ival, sval in zip(indices, shape[1:]): tmp = builder.mul(ival, sval) loc = builder.add(loc, tmp) loc = builder.add(loc, indices[-1]) ptr = builder.gep(data, [loc]) else: # any order loc = Constant.null(intp) for i, s in zip(indices, strides): tmp = builder.mul(i, s) loc = builder.add(loc, tmp) base = builder.ptrtoint(data, intp) target = builder.add(base, loc) ptr = builder.inttoptr(target, data.type) return ptr
def body(self, index): mod = self.function.module c_int32 = lambda val: Constant.int(Type.int(32), val) # global place holder for current mutant id id_var = mod.add_global_variable(Type.int(32), "P86.mutant_id") id_var.initializer = Constant.int(Type.int(32), 0) id_var.linkage = core.LINKAGE_EXTERNAL # global place holder module name containing the currently # selected mutant str_var = mod.add_global_variable(Type.pointer(Type.int(8)), "P86.mutant_mod") str_var.initializer = Constant.null(Type.pointer(Type.int(8))) str_var.linkage = core.LINKAGE_EXTERNAL # pointer to the tail of the mutant list (reverse order) lst_var = mod.add_global_variable(Type.pointer(mutant_t), "P86.mutant_list") lst_var.initializer = Constant.null(Type.pointer(mutant_t)) lst_var.linkage = core.LINKAGE_EXTERNAL ptr = self.var(Type.pointer(mutant_t), self.builder.load(lst_var)) zero = self.constant(Type.int(32), 0) one = self.constant(Type.int(32), 1) # index zero disables all mutants with self.ifelse(index == zero) as ifelse: with ifelse.then(): self.builder.store(id_var.initializer, id_var) self.builder.store(str_var.initializer, str_var) self.ret() # iterate the list until we get to the Nth element with self.loop() as loop: with loop.condition() as setcond: setcond(index > one) with loop.body(): nxt = self.builder.gep(ptr.value, [c_int32(0), c_int32(2)]) ptr.assign(CVar(self, nxt)) index -= one # assign mutant id handle = self.builder.gep(ptr.value, [c_int32(0), c_int32(0)]) handle = self.builder.load(handle) self.builder.store(handle, id_var) # assign module name containing the mutant handle = self.builder.gep(ptr.value, [c_int32(0), c_int32(1)]) handle = self.builder.load(handle) self.builder.store(handle, str_var) self.ret()
def is_scalar_zero(builder, value): nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isnull = builder.fcmp(lc.FCMP_OEQ, nullval, value) else: isnull = builder.icmp(lc.ICMP_EQ, nullval, value) return isnull
def int_power_func_body(context, builder, x, y): pcounter = builder.alloca(y.type) presult = builder.alloca(x.type) result = Constant.int(x.type, 1) counter = y builder.store(counter, pcounter) builder.store(result, presult) bbcond = cgutils.append_basic_block(builder, ".cond") bbbody = cgutils.append_basic_block(builder, ".body") bbexit = cgutils.append_basic_block(builder, ".exit") del counter del result builder.branch(bbcond) with cgutils.goto_block(builder, bbcond): counter = builder.load(pcounter) ONE = Constant.int(counter.type, 1) ZERO = Constant.null(counter.type) builder.store(builder.sub(counter, ONE), pcounter) pred = builder.icmp(lc.ICMP_SGT, counter, ZERO) builder.cbranch(pred, bbbody, bbexit) with cgutils.goto_block(builder, bbbody): result = builder.load(presult) builder.store(builder.mul(result, x), presult) builder.branch(bbcond) builder.position_at_end(bbexit) return builder.load(presult)
def body(self, ptr): handle = self.builder.load(ptr.value) self.builder.free(handle) null = Constant.null(Type.pointer(Type.int(8))) self.builder.store(null, ptr.value) self.ret()
def int_power_func_body(context, builder, x, y): pcounter = cgutils.alloca_once(builder, y.type) presult = cgutils.alloca_once(builder, x.type) result = Constant.int(x.type, 1) counter = y builder.store(counter, pcounter) builder.store(result, presult) bbcond = cgutils.append_basic_block(builder, ".cond") bbbody = cgutils.append_basic_block(builder, ".body") bbexit = cgutils.append_basic_block(builder, ".exit") del counter del result builder.branch(bbcond) with cgutils.goto_block(builder, bbcond): counter = builder.load(pcounter) ONE = Constant.int(counter.type, 1) ZERO = Constant.null(counter.type) builder.store(builder.sub(counter, ONE), pcounter) pred = builder.icmp(lc.ICMP_SGT, counter, ZERO) builder.cbranch(pred, bbbody, bbexit) with cgutils.goto_block(builder, bbbody): result = builder.load(presult) builder.store(builder.mul(result, x), presult) builder.branch(bbcond) builder.position_at_end(bbexit) return builder.load(presult)
def numpy_ufunc_kernel(context, builder, sig, args, kernel_class, explicit_output=True): if not explicit_output: args.append(Constant.null(context.get_value_type(sig.return_type))) tyargs = sig.args + (sig.return_type,) else: tyargs = sig.args arguments = [_prepare_argument(context, builder, arg, tyarg) for arg, tyarg in zip(args, tyargs)] inputs = arguments[0:-1] output = arguments[-1] outer_sig = [a.base_type for a in arguments] #signature expects return type first, while we have it last: outer_sig = outer_sig[-1:] + outer_sig[:-1] outer_sig = typing.signature(*outer_sig) kernel = kernel_class(context, builder, outer_sig) intpty = context.get_value_type(types.intp) indices = [inp.create_iter_indices() for inp in inputs] loopshape = output.shape with cgutils.loop_nest(builder, loopshape, intp=intpty) as loop_indices: vals_in = [] for i, (index, arg) in enumerate(zip(indices, inputs)): index.update_indices(loop_indices, i) vals_in.append(arg.load_data(index.as_values())) val_out = kernel.generate(*vals_in) output.store_data(loop_indices, val_out) return arguments[-1].return_val
def is_scalar_zero(builder, value): nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isnull = builder.fcmp(lc.FCMP_OEQ, nullval, value) else: isnull = builder.icmp(lc.ICMP_EQ, nullval, value) return isnull
def offsetof(struct_type, fieldnum, builder): nullval = Constant.null(Type.pointer(struct_type)) if hasattr(fieldnum, '__index__'): fieldnum = fieldnum.__index__() fieldnum = Constant.int(int_type, fieldnum) offset = builder.gep(nullval, [zero_p, fieldnum]) offsetI = builder.bitcast(offset, int_type) return offsetI
def is_scalar_neg(builder, value): """is _value_ negative?. Assumes _value_ is signed""" nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isneg = builder.fcmp(lc.FCMP_OLT, value, nullval) else: isneg = builder.icmp(lc.ICMP_SLT, value, nullval) return isneg
def offsetof(struct_type, fieldnum, builder): nullval = Constant.null(Type.pointer(struct_type)) if hasattr(fieldnum, '__index__'): fieldnum = fieldnum.__index__() fieldnum = Constant.int(int_type, fieldnum) offset = builder.gep(nullval, [zero_p, fieldnum]) offsetI = builder.bitcast(offset, int_type) return offsetI
def is_scalar_neg(builder, value): """is _value_ negative?. Assumes _value_ is signed""" nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isneg = builder.fcmp(lc.FCMP_OLT, value, nullval) else: isneg = builder.icmp(lc.ICMP_SLT, value, nullval) return isneg
def get_item_pointer2(builder, data, shape, strides, layout, inds, wraparound=False): if wraparound: # Wraparound indices = [] for ind, dimlen in zip(inds, shape): ZERO = Constant.null(ind.type) negative = builder.icmp(lc.ICMP_SLT, ind, ZERO) wrapped = builder.add(dimlen, ind) selected = builder.select(negative, wrapped, ind) indices.append(selected) else: indices = inds del inds intp = indices[0].type # Indexing code if layout in 'CF': steps = [] # Compute steps for each dimension if layout == 'C': # C contiguous for i in range(len(shape)): last = Constant.int(intp, 1) for j in shape[i + 1:]: last = builder.mul(last, j) steps.append(last) elif layout == 'F': # F contiguous for i in range(len(shape)): last = Constant.int(intp, 1) for j in shape[:i]: last = builder.mul(last, j) steps.append(last) else: raise Exception("unreachable") # Compute index loc = Constant.int(intp, 0) for i, s in zip(indices, steps): tmp = builder.mul(i, s) loc = builder.add(loc, tmp) ptr = builder.gep(data, [loc]) return ptr else: # Any layout dimoffs = [builder.mul(s, i) for s, i in zip(strides, indices)] offset = functools.reduce(builder.add, dimoffs) base = builder.ptrtoint(data, offset.type) where = builder.add(base, offset) ptr = builder.inttoptr(where, data.type) return ptr
def make_keywords(self, kws): strings = [] stringtype = Type.pointer(Type.int(8)) for k in kws: strings.append(self.make_const_string(k)) strings.append(Constant.null(stringtype)) kwlist = Constant.array(stringtype, strings) kwlist = cgutils.global_constant(self.module, ".kwlist", kwlist) return Constant.bitcast(kwlist, Type.pointer(stringtype))
def make_keywords(self, kws): strings = [] stringtype = Type.pointer(Type.int(8)) for k in kws: strings.append(self.make_const_string(k)) strings.append(Constant.null(stringtype)) kwlist = Constant.array(stringtype, strings) kwlist = cgutils.global_constant(self.module, ".kwlist", kwlist) return Constant.bitcast(kwlist, Type.pointer(stringtype))
def is_scalar_zero(builder, value): """ Return a predicate representing whether *value* is equal to zero. """ assert not is_pointer(value.type) assert not is_struct(value.type) nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isnull = builder.fcmp(lc.FCMP_OEQ, nullval, value) else: isnull = builder.icmp(lc.ICMP_EQ, nullval, value) return isnull
def is_scalar_zero(builder, value): """ Return a predicate representing whether *value* is equal to zero. """ assert not is_pointer(value.type) assert not is_struct(value.type) nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isnull = builder.fcmp(lc.FCMP_OEQ, nullval, value) else: isnull = builder.icmp(lc.ICMP_EQ, nullval, value) return isnull
def is_true(self, builder, typ, val): if typ in types.integer_domain: return builder.icmp(lc.ICMP_NE, val, Constant.null(val.type)) elif typ in types.real_domain: return builder.fcmp(lc.FCMP_ONE, val, Constant.real(val.type, 0)) elif typ in types.complex_domain: cmplx = self.make_complex(typ)(self, builder, val) fty = types.float32 if typ == types.complex64 else types.float64 real_istrue = self.is_true(builder, fty, cmplx.real) imag_istrue = self.is_true(builder, fty, cmplx.imag) return builder.or_(real_istrue, imag_istrue) raise NotImplementedError("is_true", val, typ)
def is_not_scalar_zero(builder, value): """ Return a predicate representin whether a *value* is not equal to zero. not exactly "not is_scalar_zero" because of nans """ assert not is_pointer(value.type) assert not is_struct(value.type) nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isnull = builder.fcmp(lc.FCMP_UNE, nullval, value) else: isnull = builder.icmp(lc.ICMP_NE, nullval, value) return isnull
def is_not_scalar_zero(builder, value): """ Return a predicate representin whether a *value* is not equal to zero. not exactly "not is_scalar_zero" because of nans """ assert not is_pointer(value.type) assert not is_struct(value.type) nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isnull = builder.fcmp(lc.FCMP_UNE, nullval, value) else: isnull = builder.icmp(lc.ICMP_NE, nullval, value) return isnull
def get_item_pointer2(builder, data, shape, strides, layout, inds, wraparound=False): if wraparound: # Wraparound indices = [] for ind, dimlen in zip(inds, shape): ZERO = Constant.null(ind.type) negative = builder.icmp(lc.ICMP_SLT, ind, ZERO) wrapped = builder.add(dimlen, ind) selected = builder.select(negative, wrapped, ind) indices.append(selected) else: indices = inds del inds intp = indices[0].type # Indexing code if layout in 'CF': steps = [] # Compute steps for each dimension if layout == 'C': # C contiguous for i in range(len(shape)): last = Constant.int(intp, 1) for j in shape[i + 1:]: last = builder.mul(last, j) steps.append(last) elif layout == 'F': # F contiguous for i in range(len(shape)): last = Constant.int(intp, 1) for j in shape[:i]: last = builder.mul(last, j) steps.append(last) else: raise Exception("unreachable") # Compute index loc = Constant.int(intp, 0) for i, s in zip(indices, steps): tmp = builder.mul(i, s) loc = builder.add(loc, tmp) ptr = builder.gep(data, [loc]) return ptr else: # Any layout dimoffs = [builder.mul(s, i) for s, i in zip(strides, indices)] offset = functools.reduce(builder.add, dimoffs) base = builder.ptrtoint(data, offset.type) where = builder.add(base, offset) ptr = builder.inttoptr(where, data.type) return ptr
def generate(self,*args): assert len(args) == 2 # numerator and denominator builder=self.builder context=self.context tyinputs = self.outer_sig.args tyout = self.outer_sig.return_type tyout_llvm = context.get_data_type(tyout) inner_sig = typing.signature(self.loop_out_types[0], *self.loop_in_types) fn = context.get_function(operator, inner_sig) num, den = args iszero = cgutils.is_scalar_zero(builder, den) with cgutils.ifelse(builder, iszero, expect=False) as (then, orelse): with then: # Divide by zero if ((tyinputs[0] in types.real_domain or tyinputs[1] in types.real_domain) or not numpy_support.int_divbyzero_returns_zero) or \ operator=='/': # If num is float and is 0 also, return Nan; else # return Inf outltype = context.get_data_type(types.float64) shouldretnan = cgutils.is_scalar_zero(builder, num) nan = Constant.real(outltype, float("nan")) inf = Constant.real(outltype, float("inf")) tempres = builder.select(shouldretnan, nan, inf) res_then = context.cast(builder, tempres, types.float64, tyout) elif tyout in types.signed_domain and \ not numpy_support.int_divbyzero_returns_zero: res_then = Constant.int(tyout_llvm, 0x1 << (den.type.width-1)) else: res_then = Constant.null(tyout_llvm) bb_then = builder.basic_block with orelse: # Normal cast_args = [self.context.cast(self.builder, val, inty, outty) for val, inty, outty in zip(args, self.outer_sig.args, self.loop_in_types)] tempres = fn(builder, cast_args) res_else = context.cast(builder, tempres, self.loop_out_types[0], tyout) bb_else = builder.basic_block out = builder.phi(tyout_llvm) out.add_incoming(res_then, bb_then) out.add_incoming(res_else, bb_else) return out
def build_wrapper(self, api, builder, closure, args, kws): nargs = len(self.fndesc.args) keywords = self.make_keywords(self.fndesc.args) fmt = self.make_const_string("O" * nargs) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.parse_tuple_and_keywords(args, kws, fmt, keywords, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) innerargs = [] cleanups = [] for obj, ty in zip(objs, self.fndesc.argtypes): #api.context.debug_print(builder, "%s -> %s" % (obj, ty)) #api.print_object(builder.load(obj)) val, dtor = api.to_native_arg(builder.load(obj), ty) innerargs.append(val) cleanups.append(dtor) # The wrapped function doesn't take a full closure, only # the Environment object. env = self.context.get_env_from_closure(builder, closure) status, res = self.context.call_function(builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs, env) # Do clean up for dtor in cleanups: dtor() # Determine return status with cgutils.if_likely(builder, status.ok): with cgutils.ifthen(builder, status.none): api.return_none() retval = api.from_native_return(res, self.fndesc.restype) builder.ret(retval) with cgutils.ifthen(builder, builder.not_(status.exc)): # !ok && !exc # User exception raised self.make_exception_switch(api, builder, status.code) # !ok && exc builder.ret(api.get_null_object())
def make_keywords(self, kws): strings = [] stringtype = Type.pointer(Type.int(8)) for k in kws: strings.append(self.make_const_string(k)) strings.append(Constant.null(stringtype)) kwlist = Constant.array(stringtype, strings) gv = self.module.add_global_variable(kwlist.type, name=".kwlist") gv.global_constant = True gv.initializer = kwlist gv.linkage = lc.LINKAGE_INTERNAL return Constant.bitcast(gv, Type.pointer(stringtype))
def make_keywords(self, kws): strings = [] stringtype = Type.pointer(Type.int(8)) for k in kws: strings.append(self.make_const_string(k)) strings.append(Constant.null(stringtype)) kwlist = Constant.array(stringtype, strings) gv = self.module.add_global_variable(kwlist.type, name=".kwlist") gv.global_constant = True gv.initializer = kwlist gv.linkage = lc.LINKAGE_INTERNAL return Constant.bitcast(gv, Type.pointer(stringtype))
def build_wrapper(self, api, builder, args, kws): nargs = len(self.fndesc.args) keywords = self.make_keywords(self.fndesc.args) fmt = self.make_const_string("O" * nargs) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.parse_tuple_and_keywords(args, kws, fmt, keywords, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) innerargs = [] cleanups = [] for obj, ty in zip(objs, self.fndesc.argtypes): #api.context.debug_print(builder, "%s -> %s" % (obj, ty)) #api.print_object(builder.load(obj)) val, dtor = api.to_native_arg(builder.load(obj), ty) innerargs.append(val) cleanups.append(dtor) status, res = self.context.call_function(builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs) # Do clean up for dtor in cleanups: dtor() # Determine return status with cgutils.if_likely(builder, status.ok): with cgutils.ifthen(builder, status.none): api.return_none() retval = api.from_native_return(res, self.fndesc.restype) builder.ret(retval) with cgutils.ifthen(builder, builder.not_(status.exc)): # !ok && !exc # User exception raised self.make_exception_switch(api, builder, status.code) # !ok && exc builder.ret(api.get_null_object())
def build_wrapper(self, api, builder, closure, args, kws): nargs = len(self.fndesc.args) keywords = self.make_keywords(self.fndesc.args) fmt = self.make_const_string("O" * nargs) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.parse_tuple_and_keywords(args, kws, fmt, keywords, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) argman = _ArgManager(builder, api, nargs) innerargs = [] for obj, ty in zip(objs, self.fndesc.argtypes): val = argman.add_arg(obj, ty) innerargs.append(val) # The wrapped function doesn't take a full closure, only # the Environment object. env = self.context.get_env_from_closure(builder, closure) status, res = self.context.call_function(builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs, env) # Do clean up argman.emit_cleanup() # Determine return status with cgutils.if_likely(builder, status.ok): with cgutils.ifthen(builder, status.none): api.return_none() retval = api.from_native_return(res, self.fndesc.restype) builder.ret(retval) with cgutils.ifthen(builder, builder.not_(status.exc)): # !ok && !exc # User exception raised self.make_exception_switch(api, builder, status.code) # !ok && exc builder.ret(api.get_null_object())
def int_divmod(context, builder, x, y): """ Reference Objects/intobject.c xdivy = x / y; xmody = (long)(x - (unsigned long)xdivy * y); /* If the signs of x and y differ, and the remainder is non-0, * C89 doesn't define whether xdivy is now the floor or the * ceiling of the infinitely precise quotient. We want the floor, * and we have it iff the remainder's sign matches y's. */ if (xmody && ((y ^ xmody) < 0) /* i.e. and signs differ */) { xmody += y; --xdivy; assert(xmody && ((y ^ xmody) >= 0)); } *p_xdivy = xdivy; *p_xmody = xmody; """ assert x.type == y.type xdivy = builder.sdiv(x, y) xmody = builder.srem(x, y) # Intel has divmod instruction ZERO = Constant.null(y.type) ONE = Constant.int(y.type, 1) y_xor_xmody_ltz = builder.icmp(lc.ICMP_SLT, builder.xor(y, xmody), ZERO) xmody_istrue = builder.icmp(lc.ICMP_NE, xmody, ZERO) cond = builder.and_(xmody_istrue, y_xor_xmody_ltz) bb1 = builder.basic_block with cgutils.ifthen(builder, cond): xmody_plus_y = builder.add(xmody, y) xdivy_minus_1 = builder.sub(xdivy, ONE) bb2 = builder.basic_block resdiv = builder.phi(y.type) resdiv.add_incoming(xdivy, bb1) resdiv.add_incoming(xdivy_minus_1, bb2) resmod = builder.phi(x.type) resmod.add_incoming(xmody, bb1) resmod.add_incoming(xmody_plus_y, bb2) return resdiv, resmod
def int_divmod(context, builder, x, y): """ Reference Objects/intobject.c xdivy = x / y; xmody = (long)(x - (unsigned long)xdivy * y); /* If the signs of x and y differ, and the remainder is non-0, * C89 doesn't define whether xdivy is now the floor or the * ceiling of the infinitely precise quotient. We want the floor, * and we have it iff the remainder's sign matches y's. */ if (xmody && ((y ^ xmody) < 0) /* i.e. and signs differ */) { xmody += y; --xdivy; assert(xmody && ((y ^ xmody) >= 0)); } *p_xdivy = xdivy; *p_xmody = xmody; """ assert x.type == y.type xdivy = builder.sdiv(x, y) xmody = builder.srem(x, y) # Intel has divmod instruction ZERO = Constant.null(y.type) ONE = Constant.int(y.type, 1) y_xor_xmody_ltz = builder.icmp(lc.ICMP_SLT, builder.xor(y, xmody), ZERO) xmody_istrue = builder.icmp(lc.ICMP_NE, xmody, ZERO) cond = builder.and_(xmody_istrue, y_xor_xmody_ltz) bb1 = builder.basic_block with cgutils.ifthen(builder, cond): xmody_plus_y = builder.add(xmody, y) xdivy_minus_1 = builder.sub(xdivy, ONE) bb2 = builder.basic_block resdiv = builder.phi(y.type) resdiv.add_incoming(xdivy, bb1) resdiv.add_incoming(xdivy_minus_1, bb2) resmod = builder.phi(x.type) resmod.add_incoming(xmody, bb1) resmod.add_incoming(xmody_plus_y, bb2) return resdiv, resmod
def get_item_pointer(builder, aryty, ary, inds, wraparound=False): if wraparound: # Wraparound shapes = unpack_tuple(builder, ary.shape, count=aryty.ndim) indices = [] for ind, dimlen in zip(inds, shapes): ZERO = Constant.null(ind.type) negative = builder.icmp(lc.ICMP_SLT, ind, ZERO) wrapped = builder.add(dimlen, ind) selected = builder.select(negative, wrapped, ind) indices.append(selected) else: indices = inds del inds intp = indices[0].type # Indexing code if aryty.layout == 'C': # C contiguous shapes = unpack_tuple(builder, ary.shape, count=aryty.ndim) steps = [] for i in range(len(shapes)): last = Constant.int(intp, 1) for j in shapes[i + 1:]: last = builder.mul(last, j) steps.append(last) loc = Constant.int(intp, 0) for i, s in zip(indices, steps): tmp = builder.mul(i, s) loc = builder.add(loc, tmp) ptr = builder.gep(ary.data, [loc]) return ptr else: # Any layout strides = unpack_tuple(builder, ary.strides, count=aryty.ndim) dimoffs = [builder.mul(s, i) for s, i in zip(strides, indices)] offset = functools.reduce(builder.add, dimoffs) base = builder.ptrtoint(ary.data, offset.type) where = builder.add(base, offset) ptr = builder.inttoptr(where, ary.data.type) return ptr
def get_item_pointer(builder, aryty, ary, inds, wraparound=False): if wraparound: # Wraparound shapes = unpack_tuple(builder, ary.shape, count=aryty.ndim) indices = [] for ind, dimlen in zip(inds, shapes): ZERO = Constant.null(ind.type) negative = builder.icmp(lc.ICMP_SLT, ind, ZERO) wrapped = builder.add(dimlen, ind) selected = builder.select(negative, wrapped, ind) indices.append(selected) else: indices = inds del inds intp = indices[0].type # Indexing code if aryty.layout == 'C': # C contiguous shapes = unpack_tuple(builder, ary.shape, count=aryty.ndim) steps = [] for i in range(len(shapes)): last = Constant.int(intp, 1) for j in shapes[i + 1:]: last = builder.mul(last, j) steps.append(last) loc = Constant.int(intp, 0) for i, s in zip(indices, steps): tmp = builder.mul(i, s) loc = builder.add(loc, tmp) ptr = builder.gep(ary.data, [loc]) return ptr else: # Any layout strides = unpack_tuple(builder, ary.strides, count=aryty.ndim) dimoffs = [builder.mul(s, i) for s, i in zip(strides, indices)] offset = functools.reduce(builder.add, dimoffs) base = builder.ptrtoint(ary.data, offset.type) where = builder.add(base, offset) ptr = builder.inttoptr(where, ary.data.type) return ptr
def build_wrapper(self, api, builder, args, kws): nargs = len(self.fndesc.args) keywords = self.make_keywords(self.fndesc.args) fmt = self.make_const_string("O" * nargs) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.parse_tuple_and_keywords(args, kws, fmt, keywords, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) innerargs = [] for obj, ty in zip(objs, self.fndesc.argtypes): #api.context.debug_print(builder, "%s -> %s" % (obj, ty)) #api.print_object(builder.load(obj)) val = api.to_native_arg(builder.load(obj), ty) innerargs.append(val) status, res = self.context.call_function(builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs) with cgutils.if_likely(builder, status.ok): with cgutils.ifthen(builder, status.none): api.return_none() retval = api.from_native_return(res, self.fndesc.restype) builder.ret(retval) with cgutils.ifthen(builder, builder.not_(status.exc)): # User exception raised # TODO we will just raise a RuntimeError for now. api.raise_native_error("error in native function: %s" % self.fndesc.mangled_name) builder.ret(api.get_null_object())
def define_error_gv(postfix): gv = module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv
def generate_kernel_wrapper(self, func, argtypes): module = func.module argtys = self.get_arguments(func.type.pointee) fnty = Type.function(Type.void(), argtys) wrapfn = module.add_function(fnty, name="cudaPy_" + func.name) builder = Builder.new(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = [] for at, av in zip(argtypes, wrapfn.args): av = self.get_argument_value(builder, at, av) callargs.append(av) status, _ = self.call_function(builder, func, types.void, argtypes, callargs) # Check error status with cgutils.if_likely(builder, status.ok): builder.ret_void() with cgutils.ifthen(builder, builder.not_(status.exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded xchg = builder.atomic_cmpxchg(gv_exc, old, status.code, "monotonic") changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with cgutils.ifthen(builder, changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() # force inline inline_function(status.code) module.verify() return wrapfn
def define_error_gv(postfix): gv = module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv
def generate_kernel_wrapper(self, func, argtypes): module = func.module argtys = self.get_arguments(func.type.pointee) fnty = Type.function(Type.void(), argtys) wrapfn = module.add_function(fnty, name="cudaPy_" + func.name) builder = Builder.new(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = [] for at, av in zip(argtypes, wrapfn.args): av = self.get_argument_value(builder, at, av) callargs.append(av) status, _ = self.call_function(builder, func, types.void, argtypes, callargs) # Check error status with cgutils.if_likely(builder, status.ok): builder.ret_void() with cgutils.ifthen(builder, builder.not_(status.exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded xchg = builder.atomic_cmpxchg(gv_exc, old, status.code, "monotonic") changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with cgutils.ifthen(builder, changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() # force inline inline_function(status.code) module.verify() return wrapfn
def impl(context, builder, sig, args): [tyvx, tywy, tyout] = sig.args [vx, wy, out] = args assert tyvx.dtype == tywy.dtype ndim = tyvx.ndim xary = context.make_array(tyvx)(context, builder, vx) yary = context.make_array(tywy)(context, builder, wy) oary = context.make_array(tyout)(context, builder, out) intpty = context.get_value_type(types.intp) # TODO handle differing shape by mimicking broadcasting loopshape = cgutils.unpack_tuple(builder, xary.shape, ndim) xyo_shape = [cgutils.unpack_tuple(builder, ary.shape, ndim) for ary in (xary, yary, oary)] xyo_strides = [cgutils.unpack_tuple(builder, ary.strides, ndim) for ary in (xary, yary, oary)] xyo_data = [ary.data for ary in (xary, yary, oary)] xyo_layout = [ty.layout for ty in (tyvx, tywy, tyout)] with cgutils.loop_nest(builder, loopshape, intp=intpty) as indices: [px, py, po] = [cgutils.get_item_pointer2(builder, data=data, shape=shape, strides=strides, layout=layout, inds=indices) for data, shape, strides, layout in zip(xyo_data, xyo_shape, xyo_strides, xyo_layout)] x = builder.load(px) y = builder.load(py) if divbyzero: # Handle division iszero = cgutils.is_scalar_zero(builder, y) with cgutils.ifelse(builder, iszero, expect=False) as (then, orelse): with then: # Divide by zero if tyout.dtype in types.real_domain: # If x is float and is 0 also, return Nan; else # return Inf outltype = context.get_data_type(tyout.dtype) shouldretnan = cgutils.is_scalar_zero(builder, x) nan = Constant.real(outltype, float("nan")) inf = Constant.real(outltype, float("inf")) res = builder.select(shouldretnan, nan, inf) elif (tyout.dtype in types.signed_domain and not numpy_support.int_divbyzero_returns_zero): res = Constant.int(y.type, 0x1 << (y.type.width-1)) else: res = Constant.null(y.type) assert res.type == po.type.pointee, \ (str(res.type), str(po.type.pointee)) builder.store(res, po) with orelse: # Normal res = core(builder, (x, y)) assert res.type == po.type.pointee, \ (str(res.type), str(po.type.pointee)) builder.store(res, po) else: # Handle other operations res = core(builder, (x, y)) assert res.type == po.type.pointee, (res.type, po.type.pointee) builder.store(res, po) return out
def _prepare_call_to_object_mode(context, builder, func, signature, args, env): mod = cgutils.get_module(builder) thisfunc = cgutils.get_function(builder) bb_core_return = thisfunc.append_basic_block('ufunc.core.return') pyapi = context.get_python_api(builder) # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function( ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) ndarray_pointers = [] ndarray_objects = [] for i, (arr, arrtype) in enumerate(zip(args, signature.args)): ptr = cgutils.alloca_once(builder, ll_pyobj) ndarray_pointers.append(ptr) builder.store(Constant.null(ll_pyobj), ptr) # initialize to NULL arycls = context.make_array(arrtype) array = arycls(context, builder, ref=arr) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, arrtype.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(arrtype.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) builder.store(obj, ptr) ndarray_objects.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(ndarray_objects) status, retval = context.call_function(builder, func, ll_pyobj, object_sig, ndarray_objects, env=env) builder.store(status.err, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument object for ndary_ptr in ndarray_pointers: pyapi.decref(builder.load(ndary_ptr)) innercall = status.code return innercall, builder.load(error_pointer)
def sizeof(builder, ty, intp): ptr = Type.pointer(ty) null = Constant.null(ptr) offset = builder.gep(null, [Constant.int(Type.int(), 1)]) return builder.ptrtoint(offset, intp)
def int_abs_impl(context, builder, sig, args): [x] = args ZERO = Constant.null(x.type) ltz = builder.icmp(lc.ICMP_SLT, x, ZERO) negated = builder.neg(x) return builder.select(ltz, negated, x)
def impl(context, builder, sig, args): [tyinp1, tyinp2, tyout] = sig.args [inp1, inp2, out] = args if isinstance(tyinp1, types.Array): scalar_inp1 = False scalar_tyinp1 = tyinp1.dtype inp1_ndim = tyinp1.ndim elif tyinp1 in types.number_domain: scalar_inp1 = True scalar_tyinp1 = tyinp1 inp1_ndim = 1 else: raise TypeError('unknown type for first input operand') if isinstance(tyinp2, types.Array): scalar_inp2 = False scalar_tyinp2 = tyinp2.dtype inp2_ndim = tyinp2.ndim elif tyinp2 in types.number_domain: scalar_inp2 = True scalar_tyinp2 = tyinp2 inp2_ndim = 1 else: raise TypeError('unknown type for second input operand') out_ndim = tyout.ndim if asfloat: promote_type = types.float64 elif scalar_tyinp1 in types.real_domain or \ scalar_tyinp2 in types.real_domain: promote_type = types.float64 elif scalar_tyinp1 in types.signed_domain or \ scalar_tyinp2 in types.signed_domain: promote_type = types.int64 else: promote_type = types.uint64 result_type = promote_type # Temporary hack for __ftol2 llvm bug. Don't allow storing # float results in uint64 array on windows. if result_type in types.real_domain and \ tyout.dtype is types.uint64 and \ sys.platform.startswith('win32'): raise TypeError('Cannot store result in uint64 array') sig = typing.signature(result_type, promote_type, promote_type) if not scalar_inp1: i1ary = context.make_array(tyinp1)(context, builder, inp1) if not scalar_inp2: i2ary = context.make_array(tyinp2)(context, builder, inp2) oary = context.make_array(tyout)(context, builder, out) fnwork = context.get_function(funckey, sig) intpty = context.get_value_type(types.intp) if not scalar_inp1: inp1_shape = cgutils.unpack_tuple(builder, i1ary.shape, inp1_ndim) inp1_strides = cgutils.unpack_tuple(builder, i1ary.strides, inp1_ndim) inp1_data = i1ary.data inp1_layout = tyinp1.layout if not scalar_inp2: inp2_shape = cgutils.unpack_tuple(builder, i2ary.shape, inp2_ndim) inp2_strides = cgutils.unpack_tuple(builder, i2ary.strides, inp2_ndim) inp2_data = i2ary.data inp2_layout = tyinp2.layout out_shape = cgutils.unpack_tuple(builder, oary.shape, out_ndim) out_strides = cgutils.unpack_tuple(builder, oary.strides, out_ndim) out_data = oary.data out_layout = tyout.layout ZERO = Constant.int(Type.int(intpty.width), 0) ONE = Constant.int(Type.int(intpty.width), 1) inp1_indices = None if not scalar_inp1: inp1_indices = [] for i in range(inp1_ndim): x = builder.alloca(Type.int(intpty.width)) builder.store(ZERO, x) inp1_indices.append(x) inp2_indices = None if not scalar_inp2: inp2_indices = [] for i in range(inp2_ndim): x = builder.alloca(Type.int(intpty.width)) builder.store(ZERO, x) inp2_indices.append(x) loopshape = cgutils.unpack_tuple(builder, oary.shape, out_ndim) with cgutils.loop_nest(builder, loopshape, intp=intpty) as indices: # Increment input indices. # Since the output dimensions are already being incremented, # we'll use that to set the input indices. In order to # handle broadcasting, any input dimension of size 1 won't be # incremented. def build_increment_blocks(inp_indices, inp_shape, inp_ndim, inp_num): bb_inc_inp_index = [cgutils.append_basic_block(builder, '.inc_inp{0}_index{1}'.format(inp_num, str(i))) for i in range(inp_ndim)] bb_end_inc_index = cgutils.append_basic_block(builder, '.end_inc{0}_index'.format(inp_num)) builder.branch(bb_inc_inp_index[0]) for i in range(inp_ndim): with cgutils.goto_block(builder, bb_inc_inp_index[i]): # If the shape of this dimension is 1, then leave the # index at 0 so that this dimension is broadcasted over # the corresponding input and output dimensions. cond = builder.icmp(ICMP_UGT, inp_shape[i], ONE) with cgutils.ifthen(builder, cond): builder.store(indices[out_ndim-inp_ndim+i], inp_indices[i]) if i + 1 == inp_ndim: builder.branch(bb_end_inc_index) else: builder.branch(bb_inc_inp_index[i+1]) builder.position_at_end(bb_end_inc_index) if not scalar_inp1: build_increment_blocks(inp1_indices, inp1_shape, inp1_ndim, '1') if not scalar_inp2: build_increment_blocks(inp2_indices, inp2_shape, inp2_ndim, '2') if scalar_inp1: x = inp1 else: inds = [builder.load(index) for index in inp1_indices] px = cgutils.get_item_pointer2(builder, data=inp1_data, shape=inp1_shape, strides=inp1_strides, layout=inp1_layout, inds=inds) x = builder.load(px) if scalar_inp2: y = inp2 else: inds = [builder.load(index) for index in inp2_indices] py = cgutils.get_item_pointer2(builder, data=inp2_data, shape=inp2_shape, strides=inp2_strides, layout=inp2_layout, inds=inds) y = builder.load(py) po = cgutils.get_item_pointer2(builder, data=out_data, shape=out_shape, strides=out_strides, layout=out_layout, inds=indices) if divbyzero: # Handle division iszero = cgutils.is_scalar_zero(builder, y) with cgutils.ifelse(builder, iszero, expect=False) as (then, orelse): with then: # Divide by zero if (scalar_tyinp1 in types.real_domain or scalar_tyinp2 in types.real_domain) or \ not numpy_support.int_divbyzero_returns_zero: # If y is float and is 0 also, return Nan; else # return Inf outltype = context.get_data_type(result_type) shouldretnan = cgutils.is_scalar_zero(builder, x) nan = Constant.real(outltype, float("nan")) inf = Constant.real(outltype, float("inf")) tempres = builder.select(shouldretnan, nan, inf) res = context.cast(builder, tempres, result_type, tyout.dtype) elif tyout.dtype in types.signed_domain and \ not numpy_support.int_divbyzero_returns_zero: res = Constant.int(context.get_data_type(tyout.dtype), 0x1 << (y.type.width-1)) else: res = Constant.null(context.get_data_type(tyout.dtype)) assert res.type == po.type.pointee, \ (str(res.type), str(po.type.pointee)) builder.store(res, po) with orelse: # Normal d_x = context.cast(builder, x, scalar_tyinp1, promote_type) d_y = context.cast(builder, y, scalar_tyinp2, promote_type) tempres = fnwork(builder, [d_x, d_y]) res = context.cast(builder, tempres, result_type, tyout.dtype) assert res.type == po.type.pointee, (res.type, po.type.pointee) builder.store(res, po) else: # Handle non-division operations d_x = context.cast(builder, x, scalar_tyinp1, promote_type) d_y = context.cast(builder, y, scalar_tyinp2, promote_type) tempres = fnwork(builder, [d_x, d_y]) res = context.cast(builder, tempres, result_type, tyout.dtype) assert res.type == po.type.pointee, (res.type, po.type.pointee) builder.store(res, po) return out
def sizeof(llvm_type, builder): nullval = Constant.null(Type.pointer(llvm_type)) size = builder.gep(nullval, [one_i]) sizeI = builder.bitcast(size, int_type) return sizeI
def impl(context, builder, sig, args): [tyinp1, tyinp2, tyout] = sig.args [inp1, inp2, out] = args if isinstance(tyinp1, types.Array): scalar_inp1 = False scalar_tyinp1 = tyinp1.dtype inp1_ndim = tyinp1.ndim elif tyinp1 in types.number_domain: scalar_inp1 = True scalar_tyinp1 = tyinp1 inp1_ndim = 1 else: raise TypeError('unknown type for first input operand') if isinstance(tyinp2, types.Array): scalar_inp2 = False scalar_tyinp2 = tyinp2.dtype inp2_ndim = tyinp2.ndim elif tyinp2 in types.number_domain: scalar_inp2 = True scalar_tyinp2 = tyinp2 inp2_ndim = 1 else: raise TypeError('unknown type for second input operand') out_ndim = tyout.ndim if asfloat: promote_type = types.float64 elif scalar_tyinp1 in types.real_domain or \ scalar_tyinp2 in types.real_domain: promote_type = types.float64 elif scalar_tyinp1 in types.signed_domain or \ scalar_tyinp2 in types.signed_domain: promote_type = types.int64 else: promote_type = types.uint64 result_type = promote_type # Temporary hack for __ftol2 llvm bug. Don't allow storing # float results in uint64 array on windows. if result_type in types.real_domain and \ tyout.dtype is types.uint64 and \ sys.platform.startswith('win32'): raise TypeError('Cannot store result in uint64 array') sig = typing.signature(result_type, promote_type, promote_type) if not scalar_inp1: i1ary = context.make_array(tyinp1)(context, builder, inp1) if not scalar_inp2: i2ary = context.make_array(tyinp2)(context, builder, inp2) oary = context.make_array(tyout)(context, builder, out) fnwork = context.get_function(funckey, sig) intpty = context.get_value_type(types.intp) if not scalar_inp1: inp1_shape = cgutils.unpack_tuple(builder, i1ary.shape, inp1_ndim) inp1_strides = cgutils.unpack_tuple(builder, i1ary.strides, inp1_ndim) inp1_data = i1ary.data inp1_layout = tyinp1.layout if not scalar_inp2: inp2_shape = cgutils.unpack_tuple(builder, i2ary.shape, inp2_ndim) inp2_strides = cgutils.unpack_tuple(builder, i2ary.strides, inp2_ndim) inp2_data = i2ary.data inp2_layout = tyinp2.layout out_shape = cgutils.unpack_tuple(builder, oary.shape, out_ndim) out_strides = cgutils.unpack_tuple(builder, oary.strides, out_ndim) out_data = oary.data out_layout = tyout.layout ZERO = Constant.int(Type.int(intpty.width), 0) ONE = Constant.int(Type.int(intpty.width), 1) inp1_indices = None if not scalar_inp1: inp1_indices = [] for i in range(inp1_ndim): x = builder.alloca(Type.int(intpty.width)) builder.store(ZERO, x) inp1_indices.append(x) inp2_indices = None if not scalar_inp2: inp2_indices = [] for i in range(inp2_ndim): x = builder.alloca(Type.int(intpty.width)) builder.store(ZERO, x) inp2_indices.append(x) loopshape = cgutils.unpack_tuple(builder, oary.shape, out_ndim) with cgutils.loop_nest(builder, loopshape, intp=intpty) as indices: # Increment input indices. # Since the output dimensions are already being incremented, # we'll use that to set the input indices. In order to # handle broadcasting, any input dimension of size 1 won't be # incremented. def build_increment_blocks(inp_indices, inp_shape, inp_ndim, inp_num): bb_inc_inp_index = [cgutils.append_basic_block(builder, '.inc_inp{0}_index{1}'.format(inp_num, str(i))) for i in range(inp_ndim)] bb_end_inc_index = cgutils.append_basic_block(builder, '.end_inc{0}_index'.format(inp_num)) builder.branch(bb_inc_inp_index[0]) for i in range(inp_ndim): with cgutils.goto_block(builder, bb_inc_inp_index[i]): # If the shape of this dimension is 1, then leave the # index at 0 so that this dimension is broadcasted over # the corresponding input and output dimensions. cond = builder.icmp(ICMP_UGT, inp_shape[i], ONE) with cgutils.ifthen(builder, cond): builder.store(indices[out_ndim-inp_ndim+i], inp_indices[i]) if i + 1 == inp_ndim: builder.branch(bb_end_inc_index) else: builder.branch(bb_inc_inp_index[i+1]) builder.position_at_end(bb_end_inc_index) if not scalar_inp1: build_increment_blocks(inp1_indices, inp1_shape, inp1_ndim, '1') if not scalar_inp2: build_increment_blocks(inp2_indices, inp2_shape, inp2_ndim, '2') if scalar_inp1: x = inp1 else: inds = [builder.load(index) for index in inp1_indices] px = cgutils.get_item_pointer2(builder, data=inp1_data, shape=inp1_shape, strides=inp1_strides, layout=inp1_layout, inds=inds) x = builder.load(px) if scalar_inp2: y = inp2 else: inds = [builder.load(index) for index in inp2_indices] py = cgutils.get_item_pointer2(builder, data=inp2_data, shape=inp2_shape, strides=inp2_strides, layout=inp2_layout, inds=inds) y = builder.load(py) po = cgutils.get_item_pointer2(builder, data=out_data, shape=out_shape, strides=out_strides, layout=out_layout, inds=indices) if divbyzero: # Handle division iszero = cgutils.is_scalar_zero(builder, y) with cgutils.ifelse(builder, iszero, expect=False) as (then, orelse): with then: # Divide by zero if ((scalar_tyinp1 in types.real_domain or scalar_tyinp2 in types.real_domain) or not numpy_support.int_divbyzero_returns_zero) or \ true_divide: # If y is float and is 0 also, return Nan; else # return Inf outltype = context.get_data_type(result_type) shouldretnan = cgutils.is_scalar_zero(builder, x) nan = Constant.real(outltype, float("nan")) inf = Constant.real(outltype, float("inf")) tempres = builder.select(shouldretnan, nan, inf) res = context.cast(builder, tempres, result_type, tyout.dtype) elif tyout.dtype in types.signed_domain and \ not numpy_support.int_divbyzero_returns_zero: res = Constant.int(context.get_data_type(tyout.dtype), 0x1 << (y.type.width-1)) else: res = Constant.null(context.get_data_type(tyout.dtype)) assert res.type == po.type.pointee, \ (str(res.type), str(po.type.pointee)) builder.store(res, po) with orelse: # Normal d_x = context.cast(builder, x, scalar_tyinp1, promote_type) d_y = context.cast(builder, y, scalar_tyinp2, promote_type) tempres = fnwork(builder, [d_x, d_y]) res = context.cast(builder, tempres, result_type, tyout.dtype) assert res.type == po.type.pointee, (res.type, po.type.pointee) builder.store(res, po) else: # Handle non-division operations d_x = context.cast(builder, x, scalar_tyinp1, promote_type) d_y = context.cast(builder, y, scalar_tyinp2, promote_type) tempres = fnwork(builder, [d_x, d_y]) res = context.cast(builder, tempres, result_type, tyout.dtype) assert res.type == po.type.pointee, (res.type, po.type.pointee) builder.store(res, po) return out
def _prepare_call_to_object_mode(context, builder, func, signature, args, env): mod = cgutils.get_module(builder) thisfunc = cgutils.get_function(builder) bb_core_return = thisfunc.append_basic_block('ufunc.core.return') pyapi = context.get_python_api(builder) # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function(ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) ndarray_pointers = [] ndarray_objects = [] for i, (arr, arrtype) in enumerate(zip(args, signature.args)): ptr = cgutils.alloca_once(builder, ll_pyobj) ndarray_pointers.append(ptr) builder.store(Constant.null(ll_pyobj), ptr) # initialize to NULL arycls = context.make_array(arrtype) array = arycls(context, builder, ref=arr) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, arrtype.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(arrtype.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) builder.store(obj, ptr) ndarray_objects.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(ndarray_objects) status, retval = context.call_function(builder, func, ll_pyobj, object_sig, ndarray_objects, env=env) builder.store(status.err, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument object for ndary_ptr in ndarray_pointers: pyapi.decref(builder.load(ndary_ptr)) innercall = status.code return innercall, builder.load(error_pointer)
def get_null_value(ltype): return Constant.null(ltype)
def to_native_value(self, obj, typ): if isinstance(typ, types.Object) or typ == types.pyobject: return obj elif typ == types.boolean: istrue = self.object_istrue(obj) zero = Constant.null(istrue.type) return self.builder.icmp(lc.ICMP_NE, istrue, zero) elif typ in types.unsigned_domain: longobj = self.number_long(obj) ullval = self.long_as_ulonglong(longobj) self.decref(longobj) return self.builder.trunc(ullval, self.context.get_argument_type(typ)) elif typ in types.signed_domain: longobj = self.number_long(obj) llval = self.long_as_longlong(longobj) self.decref(longobj) return self.builder.trunc(llval, self.context.get_argument_type(typ)) elif typ == types.float32: fobj = self.number_float(obj) fval = self.float_as_double(fobj) self.decref(fobj) return self.builder.fptrunc(fval, self.context.get_argument_type(typ)) elif typ == types.float64: fobj = self.number_float(obj) fval = self.float_as_double(fobj) self.decref(fobj) return fval elif typ in (types.complex128, types.complex64): cplxcls = self.context.make_complex(types.complex128) cplx = cplxcls(self.context, self.builder) pcplx = cplx._getpointer() ok = self.complex_adaptor(obj, pcplx) failed = cgutils.is_false(self.builder, ok) with cgutils.if_unlikely(self.builder, failed): self.builder.ret(self.get_null_object()) if typ == types.complex64: c64cls = self.context.make_complex(typ) c64 = c64cls(self.context, self.builder) freal = self.context.cast(self.builder, cplx.real, types.float64, types.float32) fimag = self.context.cast(self.builder, cplx.imag, types.float64, types.float32) c64.real = freal c64.imag = fimag return c64._getvalue() else: return cplx._getvalue() elif isinstance(typ, types.NPDatetime): val = self.extract_np_datetime(obj) return val elif isinstance(typ, types.NPTimedelta): val = self.extract_np_timedelta(obj) return val elif isinstance(typ, types.Array): return self.to_native_array(typ, obj) raise NotImplementedError(typ)
def get_null_object(self): return Constant.null(self.pyobj)
def get_null_value(ltype): return Constant.null(ltype)
def int_abs_impl(context, builder, sig, args): [x] = args ZERO = Constant.null(x.type) ltz = builder.icmp(lc.ICMP_SLT, x, ZERO) negated = builder.neg(x) return builder.select(ltz, negated, x)
def sizeof(builder, ty, intp): ptr = Type.pointer(ty) null = Constant.null(ptr) offset = builder.gep(null, [Constant.int(Type.int(), 1)]) return builder.ptrtoint(offset, intp)
def sizeof(llvm_type, builder): nullval = Constant.null(Type.pointer(llvm_type)) size = builder.gep(nullval, [one_i]) sizeI = builder.bitcast(size, int_type) return sizeI
def get_constant_null(self, ty): lty = self.get_value_type(ty) return Constant.null(lty)
def impl(context, builder, sig, args): [tyinp1, tyinp2, tyout] = sig.args [inp1, inp2, out] = args if scalar_inputs: ndim = 1 else: ndim = tyinp1.ndim # Temporary hack for __ftol2 llvm bug. Don't allow storing # float results in uint64 array on windows. if scalar_inputs and tyinp1 in types.real_domain and \ tyout.dtype is types.uint64 and \ sys.platform.startswith('win32'): raise TypeError('Cannot store result in uint64 array') if not scalar_inputs and tyinp1.dtype in types.real_domain and \ tyout.dtype is types.uint64 and \ sys.platform.startswith('win32'): raise TypeError('Cannot store result in uint64 array') if not scalar_inputs: i1ary = context.make_array(tyinp1)(context, builder, inp1) i2ary = context.make_array(tyinp2)(context, builder, inp2) oary = context.make_array(tyout)(context, builder, out) if asfloat and not divbyzero: sig = typing.signature(types.float64, types.float64, types.float64) else: if scalar_inputs: sig = typing.signature(tyout.dtype, tyinp1, tyinp2) else: sig = typing.signature(tyout.dtype, tyinp1.dtype, tyinp2.dtype) fnwork = context.get_function(funckey, sig) intpty = context.get_value_type(types.intp) # TODO handle differing shape by mimicking broadcasting loopshape = cgutils.unpack_tuple(builder, oary.shape, ndim) if scalar_inputs: xyo_shape = [cgutils.unpack_tuple(builder, ary.shape, ndim) for ary in (oary,)] xyo_strides = [cgutils.unpack_tuple(builder, ary.strides, ndim) for ary in (oary,)] xyo_data = [ary.data for ary in (oary,)] xyo_layout = [ty.layout for ty in (tyout,)] else: xyo_shape = [cgutils.unpack_tuple(builder, ary.shape, ndim) for ary in (i1ary, i2ary, oary)] xyo_strides = [cgutils.unpack_tuple(builder, ary.strides, ndim) for ary in (i1ary, i2ary, oary)] xyo_data = [ary.data for ary in (i1ary, i2ary, oary)] xyo_layout = [ty.layout for ty in (tyinp1, tyinp2, tyout)] with cgutils.loop_nest(builder, loopshape, intp=intpty) as indices: if scalar_inputs: [po] = [cgutils.get_item_pointer2(builder, data=data, shape=shape, strides=strides, layout=layout, inds=indices) for data, shape, strides, layout in zip(xyo_data, xyo_shape, xyo_strides, xyo_layout)] else: [px, py, po] = [cgutils.get_item_pointer2(builder, data=data, shape=shape, strides=strides, layout=layout, inds=indices) for data, shape, strides, layout in zip(xyo_data, xyo_shape, xyo_strides, xyo_layout)] if scalar_inputs: x = inp1 y = inp2 else: x = builder.load(px) y = builder.load(py) if divbyzero: # Handle division iszero = cgutils.is_scalar_zero(builder, y) with cgutils.ifelse(builder, iszero, expect=False) as (then, orelse): with then: # Divide by zero if ((scalar_inputs and tyinp2 in types.real_domain) or (not scalar_inputs and tyinp2.dtype in types.real_domain) or not numpy_support.int_divbyzero_returns_zero): # If y is float and is 0 also, return Nan; else # return Inf outltype = context.get_data_type(tyout.dtype) shouldretnan = cgutils.is_scalar_zero(builder, x) nan = Constant.real(outltype, float("nan")) inf = Constant.real(outltype, float("inf")) res = builder.select(shouldretnan, nan, inf) elif (scalar_inputs and tyout in types.signed_domain and not numpy_support.int_divbyzero_returns_zero): res = Constant.int(context.get_data_type(tyout), 0x1 << (y.type.width-1)) elif (not scalar_inputs and tyout.dtype in types.signed_domain and not numpy_support.int_divbyzero_returns_zero): res = Constant.int(context.get_data_type(tyout.dtype), 0x1 << (y.type.width-1)) else: res = Constant.null(context.get_data_type(tyout.dtype)) assert res.type == po.type.pointee, \ (str(res.type), str(po.type.pointee)) builder.store(res, po) with orelse: # Normal tempres = fnwork(builder, (x, y)) if scalar_inputs and tyinp1 in types.real_domain: res = context.cast(builder, tempres, tyinp1, tyout.dtype) elif (not scalar_inputs and tyinp1.dtype in types.real_domain): res = context.cast(builder, tempres, tyinp1.dtype, tyout.dtype) else: res = context.cast(builder, tempres, types.float64, tyout.dtype) assert res.type == po.type.pointee, \ (str(res.type), str(po.type.pointee)) builder.store(res, po) else: # Handle non-division operations if asfloat: if scalar_inputs: d_x = context.cast(builder, x, tyinp1, types.float64) d_y = context.cast(builder, y, tyinp2, types.float64) else: d_x = context.cast(builder, x, tyinp1.dtype, types.float64) d_y = context.cast(builder, y, tyinp2.dtype, types.float64) tempres = fnwork(builder, [d_x, d_y]) res = context.cast(builder, tempres, types.float64, tyout.dtype) elif scalar_inputs: if tyinp1 != tyout.dtype: tempres = fnwork(builder, [x, y]) res = context.cast(builder, tempres, tyinp1, tyout.dtype) else: res = fnwork(builder, (x, y)) elif tyinp1.dtype != tyout.dtype: tempres = fnwork(builder, [x, y]) res = context.cast(builder, tempres, tyinp1.dtype, tyout.dtype) else: res = fnwork(builder, (x, y)) assert res.type == po.type.pointee, (res.type, po.type.pointee) builder.store(res, po) return out
def get_dummy_value(self): return Constant.null(self.get_dummy_type())
_plat_bits = struct_.calcsize('@P') * 8 _int8 = Type.int(8) _int32 = Type.int(32) _void_star = Type.pointer(_int8) _int8_star = _void_star _sizeof_py_ssize_t = ctypes.sizeof(getattr(ctypes, 'c_size_t')) _llvm_py_ssize_t = Type.int(_sizeof_py_ssize_t * 8) if _trace_refs_: _pyobject_head = Type.struct( [_void_star, _void_star, _llvm_py_ssize_t, _void_star]) _pyobject_head_init = Constant.struct([ Constant.null(_void_star), # _ob_next Constant.null(_void_star), # _ob_prev Constant.int(_llvm_py_ssize_t, 1), # ob_refcnt Constant.null(_void_star), # ob_type ]) else: _pyobject_head = Type.struct([_llvm_py_ssize_t, _void_star]) _pyobject_head_init = Constant.struct([ Constant.int(_llvm_py_ssize_t, 1), # ob_refcnt Constant.null(_void_star), # ob_type ]) _pyobject_head_p = Type.pointer(_pyobject_head)