def populate_array(array, data, shape, strides, itemsize, meminfo, parent=None): """ Helper function for populating array structures. This avoids forgetting to set fields. """ context = array._context builder = array._builder datamodel = array._datamodel required_fields = set(datamodel._fields) if meminfo is None: meminfo = Constant.null(context.get_value_type( datamodel.get_type('meminfo'))) attrs = dict(shape=shape, strides=strides, data=data, itemsize=itemsize, meminfo=meminfo,) # Set `parent` attribute if parent is None: attrs['parent'] = Constant.null(context.get_value_type( datamodel.get_type('parent'))) else: attrs['parent'] = parent # Calc num of items from shape nitems = context.get_constant(types.intp, 1) unpacked_shape = cgutils.unpack_tuple(builder, shape, shape.type.count) if unpacked_shape: # Shape is not empty for axlen in unpacked_shape: nitems = builder.mul(nitems, axlen) else: # Shape is empty nitems = context.get_constant(types.intp, 0) attrs['nitems'] = nitems # Make sure that we have all the fields got_fields = set(attrs.keys()) if got_fields != required_fields: raise ValueError("missing {0}".format(required_fields - got_fields)) # Set field value for k, v in attrs.items(): setattr(array, k, v) return array
def init_generator_state(self, lower): """ NULL-initialize all generator state variables, to avoid spurious decref's on cleanup. """ lower.builder.store(Constant.null(self.gen_state_ptr.type.pointee), self.gen_state_ptr)
def int_power_func_body(context, builder, x, y): pcounter = cgutils.alloca_once(builder, y.type) presult = cgutils.alloca_once(builder, x.type) result = Constant.int(x.type, 1) counter = y builder.store(counter, pcounter) builder.store(result, presult) bbcond = cgutils.append_basic_block(builder, ".cond") bbbody = cgutils.append_basic_block(builder, ".body") bbexit = cgutils.append_basic_block(builder, ".exit") del counter del result builder.branch(bbcond) with cgutils.goto_block(builder, bbcond): counter = builder.load(pcounter) ONE = Constant.int(counter.type, 1) ZERO = Constant.null(counter.type) builder.store(builder.sub(counter, ONE), pcounter) pred = builder.icmp(lc.ICMP_SGT, counter, ZERO) builder.cbranch(pred, bbbody, bbexit) with cgutils.goto_block(builder, bbbody): result = builder.load(presult) builder.store(builder.mul(result, x), presult) builder.branch(bbcond) builder.position_at_end(bbexit) return builder.load(presult)
def int_abs_impl(context, builder, sig, args): [x] = args ZERO = Constant.null(x.type) ltz = builder.icmp(lc.ICMP_SLT, x, ZERO) negated = builder.neg(x) res = builder.select(ltz, negated, x) return impl_ret_untracked(context, builder, sig.return_type, res)
def is_scalar_neg(builder, value): """is _value_ negative?. Assumes _value_ is signed""" nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isneg = builder.fcmp(lc.FCMP_OLT, value, nullval) else: isneg = builder.icmp(lc.ICMP_SLT, value, nullval) return isneg
def _scalar_pred_against_zero(builder, value, fcond, icond): nullval = Constant.null(value.type) if isinstance(value.type, (ir.FloatType, ir.DoubleType)): isnull = builder.fcmp(fcond, value, nullval) elif isinstance(value.type, ir.IntType): isnull = builder.icmp(icond, value, nullval) else: raise TypeError("unexpected value type %s" % (value.type,)) return isnull
def build_wrapper(self, api, builder, closure, args, kws): nargs = len(self.fndesc.argtypes) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.unpack_tuple(args, self.fndesc.qualname, nargs, nargs, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) # Block that returns after erroneous argument unboxing/cleanup endblk = builder.append_basic_block("arg.end") with builder.goto_block(endblk): builder.ret(api.get_null_object()) # Extract the Environment object from the Closure envptr, env_manager = self.get_env(api, builder, closure) cleanup_manager = _ArgManager(self.context, builder, api, env_manager, endblk, nargs) # Compute the arguments to the compiled Numba function. innerargs = [] for obj, ty in zip(objs, self.fndesc.argtypes): if isinstance(ty, types.Omitted): # It's an omitted value => ignore dummy Python object innerargs.append(None) else: val = cleanup_manager.add_arg(builder.load(obj), ty) innerargs.append(val) if self.release_gil: cleanup_manager = _GilManager(builder, api, cleanup_manager) status, retval = self.context.call_conv.call_function( builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs, env=envptr) # Do clean up self.debug_print(builder, "# callwrapper: emit_cleanup") cleanup_manager.emit_cleanup() self.debug_print(builder, "# callwrapper: emit_cleanup end") # Determine return status with builder.if_then(status.is_ok, likely=True): # Ok => return boxed Python value with builder.if_then(status.is_none): api.return_none() retty = self._simplified_return_type() obj = api.from_native_return(retty, retval, env_manager) builder.ret(obj) # Error out self.context.call_conv.raise_error(builder, api, status) builder.ret(api.get_null_object())
def make_keywords(self, kws): strings = [] stringtype = Type.pointer(Type.int(8)) for k in kws: strings.append(self.make_const_string(k)) strings.append(Constant.null(stringtype)) kwlist = Constant.array(stringtype, strings) kwlist = cgutils.global_constant(self.module, ".kwlist", kwlist) return Constant.bitcast(kwlist, Type.pointer(stringtype))
def print_varargs(context, builder, sig, args): """This function is a generic 'print' wrapper for arbitrary types. It dispatches to the appropriate 'print' implementations above depending on the detected real types in the signature.""" mod = builder.module vprint = nvvmutils.declare_vprint(mod) sep = context.insert_string_const_addrspace(builder, " ") eol = context.insert_string_const_addrspace(builder, "\n") for i, (argtype, argval) in enumerate(zip(sig.args, args)): signature = typing.signature(types.none, argtype) imp = context.get_function("print_item", signature) imp(builder, [argval]) if i < len(args) - 1: builder.call(vprint, (sep, Constant.null(voidptr))) builder.call(vprint, (eol, Constant.null(voidptr))) return context.get_dummy_value()
def is_true(self, builder, typ, val): if typ in types.integer_domain: return builder.icmp(lc.ICMP_NE, val, Constant.null(val.type)) elif typ in types.real_domain: return builder.fcmp(lc.FCMP_UNE, val, Constant.real(val.type, 0)) elif typ in types.complex_domain: cmplx = self.make_complex(typ)(self, builder, val) real_istrue = self.is_true(builder, typ.underlying_float, cmplx.real) imag_istrue = self.is_true(builder, typ.underlying_float, cmplx.imag) return builder.or_(real_istrue, imag_istrue) raise NotImplementedError("is_true", val, typ)
def alloca_once(builder, ty, size=None, name='', zfill=False): """Allocate stack memory at the entry block of the current function pointed by ``builder`` withe llvm type ``ty``. The optional ``size`` arg set the number of element to allocate. The default is 1. The optional ``name`` arg set the symbol name inside the llvm IR for debugging. If ``zfill`` is set, also filling zeros to the memory. """ with builder.goto_entry_block(): ptr = builder.alloca(ty, size=size, name=name) if zfill: builder.store(Constant.null(ty), ptr) return ptr
def is_scalar_zero(builder, value): """ Return a predicate representing whether *value* is equal to zero. """ assert not is_pointer(value.type) assert not is_struct(value.type) nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isnull = builder.fcmp(lc.FCMP_OEQ, nullval, value) else: isnull = builder.icmp(lc.ICMP_EQ, nullval, value) return isnull
def build_wrapper(self, api, builder, closure, args, kws): nargs = len(self.fndesc.args) keywords = self.make_keywords(self.fndesc.args) fmt = self.make_const_string("O" * nargs) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.parse_tuple_and_keywords(args, kws, fmt, keywords, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) # Block that returns after erroneous argument unboxing/cleanup endblk = cgutils.append_basic_block(builder, "arg.end") with cgutils.goto_block(builder, endblk): builder.ret(api.get_null_object()) cleanup_manager = _ArgManager(builder, api, endblk, nargs) innerargs = [] for obj, ty in zip(objs, self.fndesc.argtypes): val = cleanup_manager.add_arg(obj, ty) innerargs.append(val) if self.release_gil: cleanup_manager = _GilManager(builder, api, cleanup_manager) # The wrapped function doesn't take a full closure, only # the Environment object. env = self.context.get_env_from_closure(builder, closure) status, res = self.context.call_function(builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs, env) # Do clean up cleanup_manager.emit_cleanup() # Determine return status with cgutils.if_likely(builder, status.ok): with cgutils.ifthen(builder, status.none): api.return_none() retval = api.from_native_return(res, self.fndesc.restype) builder.ret(retval) with cgutils.ifthen(builder, builder.not_(status.exc)): # !ok && !exc # User exception raised self.make_exception_switch(api, builder, status.code) # !ok && exc builder.ret(api.get_null_object())
def build_wrapper(self, api, builder, closure, args, kws): nargs = len(self.fndesc.args) objs = [api.alloca_obj() for _ in range(nargs)] parseok = api.unpack_tuple(args, self.fndesc.qualname, nargs, nargs, *objs) pred = builder.icmp(lc.ICMP_EQ, parseok, Constant.null(parseok.type)) with cgutils.if_unlikely(builder, pred): builder.ret(api.get_null_object()) # Block that returns after erroneous argument unboxing/cleanup endblk = builder.append_basic_block("arg.end") with builder.goto_block(endblk): builder.ret(api.get_null_object()) cleanup_manager = _ArgManager(self.context, builder, api, endblk, nargs) innerargs = [] for obj, ty in zip(objs, self.fndesc.argtypes): val = cleanup_manager.add_arg(obj, ty) innerargs.append(val) if self.release_gil: cleanup_manager = _GilManager(builder, api, cleanup_manager) # Extract the Environment object from the Closure envptr, env_manager = self.get_env(api, builder, closure) status, retval = self.context.call_conv.call_function( builder, self.func, self.fndesc.restype, self.fndesc.argtypes, innerargs, envptr) # Do clean up self.debug_print(builder, "# callwrapper: emit_cleanup") cleanup_manager.emit_cleanup() self.debug_print(builder, "# callwrapper: emit_cleanup end") # Determine return status with cgutils.if_likely(builder, status.is_ok): # Ok => return boxed Python value with builder.if_then(status.is_none): api.return_none() retty = self._simplified_return_type() obj = api.from_native_return(retval, retty, env_manager) builder.ret(obj) with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised self.make_exception_switch(api, builder, status) # Error out builder.ret(api.get_null_object())
def is_not_scalar_zero(builder, value): """ Return a predicate representin whether a *value* is not equal to zero. not exactly "not is_scalar_zero" because of nans """ assert not is_pointer(value.type) assert not is_struct(value.type) nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isnull = builder.fcmp(lc.FCMP_UNE, nullval, value) else: isnull = builder.icmp(lc.ICMP_NE, nullval, value) return isnull
def get_item_pointer2(builder, data, shape, strides, layout, inds, wraparound=False): if wraparound: # Wraparound indices = [] for ind, dimlen in zip(inds, shape): ZERO = Constant.null(ind.type) negative = builder.icmp(lc.ICMP_SLT, ind, ZERO) wrapped = builder.add(dimlen, ind) selected = builder.select(negative, wrapped, ind) indices.append(selected) else: indices = inds if not indices: # Indexing with empty tuple return builder.gep(data, [get_null_value(Type.int(32))]) intp = indices[0].type # Indexing code if layout in 'CF': steps = [] # Compute steps for each dimension if layout == 'C': # C contiguous for i in range(len(shape)): last = Constant.int(intp, 1) for j in shape[i + 1:]: last = builder.mul(last, j) steps.append(last) elif layout == 'F': # F contiguous for i in range(len(shape)): last = Constant.int(intp, 1) for j in shape[:i]: last = builder.mul(last, j) steps.append(last) else: raise Exception("unreachable") # Compute index loc = Constant.int(intp, 0) for i, s in zip(indices, steps): tmp = builder.mul(i, s) loc = builder.add(loc, tmp) ptr = builder.gep(data, [loc]) return ptr else: # Any layout dimoffs = [builder.mul(s, i) for s, i in zip(strides, indices)] offset = functools.reduce(builder.add, dimoffs) return pointer_add(builder, data, offset)
def delvar(self, name): """ Delete the given variable. """ fetype = self.typeof(name) # Define if not already (may happen if the variable is deleted # at the beginning of a loop, but only set later in the loop) self._alloca_var(name, fetype) ptr = self.getvar(name) self.decref(fetype, self.builder.load(ptr)) # Zero-fill variable to avoid double frees on subsequent dels self.builder.store(Constant.null(ptr.type.pointee), ptr)
def lower_init_func(self, lower): """ Lower the generator's initialization function (which will fill up the passed-by-reference generator structure). """ lower.setup_function(self.fndesc) builder = lower.builder # Insert the generator into the target context in order to allow # calling from other Numba-compiled functions. lower.context.insert_generator(self.gentype, self.gendesc, [self.library]) # Init argument values lower.extract_function_arguments() lower.pre_lower() # Initialize the return structure (i.e. the generator structure). retty = self.context.get_return_type(self.gentype) # Structure index #0: the initial resume index (0 == start of generator) resume_index = self.context.get_constant(types.int32, 0) # Structure index #1: the function arguments argsty = retty.elements[1] statesty = retty.elements[2] lower.debug_print("# low_init_func incref") # Incref all NRT arguments before storing into generator states if self.context.enable_nrt: for argty, argval in zip(self.fndesc.argtypes, lower.fnargs): self.context.nrt_incref(builder, argty, argval) # Filter out omitted arguments argsval = self.arg_packer.as_data(builder, lower.fnargs) # Zero initialize states statesval = Constant.null(statesty) gen_struct = cgutils.make_anonymous_struct(builder, [resume_index, argsval, statesval], retty) retval = self.box_generator_struct(lower, gen_struct) lower.debug_print("# low_init_func before return") self.call_conv.return_value(builder, retval) lower.post_lower()
def int_divmod(context, builder, x, y): """ Reference Objects/intobject.c xdivy = x / y; xmody = (long)(x - (unsigned long)xdivy * y); /* If the signs of x and y differ, and the remainder is non-0, * C89 doesn't define whether xdivy is now the floor or the * ceiling of the infinitely precise quotient. We want the floor, * and we have it iff the remainder's sign matches y's. */ if (xmody && ((y ^ xmody) < 0) /* i.e. and signs differ */) { xmody += y; --xdivy; assert(xmody && ((y ^ xmody) >= 0)); } *p_xdivy = xdivy; *p_xmody = xmody; """ assert x.type == y.type xdivy = builder.sdiv(x, y) xmody = builder.srem(x, y) # Intel has divmod instruction ZERO = Constant.null(y.type) ONE = Constant.int(y.type, 1) y_xor_xmody_ltz = builder.icmp(lc.ICMP_SLT, builder.xor(y, xmody), ZERO) xmody_istrue = builder.icmp(lc.ICMP_NE, xmody, ZERO) cond = builder.and_(xmody_istrue, y_xor_xmody_ltz) bb1 = builder.basic_block with builder.if_then(cond): xmody_plus_y = builder.add(xmody, y) xdivy_minus_1 = builder.sub(xdivy, ONE) bb2 = builder.basic_block resdiv = builder.phi(y.type) resdiv.add_incoming(xdivy, bb1) resdiv.add_incoming(xdivy_minus_1, bb2) resmod = builder.phi(x.type) resmod.add_incoming(xmody, bb1) resmod.add_incoming(xmody_plus_y, bb2) return resdiv, resmod
def from_native_generator(self, val, typ, env=None): """ Make a Numba generator (a _dynfunc.Generator instance) from a generator structure pointer *val*. *env* is an optional _dynfunc.Environment instance to be wrapped in the generator. """ llty = self.context.get_data_type(typ) assert not llty.is_pointer gen_struct_size = self.context.get_abi_sizeof(llty) gendesc = self.context.get_generator_desc(typ) # This is the PyCFunctionWithKeywords generated by PyCallWrapper genfnty = Type.function(self.pyobj, [self.pyobj, self.pyobj, self.pyobj]) genfn = self._get_function(genfnty, name=gendesc.llvm_cpython_wrapper_name) # This is the raw finalizer generated by _lower_generator_finalize_func() finalizerty = Type.function(Type.void(), [self.voidptr]) if typ.has_finalizer: finalizer = self._get_function(finalizerty, name=gendesc.llvm_finalizer_name) else: finalizer = Constant.null(Type.pointer(finalizerty)) # PyObject *numba_make_generator(state_size, initial_state, nextfunc, finalizer, env) fnty = Type.function(self.pyobj, [self.py_ssize_t, self.voidptr, Type.pointer(genfnty), Type.pointer(finalizerty), self.voidptr]) fn = self._get_function(fnty, name="numba_make_generator") state_size = ir.Constant(self.py_ssize_t, gen_struct_size) initial_state = self.builder.bitcast(val, self.voidptr) if env is None: env = self.get_null_object() env = self.builder.bitcast(env, self.voidptr) return self.builder.call(fn, (state_size, initial_state, genfn, finalizer, env))
def object_richcompare(self, lhs, rhs, opstr): """ Refer to Python source Include/object.h for macros definition of the opid. """ ops = ['<', '<=', '==', '!=', '>', '>='] if opstr in ops: opid = ops.index(opstr) fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj, Type.int()]) fn = self._get_function(fnty, name="PyObject_RichCompare") lopid = self.context.get_constant(types.int32, opid) return self.builder.call(fn, (lhs, rhs, lopid)) elif opstr == 'is': bitflag = self.builder.icmp(lc.ICMP_EQ, lhs, rhs) return self.from_native_value(bitflag, types.boolean) elif opstr == 'is not': bitflag = self.builder.icmp(lc.ICMP_NE, lhs, rhs) return self.from_native_value(bitflag, types.boolean) elif opstr == 'in': fnty = Type.function(Type.int(), [self.pyobj, self.pyobj]) fn = self._get_function(fnty, name="PySequence_Contains") status = self.builder.call(fn, (rhs, lhs)) negone = self.context.get_constant(types.int32, -1) is_good = self.builder.icmp(lc.ICMP_NE, status, negone) # Stack allocate output and initialize to Null outptr = cgutils.alloca_once_value(self.builder, Constant.null(self.pyobj)) # If PySequence_Contains returns non-error value with cgutils.if_likely(self.builder, is_good): # Store the status as a boolean object truncated = self.builder.trunc(status, Type.int(1)) self.builder.store(self.bool_from_bool(truncated), outptr) return self.builder.load(outptr) else: raise NotImplementedError("Unknown operator {op!r}".format( op=opstr))
def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv
def get_constant_null(self, ty): lty = self.get_value_type(ty) return Constant.null(lty)
def generate_kernel_wrapper(self, library, fname, argtypes): """ Generate the kernel wrapper in the given ``library``. The function being wrapped have the name ``fname`` and argument types ``argtypes``. The wrapper function is returned. """ arginfo = self.get_arg_packer(argtypes) argtys = list(arginfo.argument_types) wrapfnty = Type.function(Type.void(), argtys) wrapper_module = self.create_module("cuda.kernel.wrapper") fnty = Type.function(Type.int(), [self.call_conv.get_return_type(types.pyobject)] + argtys) func = wrapper_module.add_function(fnty, name=fname) wrapfn = wrapper_module.add_function(wrapfnty, name="cudaPy_" + func.name) builder = Builder(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = arginfo.from_arguments(builder, wrapfn.args) status, _ = self.call_conv.call_function( builder, func, types.void, argtypes, callargs) # Check error status with cgutils.if_likely(builder, status.is_ok): builder.ret_void() with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded casfnty = lc.Type.function(old.type, [gv_exc.type, old.type, old.type]) casfn = wrapper_module.add_function(casfnty, name="___numba_cas_hack") xchg = builder.call(casfn, [gv_exc, old, status.code]) changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with builder.if_then(changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() nvvm.set_cuda_kernel(wrapfn) library.add_ir_module(wrapper_module) library.finalize() wrapfn = library.get_function(wrapfn.name) return wrapfn
def as_bool_bit(builder, value): return builder.icmp(lc.ICMP_NE, value, Constant.null(value.type))
def get_constant_null(self, ty): lty = self.get_value_type(ty) return Constant.null(lty)
def get_dummy_value(self): return Constant.null(self.get_dummy_type())
def to_native_value(self, obj, typ): if isinstance(typ, types.Object) or typ == types.pyobject: return obj elif typ == types.boolean: istrue = self.object_istrue(obj) zero = Constant.null(istrue.type) return self.builder.icmp(lc.ICMP_NE, istrue, zero) elif typ in types.unsigned_domain: longobj = self.number_long(obj) ullval = self.long_as_ulonglong(longobj) self.decref(longobj) return self.builder.trunc(ullval, self.context.get_argument_type(typ)) elif typ in types.signed_domain: longobj = self.number_long(obj) llval = self.long_as_longlong(longobj) self.decref(longobj) return self.builder.trunc(llval, self.context.get_argument_type(typ)) elif typ == types.float32: fobj = self.number_float(obj) fval = self.float_as_double(fobj) self.decref(fobj) return self.builder.fptrunc(fval, self.context.get_argument_type(typ)) elif typ == types.float64: fobj = self.number_float(obj) fval = self.float_as_double(fobj) self.decref(fobj) return fval elif typ in (types.complex128, types.complex64): cplxcls = self.context.make_complex(types.complex128) cplx = cplxcls(self.context, self.builder) pcplx = cplx._getpointer() ok = self.complex_adaptor(obj, pcplx) failed = cgutils.is_false(self.builder, ok) with cgutils.if_unlikely(self.builder, failed): self.builder.ret(self.get_null_object()) if typ == types.complex64: c64cls = self.context.make_complex(typ) c64 = c64cls(self.context, self.builder) freal = self.context.cast(self.builder, cplx.real, types.float64, types.float32) fimag = self.context.cast(self.builder, cplx.imag, types.float64, types.float32) c64.real = freal c64.imag = fimag return c64._getvalue() else: return cplx._getvalue() elif isinstance(typ, types.NPDatetime): val = self.extract_np_datetime(obj) return val elif isinstance(typ, types.NPTimedelta): val = self.extract_np_timedelta(obj) return val elif isinstance(typ, types.Array): return self.to_native_array(typ, obj) elif isinstance(typ, types.Optional): isnone = self.builder.icmp(lc.ICMP_EQ, obj, self.borrow_none()) with cgutils.ifelse(self.builder, isnone) as (then, orelse): with then: noneval = self.context.make_optional_none(self.builder, typ.type) ret = cgutils.alloca_once(self.builder, noneval.type) self.builder.store(noneval, ret) with orelse: val = self.to_native_value(obj, typ.type) just = self.context.make_optional_value(self.builder, typ.type, val) self.builder.store(just, ret) return ret raise NotImplementedError(typ)
def get_null_value(ltype): return Constant.null(ltype)
def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv
def get_dummy_value(self): return Constant.null(self.get_dummy_type())
def _delete_variable(self, varname): """ Zero-fill variable to avoid crashing due to extra ir.Del """ storage = self.getvar(varname) self.builder.store(Constant.null(storage.type.pointee), storage)
def _prepare_call_to_object_mode(context, builder, pyapi, func, signature, args, env): mod = builder.module bb_core_return = builder.append_basic_block('ufunc.core.return') # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function(ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) ndarray_pointers = [] ndarray_objects = [] for i, (arr, arrtype) in enumerate(zip(args, signature.args)): ptr = cgutils.alloca_once(builder, ll_pyobj) ndarray_pointers.append(ptr) builder.store(Constant.null(ll_pyobj), ptr) # initialize to NULL arycls = context.make_array(arrtype) array = arycls(context, builder, value=arr) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, arrtype.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(arrtype.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) builder.store(obj, ptr) ndarray_objects.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(ndarray_objects) status, retval = context.call_conv.call_function( builder, func, types.pyobject, object_sig, ndarray_objects, env=env) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument object for ndary_ptr in ndarray_pointers: pyapi.decref(builder.load(ndary_ptr)) innercall = status.code return innercall, builder.load(error_pointer)
def get_null_object(self): return Constant.null(self.pyobj)
def _prepare_call_to_object_mode(context, builder, func, signature, args, env): mod = builder.module bb_core_return = builder.append_basic_block('ufunc.core.return') pyapi = context.get_python_api(builder) # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function( ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) ndarray_pointers = [] ndarray_objects = [] for i, (arr, arrtype) in enumerate(zip(args, signature.args)): ptr = cgutils.alloca_once(builder, ll_pyobj) ndarray_pointers.append(ptr) builder.store(Constant.null(ll_pyobj), ptr) # initialize to NULL arycls = context.make_array(arrtype) array = arycls(context, builder, value=arr) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, arrtype.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(arrtype.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) builder.store(obj, ptr) ndarray_objects.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(ndarray_objects) status, retval = context.call_conv.call_function(builder, func, types.pyobject, object_sig, ndarray_objects, env=env) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument object for ndary_ptr in ndarray_pointers: pyapi.decref(builder.load(ndary_ptr)) innercall = status.code return innercall, builder.load(error_pointer)
def get_null_value(ltype): return Constant.null(ltype)
def generate_kernel_wrapper(self, library, fname, argtypes, debug): """ Generate the kernel wrapper in the given ``library``. The function being wrapped have the name ``fname`` and argument types ``argtypes``. The wrapper function is returned. """ arginfo = self.get_arg_packer(argtypes) argtys = list(arginfo.argument_types) wrapfnty = Type.function(Type.void(), argtys) wrapper_module = self.create_module("cuda.kernel.wrapper") fnty = Type.function(Type.int(), [self.call_conv.get_return_type(types.pyobject)] + argtys) func = wrapper_module.add_function(fnty, name=fname) prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy') wrapfn = wrapper_module.add_function(wrapfnty, name=prefixed) builder = Builder(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = arginfo.from_arguments(builder, wrapfn.args) status, _ = self.call_conv.call_function( builder, func, types.void, argtypes, callargs) if debug: # Check error status with cgutils.if_likely(builder, status.is_ok): builder.ret_void() with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded casfnty = lc.Type.function(old.type, [gv_exc.type, old.type, old.type]) casfn = wrapper_module.add_function(casfnty, name="___numba_cas_hack") xchg = builder.call(casfn, [gv_exc, old, status.code]) changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with builder.if_then(changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() nvvm.set_cuda_kernel(wrapfn) library.add_ir_module(wrapper_module) library.finalize() wrapfn = library.get_function(wrapfn.name) return wrapfn
def as_bool_bit(builder, value): return builder.icmp(lc.ICMP_NE, value, Constant.null(value.type))