def set_branch_weight(builder, brinst, trueweight, falseweight): module = get_module(builder) mdid = lc.MetaDataString.get(module, "branch_weights") trueweight = lc.Constant.int(Type.int(), trueweight) falseweight = lc.Constant.int(Type.int(), falseweight) md = lc.MetaData.get(module, [mdid, trueweight, falseweight]) brinst.set_metadata("prof", md)
def parse_tuple_and_keywords(self, args, kws, fmt, keywords, *objs): charptr = Type.pointer(Type.int(8)) charptrary = Type.pointer(charptr) argtypes = [self.pyobj, self.pyobj, charptr, charptrary] fnty = Type.function(Type.int(), argtypes, var_arg=True) fn = self._get_function(fnty, name="PyArg_ParseTupleAndKeywords") return self.builder.call(fn, [args, kws, fmt, keywords] + list(objs))
def __init__(self, context, builder, value=None, ref=None, cast_ref=False): self._type = context.get_struct_type(self) self._context = context self._builder = builder if ref is None: self._value = alloca_once(builder, self._type) if value is not None: assert not is_pointer(value.type) assert value.type == self._type, (value.type, self._type) builder.store(value, self._value) else: assert value is None assert is_pointer(ref.type) if self._type != ref.type.pointee: if cast_ref: ref = builder.bitcast(ref, Type.pointer(self._type)) else: raise TypeError( "mismatching pointer type: got %s, expected %s" % (ref.type.pointee, self._type)) self._value = ref self._namemap = {} self._fdmap = [] self._typemap = [] base = Constant.int(Type.int(), 0) for i, (k, tp) in enumerate(self._fields): self._namemap[k] = i self._fdmap.append((base, Constant.int(Type.int(), i))) self._typemap.append(tp)
def ptx_vote_sync(context, builder, sig, args): fname = 'llvm.nvvm.vote.sync' lmod = builder.module fnty = Type.function(Type.struct((Type.int(32), Type.int(1))), (Type.int(32), Type.int(32), Type.int(1))) func = lmod.get_or_insert_function(fnty, name=fname) return builder.call(func, args)
def __init__(self, context, builder): """ Note: Maybe called multiple times when lowering a function """ from numba.targets import boxing self.context = context self.builder = builder self.module = builder.basic_block.function.module # A unique mapping of serialized objects in this module try: self.module.__serialized except AttributeError: self.module.__serialized = {} # Initialize types self.pyobj = self.context.get_argument_type(types.pyobject) self.voidptr = Type.pointer(Type.int(8)) self.long = Type.int(ctypes.sizeof(ctypes.c_long) * 8) self.ulonglong = Type.int(ctypes.sizeof(ctypes.c_ulonglong) * 8) self.longlong = self.ulonglong self.double = Type.double() self.py_ssize_t = self.context.get_value_type(types.intp) self.cstring = Type.pointer(Type.int(8)) self.gil_state = Type.int(_helperlib.py_gil_state_size * 8) self.py_buffer_t = ir.ArrayType(ir.IntType(8), _helperlib.py_buffer_size)
def numba_array_adaptor(self, ary, ptr): voidptr = Type.pointer(Type.int(8)) fnty = Type.function(Type.int(), [self.pyobj, voidptr]) fn = self._get_function(fnty, name="numba_adapt_ndarray") fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) return self.builder.call(fn, (ary, ptr))
def tuple_setitem(self, tuple_val, index, item): """ Steals a reference to `item`. """ fnty = Type.function(Type.int(), [self.pyobj, Type.int(), self.pyobj]) setitem_fn = self._get_function(fnty, name='PyTuple_SetItem') index = self.context.get_constant(types.int32, index) self.builder.call(setitem_fn, [tuple_val, index, item])
def unpack_tuple(self, args, name, n_min, n_max, *objs): charptr = Type.pointer(Type.int(8)) argtypes = [self.pyobj, charptr, self.py_ssize_t, self.py_ssize_t] fnty = Type.function(Type.int(), argtypes, var_arg=True) fn = self._get_function(fnty, name="PyArg_UnpackTuple") n_min = Constant.int(self.py_ssize_t, n_min) n_max = Constant.int(self.py_ssize_t, n_max) if isinstance(name, str): name = self.context.insert_const_string(self.builder.module, name) return self.builder.call(fn, [args, name, n_min, n_max] + list(objs))
def ptx_match_any_sync(context, builder, sig, args): mask, value = args width = sig.args[1].bitwidth if sig.args[1] in types.real_domain: value = builder.bitcast(value, Type.int(width)) fname = 'llvm.nvvm.match.any.sync.i{}'.format(width) lmod = builder.module fnty = Type.function(Type.int(32), (Type.int(32), Type.int(width))) func = lmod.get_or_insert_function(fnty, name=fname) return builder.call(func, (mask, value))
def ptx_cmem_arylike(context, builder, sig, args): lmod = builder.module [arr] = args flat = arr.flatten(order='A') aryty = sig.return_type dtype = aryty.dtype if isinstance(dtype, types.Complex): elemtype = (types.float32 if dtype == types.complex64 else types.float64) constvals = [] for i in range(flat.size): elem = flat[i] real = context.get_constant(elemtype, elem.real) imag = context.get_constant(elemtype, elem.imag) constvals.extend([real, imag]) elif dtype in types.number_domain: constvals = [context.get_constant(dtype, flat[i]) for i in range(flat.size)] else: raise TypeError("unsupport type: %s" % dtype) constary = lc.Constant.array(constvals[0].type, constvals) addrspace = nvvm.ADDRSPACE_CONSTANT gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem", addrspace=addrspace) gv.linkage = lc.LINKAGE_INTERNAL gv.global_constant = True gv.initializer = constary # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace)) genptr = builder.call(conv, [addrspaceptr]) # Create array object ary = context.make_array(aryty)(context, builder) kshape = [context.get_constant(types.intp, s) for s in arr.shape] kstrides = [context.get_constant(types.intp, s) for s in arr.strides] context.populate_array(ary, data=builder.bitcast(genptr, ary.data.type), shape=cgutils.pack_array(builder, kshape), strides=cgutils.pack_array(builder, kstrides), itemsize=ary.itemsize, parent=ary.parent, meminfo=None) return ary._getvalue()
def int_upower_impl(context, builder, sig, args): module = cgutils.get_module(builder) x, y = args if y.type.width > 32: y = builder.trunc(y, Type.int(32)) elif y.type.width < 32: y = builder.zext(y, Type.int(32)) if context.implement_powi_as_math_call: undersig = typing.signature(sig.return_type, sig.args[0], types.int32) impl = context.get_function(math.pow, undersig) return impl(builder, (x, y)) else: powerfn = lc.Function.intrinsic(module, lc.INTR_POWI, [x.type]) return builder.call(powerfn, (x, y))
def to_native_arg(self, obj, typ): if isinstance(typ, types.Record): # Generate a dummy integer type that has the size of Py_buffer dummy_py_buffer_type = Type.int(_helperlib.py_buffer_size * 8) # Allocate the Py_buffer py_buffer = cgutils.alloca_once(self.builder, dummy_py_buffer_type) # Zero-fill the py_buffer. where the obj field in Py_buffer is NULL # PyBuffer_Release has no effect. zeroed_buffer = lc.Constant.null(dummy_py_buffer_type) self.builder.store(zeroed_buffer, py_buffer) buf_as_voidptr = self.builder.bitcast(py_buffer, self.voidptr) ptr = self.extract_record_data(obj, buf_as_voidptr) with cgutils.if_unlikely(self.builder, cgutils.is_null(self.builder, ptr)): self.builder.ret(ptr) ltyp = self.context.get_value_type(typ) val = cgutils.init_record_by_ptr(self.builder, ltyp, ptr) def dtor(): self.release_record_buffer(buf_as_voidptr) else: val = self.to_native_value(obj, typ) def dtor(): pass return val, dtor
def ptx_warp_sync(context, builder, sig, args): fname = 'llvm.nvvm.bar.warp.sync' lmod = builder.module fnty = Type.function(Type.void(), (Type.int(32),)) sync = lmod.get_or_insert_function(fnty, name=fname) builder.call(sync, args) return context.get_dummy_value()
def insert_string_const_addrspace(self, builder, string): """ Insert a constant string in the constant addresspace and return a generic i8 pointer to the data. This function attempts to deduplicate. """ lmod = builder.basic_block.function.module text = Constant.stringz(string) name = "__conststring__.%s" % string charty = Type.int(8) for gv in lmod.global_variables: if gv.name == name and gv.type.pointee == text.type: break else: gv = lmod.add_global_variable(text.type, name=name, addrspace=nvvm.ADDRSPACE_CONSTANT) gv.linkage = LINKAGE_INTERNAL gv.global_constant = True gv.initializer = text constcharptrty = Type.pointer(charty, nvvm.ADDRSPACE_CONSTANT) charptr = builder.bitcast(gv, constcharptrty) conv = nvvmutils.insert_addrspace_conv(lmod, charty, nvvm.ADDRSPACE_CONSTANT) return builder.call(conv, [charptr])
def nrt_adapt_ndarray_from_python(self, ary, ptr): assert self.context.enable_nrt fnty = Type.function(Type.int(), [self.pyobj, self.voidptr]) fn = self._get_function(fnty, name="NRT_adapt_ndarray_from_python") fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) return self.builder.call(fn, (ary, ptr))
def get_constant(self, ty, val): assert not self.is_struct_type(ty) lty = self.get_value_type(ty) if ty == types.none: assert val is None return self.get_dummy_value() elif ty == types.boolean: return Constant.int(Type.int(1), int(val)) elif ty in types.signed_domain: return Constant.int_signextend(lty, val) elif ty in types.unsigned_domain: return Constant.int(lty, val) elif ty in types.real_domain: return Constant.real(lty, val) elif isinstance(ty, types.UniTuple): consts = [self.get_constant(ty.dtype, v) for v in val] return Constant.array(consts[0].type, consts) raise NotImplementedError("cannot lower constant of type '%s'" % (ty,))
def list_setitem(self, seq, idx, val): """ Warning: Steals reference to ``val`` """ fnty = Type.function(Type.int(), [self.pyobj, self.py_ssize_t, self.pyobj]) fn = self._get_function(fnty, name="PyList_SetItem") return self.builder.call(fn, [seq, idx, val])
def core(context, builder, sig, args): assert sig.return_type == types.boolean, nvname fty = context.get_value_type(ty) lmod = builder.module fnty = Type.function(Type.int(), [fty]) fn = lmod.get_or_insert_function(fnty, name=nvname) result = builder.call(fn, args) return context.cast(builder, result, types.int32, types.boolean)
def inbound_gep(builder, ptr, *inds): idx = [] for i in inds: if isinstance(i, int): ind = Constant.int(Type.int(32), i) else: ind = i idx.append(ind) return builder.gep(ptr, idx, inbounds=True)
def gep(builder, ptr, *inds): idx = [] for i in inds: if isinstance(i, int): ind = Constant.int(Type.int(64), i) else: ind = i idx.append(ind) return builder.gep(ptr, idx)
def printf(self, builder, format_string, *args): mod = builder.module if isinstance(format_string, str): cstr = self.insert_const_string(mod, format_string) else: cstr = format_string fnty = Type.function(Type.int(), (GENERIC_POINTER,), var_arg=True) fn = mod.get_or_insert_function(fnty, "printf") return builder.call(fn, (cstr,) + tuple(args))
def __init__(self, context, builder): """ Note: Maybe called multiple times when lowering a function """ fix_python_api() self.context = context self.builder = builder self.module = builder.basic_block.function.module # Initialize types self.pyobj = self.context.get_argument_type(types.pyobject) self.voidptr = Type.pointer(Type.int(8)) self.long = Type.int(ctypes.sizeof(ctypes.c_long) * 8) self.ulonglong = Type.int(ctypes.sizeof(ctypes.c_ulonglong) * 8) self.longlong = self.ulonglong self.double = Type.double() self.py_ssize_t = self.context.get_value_type(types.intp) self.cstring = Type.pointer(Type.int(8)) self.gil_state = Type.int(_helperlib.py_gil_state_size * 8)
def make_keywords(self, kws): strings = [] stringtype = Type.pointer(Type.int(8)) for k in kws: strings.append(self.make_const_string(k)) strings.append(Constant.null(stringtype)) kwlist = Constant.array(stringtype, strings) kwlist = cgutils.global_constant(self.module, ".kwlist", kwlist) return Constant.bitcast(kwlist, Type.pointer(stringtype))
def activelanepermute_wavewidth_impl(context, builder, sig, args): [src, laneid, identity, use_ident] = args assert sig.args[0] == sig.args[2] elem_type = sig.args[0] bitwidth = elem_type.bitwidth intbitwidth = Type.int(bitwidth) i32 = Type.int(32) i1 = Type.int(1) name = "__hsail_activelanepermute_wavewidth_b{0}".format(bitwidth) fnty = Type.function(intbitwidth, [intbitwidth, i32, intbitwidth, i1]) fn = builder.module.get_or_insert_function(fnty, name=name) fn.calling_convention = target.CC_SPIR_FUNC def cast(val): return builder.bitcast(val, intbitwidth) result = builder.call(fn, [cast(src), laneid, cast(identity), use_ident]) return builder.bitcast(result, context.get_value_type(elem_type))
def set_cuda_kernel(lfunc): from llvmlite.llvmpy.core import MetaData, MetaDataString, Constant, Type m = lfunc.module ops = lfunc, MetaDataString.get(m, "kernel"), Constant.int(Type.int(), 1) md = MetaData.get(m, ops) nmd = m.get_or_insert_named_metadata('nvvm.annotations') nmd.add(md)
def _generic_array(context, builder, shape, dtype, symbol_name, addrspace, can_dynsized=False): elemcount = reduce(operator.mul, shape) lldtype = context.get_data_type(dtype) laryty = Type.array(lldtype, elemcount) if addrspace == nvvm.ADDRSPACE_LOCAL: # Special case local addrespace allocation to use alloca # NVVM is smart enough to only use local memory if no register is # available dataptr = cgutils.alloca_once(builder, laryty, name=symbol_name) else: lmod = builder.module # Create global variable in the requested address-space gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace) # Specify alignment to avoid misalignment bug gvmem.align = context.get_abi_sizeof(lldtype) if elemcount <= 0: if can_dynsized: # dynamic shared memory gvmem.linkage = lc.LINKAGE_EXTERNAL else: raise ValueError("array length <= 0") else: ## Comment out the following line to workaround a NVVM bug ## which generates a invalid symbol name when the linkage ## is internal and in some situation. ## See _get_unique_smem_id() # gvmem.linkage = lc.LINKAGE_INTERNAL gvmem.initializer = lc.Constant.undef(laryty) if dtype not in types.number_domain: raise TypeError("unsupported type: %s" % dtype) # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gvmem.bitcast(Type.pointer(Type.int(8), addrspace)) dataptr = builder.call(conv, [addrspaceptr]) return _make_array(context, builder, dataptr, dtype, shape)
def gep(builder, ptr, *inds, **kws): name = kws.pop('name', '') idx = [] for i in inds: if isinstance(i, int): # NOTE: llvm only accepts int32 inside structs, not int64 ind = Constant.int(Type.int(32), i) else: ind = i idx.append(ind) return builder.gep(ptr, idx, name=name)
def print_charseq(context, builder, sig, args): [x] = args py = context.get_python_api(builder) xp = cgutils.alloca_once(builder, x.type) builder.store(x, xp) byteptr = builder.bitcast(xp, Type.pointer(Type.int(8))) size = context.get_constant(types.intp, x.type.elements[0].count) cstr = py.bytes_from_string_and_size(byteptr, size) py.print_object(cstr) py.decref(cstr) return context.get_dummy_value()
def get_value_type(self, ty): if ty == types.boolean: return Type.int(1) dataty = self.get_data_type(ty) if isinstance(ty, types.Record): # Record data are passed by refrence memory = dataty.elements[0] return Type.struct([Type.pointer(memory)]) return dataty
def printf(builder, format_string, *values): str_const = Constant.stringz(format_string) global_str_const = get_module(builder).add_global_variable(str_const.type, '') global_str_const.initializer = str_const idx = [Constant.int(Type.int(32), 0), Constant.int(Type.int(32), 0)] str_addr = global_str_const.gep(idx) args = [] for v in values: if isinstance(v, int): args.append(Constant.int(Type.int(), v)) elif isinstance(v, float): args.append(Constant.real(Type.double(), v)) else: args.append(v) functype = Type.function(Type.int(32), [Type.pointer(Type.int(8))], True) fn = get_module(builder).get_or_insert_function(functype, 'printf') builder.call(fn, [str_addr] + args)
def int64_as_f64(builder, val): """ Bitcast a 64-bit integer into a double. """ assert val.type == Type.int(64) return builder.bitcast(val, Type.double())
def _get_ptr_by_index(self, index): geped = self._builder.gep( self._value, [Constant.int(Type.int(), 0), Constant.int(Type.int(), index)]) return geped
def _prepare_call_to_object_mode(context, builder, pyapi, func, signature, args, env): mod = builder.module bb_core_return = builder.append_basic_block('ufunc.core.return') # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function( ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) ndarray_pointers = [] ndarray_objects = [] for i, (arr, arrtype) in enumerate(zip(args, signature.args)): ptr = cgutils.alloca_once(builder, ll_pyobj) ndarray_pointers.append(ptr) builder.store(Constant.null(ll_pyobj), ptr) # initialize to NULL arycls = context.make_array(arrtype) array = arycls(context, builder, value=arr) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, arrtype.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(arrtype.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) builder.store(obj, ptr) ndarray_objects.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(ndarray_objects) status, retval = context.call_conv.call_function(builder, func, types.pyobject, object_sig, ndarray_objects, env=env) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument object for ndary_ptr in ndarray_pointers: pyapi.decref(builder.load(ndary_ptr)) innercall = status.code return innercall, builder.load(error_pointer)
def _generic_array(context, builder, shape, dtype, symbol_name, addrspace, can_dynsized=False): elemcount = reduce(operator.mul, shape, 1) # Check for valid shape for this type of allocation. # Only 1d arrays can be dynamic. dynamic_smem = elemcount <= 0 and can_dynsized and len(shape) == 1 if elemcount <= 0 and not dynamic_smem: raise ValueError("array length <= 0") # Check that we support the requested dtype other_supported_type = isinstance(dtype, (types.Record, types.Boolean)) if dtype not in types.number_domain and not other_supported_type: raise TypeError("unsupported type: %s" % dtype) lldtype = context.get_data_type(dtype) laryty = Type.array(lldtype, elemcount) if addrspace == nvvm.ADDRSPACE_LOCAL: # Special case local address space allocation to use alloca # NVVM is smart enough to only use local memory if no register is # available dataptr = cgutils.alloca_once(builder, laryty, name=symbol_name) else: lmod = builder.module # Create global variable in the requested address space gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace) # Specify alignment to avoid misalignment bug align = context.get_abi_sizeof(lldtype) # Alignment is required to be a power of 2 for shared memory. If it is # not a power of 2 (e.g. for a Record array) then round up accordingly. gvmem.align = 1 << (align - 1).bit_length() if dynamic_smem: gvmem.linkage = lc.LINKAGE_EXTERNAL else: ## Comment out the following line to workaround a NVVM bug ## which generates a invalid symbol name when the linkage ## is internal and in some situation. ## See _get_unique_smem_id() # gvmem.linkage = lc.LINKAGE_INTERNAL gvmem.initializer = lc.Constant.undef(laryty) # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gvmem.bitcast(Type.pointer(Type.int(8), addrspace)) dataptr = builder.call(conv, [addrspaceptr]) targetdata = _get_target_data(context) lldtype = context.get_data_type(dtype) itemsize = lldtype.get_abi_size(targetdata) # Compute strides laststride = itemsize rstrides = [] for i, lastsize in enumerate(reversed(shape)): rstrides.append(laststride) laststride *= lastsize strides = [s for s in reversed(rstrides)] kstrides = [context.get_constant(types.intp, s) for s in strides] # Compute shape if dynamic_smem: # Compute the shape based on the dynamic shared memory configuration. # Unfortunately NVVM does not provide an intrinsic for the # %dynamic_smem_size register, so we must read it using inline # assembly. get_dynshared_size = InlineAsm.get(Type.function(Type.int(), []), "mov.u32 $0, %dynamic_smem_size;", '=r', side_effect=True) dynsmem_size = builder.zext(builder.call(get_dynshared_size, []), Type.int(width=64)) # Only 1-D dynamic shared memory is supported so the following is a # sufficient construction of the shape kitemsize = context.get_constant(types.intp, itemsize) kshape = [builder.udiv(dynsmem_size, kitemsize)] else: kshape = [context.get_constant(types.intp, s) for s in shape] # Create array object ndim = len(shape) aryty = types.Array(dtype=dtype, ndim=ndim, layout='C') ary = context.make_array(aryty)(context, builder) context.populate_array(ary, data=builder.bitcast(dataptr, ary.data.type), shape=kshape, strides=kstrides, itemsize=context.get_constant(types.intp, itemsize), meminfo=None) return ary._getvalue()
def get_data_type(self, ty): """ Get a LLVM data representation of the Numba type *ty* that is safe for storage. Record data are stored as byte array. The return value is a llvmlite.ir.Type object, or None if the type is an opaque pointer (???). """ try: fac = type_registry.match(ty) except KeyError: pass else: return fac(self, ty) if (isinstance(ty, types.Dummy) or isinstance(ty, types.Module) or isinstance(ty, types.Function) or isinstance(ty, types.Dispatcher) or isinstance(ty, types.Object) or isinstance(ty, types.Macro)): return PYOBJECT elif isinstance(ty, types.CPointer): dty = self.get_data_type(ty.dtype) return Type.pointer(dty) elif isinstance(ty, types.Optional): return self.get_struct_type(self.make_optional(ty)) elif isinstance(ty, types.Array): return self.get_struct_type(self.make_array(ty)) elif isinstance(ty, types.UniTuple): dty = self.get_value_type(ty.dtype) return Type.array(dty, ty.count) elif isinstance(ty, types.Tuple): dtys = [self.get_value_type(t) for t in ty] return Type.struct(dtys) elif isinstance(ty, types.Record): # Record are represented as byte array return Type.struct([Type.array(Type.int(8), ty.size)]) elif isinstance(ty, types.UnicodeCharSeq): charty = Type.int(numpy_support.sizeof_unicode_char * 8) return Type.struct([Type.array(charty, ty.count)]) elif isinstance(ty, types.CharSeq): charty = Type.int(8) return Type.struct([Type.array(charty, ty.count)]) elif ty in STRUCT_TYPES: return self.get_struct_type(STRUCT_TYPES[ty]) else: try: impl = struct_registry.match(ty) except KeyError: pass else: return self.get_struct_type(impl(ty)) if isinstance(ty, types.Pair): pairty = self.make_pair(ty.first_type, ty.second_type) return self.get_struct_type(pairty) else: return LTYPEMAP[ty]
def print_string(self, builder, text): mod = builder.module cstring = GENERIC_POINTER fnty = Type.function(Type.int(), [cstring]) puts = mod.get_or_insert_function(fnty, "puts") return builder.call(puts, [text])
def generate_kernel_wrapper(self, func, argtypes): module = func.module arginfo = self.get_arg_packer(argtypes) argtys = list(arginfo.argument_types) wrapfnty = Type.function(Type.void(), argtys) wrapper_module = self.create_module("cuda.kernel.wrapper") fnty = Type.function(Type.int(), [self.call_conv.get_return_type(types.pyobject)] + argtys) func = wrapper_module.add_function(fnty, name=func.name) wrapfn = wrapper_module.add_function(wrapfnty, name="cudaPy_" + func.name) builder = Builder.new(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = arginfo.from_arguments(builder, wrapfn.args) status, _ = self.call_conv.call_function(builder, func, types.void, argtypes, callargs) # Check error status with cgutils.if_likely(builder, status.is_ok): builder.ret_void() with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded casfnty = lc.Type.function(old.type, [gv_exc.type, old.type, old.type]) casfn = wrapper_module.add_function(casfnty, name="___numba_cas_hack") xchg = builder.call(casfn, [gv_exc, old, status.code]) changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with builder.if_then(changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() # force inline # inline_function(status.code) nvvm.set_cuda_kernel(wrapfn) module.link_in(ll.parse_assembly(str(wrapper_module))) module.verify() wrapfn = module.get_function(wrapfn.name) return wrapfn
def boolean_to_any(context, builder, fromty, toty, val): # Casting from boolean to anything first casts to int32 asint = builder.zext(val, Type.int()) return context.cast(builder, asint, types.int32, toty)
def ptx_shfl_sync_i32(context, builder, sig, args): """ The NVVM intrinsic for shfl only supports i32, but the cuda intrinsic function supports both 32 and 64 bit ints and floats, so for feature parity, i64, f32, and f64 are implemented. Floats by way of bitcasting the float to an int, then shuffling, then bitcasting back. And 64-bit values by packing them into 2 32bit values, shuffling thoose, and then packing back together. """ mask, mode, value, index, clamp = args value_type = sig.args[2] if value_type in types.real_domain: value = builder.bitcast(value, Type.int(value_type.bitwidth)) fname = 'llvm.nvvm.shfl.sync.i32' lmod = builder.module fnty = Type.function( Type.struct((Type.int(32), Type.int(1))), (Type.int(32), Type.int(32), Type.int(32), Type.int(32), Type.int(32)) ) func = lmod.get_or_insert_function(fnty, name=fname) if value_type.bitwidth == 32: ret = builder.call(func, (mask, mode, value, index, clamp)) if value_type == types.float32: rv = builder.extract_value(ret, 0) pred = builder.extract_value(ret, 1) fv = builder.bitcast(rv, Type.float()) ret = cgutils.make_anonymous_struct(builder, (fv, pred)) else: value1 = builder.trunc(value, Type.int(32)) value_lshr = builder.lshr(value, context.get_constant(types.i8, 32)) value2 = builder.trunc(value_lshr, Type.int(32)) ret1 = builder.call(func, (mask, mode, value1, index, clamp)) ret2 = builder.call(func, (mask, mode, value2, index, clamp)) rv1 = builder.extract_value(ret1, 0) rv2 = builder.extract_value(ret2, 0) pred = builder.extract_value(ret1, 1) rv1_64 = builder.zext(rv1, Type.int(64)) rv2_64 = builder.zext(rv2, Type.int(64)) rv_shl = builder.shl(rv2_64, context.get_constant(types.i8, 32)) rv = builder.or_(rv_shl, rv1_64) if value_type == types.float64: rv = builder.bitcast(rv, Type.double()) ret = cgutils.make_anonymous_struct(builder, (rv, pred)) return ret
def ptx_syncthreads_or(context, builder, sig, args): fname = 'llvm.nvvm.barrier0.or' lmod = builder.module fnty = Type.function(Type.int(32), (Type.int(32),)) sync = lmod.get_or_insert_function(fnty, name=fname) return builder.call(sync, args)
def generate_kernel_wrapper(self, library, fname, argtypes, debug): """ Generate the kernel wrapper in the given ``library``. The function being wrapped have the name ``fname`` and argument types ``argtypes``. The wrapper function is returned. """ arginfo = self.get_arg_packer(argtypes) argtys = list(arginfo.argument_types) wrapfnty = Type.function(Type.void(), argtys) wrapper_module = self.create_module("cuda.kernel.wrapper") fnty = Type.function(Type.int(), [self.call_conv.get_return_type(types.pyobject)] + argtys) func = wrapper_module.add_function(fnty, name=fname) prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy') wrapfn = wrapper_module.add_function(wrapfnty, name=prefixed) builder = Builder(wrapfn.append_basic_block('')) # Define error handling variables def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv gv_exc = define_error_gv("__errcode__") gv_tid = [] gv_ctaid = [] for i in 'xyz': gv_tid.append(define_error_gv("__tid%s__" % i)) gv_ctaid.append(define_error_gv("__ctaid%s__" % i)) callargs = arginfo.from_arguments(builder, wrapfn.args) status, _ = self.call_conv.call_function(builder, func, types.void, argtypes, callargs) if debug: # Check error status with cgutils.if_likely(builder, status.is_ok): builder.ret_void() with builder.if_then(builder.not_(status.is_python_exc)): # User exception raised old = Constant.null(gv_exc.type.pointee) # Use atomic cmpxchg to prevent rewriting the error status # Only the first error is recorded casfnty = lc.Type.function(old.type, [gv_exc.type, old.type, old.type]) casfn = wrapper_module.add_function(casfnty, name="___numba_cas_hack") xchg = builder.call(casfn, [gv_exc, old, status.code]) changed = builder.icmp(ICMP_EQ, xchg, old) # If the xchange is successful, save the thread ID. sreg = nvvmutils.SRegBuilder(builder) with builder.if_then(changed): for dim, ptr, in zip("xyz", gv_tid): val = sreg.tid(dim) builder.store(val, ptr) for dim, ptr, in zip("xyz", gv_ctaid): val = sreg.ctaid(dim) builder.store(val, ptr) builder.ret_void() nvvm.set_cuda_kernel(wrapfn) library.add_ir_module(wrapper_module) library.finalize() wrapfn = library.get_function(wrapfn.name) return wrapfn
""" from __future__ import print_function, division, absolute_import import collections from contextlib import contextmanager import functools import re from llvmlite import ir from llvmlite.llvmpy.core import Constant, Type import llvmlite.llvmpy.core as lc from . import utils true_bit = Constant.int(Type.int(1), 1) false_bit = Constant.int(Type.int(1), 0) true_byte = Constant.int(Type.int(8), 1) false_byte = Constant.int(Type.int(8), 0) intp_t = Type.int(utils.MACHINE_BITS) def as_bool_byte(builder, value): return builder.zext(value, Type.int(8)) def as_bool_bit(builder, value): return builder.icmp(lc.ICMP_NE, value, Constant.null(value.type))
def build(self): byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function( Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = self.library.create_ir_module('') func_type = self.call_conv.get_function_type(self.fndesc.restype, self.fndesc.argtypes) func = wrapper_module.add_function(func_type, name=self.func.name) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__gufunc__." + self.func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") pyapi = self.context.get_python_api(builder) # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load( builder.gep(arg_dims, [self.context.get_constant(types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate( zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_steps, i, step_offset, typ, sym, sym_dim) step_offset += len(sym) arrays.append(ary) bbreturn = builder.append_basic_block('.return') # Prologue self.gen_prologue(builder, pyapi) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: args = [a.get_array_at_offset(loop.index) for a in arrays] innercall, error = self.gen_loop_body(builder, pyapi, func, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder, pyapi) builder.ret_void() self.library.add_ir_module(wrapper_module) wrapper = self.library.get_function(wrapper.name) # Set core function to internal so that it is not generated self.func.linkage = LINKAGE_INTERNAL return wrapper, self.env
def int32_as_f32(builder, val): """ Bitcast a 32-bit integer into a float. """ assert val.type == Type.int(32) return builder.bitcast(val, Type.float())
def define_error_gv(postfix): gv = wrapper_module.add_global_variable(Type.int(), name=wrapfn.name + postfix) gv.initializer = Constant.null(gv.type.pointee) return gv
def lower_inst(self, inst): if isinstance(inst, ir.Assign): value = self.lower_assign(inst) self.storevar(value, inst.target.name) elif isinstance(inst, ir.SetItem): target = self.loadvar(inst.target.name) index = self.loadvar(inst.index.name) value = self.loadvar(inst.value.name) ok = self.pyapi.object_setitem(target, index, value) self.check_int_status(ok) elif isinstance(inst, ir.DelItem): target = self.loadvar(inst.target.name) index = self.loadvar(inst.index.name) ok = self.pyapi.object_delitem(target, index) self.check_int_status(ok) elif isinstance(inst, ir.SetAttr): target = self.loadvar(inst.target.name) value = self.loadvar(inst.value.name) ok = self.pyapi.object_setattr(target, self._freeze_string(inst.attr), value) self.check_int_status(ok) elif isinstance(inst, ir.DelAttr): target = self.loadvar(inst.target.name) ok = self.pyapi.object_delattr(target, self._freeze_string(inst.attr)) self.check_int_status(ok) elif isinstance(inst, ir.StoreMap): dct = self.loadvar(inst.dct.name) key = self.loadvar(inst.key.name) value = self.loadvar(inst.value.name) ok = self.pyapi.dict_setitem(dct, key, value) self.check_int_status(ok) elif isinstance(inst, ir.Return): retval = self.loadvar(inst.value.name) if self.generator_info: # StopIteration # We own a reference to the "return value", but we # don't return it. self.pyapi.decref(retval) self.genlower.return_from_generator(self) return # No need to incref() as the reference is already owned. self.call_conv.return_value(self.builder, retval) elif isinstance(inst, ir.Branch): cond = self.loadvar(inst.cond.name) if cond.type == Type.int(1): istrue = cond else: istrue = self.pyapi.object_istrue(cond) zero = lc.Constant.null(istrue.type) pred = self.builder.icmp(lc.ICMP_NE, istrue, zero) tr = self.blkmap[inst.truebr] fl = self.blkmap[inst.falsebr] self.builder.cbranch(pred, tr, fl) elif isinstance(inst, ir.Jump): target = self.blkmap[inst.target] self.builder.branch(target) elif isinstance(inst, ir.Del): self.delvar(inst.value) elif isinstance(inst, ir.Raise): if inst.exception is not None: exc = self.loadvar(inst.exception.name) # A reference will be stolen by raise_object() and another # by return_exception_raised(). self.incref(exc) else: exc = None self.pyapi.raise_object(exc) self.return_exception_raised() else: raise NotImplementedError(type(inst), inst)
from llvmlite.llvmpy.core import Type, Constant, LLVMException import llvmlite.binding as ll from numba.core import types, utils, typing, datamodel, debuginfo, funcdesc, config, cgutils, imputils from numba.core import event from numba import _dynfunc, _helperlib from numba.core.compiler_lock import global_compiler_lock from numba.core.pythonapi import PythonAPI from numba.np import arrayobj from numba.core.imputils import (user_function, user_generator, builtin_registry, impl_ret_borrowed, RegistryLoader) from numba.cpython import builtins GENERIC_POINTER = Type.pointer(Type.int(8)) PYOBJECT = GENERIC_POINTER void_ptr = GENERIC_POINTER class OverloadSelector(object): """ An object matching an actual signature against a registry of formal signatures and choosing the best candidate, if any. In the current implementation: - a "signature" is a tuple of type classes or type instances - the "best candidate" is the most specific match """ def __init__(self):
from llvmlite.llvmpy.core import Type, Constant import llvmlite.llvmpy.core as lc from numba import npdatetime, types, typing, cgutils, utils from numba.targets.imputils import (builtin, builtin_attr, implement, impl_attribute, impl_attribute_generic, iterator_impl, iternext_impl, struct_factory, type_factory) from numba.typing import signature if not npdatetime.NPDATETIME_SUPPORTED: raise NotImplementedError( "numpy.datetime64 unsupported in this configuration") # datetime64 and timedelta64 use the same internal representation DATETIME64 = TIMEDELTA64 = Type.int(64) NAT = Constant.int(TIMEDELTA64, npdatetime.NAT) TIMEDELTA_BINOP_SIG = (types.Kind(types.NPTimedelta), ) * 2 @type_factory(types.NPDatetime) def llvm_datetime_type(context, tp): return DATETIME64 @type_factory(types.NPTimedelta) def llvm_timedelta_type(context, tp): return TIMEDELTA64
from llvmlite.llvmpy.core import Type, Constant, LLVMException import llvmlite.binding as ll import numba from numba import types, utils, cgutils, typing, numpy_support, _helperlib from numba.pythonapi import PythonAPI from numba.targets.imputils import (user_function, python_attr_impl, builtin_registry, impl_attribute, struct_registry, type_registry) from . import arrayobj, builtins, iterators, rangeobj, optional try: from . import npdatetime except NotImplementedError: pass GENERIC_POINTER = Type.pointer(Type.int(8)) PYOBJECT = GENERIC_POINTER LTYPEMAP = { types.pyobject: PYOBJECT, types.boolean: Type.int(8), types.uint8: Type.int(8), types.uint16: Type.int(16), types.uint32: Type.int(32), types.uint64: Type.int(64), types.int8: Type.int(8), types.int16: Type.int(16), types.int32: Type.int(32), types.int64: Type.int(64), types.float32: Type.float(), types.float64: Type.double(),
def alloc_boolean_result(builder, name='ret'): """ Allocate an uninitialized boolean result slot. """ ret = cgutils.alloca_once(builder, Type.int(1), name=name) return ret
def cast(self, builder, val, fromty, toty): if fromty == toty or toty == types.Any or isinstance(toty, types.Kind): return val elif ((fromty in types.unsigned_domain and toty in types.signed_domain) or (fromty in types.integer_domain and toty in types.unsigned_domain)): lfrom = self.get_value_type(fromty) lto = self.get_value_type(toty) if lfrom.width <= lto.width: return builder.zext(val, lto) elif lfrom.width > lto.width: return builder.trunc(val, lto) elif fromty in types.signed_domain and toty in types.signed_domain: lfrom = self.get_value_type(fromty) lto = self.get_value_type(toty) if lfrom.width <= lto.width: return builder.sext(val, lto) elif lfrom.width > lto.width: return builder.trunc(val, lto) elif fromty in types.real_domain and toty in types.real_domain: lty = self.get_value_type(toty) if fromty == types.float32 and toty == types.float64: return builder.fpext(val, lty) elif fromty == types.float64 and toty == types.float32: return builder.fptrunc(val, lty) elif fromty in types.real_domain and toty in types.complex_domain: if fromty == types.float32: if toty == types.complex128: real = self.cast(builder, val, fromty, types.float64) else: real = val elif fromty == types.float64: if toty == types.complex64: real = self.cast(builder, val, fromty, types.float32) else: real = val if toty == types.complex128: imag = self.get_constant(types.float64, 0) elif toty == types.complex64: imag = self.get_constant(types.float32, 0) else: raise Exception("unreachable") cmplx = self.make_complex(toty)(self, builder) cmplx.real = real cmplx.imag = imag return cmplx._getvalue() elif fromty in types.integer_domain and toty in types.real_domain: lty = self.get_value_type(toty) if fromty in types.signed_domain: return builder.sitofp(val, lty) else: return builder.uitofp(val, lty) elif toty in types.integer_domain and fromty in types.real_domain: lty = self.get_value_type(toty) if toty in types.signed_domain: return builder.fptosi(val, lty) else: return builder.fptoui(val, lty) elif fromty in types.integer_domain and toty in types.complex_domain: cmplxcls, flty = builtins.get_complex_info(toty) cmpl = cmplxcls(self, builder) cmpl.real = self.cast(builder, val, fromty, flty) cmpl.imag = self.get_constant(flty, 0) return cmpl._getvalue() elif fromty in types.complex_domain and toty in types.complex_domain: srccls, srcty = builtins.get_complex_info(fromty) dstcls, dstty = builtins.get_complex_info(toty) src = srccls(self, builder, value=val) dst = dstcls(self, builder) dst.real = self.cast(builder, src.real, srcty, dstty) dst.imag = self.cast(builder, src.imag, srcty, dstty) return dst._getvalue() elif (isinstance(toty, types.UniTuple) and isinstance(fromty, types.UniTuple) and len(fromty) == len(toty)): olditems = cgutils.unpack_tuple(builder, val, len(fromty)) items = [ self.cast(builder, i, fromty.dtype, toty.dtype) for i in olditems ] tup = self.get_constant_undef(toty) for idx, val in enumerate(items): tup = builder.insert_value(tup, val, idx) return tup elif (isinstance(fromty, (types.UniTuple, types.Tuple)) and isinstance(toty, (types.UniTuple, types.Tuple)) and len(toty) == len(fromty)): olditems = cgutils.unpack_tuple(builder, val, len(fromty)) items = [ self.cast(builder, i, f, t) for i, f, t in zip(olditems, fromty, toty) ] tup = self.get_constant_undef(toty) for idx, val in enumerate(items): tup = builder.insert_value(tup, val, idx) return tup elif toty == types.boolean: return self.is_true(builder, fromty, val) elif fromty == types.boolean: # first promote to int32 asint = builder.zext(val, Type.int()) # then promote to number return self.cast(builder, asint, types.int32, toty) elif fromty == types.none and isinstance(toty, types.Optional): return self.make_optional_none(builder, toty.type) elif isinstance(toty, types.Optional): casted = self.cast(builder, val, fromty, toty.type) return self.make_optional_value(builder, toty.type, casted) elif isinstance(fromty, types.Optional): optty = self.make_optional(fromty) optval = optty(self, builder, value=val) validbit = cgutils.as_bool_bit(builder, optval.valid) with cgutils.if_unlikely(builder, builder.not_(validbit)): msg = "expected %s, got None" % (fromty.type, ) self.call_conv.return_user_exc(builder, TypeError, (msg, )) return optval.data elif (isinstance(fromty, types.Array) and isinstance(toty, types.Array)): # Type inference should have prevented illegal array casting. assert toty.layout == 'A' return val raise NotImplementedError("cast", val, fromty, toty)
""" Generic helpers for LLVM code generation. """ from __future__ import print_function, division, absolute_import from contextlib import contextmanager import functools import re from llvmlite import ir from llvmlite.llvmpy.core import Constant, Type import llvmlite.llvmpy.core as lc from . import utils true_bit = Constant.int(Type.int(1), 1) false_bit = Constant.int(Type.int(1), 0) true_byte = Constant.int(Type.int(8), 1) false_byte = Constant.int(Type.int(8), 0) def as_bool_byte(builder, value): return builder.zext(value, Type.int(8)) def as_bool_bit(builder, value): return builder.icmp(lc.ICMP_NE, value, Constant.null(value.type)) def make_anonymous_struct(builder, values, struct_type=None): """
import sys import ctypes import struct as struct_ from llvmlite.llvmpy.core import Type, Constant _trace_refs_ = hasattr(sys, "getobjects") _plat_bits = struct_.calcsize("@P") * 8 _int8 = Type.int(8) _int32 = Type.int(32) _void_star = Type.pointer(_int8) _int8_star = _void_star _sizeof_py_ssize_t = ctypes.sizeof(getattr(ctypes, "c_size_t")) _llvm_py_ssize_t = Type.int(_sizeof_py_ssize_t * 8) if _trace_refs_: _pyobject_head = Type.struct( [_void_star, _void_star, _llvm_py_ssize_t, _void_star]) _pyobject_head_init = Constant.struct([ Constant.null(_void_star), # _ob_next Constant.null(_void_star), # _ob_prev Constant.int(_llvm_py_ssize_t, 1), # ob_refcnt Constant.null(_void_star), # ob_type ]) else: _pyobject_head = Type.struct([_llvm_py_ssize_t, _void_star]) _pyobject_head_init = Constant.struct([
def lower_inst(self, inst): if config.DEBUG_JIT: self.context.debug_print(self.builder, str(inst)) if isinstance(inst, ir.Assign): ty = self.typeof(inst.target.name) val = self.lower_assign(ty, inst) self.storevar(val, inst.target.name) elif isinstance(inst, ir.Branch): cond = self.loadvar(inst.cond.name) tr = self.blkmap[inst.truebr] fl = self.blkmap[inst.falsebr] condty = self.typeof(inst.cond.name) pred = self.context.cast(self.builder, cond, condty, types.boolean) assert pred.type == Type.int(1), ("cond is not i1: %s" % pred.type) self.builder.cbranch(pred, tr, fl) elif isinstance(inst, ir.Jump): target = self.blkmap[inst.target] self.builder.branch(target) elif isinstance(inst, ir.Return): val = self.loadvar(inst.value.name) oty = self.typeof(inst.value.name) ty = self.fndesc.restype if isinstance(ty, types.Optional): # If returning an optional type self.call_conv.return_optional_value(self.builder, ty, oty, val) return if ty != oty: val = self.context.cast(self.builder, val, oty, ty) retval = self.context.get_return_value(self.builder, ty, val) self.call_conv.return_value(self.builder, retval) elif isinstance(inst, ir.SetItem): target = self.loadvar(inst.target.name) value = self.loadvar(inst.value.name) index = self.loadvar(inst.index.name) targetty = self.typeof(inst.target.name) valuety = self.typeof(inst.value.name) indexty = self.typeof(inst.index.name) signature = self.fndesc.calltypes[inst] assert signature is not None impl = self.context.get_function('setitem', signature) # Convert argument to match if isinstance(targetty, types.Optional): target = self.context.cast(self.builder, target, targetty, targetty.type) else: assert targetty == signature.args[0] index = self.context.cast(self.builder, index, indexty, signature.args[1]) value = self.context.cast(self.builder, value, valuety, signature.args[2]) return impl(self.builder, (target, index, value)) elif isinstance(inst, ir.Del): pass elif isinstance(inst, ir.SetAttr): target = self.loadvar(inst.target.name) value = self.loadvar(inst.value.name) signature = self.fndesc.calltypes[inst] targetty = self.typeof(inst.target.name) valuety = self.typeof(inst.value.name) assert signature is not None assert signature.args[0] == targetty impl = self.context.get_setattr(inst.attr, signature) # Convert argument to match value = self.context.cast(self.builder, value, valuety, signature.args[1]) return impl(self.builder, (target, value)) elif isinstance(inst, ir.Raise): self.lower_raise(inst) else: raise NotImplementedError(type(inst))
def build_ufunc_wrapper(library, context, fname, signature, objmode, cres): """ Wrap the scalar function with a loop that iterates over the arguments """ assert isinstance(fname, str) byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapperlib = context.codegen().create_library('ufunc_wrapper') wrapper_module = wrapperlib.create_ir_module('') if objmode: func_type = context.call_conv.get_function_type( types.pyobject, [types.pyobject] * len(signature.args)) else: func_type = context.call_conv.get_function_type( signature.return_type, signature.args) func = wrapper_module.add_function(func_type, name=fname) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder(wrapper.append_basic_block("entry")) # Prepare Environment envname = context.get_env_name(cres.fndesc) env = cres.environment envptr = builder.load(context.declare_env_global(builder.module, envname)) # Emit loop loopcount = builder.load(arg_dims, name="loopcount") # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, typ)) # Prepare output out = UArrayArg(context, builder, arg_args, arg_steps, len(arrays), signature.return_type) # Setup indices offsets = [] zero = context.get_constant(types.intp, 0) for _ in arrays: p = cgutils.alloca_once(builder, intp_t) offsets.append(p) builder.store(zero, p) store_offset = cgutils.alloca_once(builder, intp_t) builder.store(zero, store_offset) unit_strided = cgutils.true_bit for ary in arrays: unit_strided = builder.and_(unit_strided, ary.is_unit_strided) pyapi = context.get_python_api(builder) if objmode: # General loop gil = pyapi.gil_ensure() with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_obj_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, pyapi, envptr, env) pyapi.gil_release(gil) builder.ret_void() else: with builder.if_else(unit_strided) as (is_unit_strided, is_strided): with is_unit_strided: with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: fastloop = build_fast_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, loop.index, pyapi, env=envptr) with is_strided: # General loop with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_slow_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, pyapi, env=envptr) builder.ret_void() del builder # Link and finalize wrapperlib.add_ir_module(wrapper_module) wrapperlib.add_linking_library(library) return wrapperlib.get_pointer_to_function(wrapper.name)
def as_bool_byte(builder, value): return builder.zext(value, Type.int(8))
def _build_wrapper(self, library, name): """ The LLVM IRBuilder code to create the gufunc wrapper. The *library* arg is the CodeLibrary for which the wrapper should be added to. The *name* arg is the name of the wrapper function being created. """ byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function( Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = library.create_ir_module('_gufunc_wrapper') func_type = self.call_conv.get_function_type(self.fndesc.restype, self.fndesc.argtypes) fname = self.fndesc.llvm_func_name func = wrapper_module.add_function(func_type, name=fname) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") pyapi = self.context.get_python_api(builder) # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load( builder.gep(arg_dims, [self.context.get_constant(types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate( zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_steps, i, step_offset, typ, sym, sym_dim) step_offset += len(sym) arrays.append(ary) bbreturn = builder.append_basic_block('.return') # Prologue self.gen_prologue(builder, pyapi) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: args = [a.get_array_at_offset(loop.index) for a in arrays] innercall, error = self.gen_loop_body(builder, pyapi, func, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder, pyapi) builder.ret_void() # Link library.add_ir_module(wrapper_module) library.add_linking_library(self.library)
def _prepare_call_to_object_mode(context, builder, pyapi, func, signature, args): mod = builder.module bb_core_return = builder.append_basic_block('ufunc.core.return') # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function( ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) # The PyObject* arguments to the kernel function object_args = [] object_pointers = [] for i, (arg, argty) in enumerate(zip(args, signature.args)): # Allocate NULL-initialized slot for this argument objptr = cgutils.alloca_once(builder, ll_pyobj, zfill=True) object_pointers.append(objptr) if isinstance(argty, types.Array): # Special case arrays: we don't need full-blown NRT reflection # since the argument will be gone at the end of the kernel arycls = context.make_array(argty) array = arycls(context, builder, value=arg) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, argty.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(argty.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) else: # Other argument types => use generic boxing obj = pyapi.from_native_value(argty, arg) builder.store(obj, objptr) object_args.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(object_args) status, retval = context.call_conv.call_function(builder, func, types.pyobject, object_sig, object_args) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument objects for objptr in object_pointers: pyapi.decref(builder.load(objptr)) innercall = status.code return innercall, builder.load(error_pointer)
def lower_inst(self, inst): self.debug_print(str(inst)) if isinstance(inst, ir.Assign): ty = self.typeof(inst.target.name) val = self.lower_assign(ty, inst) self.storevar(val, inst.target.name) elif isinstance(inst, ir.Branch): cond = self.loadvar(inst.cond.name) tr = self.blkmap[inst.truebr] fl = self.blkmap[inst.falsebr] condty = self.typeof(inst.cond.name) pred = self.context.cast(self.builder, cond, condty, types.boolean) assert pred.type == Type.int(1), ("cond is not i1: %s" % pred.type) self.builder.cbranch(pred, tr, fl) elif isinstance(inst, ir.Jump): target = self.blkmap[inst.target] self.builder.branch(target) elif isinstance(inst, ir.Return): if self.generator_info: # StopIteration self.genlower.return_from_generator(self) return val = self.loadvar(inst.value.name) oty = self.typeof(inst.value.name) ty = self.fndesc.restype if isinstance(ty, types.Optional): # If returning an optional type self.call_conv.return_optional_value(self.builder, ty, oty, val) return if ty != oty: val = self.context.cast(self.builder, val, oty, ty) retval = self.context.get_return_value(self.builder, ty, val) self.call_conv.return_value(self.builder, retval) elif isinstance(inst, ir.StaticSetItem): signature = self.fndesc.calltypes[inst] assert signature is not None try: impl = self.context.get_function('static_setitem', signature) except NotImplementedError: return self.lower_setitem(inst.target, inst.index_var, inst.value, signature) else: target = self.loadvar(inst.target.name) value = self.loadvar(inst.value.name) valuety = self.typeof(inst.value.name) value = self.context.cast(self.builder, value, valuety, signature.args[2]) return impl(self.builder, (target, inst.index, value)) elif isinstance(inst, ir.SetItem): signature = self.fndesc.calltypes[inst] assert signature is not None return self.lower_setitem(inst.target, inst.index, inst.value, signature) elif isinstance(inst, ir.DelItem): target = self.loadvar(inst.target.name) index = self.loadvar(inst.index.name) targetty = self.typeof(inst.target.name) indexty = self.typeof(inst.index.name) signature = self.fndesc.calltypes[inst] assert signature is not None impl = self.context.get_function('delitem', signature) assert targetty == signature.args[0] index = self.context.cast(self.builder, index, indexty, signature.args[1]) return impl(self.builder, (target, index)) elif isinstance(inst, ir.Del): try: # XXX: incorrect Del injection? val = self.loadvar(inst.value) except KeyError: pass else: self.decref(self.typeof(inst.value), val) self._delete_variable(inst.value) elif isinstance(inst, ir.SetAttr): target = self.loadvar(inst.target.name) value = self.loadvar(inst.value.name) signature = self.fndesc.calltypes[inst] targetty = self.typeof(inst.target.name) valuety = self.typeof(inst.value.name) assert signature is not None assert signature.args[0] == targetty impl = self.context.get_setattr(inst.attr, signature) # Convert argument to match value = self.context.cast(self.builder, value, valuety, signature.args[1]) return impl(self.builder, (target, value)) elif isinstance(inst, ir.StaticRaise): self.lower_static_raise(inst) else: raise NotImplementedError(type(inst))
def lower_inst(self, inst): if config.DEBUG_JIT: self.context.debug_print(self.builder, str(inst)) if isinstance(inst, ir.Assign): ty = self.typeof(inst.target.name) val = self.lower_assign(ty, inst) self.storevar(val, inst.target.name) # TODO: emit incref/decref in the numba IR properly. # Workaround due to lack of proper incref/decref info. if self.context.enable_nrt: if isinstance(inst.value, ir.Expr) and inst.value.op == 'call': callexpr = inst.value # NPM function returns new reference if isinstance(self.typeof(callexpr.func.name), types.Dispatcher): self.decref(ty, val) elif isinstance(inst, ir.Branch): cond = self.loadvar(inst.cond.name) tr = self.blkmap[inst.truebr] fl = self.blkmap[inst.falsebr] condty = self.typeof(inst.cond.name) pred = self.context.cast(self.builder, cond, condty, types.boolean) assert pred.type == Type.int(1), ("cond is not i1: %s" % pred.type) self.builder.cbranch(pred, tr, fl) elif isinstance(inst, ir.Jump): target = self.blkmap[inst.target] self.builder.branch(target) elif isinstance(inst, ir.Return): if self.generator_info: # StopIteration self.genlower.return_from_generator(self) return val = self.loadvar(inst.value.name) oty = self.typeof(inst.value.name) ty = self.fndesc.restype if isinstance(ty, types.Optional): # If returning an optional type self.call_conv.return_optional_value(self.builder, ty, oty, val) return if ty != oty: val = self.context.cast(self.builder, val, oty, ty) retval = self.context.get_return_value(self.builder, ty, val) self.call_conv.return_value(self.builder, retval) elif isinstance(inst, ir.SetItem): target = self.loadvar(inst.target.name) value = self.loadvar(inst.value.name) index = self.loadvar(inst.index.name) targetty = self.typeof(inst.target.name) valuety = self.typeof(inst.value.name) indexty = self.typeof(inst.index.name) signature = self.fndesc.calltypes[inst] assert signature is not None impl = self.context.get_function('setitem', signature) # Convert argument to match if isinstance(targetty, types.Optional): target = self.context.cast(self.builder, target, targetty, targetty.type) else: assert targetty == signature.args[0] index = self.context.cast(self.builder, index, indexty, signature.args[1]) value = self.context.cast(self.builder, value, valuety, signature.args[2]) return impl(self.builder, (target, index, value)) elif isinstance(inst, ir.Del): try: # XXX: incorrect Del injection? val = self.loadvar(inst.value) except KeyError: pass else: self.decref(self.typeof(inst.value), val) elif isinstance(inst, ir.SetAttr): target = self.loadvar(inst.target.name) value = self.loadvar(inst.value.name) signature = self.fndesc.calltypes[inst] targetty = self.typeof(inst.target.name) valuety = self.typeof(inst.value.name) assert signature is not None assert signature.args[0] == targetty impl = self.context.get_setattr(inst.attr, signature) # Convert argument to match value = self.context.cast(self.builder, value, valuety, signature.args[1]) return impl(self.builder, (target, value)) elif isinstance(inst, ir.Raise): self.lower_raise(inst) else: raise NotImplementedError(type(inst))