def ptx_atomic_add_tuple(context, builder, sig, args): aryty, indty, valty = sig.args ary, inds, val = args dtype = aryty.dtype indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) indices = [context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices)] if dtype != valty: raise TypeError("expect %s but got %s" % (dtype, valty)) if aryty.ndim != len(indty): raise TypeError("indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(builder, aryty, lary, indices) if aryty.dtype == types.float32: lmod = cgutils.get_module(builder) return builder.call(nvvmutils.declare_atomic_add_float32(lmod), (ptr, val)) elif aryty.dtype == types.float64: lmod = cgutils.get_module(builder) return builder.call(nvvmutils.declare_atomic_add_float64(lmod), (ptr, val)) else: return builder.atomic_rmw('add', ptr, val, 'monotonic')
def string_split_2(context, builder, sig, args): module = cgutils.get_module(builder) precomp_func = context._get_precompiled_function("StringSplitImpl") func = module.get_or_insert_function( precomp_func.type.pointee, precomp_func.name) fnctx_arg = context.get_arguments(cgutils.get_function(builder))[0] cfnctx_arg = builder.bitcast(fnctx_arg, func.args[0].type) [s, sep] = args maxsplit = context.get_constant_struct(builder, IntVal, -1) cs = _conv_numba_struct_to_clang(builder, s, func.args[1].type) csep = _conv_numba_struct_to_clang(builder, sep, func.args[2].type) cmaxsplit = _conv_numba_struct_to_clang( builder, maxsplit, func.args[3].type) # result is StringVal with an array of StringVals in the buffer array_as_lowered_struct = builder.call( func, [cfnctx_arg, cs, csep, cmaxsplit]) array_as_struct = raise_return_type( context, builder, StringVal, array_as_lowered_struct) array_as_StringVal = StringValStruct( context, builder, value=array_as_struct) array_as_numba = context.make_array(sig.return_type)(context, builder) data_ptr = builder.bitcast( array_as_StringVal.ptr, array_as_numba.data.type) array_as_numba.data = data_ptr return array_as_numba._getvalue()
def _generic_array(context, builder, shape, dtype, symbol_name, addrspace, can_dynsized=False): elemcount = reduce(operator.mul, shape) lldtype = context.get_data_type(dtype) laryty = Type.array(lldtype, elemcount) if addrspace == nvvm.ADDRSPACE_LOCAL: # Special case local addrespace allocation to use alloca # NVVM is smart enough to only use local memory if no register is # available dataptr = builder.alloca(laryty, name=symbol_name) else: lmod = cgutils.get_module(builder) # Create global variable in the requested address-space gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace) if elemcount <= 0: if can_dynsized: # dynamic shared memory gvmem.linkage = lc.LINKAGE_EXTERNAL else: raise ValueError("array length <= 0") else: gvmem.linkage = lc.LINKAGE_INTERNAL gvmem.initializer = lc.Constant.undef(laryty) if dtype not in types.number_domain: raise TypeError("unsupported type: %s" % dtype) # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gvmem.bitcast(Type.pointer(Type.int(8), addrspace)) dataptr = builder.call(conv, [addrspaceptr]) return _make_array(context, builder, dataptr, dtype, shape)
def imp(context, builder, sig, args): func = context.declare_function(cgutils.get_module(builder), fndesc) status, retval = context.call_function(builder, func, fndesc.restype, fndesc.argtypes, args) with cgutils.if_unlikely(builder, status.err): context.return_errcode_propagate(builder, status.code) return retval
def complex128_power_impl(context, builder, sig, args): [ca, cb] = args a = Complex128(context, builder, value=ca) b = Complex128(context, builder, value=cb) c = Complex128(context, builder) module = cgutils.get_module(builder) pa = a._getpointer() pb = b._getpointer() pc = c._getpointer() # Optimize for square because cpow looses a lot of precsiion TWO = context.get_constant(types.float64, 2) ZERO = context.get_constant(types.float64, 0) b_real_is_two = builder.fcmp(lc.FCMP_OEQ, b.real, TWO) b_imag_is_zero = builder.fcmp(lc.FCMP_OEQ, b.imag, ZERO) b_is_two = builder.and_(b_real_is_two, b_imag_is_zero) with cgutils.ifelse(builder, b_is_two) as (then, otherwise): with then: # Lower as multiplication res = complex_mul_impl(context, builder, sig, (ca, ca)) cres = Complex128(context, builder, value=res) c.real = cres.real c.imag = cres.imag with otherwise: # Lower with call to external function fnty = Type.function(Type.void(), [pa.type] * 3) cpow = module.get_or_insert_function(fnty, name="numba.math.cpow") builder.call(cpow, (pa, pb, pc)) return builder.load(pc)
def get_constant_struct(self, builder, ty, val): assert self.is_struct_type(ty) module = cgutils.get_module(builder) if ty in types.complex_domain: if ty == types.complex64: innertype = types.float32 elif ty == types.complex128: innertype = types.float64 else: raise Exception("unreachable") real = self.get_constant(innertype, val.real) imag = self.get_constant(innertype, val.imag) const = Constant.struct([real, imag]) return const elif isinstance(ty, types.Tuple): consts = [self.get_constant_generic(builder, ty.types[i], v) for i, v in enumerate(val)] return Constant.struct(consts) elif isinstance(ty, types.Record): consts = [self.get_constant(types.int8, b) for b in bytearray(val.tostring())] return Constant.array(consts[0].type, consts) else: raise NotImplementedError("%s as constant unsupported" % ty)
def compile_internal(self, builder, impl, sig, args, locals={}): """Invoke compiler to implement a function for a nopython function """ cache_key = (impl.__code__, sig) fndesc = self.cached_internal_func.get(cache_key) if fndesc is None: # Compile from numba import compiler codegen = self.jit_codegen() library = codegen.create_library(impl.__name__) flags = compiler.Flags() flags.set("no_compile") flags.set("no_cpython_wrapper") cres = compiler.compile_internal( self.typing_context, self, library, impl, sig.args, sig.return_type, flags, locals=locals ) # Allow inlining the function inside callers. codegen.add_linking_library(cres.library) fndesc = cres.fndesc self.cached_internal_func[cache_key] = fndesc # Add call to the generated function llvm_mod = cgutils.get_module(builder) fn = self.declare_function(llvm_mod, fndesc) status, res = self.call_function(builder, fn, sig.return_type, sig.args, args) return res
def string_split_2(context, builder, sig, args): module = cgutils.get_module(builder) precomp_func = context._get_precompiled_function("StringSplitImpl") func = module.get_or_insert_function(precomp_func.type.pointee, precomp_func.name) fnctx_arg = context.get_arguments(cgutils.get_function(builder))[0] cfnctx_arg = builder.bitcast(fnctx_arg, func.args[0].type) [s, sep] = args maxsplit = context.get_constant_struct(builder, IntVal, -1) cs = _conv_numba_struct_to_clang(builder, s, func.args[1].type) csep = _conv_numba_struct_to_clang(builder, sep, func.args[2].type) cmaxsplit = _conv_numba_struct_to_clang(builder, maxsplit, func.args[3].type) # result is StringVal with an array of StringVals in the buffer array_as_lowered_struct = builder.call(func, [cfnctx_arg, cs, csep, cmaxsplit]) array_as_struct = raise_return_type(context, builder, StringVal, array_as_lowered_struct) array_as_StringVal = StringValStruct(context, builder, value=array_as_struct) array_as_numba = context.make_array(sig.return_type)(context, builder) data_ptr = builder.bitcast(array_as_StringVal.ptr, array_as_numba.data.type) array_as_numba.data = data_ptr return array_as_numba._getvalue()
def get_constant_struct(self, builder, ty, val): assert self.is_struct_type(ty) module = cgutils.get_module(builder) if ty in types.complex_domain: if ty == types.complex64: innertype = types.float32 elif ty == types.complex128: innertype = types.float64 else: raise Exception("unreachable") real = self.get_constant(innertype, val.real) imag = self.get_constant(innertype, val.imag) const = Constant.struct([real, imag]) return const elif isinstance(ty, types.Tuple): consts = [ self.get_constant_generic(builder, ty.types[i], v) for i, v in enumerate(val) ] return Constant.struct(consts) elif isinstance(ty, types.Record): consts = [ self.get_constant(types.int8, b) for b in bytearray(val.tostring()) ] return Constant.array(consts[0].type, consts) else: raise NotImplementedError("%s as constant unsupported" % ty)
def compile_internal(self, builder, impl, sig, args, locals={}): """Invoke compiler to implement a function for a nopython function """ cache_key = (impl.__code__, sig) fndesc = self.cached_internal_func.get(cache_key) if fndesc is None: # Compile cres = numba.compiler.compile_internal(self.typing_context, self, impl, sig.args, sig.return_type, locals=locals) llvm_func = cres.llvm_func # Set to linkonce one-definition-rule so that the function # is removed once it is linked. llvm_func.linkage = lc.LINKAGE_LINKONCE_ODR self.add_libs([cres.llvm_module]) fndesc = cres.fndesc self.cached_internal_func[cache_key] = fndesc # Add call to the generated function llvm_mod = cgutils.get_module(builder) fn = self.declare_function(llvm_mod, fndesc) status, res = self.call_function(builder, fn, sig.return_type, sig.args, args) return res
def core(context, builder, sig, args): assert sig.return_type == types.boolean, nvname fty = context.get_value_type(ty) lmod = cgutils.get_module(builder) fnty = Type.function(Type.int(), [fty]) fn = lmod.get_or_insert_function(fnty, name=nvname) result = builder.call(fn, args) return context.cast(builder, result, types.int32, types.boolean)
def atan2_f64_impl(context, builder, sig, args): assert len(args) == 2 mod = cgutils.get_module(builder) fnty = Type.function(Type.double(), [Type.double(), Type.double()]) # Workaround atan2() issues under Windows fname = "atan2_fixed" if sys.platform == "win32" else "atan2" fn = mod.get_or_insert_function(fnty, name=fname) return builder.call(fn, args)
def ptx_syncthreads(context, builder, sig, args): assert not args fname = 'llvm.nvvm.barrier0' lmod = cgutils.get_module(builder) fnty = Type.function(Type.void(), ()) sync = lmod.get_or_insert_function(fnty, name=fname) builder.call(sync, ()) return context.get_dummy_value()
def getitem_stringval(context, builder, sig, args): module = cgutils.get_module(builder) precomp_func = context._get_precompiled_function("GetItemStringValImpl") func = module.get_or_insert_function(precomp_func.type.pointee, precomp_func.name) [s, i] = args cs = _conv_numba_struct_to_clang(builder, s, func.args[0].type) result = builder.call(func, [cs, i]) return raise_return_type(context, builder, StringVal, result)
def nrt_meminfo_data(self, builder, meminfo): if not self.enable_nrt: raise Exception("Require NRT") mod = cgutils.get_module(builder) voidptr = llvmir.IntType(8).as_pointer() fnty = llvmir.FunctionType(voidptr, [voidptr]) fn = mod.get_or_insert_function(fnty, name="NRT_MemInfo_data") return builder.call(fn, [meminfo])
def nrt_meminfo_alloc(self, builder, size): if not self.enable_nrt: raise Exception("Require NRT") mod = cgutils.get_module(builder) fnty = llvmir.FunctionType(llvmir.IntType(8).as_pointer(), [self.get_value_type(types.intp)]) fn = mod.get_or_insert_function(fnty, name="NRT_MemInfo_alloc_safe") return builder.call(fn, [size])
def getitem_stringval(context, builder, sig, args): module = cgutils.get_module(builder) precomp_func = context.precompiled_fns["GetItemStringValImpl"] func = module.get_or_insert_function(precomp_func.type.pointee, precomp_func.name) [s, i] = args cs = _conv_numba_struct_to_clang(builder, s, func.args[0].type) result = builder.call(func, [cs, i]) return _raise_return_type(context, builder, StringVal, result)
def real_power_impl(context, builder, sig, args): x, y = args module = cgutils.get_module(builder) if context.implement_powi_as_math_call: imp = context.get_function(math.pow, sig) return imp(builder, args) else: fn = lc.Function.intrinsic(module, lc.INTR_POW, [y.type]) return builder.call(fn, (x, y))
def get_constant_string(self, builder, ty, val): assert ty == ntypes.string literal = lc.Constant.stringz(val) gv = cgutils.get_module(builder).add_global_variable(literal.type, 'str_literal') gv.linkage = lc.LINKAGE_PRIVATE gv.initializer = literal gv.global_constant = True # gep gets pointer to first element of the constant byte array return gv.gep([lc.Constant.int(lc.Type.int(32), 0)] * 2)
def eq_stringval(context, builder, sig, args): module = cgutils.get_module(builder) precomp_func = context._get_precompiled_function("EqStringValImpl") func = module.get_or_insert_function(precomp_func.type.pointee, precomp_func.name) [s1, s2] = args cs1 = _conv_numba_struct_to_clang(builder, s1, func.args[0].type) cs2 = _conv_numba_struct_to_clang(builder, s2, func.args[1].type) result = builder.call(func, [cs1, cs2]) return result # ret bool so no need to raise type
def core(context, builder, sig, args): [base, pow] = args [basety, powty] = sig.args lmod = cgutils.get_module(builder) fty = context.get_value_type(basety) ity = context.get_value_type(types.int32) fnty = Type.function(fty, [fty, ity]) fn = lmod.get_or_insert_function(fnty, name=nvname) return builder.call(fn, [base, pow])
def int_upower_impl(context, builder, sig, args): module = cgutils.get_module(builder) x, y = args if y.type.width > 32: y = builder.trunc(y, Type.int(32)) elif y.type.width < 32: y = builder.zext(y, Type.int(32)) powerfn = lc.Function.intrinsic(module, lc.INTR_POWI, [x.type]) return builder.call(powerfn, (x, y))
def round_impl_f64(context, builder, sig, args): module = cgutils.get_module(builder) fnty = Type.function(Type.double(), [Type.double()]) if utils.IS_PY3: fn = module.get_or_insert_function(fnty, name="numba.round") else: fn = module.get_or_insert_function(fnty, name="round") assert fn.is_declaration return builder.call(fn, args)
def eq_stringval(context, builder, sig, args): module = cgutils.get_module(builder) precomp_func = context.precompiled_fns["EqStringValImpl"] func = module.get_or_insert_function(precomp_func.type.pointee, precomp_func.name) [s1, s2] = args cs1 = _conv_numba_struct_to_clang(builder, s1, func.args[0].type) cs2 = _conv_numba_struct_to_clang(builder, s2, func.args[1].type) result = builder.call(func, [cs1, cs2]) return result # ret bool so no need to raise type
def ldexp_impl(context, builder, sig, args): val, exp = args fltty, intty = map(context.get_data_type, sig.args) fnty = Type.function(fltty, (fltty, intty)) fname = { "float": "numba_ldexpf", "double": "numba_ldexp", }[str(fltty)] fn = cgutils.get_module(builder).get_or_insert_function(fnty, name=fname) return builder.call(fn, (val, exp))
def string_capitalize(context, builder, sig, args): module = cgutils.get_module(builder) precomp_func = context._get_precompiled_function("StringCapitalizeImpl") func = module.get_or_insert_function(precomp_func.type.pointee, precomp_func.name) fnctx_arg = context.get_arguments(cgutils.get_function(builder))[0] cfnctx_arg = builder.bitcast(fnctx_arg, func.args[0].type) [s] = args cs = _conv_numba_struct_to_clang(builder, s, func.args[1].type) result = builder.call(func, [cfnctx_arg, cs]) return raise_return_type(context, builder, StringVal, result)
def get_constant_string(self, builder, ty, val): assert ty == types.string literal = lc.Constant.stringz(val) gv = cgutils.get_module(builder).add_global_variable( literal.type, 'str_literal') gv.linkage = lc.LINKAGE_PRIVATE gv.initializer = literal gv.global_constant = True # gep gets pointer to first element of the constant byte array return gv.gep([lc.Constant.int(lc.Type.int(32), 0)] * 2)
def implementer(context, builder, sig, args): [val] = args mod = cgutils.get_module(builder) lty = context.get_value_type(input_type) fnty = Type.function(lty, [lty]) fn = mod.get_or_insert_function(fnty, name=extern_func) res = builder.call(fn, (val, )) if restype is None: return res else: return context.cast(builder, res, input_type, restype)
def add_stringval(context, builder, sig, args): module = cgutils.get_module(builder) precomp_func = context._get_precompiled_function("AddStringValImpl") func = module.get_or_insert_function(precomp_func.type.pointee, precomp_func.name) fnctx_arg = context.get_arguments(cgutils.get_function(builder))[0] cfnctx_arg = builder.bitcast(fnctx_arg, func.args[0].type) [s1, s2] = args cs1 = _conv_numba_struct_to_clang(builder, s1, func.args[1].type) cs2 = _conv_numba_struct_to_clang(builder, s2, func.args[2].type) result = builder.call(func, [cfnctx_arg, cs1, cs2]) return raise_return_type(context, builder, StringVal, result)
def ptx_atomic_add_intp(context, builder, sig, args): aryty, indty, valty = sig.args ary, ind, val = args dtype = aryty.dtype if dtype != valty: raise TypeError("expect %s but got %s" % (dtype, valty)) if aryty.ndim != 1: raise TypeError("indexing %d-D array with 1-D index" % (aryty.ndim,)) lary = context.make_array(aryty)(context, builder, ary) ptr = cgutils.get_item_pointer(builder, aryty, lary, [ind]) if aryty.dtype == types.float32: lmod = cgutils.get_module(builder) return builder.call(nvvmutils.declare_atomic_add_float32(lmod), (ptr, val)) elif aryty.dtype == types.float64: lmod = cgutils.get_module(builder) return builder.call(nvvmutils.declare_atomic_add_float64(lmod), (ptr, val)) else: return builder.atomic_rmw('add', ptr, val, 'monotonic')
def add_stringval(context, builder, sig, args): module = cgutils.get_module(builder) precomp_func = context.precompiled_fns["AddStringValImpl"] func = module.get_or_insert_function(precomp_func.type.pointee, precomp_func.name) fnctx_arg = context.get_arguments(cgutils.get_function(builder))[0] cfnctx_arg = builder.bitcast(fnctx_arg, func.args[0].type) [s1, s2] = args cs1 = _conv_numba_struct_to_clang(builder, s1, func.args[1].type) cs2 = _conv_numba_struct_to_clang(builder, s2, func.args[2].type) result = builder.call(func, [cfnctx_arg, cs1, cs2]) return _raise_return_type(context, builder, StringVal, result)
def implementer(context, builder, sig, args): [val] = args mod = cgutils.get_module(builder) lty = context.get_value_type(input_type) fnty = Type.function(lty, [lty]) fn = mod.get_or_insert_function(fnty, name=extern_func) res = builder.call(fn, (val,)) if restype is None: return res else: return context.cast(builder, res, input_type, restype)
def ptx_cmem_arylike(context, builder, sig, args): lmod = cgutils.get_module(builder) [arr] = args flat = arr.flatten(order='A') aryty = sig.return_type dtype = aryty.dtype if isinstance(dtype, types.Complex): elemtype = (types.float32 if dtype == types.complex64 else types.float64) constvals = [] for i in range(flat.size): elem = flat[i] real = context.get_constant(elemtype, elem.real) imag = context.get_constant(elemtype, elem.imag) constvals.extend([real, imag]) elif dtype in types.number_domain: constvals = [context.get_constant(dtype, flat[i]) for i in range(flat.size)] else: raise TypeError("unsupport type: %s" % dtype) constary = lc.Constant.array(constvals[0].type, constvals) addrspace = nvvm.ADDRSPACE_CONSTANT gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem", addrspace=addrspace) gv.linkage = lc.LINKAGE_INTERNAL gv.global_constant = True gv.initializer = constary # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace)) genptr = builder.call(conv, [addrspaceptr]) # Create array object ary = context.make_array(aryty)(context, builder) kshape = [context.get_constant(types.intp, s) for s in arr.shape] kstrides = [context.get_constant(types.intp, s) for s in arr.strides] context.populate_array(ary, data=builder.bitcast(genptr, ary.data.type), shape=cgutils.pack_array(builder, kshape), strides=cgutils.pack_array(builder, kstrides), itemsize=ary.itemsize, parent=ary.parent, meminfo=None) return ary._getvalue()
def StringVal_ctor(context, builder, sig, args): """StringVal(ntypes.string)""" [x] = args iv = StringValStruct(context, builder) _set_is_null(builder, iv, cgutils.false_bit) fndesc = lowering.ExternalFunctionDescriptor('strlen', ntypes.uintp, [ntypes.CPointer(ntypes.char)]) func = context.declare_external_function(cgutils.get_module(builder), fndesc) strlen_x = context.call_external_function(builder, func, fndesc.argtypes, [x]) len_x = builder.trunc(strlen_x, lc.Type.int(32)) iv.len = len_x iv.ptr = x return iv._getvalue()
def frexp_impl(context, builder, sig, args): val, = args fltty = context.get_data_type(sig.args[0]) intty = context.get_data_type(sig.return_type[1]) expptr = cgutils.alloca_once(builder, intty, name='exp') fnty = Type.function(fltty, (fltty, Type.pointer(intty))) fname = { "float": "numba_frexpf", "double": "numba_frexp", }[str(fltty)] fn = cgutils.get_module(builder).get_or_insert_function(fnty, name=fname) res = builder.call(fn, (val, expptr)) return cgutils.make_anonymous_struct(builder, (res, builder.load(expptr)))
def call_internal(self, builder, fndesc, sig, args): """Given the function descriptor of an internally compiled function, emit a call to that function with the given arguments. """ # Add call to the generated function llvm_mod = cgutils.get_module(builder) fn = self.declare_function(llvm_mod, fndesc) status, res = self.call_conv.call_function(builder, fn, sig.return_type, sig.args, args) with cgutils.if_unlikely(builder, status.is_error): self.call_conv.return_status_propagate(builder, status) return res
def StringVal_ctor(context, builder, sig, args): """StringVal(types.string)""" [x] = args iv = StringValStruct(context, builder) _set_is_null(builder, iv, cgutils.false_bit) fndesc = lowering.ExternalFunctionDescriptor('strlen', types.uintp, [types.CPointer(types.char)]) func = context.declare_external_function(cgutils.get_module(builder), fndesc) strlen_x = context.call_external_function(builder, func, fndesc.argtypes, [x]) len_x = builder.trunc(strlen_x, lc.Type.int(32)) iv.len = len_x iv.ptr = x return iv._getvalue()
def nrt_decref(self, builder, typ, value): if not self.enable_nrt: raise Exception("Require NRT") members = self.data_model_manager[typ].traverse(builder, value) for mt, mv in members: self.nrt_decref(builder, mt, mv) meminfo = self.get_nrt_meminfo(builder, typ, value) if meminfo: mod = cgutils.get_module(builder) fnty = llvmir.FunctionType(llvmir.VoidType(), [llvmir.IntType(8).as_pointer()]) fn = mod.get_or_insert_function(fnty, name="NRT_decref") builder.call(fn, [meminfo])
def compile_internal(self, builder, impl, sig, args, locals={}): """Invoke compiler to implement a function for a nopython function """ cache_key = (impl.__code__, sig) if impl.__closure__: # XXX This obviously won't work if a cell's value is # unhashable. cache_key += tuple(c.cell_contents for c in impl.__closure__) fndesc = self.cached_internal_func.get(cache_key) if fndesc is None: # Compile from numba import compiler codegen = self.jit_codegen() library = codegen.create_library(impl.__name__) flags = compiler.Flags() flags.set('no_compile') flags.set('no_cpython_wrapper') cres = compiler.compile_internal(self.typing_context, self, library, impl, sig.args, sig.return_type, flags, locals=locals) # Allow inlining the function inside callers. codegen.add_linking_library(cres.library) fndesc = cres.fndesc self.cached_internal_func[cache_key] = fndesc # Add call to the generated function llvm_mod = cgutils.get_module(builder) fn = self.declare_function(llvm_mod, fndesc) status, res = self.call_conv.call_function(builder, fn, sig.return_type, sig.args, args) with cgutils.if_unlikely(builder, status.is_error): self.call_conv.return_status_propagate(builder, status) return res
def make_constant_array(self, builder, typ, ary): assert typ.layout == 'C' # assumed in typeinfer.py ary = numpy.ascontiguousarray(ary) flat = ary.flatten() # Handle data if self.is_struct_type(typ.dtype): # FIXME raise TypeError("Do not support structure dtype as constant " "array, yet.") values = [ self.get_constant(typ.dtype, flat[i]) for i in range(flat.size) ] lldtype = values[0].type consts = Constant.array(lldtype, values) module = cgutils.get_module(builder) data = module.add_global_variable(consts.type, name=".const.array" ".data") data.linkage = lc.LINKAGE_INTERNAL data.global_constant = True data.initializer = consts # Handle shape llintp = self.get_value_type(types.intp) shapevals = [self.get_constant(types.intp, s) for s in ary.shape] cshape = Constant.array(llintp, shapevals) # Handle strides stridevals = [self.get_constant(types.intp, s) for s in ary.strides] cstrides = Constant.array(llintp, stridevals) # Create array structure cary = self.make_array(typ)(self, builder) cary.data = builder.bitcast(data, cary.data.type) cary.shape = cshape cary.strides = cstrides return cary._getvalue()
def compile_internal(self, builder, impl, sig, args, locals={}, cache_key=None): """Invoke compiler to implement a function for a nopython function Args ---- cache_key : hashable A hashable object to use as the key for caching. If it is `None`, no caching is performed. """ if cache_key is not None: # Caching is enabled fndesc = self.cached_internal_func.get(cache_key) else: # Caching is disabled fndesc = None if fndesc is None: # Compile cres = numba.compiler.compile_internal(self.typing_context, self, impl, sig.args, sig.return_type, locals=locals) llvm_func = cres.llvm_func # Set to linkonce one-definition-rule so that the function # is removed once it is linked. llvm_func.linkage = lc.LINKAGE_LINKONCE_ODR self.add_libs([cres.llvm_module]) fndesc = cres.fndesc # Do cache if caching is enabled if cache_key is not None: self.cached_internal_func[cache_key] = fndesc # Add call to the generated function llvm_mod = cgutils.get_module(builder) fn = self.declare_function(llvm_mod, fndesc) status, res = self.call_function(builder, fn, sig.return_type, sig.args, args) return res
def compile_internal(self, builder, impl, sig, args, locals={}): """Invoke compiler to implement a function for a nopython function """ cache_key = (impl.__code__, sig) fndesc = self.cached_internal_func.get(cache_key) if fndesc is None: # Compile from numba import compiler codegen = self.jit_codegen() library = codegen.create_library(impl.__name__) flags = compiler.Flags() flags.set('no_compile') flags.set('no_cpython_wrapper') cres = compiler.compile_internal(self.typing_context, self, library, impl, sig.args, sig.return_type, flags, locals=locals) # Allow inlining the function inside callers. codegen.add_linking_library(cres.library) fndesc = cres.fndesc self.cached_internal_func[cache_key] = fndesc # Add call to the generated function llvm_mod = cgutils.get_module(builder) fn = self.declare_function(llvm_mod, fndesc) status, res = self.call_function(builder, fn, sig.return_type, sig.args, args) return res
def get_constant_struct(self, builder, ty, val): assert self.is_struct_type(ty) module = cgutils.get_module(builder) if ty in types.complex_domain: if ty == types.complex64: innertype = types.float32 elif ty == types.complex128: innertype = types.float64 else: raise Exception("unreachable") real = self.get_constant(innertype, val.real) imag = self.get_constant(innertype, val.imag) const = Constant.struct([real, imag]) gv = module.add_global_variable(const.type, name=".const") gv.linkage = lc.LINKAGE_INTERNAL gv.initializer = const gv.global_constant = True return builder.load(gv) else: raise NotImplementedError(ty)
def debug_print(self, builder, text): mod = cgutils.get_module(builder) cstr = self.insert_const_string(mod, str(text)) self.print_string(builder, cstr)
def _prepare_call_to_object_mode(context, builder, func, signature, args, env): mod = cgutils.get_module(builder) thisfunc = cgutils.get_function(builder) bb_core_return = thisfunc.append_basic_block('ufunc.core.return') pyapi = context.get_python_api(builder) # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function( ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) ndarray_pointers = [] ndarray_objects = [] for i, (arr, arrtype) in enumerate(zip(args, signature.args)): ptr = cgutils.alloca_once(builder, ll_pyobj) ndarray_pointers.append(ptr) builder.store(Constant.null(ll_pyobj), ptr) # initialize to NULL arycls = context.make_array(arrtype) array = arycls(context, builder, ref=arr) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, arrtype.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(arrtype.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) builder.store(obj, ptr) ndarray_objects.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(ndarray_objects) status, retval = context.call_conv.call_function(builder, func, ll_pyobj, object_sig, ndarray_objects, env=env) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument object for ndary_ptr in ndarray_pointers: pyapi.decref(builder.load(ndary_ptr)) innercall = status.code return innercall, builder.load(error_pointer)