Esempio n. 1
0
    def _long_from_native_int(self, ival, func_name, native_int_type,
                              signed):
        fnty = Type.function(self.pyobj, [native_int_type])
        fn = self._get_function(fnty, name=func_name)
        resptr = cgutils.alloca_once(self.builder, self.pyobj)

        if PYVERSION < (3, 0):
            # Under Python 2, we try to return a PyInt object whenever
            # the given number fits in a C long.
            pyint_fnty = Type.function(self.pyobj, [self.long])
            pyint_fn = self._get_function(pyint_fnty, name="PyInt_FromLong")
            long_max = Constant.int(native_int_type, _helperlib.long_max)
            if signed:
                long_min = Constant.int(native_int_type, _helperlib.long_min)
                use_pyint = self.builder.and_(
                    self.builder.icmp(lc.ICMP_SGE, ival, long_min),
                    self.builder.icmp(lc.ICMP_SLE, ival, long_max),
                    )
            else:
                use_pyint = self.builder.icmp(lc.ICMP_ULE, ival, long_max)

            with self.builder.if_else(use_pyint) as (then, otherwise):
                with then:
                    downcast_ival = self.builder.trunc(ival, self.long)
                    res = self.builder.call(pyint_fn, [downcast_ival])
                    self.builder.store(res, resptr)
                with otherwise:
                    res = self.builder.call(fn, [ival])
                    self.builder.store(res, resptr)
        else:
            fn = self._get_function(fnty, name=func_name)
            self.builder.store(self.builder.call(fn, [ival]), resptr)

        return self.builder.load(resptr)
Esempio n. 2
0
    def string_as_string_and_size(self, strobj):
        """
        Returns a tuple of ``(ok, buffer, length)``.
        The ``ok`` is i1 value that is set if ok.
        The ``buffer`` is a i8* of the output buffer.
        The ``length`` is a i32/i64 (py_ssize_t) of the length of the buffer.
        """

        p_length = cgutils.alloca_once(self.builder, self.py_ssize_t)
        if PYVERSION >= (3, 0):
            fnty = Type.function(self.cstring, [self.pyobj,
                                                self.py_ssize_t.as_pointer()])
            fname = "PyUnicode_AsUTF8AndSize"
            fn = self._get_function(fnty, name=fname)

            buffer = self.builder.call(fn, [strobj, p_length])
            ok = self.builder.icmp_unsigned('!=',
                                            ir.Constant(buffer.type, None),
                                            buffer)
        else:
            fnty = Type.function(lc.Type.int(), [self.pyobj,
                                                 self.cstring.as_pointer(),
                                                 self.py_ssize_t.as_pointer()])
            fname = "PyString_AsStringAndSize"
            fn = self._get_function(fnty, name=fname)
            # Allocate space for the output parameters
            p_buffer = cgutils.alloca_once(self.builder, self.cstring)

            status = self.builder.call(fn, [strobj, p_buffer, p_length])

            negone = ir.Constant(status.type, -1)
            ok = self.builder.icmp_signed("!=", status, negone)
            buffer = self.builder.load(p_buffer)

        return (ok, buffer, self.builder.load(p_length))
Esempio n. 3
0
 def dict_new(self, presize=0):
     if presize == 0:
         fnty = Type.function(self.pyobj, ())
         fn = self._get_function(fnty, name="PyDict_New")
         return self.builder.call(fn, ())
     else:
         fnty = Type.function(self.pyobj, [self.py_ssize_t])
         fn = self._get_function(fnty, name="_PyDict_NewPresized")
         return self.builder.call(fn,
                                  [Constant.int(self.py_ssize_t, presize)])
Esempio n. 4
0
 def numba_array_adaptor(self, ary, ptr):
     voidptr = Type.pointer(Type.int(8))
     fnty = Type.function(Type.int(), [self.pyobj, voidptr])
     fn = self._get_function(fnty, name="numba_adapt_ndarray")
     fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE)
     fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE)
     return self.builder.call(fn, (ary, ptr))
Esempio n. 5
0
 def dict_getitem_string(self, dic, name):
     """Returns a borrowed reference
     """
     fnty = Type.function(self.pyobj, [self.pyobj, self.cstring])
     fn = self._get_function(fnty, name="PyDict_GetItemString")
     cstr = self.context.insert_const_string(self.module, name)
     return self.builder.call(fn, [dic, cstr])
Esempio n. 6
0
File: base.py Progetto: yuguen/numba
 def get_external_function_type(self, fndesc):
     argtypes = [self.get_argument_type(aty)
                 for aty in fndesc.argtypes]
     # don't wrap in pointer
     restype = self.get_argument_type(fndesc.restype)
     fnty = Type.function(restype, argtypes)
     return fnty
Esempio n. 7
0
def wavebarrier_impl(context, builder, sig, args):
    assert not args
    fnty = Type.function(Type.void(), [])
    fn = builder.module.get_or_insert_function(fnty, name="__hsail_wavebarrier")
    fn.calling_convention = target.CC_SPIR_FUNC
    builder.call(fn, [])
    return _void_value
Esempio n. 8
0
def ptx_vote_sync(context, builder, sig, args):
    fname = 'llvm.nvvm.vote.sync'
    lmod = builder.module
    fnty = Type.function(Type.struct((Type.int(32), Type.int(1))),
                         (Type.int(32), Type.int(32), Type.int(1)))
    func = lmod.get_or_insert_function(fnty, name=fname)
    return builder.call(func, args)
Esempio n. 9
0
 def numba_buffer_adaptor(self, buf, ptr):
     fnty = Type.function(Type.void(),
                          [ir.PointerType(self.py_buffer_t), self.voidptr])
     fn = self._get_function(fnty, name="numba_adapt_buffer")
     fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE)
     fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE)
     return self.builder.call(fn, (buf, ptr))
Esempio n. 10
0
def atan2_f32_impl(context, builder, sig, args):
    assert len(args) == 2
    mod = builder.module
    fnty = Type.function(Type.float(), [Type.float(), Type.float()])
    fn = cgutils.insert_pure_function(builder.module, fnty, name="atan2f")
    res = builder.call(fn, args)
    return impl_ret_untracked(context, builder, sig.return_type, res)
Esempio n. 11
0
def complex128_power_impl(context, builder, sig, args):
    [ca, cb] = args
    a = Complex128(context, builder, value=ca)
    b = Complex128(context, builder, value=cb)
    c = Complex128(context, builder)
    module = builder.module
    pa = a._getpointer()
    pb = b._getpointer()
    pc = c._getpointer()

    # Optimize for square because cpow looses a lot of precsiion
    TWO = context.get_constant(types.float64, 2)
    ZERO = context.get_constant(types.float64, 0)

    b_real_is_two = builder.fcmp(lc.FCMP_OEQ, b.real, TWO)
    b_imag_is_zero = builder.fcmp(lc.FCMP_OEQ, b.imag, ZERO)
    b_is_two = builder.and_(b_real_is_two, b_imag_is_zero)

    with builder.if_else(b_is_two) as (then, otherwise):
        with then:
            # Lower as multiplication
            res = complex_mul_impl(context, builder, sig, (ca, ca))
            cres = Complex128(context, builder, value=res)
            c.real = cres.real
            c.imag = cres.imag

        with otherwise:
            # Lower with call to external function
            fnty = Type.function(Type.void(), [pa.type] * 3)
            cpow = module.get_or_insert_function(fnty, name="numba.math.cpow")
            builder.call(cpow, (pa, pb, pc))

    res = builder.load(pc)
    return impl_ret_untracked(context, builder, sig.return_type, res)
Esempio n. 12
0
 def object_dump(self, obj):
     """
     Dump a Python object on C stderr.  For debugging purposes.
     """
     fnty = Type.function(Type.void(), [self.pyobj])
     fn = self._get_function(fnty, name="_PyObject_Dump")
     return self.builder.call(fn, (obj,))
Esempio n. 13
0
 def nrt_adapt_ndarray_from_python(self, ary, ptr):
     assert self.context.enable_nrt
     fnty = Type.function(Type.int(), [self.pyobj, self.voidptr])
     fn = self._get_function(fnty, name="NRT_adapt_ndarray_from_python")
     fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE)
     fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE)
     return self.builder.call(fn, (ary, ptr))
Esempio n. 14
0
File: hsaimpl.py Progetto: esc/numba
def _declare_function(context, builder, name, sig, cargs,
                      mangler=mangle_c):
    """Insert declaration for a opencl builtin function.
    Uses the Itanium mangler.

    Args
    ----
    context: target context

    builder: llvm builder

    name: str
        symbol name

    sig: signature
        function signature of the symbol being declared

    cargs: sequence of str
        C type names for the arguments

    mangler: a mangler function
        function to use to mangle the symbol

    """
    mod = builder.module
    if sig.return_type == types.void:
        llretty = lc.Type.void()
    else:
        llretty = context.get_value_type(sig.return_type)
    llargs = [context.get_value_type(t) for t in sig.args]
    fnty = Type.function(llretty, llargs)
    mangled = mangler(name, cargs)
    fn = mod.get_or_insert_function(fnty, mangled)
    fn.calling_convention = target.CC_SPIR_FUNC
    return fn
Esempio n. 15
0
 def restore_thread(self, thread_state):
     """
     Restore the given thread state by reacquiring the GIL.
     """
     fnty = Type.function(Type.void(), [self.voidptr])
     fn = self._get_function(fnty, name="PyEval_RestoreThread")
     self.builder.call(fn, [thread_state])
Esempio n. 16
0
 def call_function_pointer(self, builder, funcptr, signature, args, cconv=None):
     retty = self.get_value_type(signature.return_type)
     fnty = Type.function(retty, [a.type for a in args])
     fnptrty = Type.pointer(fnty)
     addr = self.get_constant(types.intp, funcptr)
     ptr = builder.inttoptr(addr, fnptrty)
     return builder.call(ptr, args, cconv=cconv)
Esempio n. 17
0
def ptx_warp_sync(context, builder, sig, args):
    fname = 'llvm.nvvm.bar.warp.sync'
    lmod = builder.module
    fnty = Type.function(Type.void(), (Type.int(32),))
    sync = lmod.get_or_insert_function(fnty, name=fname)
    builder.call(sync, args)
    return context.get_dummy_value()
Esempio n. 18
0
 def parse_tuple_and_keywords(self, args, kws, fmt, keywords, *objs):
     charptr = Type.pointer(Type.int(8))
     charptrary = Type.pointer(charptr)
     argtypes = [self.pyobj, self.pyobj, charptr, charptrary]
     fnty = Type.function(Type.int(), argtypes, var_arg=True)
     fn = self._get_function(fnty, name="PyArg_ParseTupleAndKeywords")
     return self.builder.call(fn, [args, kws, fmt, keywords] + list(objs))
Esempio n. 19
0
 def tuple_pack(self, items):
     fnty = Type.function(self.pyobj, [self.py_ssize_t], var_arg=True)
     fn = self._get_function(fnty, name="PyTuple_Pack")
     n = self.context.get_constant(types.intp, len(items))
     args = [n]
     args.extend(items)
     return self.builder.call(fn, args)
Esempio n. 20
0
 def tuple_setitem(self, tuple_val, index, item):
     """
     Steals a reference to `item`.
     """
     fnty = Type.function(Type.int(), [self.pyobj, Type.int(), self.pyobj])
     setitem_fn = self._get_function(fnty, name='PyTuple_SetItem')
     index = self.context.get_constant(types.int32, index)
     self.builder.call(setitem_fn, [tuple_val, index, item])
Esempio n. 21
0
 def tuple_getitem(self, tup, idx):
     """
     Borrow reference
     """
     fnty = Type.function(self.pyobj, [self.pyobj, self.py_ssize_t])
     fn = self._get_function(fnty, name="PyTuple_GetItem")
     idx = self.context.get_constant(types.intp, idx)
     return self.builder.call(fn, [tup, idx])
Esempio n. 22
0
 def list_setitem(self, seq, idx, val):
     """
     Warning: Steals reference to ``val``
     """
     fnty = Type.function(Type.int(), [self.pyobj, self.py_ssize_t,
                                       self.pyobj])
     fn = self._get_function(fnty, name="PyList_SetItem")
     return self.builder.call(fn, [seq, idx, val])
Esempio n. 23
0
def atan2_f64_impl(context, builder, sig, args):
    assert len(args) == 2
    mod = cgutils.get_module(builder)
    fnty = Type.function(Type.double(), [Type.double(), Type.double()])
    # Workaround atan2() issues under Windows
    fname = "atan2_fixed" if sys.platform == "win32" else "atan2"
    fn = mod.get_or_insert_function(fnty, name=fname)
    return builder.call(fn, args)
Esempio n. 24
0
 def save_thread(self):
     """
     Release the GIL and return the former thread state
     (an opaque non-NULL pointer).
     """
     fnty = Type.function(self.voidptr, [])
     fn = self._get_function(fnty, name="PyEval_SaveThread")
     return self.builder.call(fn, [])
Esempio n. 25
0
def ptx_syncthreads(context, builder, sig, args):
    assert not args
    fname = 'llvm.nvvm.barrier0'
    lmod = cgutils.get_module(builder)
    fnty = Type.function(Type.void(), ())
    sync = lmod.get_or_insert_function(fnty, name=fname)
    builder.call(sync, ())
    return context.get_dummy_value()
Esempio n. 26
0
 def string_from_string(self, string):
     fnty = Type.function(self.pyobj, [self.cstring])
     if PYVERSION >= (3, 0):
         fname = "PyUnicode_FromString"
     else:
         fname = "PyString_FromString"
     fn = self._get_function(fnty, name=fname)
     return self.builder.call(fn, [string])
Esempio n. 27
0
 def bytes_from_string_and_size(self, string, size):
     fnty = Type.function(self.pyobj, [self.cstring, self.py_ssize_t])
     if PYVERSION >= (3, 0):
         fname = "PyBytes_FromStringAndSize"
     else:
         fname = "PyString_FromStringAndSize"
     fn = self._get_function(fnty, name=fname)
     return self.builder.call(fn, [string, size])
Esempio n. 28
0
 def nrt_adapt_buffer_from_python(self, buf, ptr):
     assert self.context.enable_nrt
     fnty = Type.function(Type.void(), [Type.pointer(self.py_buffer_t),
                                        self.voidptr])
     fn = self._get_function(fnty, name="NRT_adapt_buffer_from_python")
     fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE)
     fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE)
     return self.builder.call(fn, (buf, ptr))
Esempio n. 29
0
 def string_as_string(self, strobj):
     fnty = Type.function(self.cstring, [self.pyobj])
     if PYVERSION >= (3, 0):
         fname = "PyUnicode_AsUTF8"
     else:
         fname = "PyString_AsString"
     fn = self._get_function(fnty, name=fname)
     return self.builder.call(fn, [strobj])
Esempio n. 30
0
 def call(self, callee, args=None, kws=None):
     if args is None:
         args = self.get_null_object()
     if kws is None:
         kws = self.get_null_object()
     fnty = Type.function(self.pyobj, [self.pyobj] * 3)
     fn = self._get_function(fnty, name="PyObject_Call")
     return self.builder.call(fn, (callee, args, kws))
Esempio n. 31
0
 def _impl(context, builder, sig, args):
     """
     args are (index, src)
     """
     assert sig.return_type == sig.args[1]
     idx, src = args
     i32 = Type.int(32)
     fnty = Type.function(i32, [i32, i32])
     fn = builder.module.declare_intrinsic(intrinsic_name, fnty=fnty)
     # the args are byte addressable, VGPRs are 4 wide so mul idx by 4
     # the idx might be an int64, this is ok to trunc to int32 as
     # wavefront_size is never likely overflow an int32
     idx = builder.trunc(idx, i32)
     four = lc.Constant.int(i32, 4)
     idx = builder.mul(idx, four)
     # bit cast is so float32 works as packed i32, the return casts back
     result = builder.call(fn, (idx, builder.bitcast(src, i32)))
     return builder.bitcast(result, context.get_value_type(sig.return_type))
Esempio n. 32
0
def activelanepermute_wavewidth_impl(context, builder, sig, args):
    [src, laneid, identity, use_ident] = args
    assert sig.args[0] == sig.args[2]
    elem_type = sig.args[0]
    bitwidth = elem_type.bitwidth
    intbitwidth = Type.int(bitwidth)
    i32 = Type.int(32)
    i1 = Type.int(1)
    name = "__hsail_activelanepermute_wavewidth_b{0}".format(bitwidth)

    fnty = Type.function(intbitwidth, [intbitwidth, i32, intbitwidth, i1])
    fn = builder.module.get_or_insert_function(fnty, name=name)
    fn.calling_convention = target.CC_SPIR_FUNC

    def cast(val):
        return builder.bitcast(val, intbitwidth)

    result = builder.call(fn, [cast(src), laneid, cast(identity), use_ident])
    return builder.bitcast(result, context.get_value_type(elem_type))
Esempio n. 33
0
def ptx_shfl_sync_i32(context, builder, sig, args):
    """
    The NVVM intrinsic for shfl only supports i32, but the cuda intrinsic
    function supports both 32 and 64 bit ints and floats, so for feature parity,
    i64, f32, and f64 are implemented. Floats by way of bitcasting the float to
    an int, then shuffling, then bitcasting back. And 64-bit values by packing
    them into 2 32bit values, shuffling thoose, and then packing back together.
    """
    mask, mode, value, index, clamp = args
    value_type = sig.args[2]
    if value_type in types.real_domain:
        value = builder.bitcast(value, Type.int(value_type.bitwidth))
    fname = 'llvm.nvvm.shfl.sync.i32'
    lmod = builder.module
    fnty = Type.function(
        Type.struct((Type.int(32), Type.int(1))),
        (Type.int(32), Type.int(32), Type.int(32), Type.int(32), Type.int(32))
    )
    func = lmod.get_or_insert_function(fnty, name=fname)
    if value_type.bitwidth == 32:
        ret = builder.call(func, (mask, mode, value, index, clamp))
        if value_type == types.float32:
            rv = builder.extract_value(ret, 0)
            pred = builder.extract_value(ret, 1)
            fv = builder.bitcast(rv, Type.float())
            ret = cgutils.make_anonymous_struct(builder, (fv, pred))
    else:
        value1 = builder.trunc(value, Type.int(32))
        value_lshr = builder.lshr(value, context.get_constant(types.i8, 32))
        value2 = builder.trunc(value_lshr, Type.int(32))
        ret1 = builder.call(func, (mask, mode, value1, index, clamp))
        ret2 = builder.call(func, (mask, mode, value2, index, clamp))
        rv1 = builder.extract_value(ret1, 0)
        rv2 = builder.extract_value(ret2, 0)
        pred = builder.extract_value(ret1, 1)
        rv1_64 = builder.zext(rv1, Type.int(64))
        rv2_64 = builder.zext(rv2, Type.int(64))
        rv_shl = builder.shl(rv2_64, context.get_constant(types.i8, 32))
        rv = builder.or_(rv_shl, rv1_64)
        if value_type == types.float64:
            rv = builder.bitcast(rv, Type.double())
        ret = cgutils.make_anonymous_struct(builder, (rv, pred))
    return ret
Esempio n. 34
0
def printf(builder, format_string, *values):
    str_const = Constant.stringz(format_string)
    global_str_const = get_module(builder).add_global_variable(
        str_const.type, '')
    global_str_const.initializer = str_const

    idx = [Constant.int(Type.int(32), 0), Constant.int(Type.int(32), 0)]
    str_addr = global_str_const.gep(idx)

    args = []
    for v in values:
        if isinstance(v, int):
            args.append(Constant.int(Type.int(), v))
        elif isinstance(v, float):
            args.append(Constant.real(Type.double(), v))
        else:
            args.append(v)
    functype = Type.function(Type.int(32), [Type.pointer(Type.int(8))], True)
    fn = get_module(builder).get_or_insert_function(functype, 'printf')
    builder.call(fn, [str_addr] + args)
Esempio n. 35
0
def real_divmod(context, builder, x, y):
    assert x.type == y.type
    floatty = x.type

    module = builder.module
    fname = context.mangler(".numba.python.rem", [x.type])
    fnty = Type.function(floatty, (floatty, floatty, Type.pointer(floatty)))
    fn = module.get_or_insert_function(fnty, fname)

    if fn.is_declaration:
        fn.linkage = lc.LINKAGE_LINKONCE_ODR
        fnbuilder = lc.Builder(fn.append_basic_block('entry'))
        fx, fy, pmod = fn.args
        div, mod = real_divmod_func_body(context, fnbuilder, fx, fy)
        fnbuilder.store(mod, pmod)
        fnbuilder.ret(div)

    pmod = cgutils.alloca_once(builder, floatty)
    quotient = builder.call(fn, (x, y, pmod))
    return quotient, builder.load(pmod)
Esempio n. 36
0
    def get_function_type(self, fndesc):
        """
        Get the implemented Function type for the high-level *fndesc*.
        Some parameters can be added or shuffled around.
        This is kept in sync with call_function() and get_arguments().

        Calling Convention
        ------------------
        Returns: -2 for return none in native function;
                 -1 for failure with python exception set;
                  0 for success;
                 >0 for user error code.
        Return value is passed by reference as the first argument.

        Actual arguments starts at the 2rd argument position.
        Caller is responsible to allocate space for return value.
        """
        argtypes = [self.get_argument_type(aty) for aty in fndesc.argtypes]
        resptr = self.get_return_type(fndesc.restype)
        fnty = Type.function(Type.int(), [resptr] + argtypes)
        return fnty
Esempio n. 37
0
 def object_richcompare(self, lhs, rhs, opstr):
     """
     Refer to Python source Include/object.h for macros definition
     of the opid.
     """
     ops = ['<', '<=', '==', '!=', '>', '>=']
     if opstr in ops:
         opid = ops.index(opstr)
         fnty = Type.function(self.pyobj, [self.pyobj, self.pyobj, Type.int()])
         fn = self._get_function(fnty, name="PyObject_RichCompare")
         lopid = self.context.get_constant(types.int32, opid)
         return self.builder.call(fn, (lhs, rhs, lopid))
     elif opstr == 'is':
         bitflag = self.builder.icmp(lc.ICMP_EQ, lhs, rhs)
         return self.from_native_value(bitflag, types.boolean)
     elif opstr == 'is not':
         bitflag = self.builder.icmp(lc.ICMP_NE, lhs, rhs)
         return self.from_native_value(bitflag, types.boolean)
     else:
         raise NotImplementedError("Unknown operator {op!r}".format(
             op=opstr))
Esempio n. 38
0
def complex_power_impl(context, builder, sig, args):
    [ca, cb] = args
    ty = sig.args[0]
    fty = ty.underlying_float
    a = context.make_helper(builder, ty, value=ca)
    b = context.make_helper(builder, ty, value=cb)
    c = context.make_helper(builder, ty)
    module = builder.module
    pa = a._getpointer()
    pb = b._getpointer()
    pc = c._getpointer()

    # Optimize for square because cpow loses a lot of precision
    TWO = context.get_constant(fty, 2)
    ZERO = context.get_constant(fty, 0)

    b_real_is_two = builder.fcmp_ordered('==', b.real, TWO)
    b_imag_is_zero = builder.fcmp_ordered('==', b.imag, ZERO)
    b_is_two = builder.and_(b_real_is_two, b_imag_is_zero)

    with builder.if_else(b_is_two) as (then, otherwise):
        with then:
            # Lower as multiplication
            res = complex_mul_impl(context, builder, sig, (ca, ca))
            cres = context.make_helper(builder, ty, value=res)
            c.real = cres.real
            c.imag = cres.imag

        with otherwise:
            # Lower with call to external function
            func_name = {
                types.complex64: "numba_cpowf",
                types.complex128: "numba_cpow",
            }[ty]
            fnty = Type.function(Type.void(), [pa.type] * 3)
            cpow = module.get_or_insert_function(fnty, name=func_name)
            builder.call(cpow, (pa, pb, pc))

    res = builder.load(pc)
    return impl_ret_untracked(context, builder, sig.return_type, res)
Esempio n. 39
0
    def build(self):
        wrapname = "wrapper.%s" % self.func.name

        # This is the signature of PyCFunctionWithKeywords
        # (see CPython's methodobject.h)
        pyobj = self.context.get_argument_type(types.pyobject)
        wrapty = Type.function(pyobj, [pyobj, pyobj, pyobj])
        wrapper = self.module.add_function(wrapty, name=wrapname)

        builder = Builder.new(wrapper.append_basic_block('entry'))

        # - `closure` will receive the `self` pointer stored in the
        #   PyCFunction object (see _dynfunc.c)
        # - `args` and `kws` will receive the tuple and dict objects
        #   of positional and keyword arguments, respectively.
        closure, args, kws = wrapper.args
        closure.name = 'py_closure'
        args.name = 'py_args'
        kws.name = 'py_kws'

        api = self.context.get_python_api(builder)
        self.build_wrapper(api, builder, closure, args, kws)

        return wrapper, api
Esempio n. 40
0
 def create_np_datetime(self, val, unit_code):
     unit_code = Constant.int(Type.int(), unit_code)
     fnty = Type.function(self.pyobj, [Type.int(64), Type.int()])
     fn = self._get_function(fnty, name="numba_create_np_datetime")
     return self.builder.call(fn, [val, unit_code])
Esempio n. 41
0
def atan2_f32_impl(context, builder, sig, args):
    assert len(args) == 2
    mod = builder.module
    fnty = Type.function(Type.float(), [Type.float(), Type.float()])
    fn = cgutils.insert_pure_function(builder.module, fnty, name="atan2f")
    return builder.call(fn, args)
Esempio n. 42
0
 def core(context, builder, sig, args):
     fty = context.get_value_type(ty)
     lmod = builder.module
     fnty = Type.function(fty, [fty, fty])
     fn = lmod.get_or_insert_function(fnty, name=nvname)
     return builder.call(fn, args)
Esempio n. 43
0
    def generate_kernel_wrapper(self, library, fname, argtypes, debug):
        """
        Generate the kernel wrapper in the given ``library``.
        The function being wrapped have the name ``fname`` and argument types
        ``argtypes``.  The wrapper function is returned.
        """
        arginfo = self.get_arg_packer(argtypes)
        argtys = list(arginfo.argument_types)
        wrapfnty = Type.function(Type.void(), argtys)
        wrapper_module = self.create_module("cuda.kernel.wrapper")
        fnty = Type.function(Type.int(),
                             [self.call_conv.get_return_type(types.pyobject)] +
                             argtys)
        func = wrapper_module.add_function(fnty, name=fname)

        prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy')
        wrapfn = wrapper_module.add_function(wrapfnty, name=prefixed)
        builder = Builder(wrapfn.append_basic_block(''))

        # Define error handling variables
        def define_error_gv(postfix):
            gv = wrapper_module.add_global_variable(Type.int(),
                                                    name=wrapfn.name + postfix)
            gv.initializer = Constant.null(gv.type.pointee)
            return gv

        gv_exc = define_error_gv("__errcode__")
        gv_tid = []
        gv_ctaid = []
        for i in 'xyz':
            gv_tid.append(define_error_gv("__tid%s__" % i))
            gv_ctaid.append(define_error_gv("__ctaid%s__" % i))

        callargs = arginfo.from_arguments(builder, wrapfn.args)
        status, _ = self.call_conv.call_function(builder, func, types.void,
                                                 argtypes, callargs)

        if debug:
            # Check error status
            with cgutils.if_likely(builder, status.is_ok):
                builder.ret_void()

            with builder.if_then(builder.not_(status.is_python_exc)):
                # User exception raised
                old = Constant.null(gv_exc.type.pointee)

                # Use atomic cmpxchg to prevent rewriting the error status
                # Only the first error is recorded

                casfnty = lc.Type.function(old.type,
                                           [gv_exc.type, old.type, old.type])

                casfn = wrapper_module.add_function(casfnty,
                                                    name="___numba_cas_hack")
                xchg = builder.call(casfn, [gv_exc, old, status.code])
                changed = builder.icmp(ICMP_EQ, xchg, old)

                # If the xchange is successful, save the thread ID.
                sreg = nvvmutils.SRegBuilder(builder)
                with builder.if_then(changed):
                    for dim, ptr, in zip("xyz", gv_tid):
                        val = sreg.tid(dim)
                        builder.store(val, ptr)

                    for dim, ptr, in zip("xyz", gv_ctaid):
                        val = sreg.ctaid(dim)
                        builder.store(val, ptr)

        builder.ret_void()

        nvvm.set_cuda_kernel(wrapfn)
        library.add_ir_module(wrapper_module)
        library.finalize()
        wrapfn = library.get_function(wrapfn.name)
        return wrapfn
Esempio n. 44
0
def _prepare_call_to_object_mode(context, builder, pyapi, func, signature,
                                 args, env):
    mod = builder.module

    bb_core_return = builder.append_basic_block('ufunc.core.return')

    # Call to
    # PyObject* ndarray_new(int nd,
    #       npy_intp *dims,   /* shape */
    #       npy_intp *strides,
    #       void* data,
    #       int type_num,
    #       int itemsize)

    ll_int = context.get_value_type(types.int32)
    ll_intp = context.get_value_type(types.intp)
    ll_intp_ptr = Type.pointer(ll_intp)
    ll_voidptr = context.get_value_type(types.voidptr)
    ll_pyobj = context.get_value_type(types.pyobject)
    fnty = Type.function(
        ll_pyobj,
        [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int])

    fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new")

    # Convert each llarray into pyobject
    error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error')
    builder.store(cgutils.true_bit, error_pointer)

    # The PyObject* arguments to the kernel function
    object_args = []
    object_pointers = []

    for i, (arg, argty) in enumerate(zip(args, signature.args)):
        # Allocate NULL-initialized slot for this argument
        objptr = cgutils.alloca_once(builder, ll_pyobj, zfill=True)
        object_pointers.append(objptr)

        if isinstance(argty, types.Array):
            # Special case arrays: we don't need full-blown NRT reflection
            # since the argument will be gone at the end of the kernel
            arycls = context.make_array(argty)
            array = arycls(context, builder, value=arg)

            zero = Constant.int(ll_int, 0)

            # Extract members of the llarray
            nd = Constant.int(ll_int, argty.ndim)
            dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero])
            strides = builder.gep(array._get_ptr_by_name('strides'),
                                  [zero, zero])
            data = builder.bitcast(array.data, ll_voidptr)
            dtype = np.dtype(str(argty.dtype))

            # Prepare other info for reconstruction of the PyArray
            type_num = Constant.int(ll_int, dtype.num)
            itemsize = Constant.int(ll_int, dtype.itemsize)

            # Call helper to reconstruct PyArray objects
            obj = builder.call(fn_array_new,
                               [nd, dims, strides, data, type_num, itemsize])
        else:
            # Other argument types => use generic boxing
            obj = pyapi.from_native_value(argty, arg)

        builder.store(obj, objptr)
        object_args.append(obj)

        obj_is_null = cgutils.is_null(builder, obj)
        builder.store(obj_is_null, error_pointer)
        cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return)

    # Call ufunc core function
    object_sig = [types.pyobject] * len(object_args)

    status, retval = context.call_conv.call_function(builder,
                                                     func,
                                                     types.pyobject,
                                                     object_sig,
                                                     object_args,
                                                     env=env)
    builder.store(status.is_error, error_pointer)

    # Release returned object
    pyapi.decref(retval)

    builder.branch(bb_core_return)
    # At return block
    builder.position_at_end(bb_core_return)

    # Release argument objects
    for objptr in object_pointers:
        pyapi.decref(builder.load(objptr))

    innercall = status.code
    return innercall, builder.load(error_pointer)
Esempio n. 45
0
def build_ufunc_wrapper(library, context, fname, signature, objmode, envptr,
                        env):
    """
    Wrap the scalar function with a loop that iterates over the arguments
    """
    assert isinstance(fname, str)
    byte_t = Type.int(8)
    byte_ptr_t = Type.pointer(byte_t)
    byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
    intp_t = context.get_value_type(types.intp)
    intp_ptr_t = Type.pointer(intp_t)

    fnty = Type.function(Type.void(),
                         [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

    wrapperlib = context.codegen().create_library('ufunc_wrapper')
    wrapper_module = wrapperlib.create_ir_module('')
    if objmode:
        func_type = context.call_conv.get_function_type(
            types.pyobject, [types.pyobject] * len(signature.args))
    else:
        func_type = context.call_conv.get_function_type(
            signature.return_type, signature.args)

    func = wrapper_module.add_function(func_type, name=fname)
    func.attributes.add("alwaysinline")

    wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name)
    arg_args, arg_dims, arg_steps, arg_data = wrapper.args
    arg_args.name = "args"
    arg_dims.name = "dims"
    arg_steps.name = "steps"
    arg_data.name = "data"

    builder = Builder(wrapper.append_basic_block("entry"))

    loopcount = builder.load(arg_dims, name="loopcount")

    # Prepare inputs
    arrays = []
    for i, typ in enumerate(signature.args):
        arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, typ))

    # Prepare output
    out = UArrayArg(context, builder, arg_args, arg_steps, len(arrays),
                    signature.return_type)

    # Setup indices
    offsets = []
    zero = context.get_constant(types.intp, 0)
    for _ in arrays:
        p = cgutils.alloca_once(builder, intp_t)
        offsets.append(p)
        builder.store(zero, p)

    store_offset = cgutils.alloca_once(builder, intp_t)
    builder.store(zero, store_offset)

    unit_strided = cgutils.true_bit
    for ary in arrays:
        unit_strided = builder.and_(unit_strided, ary.is_unit_strided)

    pyapi = context.get_python_api(builder)
    if objmode:
        # General loop
        gil = pyapi.gil_ensure()
        with cgutils.for_range(builder, loopcount, intp=intp_t):
            slowloop = build_obj_loop_body(context, func, builder, arrays, out,
                                           offsets, store_offset, signature,
                                           pyapi, envptr, env)
        pyapi.gil_release(gil)
        builder.ret_void()

    else:
        with builder.if_else(unit_strided) as (is_unit_strided, is_strided):
            with is_unit_strided:
                with cgutils.for_range(builder, loopcount,
                                       intp=intp_t) as loop:
                    fastloop = build_fast_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature,
                                                    loop.index, pyapi)

            with is_strided:
                # General loop
                with cgutils.for_range(builder, loopcount, intp=intp_t):
                    slowloop = build_slow_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature,
                                                    pyapi)

        builder.ret_void()
    del builder

    # Link and finalize
    wrapperlib.add_ir_module(wrapper_module)
    wrapperlib.add_linking_library(library)
    return wrapperlib.get_pointer_to_function(wrapper.name)
Esempio n. 46
0
 def release_record_buffer(self, pbuf):
     fnty = Type.function(Type.void(), [self.voidptr])
     fn = self._get_function(fnty, name="numba_release_record_buffer")
     return self.builder.call(fn, [pbuf])
Esempio n. 47
0
def build_ufunc_wrapper(library, context, func, signature, objmode, env):
    """
    Wrap the scalar function with a loop that iterates over the arguments
    """
    byte_t = Type.int(8)
    byte_ptr_t = Type.pointer(byte_t)
    byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
    intp_t = context.get_value_type(types.intp)
    intp_ptr_t = Type.pointer(intp_t)

    fnty = Type.function(Type.void(),
                         [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

    wrapper_module = library.create_ir_module('')
    if objmode:
        func_type = context.call_conv.get_function_type(
            types.pyobject, [types.pyobject] * len(signature.args))
    else:
        func_type = context.call_conv.get_function_type(
            signature.return_type, signature.args)
    oldfunc = func
    func = wrapper_module.add_function(func_type, name=func.name)
    func.attributes.add("alwaysinline")

    wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name)
    arg_args, arg_dims, arg_steps, arg_data = wrapper.args
    arg_args.name = "args"
    arg_dims.name = "dims"
    arg_steps.name = "steps"
    arg_data.name = "data"

    builder = Builder.new(wrapper.append_basic_block("entry"))

    loopcount = builder.load(arg_dims, name="loopcount")

    actual_args = context.call_conv.get_arguments(func)

    # Prepare inputs
    arrays = []
    for i, typ in enumerate(signature.args):
        arrays.append(
            UArrayArg(context, builder, arg_args, arg_steps, i,
                      context.get_argument_type(typ)))

    # Prepare output
    valty = context.get_data_type(signature.return_type)
    out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args),
                    valty)

    # Setup indices
    offsets = []
    zero = context.get_constant(types.intp, 0)
    for _ in arrays:
        p = cgutils.alloca_once(builder, intp_t)
        offsets.append(p)
        builder.store(zero, p)

    store_offset = cgutils.alloca_once(builder, intp_t)
    builder.store(zero, store_offset)

    unit_strided = cgutils.true_bit
    for ary in arrays:
        unit_strided = builder.and_(unit_strided, ary.is_unit_strided)

    if objmode:
        # General loop
        pyapi = context.get_python_api(builder)
        gil = pyapi.gil_ensure()
        with cgutils.for_range(builder, loopcount, intp=intp_t):
            slowloop = build_obj_loop_body(context, func, builder, arrays, out,
                                           offsets, store_offset, signature,
                                           pyapi, env)
        pyapi.gil_release(gil)
        builder.ret_void()

    else:

        with cgutils.ifelse(builder,
                            unit_strided) as (is_unit_strided, is_strided):

            with is_unit_strided:
                with cgutils.for_range(builder, loopcount, intp=intp_t) as ind:
                    fastloop = build_fast_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature,
                                                    ind)
                builder.ret_void()

            with is_strided:
                # General loop
                with cgutils.for_range(builder, loopcount, intp=intp_t):
                    slowloop = build_slow_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature)

                builder.ret_void()

        builder.ret_void()
    del builder

    # Run optimizer
    library.add_ir_module(wrapper_module)
    wrapper = library.get_function(wrapper.name)
    oldfunc.linkage = LINKAGE_INTERNAL

    return wrapper
Esempio n. 48
0
    def generate_kernel_wrapper(self, func, argtypes):
        module = func.module

        arginfo = self.get_arg_packer(argtypes)
        argtys = list(arginfo.argument_types)
        wrapfnty = Type.function(Type.void(), argtys)
        wrapper_module = self.create_module("cuda.kernel.wrapper")
        fnty = Type.function(Type.int(),
                             [self.call_conv.get_return_type(types.pyobject)] +
                             argtys)
        func = wrapper_module.add_function(fnty, name=func.name)
        wrapfn = wrapper_module.add_function(wrapfnty,
                                             name="cudaPy_" + func.name)
        builder = Builder.new(wrapfn.append_basic_block(''))

        # Define error handling variables
        def define_error_gv(postfix):
            gv = wrapper_module.add_global_variable(Type.int(),
                                                    name=wrapfn.name + postfix)
            gv.initializer = Constant.null(gv.type.pointee)
            return gv

        gv_exc = define_error_gv("__errcode__")
        gv_tid = []
        gv_ctaid = []
        for i in 'xyz':
            gv_tid.append(define_error_gv("__tid%s__" % i))
            gv_ctaid.append(define_error_gv("__ctaid%s__" % i))

        callargs = arginfo.from_arguments(builder, wrapfn.args)
        status, _ = self.call_conv.call_function(builder, func, types.void,
                                                 argtypes, callargs)

        # Check error status
        with cgutils.if_likely(builder, status.is_ok):
            builder.ret_void()

        with builder.if_then(builder.not_(status.is_python_exc)):
            # User exception raised
            old = Constant.null(gv_exc.type.pointee)

            # Use atomic cmpxchg to prevent rewriting the error status
            # Only the first error is recorded

            casfnty = lc.Type.function(old.type,
                                       [gv_exc.type, old.type, old.type])

            casfn = wrapper_module.add_function(casfnty,
                                                name="___numba_cas_hack")
            xchg = builder.call(casfn, [gv_exc, old, status.code])
            changed = builder.icmp(ICMP_EQ, xchg, old)

            # If the xchange is successful, save the thread ID.
            sreg = nvvmutils.SRegBuilder(builder)
            with builder.if_then(changed):
                for dim, ptr, in zip("xyz", gv_tid):
                    val = sreg.tid(dim)
                    builder.store(val, ptr)

                for dim, ptr, in zip("xyz", gv_ctaid):
                    val = sreg.ctaid(dim)
                    builder.store(val, ptr)

        builder.ret_void()
        # force inline
        # inline_function(status.code)
        nvvm.set_cuda_kernel(wrapfn)
        module.link_in(ll.parse_assembly(str(wrapper_module)))
        module.verify()

        wrapfn = module.get_function(wrapfn.name)
        return wrapfn
Esempio n. 49
0
def atan2_f32_impl(context, builder, sig, args):
    assert len(args) == 2
    mod = cgutils.get_module(builder)
    fnty = Type.function(Type.float(), [Type.float(), Type.float()])
    fn = mod.get_or_insert_function(fnty, name="atan2f")
    return builder.call(fn, args)
Esempio n. 50
0
def _generic_array(context,
                   builder,
                   shape,
                   dtype,
                   symbol_name,
                   addrspace,
                   can_dynsized=False):
    elemcount = reduce(operator.mul, shape, 1)

    # Check for valid shape for this type of allocation.
    # Only 1d arrays can be dynamic.
    dynamic_smem = elemcount <= 0 and can_dynsized and len(shape) == 1
    if elemcount <= 0 and not dynamic_smem:
        raise ValueError("array length <= 0")

    # Check that we support the requested dtype
    other_supported_type = isinstance(dtype, (types.Record, types.Boolean))
    if dtype not in types.number_domain and not other_supported_type:
        raise TypeError("unsupported type: %s" % dtype)

    lldtype = context.get_data_type(dtype)
    laryty = Type.array(lldtype, elemcount)

    if addrspace == nvvm.ADDRSPACE_LOCAL:
        # Special case local address space allocation to use alloca
        # NVVM is smart enough to only use local memory if no register is
        # available
        dataptr = cgutils.alloca_once(builder, laryty, name=symbol_name)
    else:
        lmod = builder.module

        # Create global variable in the requested address space
        gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace)
        # Specify alignment to avoid misalignment bug
        align = context.get_abi_sizeof(lldtype)
        # Alignment is required to be a power of 2 for shared memory. If it is
        # not a power of 2 (e.g. for a Record array) then round up accordingly.
        gvmem.align = 1 << (align - 1).bit_length()

        if dynamic_smem:
            gvmem.linkage = lc.LINKAGE_EXTERNAL
        else:
            ## Comment out the following line to workaround a NVVM bug
            ## which generates a invalid symbol name when the linkage
            ## is internal and in some situation.
            ## See _get_unique_smem_id()
            # gvmem.linkage = lc.LINKAGE_INTERNAL

            gvmem.initializer = lc.Constant.undef(laryty)

        # Convert to generic address-space
        conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
        addrspaceptr = gvmem.bitcast(Type.pointer(Type.int(8), addrspace))
        dataptr = builder.call(conv, [addrspaceptr])

    targetdata = _get_target_data(context)
    lldtype = context.get_data_type(dtype)
    itemsize = lldtype.get_abi_size(targetdata)

    # Compute strides
    laststride = itemsize
    rstrides = []
    for i, lastsize in enumerate(reversed(shape)):
        rstrides.append(laststride)
        laststride *= lastsize
    strides = [s for s in reversed(rstrides)]
    kstrides = [context.get_constant(types.intp, s) for s in strides]

    # Compute shape
    if dynamic_smem:
        # Compute the shape based on the dynamic shared memory configuration.
        # Unfortunately NVVM does not provide an intrinsic for the
        # %dynamic_smem_size register, so we must read it using inline
        # assembly.
        get_dynshared_size = InlineAsm.get(Type.function(Type.int(), []),
                                           "mov.u32 $0, %dynamic_smem_size;",
                                           '=r',
                                           side_effect=True)
        dynsmem_size = builder.zext(builder.call(get_dynshared_size, []),
                                    Type.int(width=64))
        # Only 1-D dynamic shared memory is supported so the following is a
        # sufficient construction of the shape
        kitemsize = context.get_constant(types.intp, itemsize)
        kshape = [builder.udiv(dynsmem_size, kitemsize)]
    else:
        kshape = [context.get_constant(types.intp, s) for s in shape]

    # Create array object
    ndim = len(shape)
    aryty = types.Array(dtype=dtype, ndim=ndim, layout='C')
    ary = context.make_array(aryty)(context, builder)

    context.populate_array(ary,
                           data=builder.bitcast(dataptr, ary.data.type),
                           shape=kshape,
                           strides=kstrides,
                           itemsize=context.get_constant(types.intp, itemsize),
                           meminfo=None)
    return ary._getvalue()
Esempio n. 51
0
def ptx_syncthreads_or(context, builder, sig, args):
    fname = 'llvm.nvvm.barrier0.or'
    lmod = builder.module
    fnty = Type.function(Type.int(32), (Type.int(32),))
    sync = lmod.get_or_insert_function(fnty, name=fname)
    return builder.call(sync, args)
Esempio n. 52
0
 def print_string(self, builder, text):
     mod = builder.module
     cstring = GENERIC_POINTER
     fnty = Type.function(Type.int(), [cstring])
     puts = mod.get_or_insert_function(fnty, "puts")
     return builder.call(puts, [text])
Esempio n. 53
0
 def extract_np_timedelta(self, obj):
     fnty = Type.function(Type.int(64), [self.pyobj])
     fn = self._get_function(fnty, name="numba_extract_np_timedelta")
     return self.builder.call(fn, [obj])
Esempio n. 54
0
    def build(self):
        byte_t = Type.int(8)
        byte_ptr_t = Type.pointer(byte_t)
        byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
        intp_t = self.context.get_value_type(types.intp)
        intp_ptr_t = Type.pointer(intp_t)

        fnty = Type.function(
            Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

        wrapper_module = self.library.create_ir_module('')
        func_type = self.call_conv.get_function_type(self.fndesc.restype,
                                                     self.fndesc.argtypes)
        func = wrapper_module.add_function(func_type, name=self.func.name)
        func.attributes.add("alwaysinline")
        wrapper = wrapper_module.add_function(fnty,
                                              "__gufunc__." + self.func.name)
        arg_args, arg_dims, arg_steps, arg_data = wrapper.args
        arg_args.name = "args"
        arg_dims.name = "dims"
        arg_steps.name = "steps"
        arg_data.name = "data"

        builder = Builder.new(wrapper.append_basic_block("entry"))
        loopcount = builder.load(arg_dims, name="loopcount")

        # Unpack shapes
        unique_syms = set()
        for grp in (self.sin, self.sout):
            for syms in grp:
                unique_syms |= set(syms)

        sym_map = {}
        for syms in self.sin:
            for s in syms:
                if s not in sym_map:
                    sym_map[s] = len(sym_map)

        sym_dim = {}
        for s, i in sym_map.items():
            sym_dim[s] = builder.load(
                builder.gep(arg_dims,
                            [self.context.get_constant(types.intp, i + 1)]))

        # Prepare inputs
        arrays = []
        step_offset = len(self.sin) + len(self.sout)
        for i, (typ, sym) in enumerate(
                zip(self.signature.args, self.sin + self.sout)):
            ary = GUArrayArg(self.context, builder, arg_args, arg_dims,
                             arg_steps, i, step_offset, typ, sym, sym_dim)
            if not ary.as_scalar:
                step_offset += ary.ndim
            arrays.append(ary)

        bbreturn = cgutils.get_function(builder).append_basic_block('.return')

        # Prologue
        self.gen_prologue(builder)

        # Loop
        with cgutils.for_range(builder, loopcount, intp=intp_t) as ind:
            args = [a.array_value for a in arrays]
            innercall, error = self.gen_loop_body(builder, func, args)
            # If error, escape
            cgutils.cbranch_or_continue(builder, error, bbreturn)

            for a in arrays:
                a.next(ind)

        builder.branch(bbreturn)
        builder.position_at_end(bbreturn)

        # Epilogue
        self.gen_epilogue(builder)

        builder.ret_void()

        self.library.add_ir_module(wrapper_module)
        wrapper = self.library.get_function(wrapper.name)

        # Set core function to internal so that it is not generated
        self.func.linkage = LINKAGE_INTERNAL

        return wrapper, self.env
Esempio n. 55
0
 def extract_record_data(self, obj, pbuf):
     fnty = Type.function(self.voidptr, [self.pyobj, self.voidptr])
     fn = self._get_function(fnty, name="numba_extract_record_data")
     return self.builder.call(fn, [obj, pbuf])
Esempio n. 56
0
def _prepare_call_to_object_mode(context, builder, func, signature, args, env):
    mod = cgutils.get_module(builder)

    thisfunc = cgutils.get_function(builder)
    bb_core_return = thisfunc.append_basic_block('ufunc.core.return')

    pyapi = context.get_python_api(builder)

    # Call to
    # PyObject* ndarray_new(int nd,
    #       npy_intp *dims,   /* shape */
    #       npy_intp *strides,
    #       void* data,
    #       int type_num,
    #       int itemsize)

    ll_int = context.get_value_type(types.int32)
    ll_intp = context.get_value_type(types.intp)
    ll_intp_ptr = Type.pointer(ll_intp)
    ll_voidptr = context.get_value_type(types.voidptr)
    ll_pyobj = context.get_value_type(types.pyobject)
    fnty = Type.function(
        ll_pyobj,
        [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int])

    fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new")

    # Convert each llarray into pyobject
    error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error')
    builder.store(cgutils.true_bit, error_pointer)
    ndarray_pointers = []
    ndarray_objects = []
    for i, (arr, arrtype) in enumerate(zip(args, signature.args)):
        ptr = cgutils.alloca_once(builder, ll_pyobj)
        ndarray_pointers.append(ptr)

        builder.store(Constant.null(ll_pyobj), ptr)  # initialize to NULL

        arycls = context.make_array(arrtype)
        array = arycls(context, builder, ref=arr)

        zero = Constant.int(ll_int, 0)

        # Extract members of the llarray
        nd = Constant.int(ll_int, arrtype.ndim)
        dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero])
        strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero])
        data = builder.bitcast(array.data, ll_voidptr)
        dtype = np.dtype(str(arrtype.dtype))

        # Prepare other info for reconstruction of the PyArray
        type_num = Constant.int(ll_int, dtype.num)
        itemsize = Constant.int(ll_int, dtype.itemsize)

        # Call helper to reconstruct PyArray objects
        obj = builder.call(fn_array_new,
                           [nd, dims, strides, data, type_num, itemsize])
        builder.store(obj, ptr)
        ndarray_objects.append(obj)

        obj_is_null = cgutils.is_null(builder, obj)
        builder.store(obj_is_null, error_pointer)
        cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return)

    # Call ufunc core function
    object_sig = [types.pyobject] * len(ndarray_objects)

    status, retval = context.call_conv.call_function(builder,
                                                     func,
                                                     ll_pyobj,
                                                     object_sig,
                                                     ndarray_objects,
                                                     env=env)
    builder.store(status.is_error, error_pointer)

    # Release returned object
    pyapi.decref(retval)

    builder.branch(bb_core_return)
    # At return block
    builder.position_at_end(bb_core_return)

    # Release argument object
    for ndary_ptr in ndarray_pointers:
        pyapi.decref(builder.load(ndary_ptr))

    innercall = status.code
    return innercall, builder.load(error_pointer)
Esempio n. 57
0
    def _build_wrapper(self, library, name):
        """
        The LLVM IRBuilder code to create the gufunc wrapper.
        The *library* arg is the CodeLibrary for which the wrapper should
        be added to.  The *name* arg is the name of the wrapper function being
        created.
        """
        byte_t = Type.int(8)
        byte_ptr_t = Type.pointer(byte_t)
        byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
        intp_t = self.context.get_value_type(types.intp)
        intp_ptr_t = Type.pointer(intp_t)

        fnty = Type.function(
            Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

        wrapper_module = library.create_ir_module('')
        func_type = self.call_conv.get_function_type(self.fndesc.restype,
                                                     self.fndesc.argtypes)
        fname = self.fndesc.llvm_func_name
        func = wrapper_module.add_function(func_type, name=fname)

        func.attributes.add("alwaysinline")
        wrapper = wrapper_module.add_function(fnty, name)
        arg_args, arg_dims, arg_steps, arg_data = wrapper.args
        arg_args.name = "args"
        arg_dims.name = "dims"
        arg_steps.name = "steps"
        arg_data.name = "data"

        builder = Builder(wrapper.append_basic_block("entry"))
        loopcount = builder.load(arg_dims, name="loopcount")
        pyapi = self.context.get_python_api(builder)

        # Unpack shapes
        unique_syms = set()
        for grp in (self.sin, self.sout):
            for syms in grp:
                unique_syms |= set(syms)

        sym_map = {}
        for syms in self.sin:
            for s in syms:
                if s not in sym_map:
                    sym_map[s] = len(sym_map)

        sym_dim = {}
        for s, i in sym_map.items():
            sym_dim[s] = builder.load(
                builder.gep(arg_dims,
                            [self.context.get_constant(types.intp, i + 1)]))

        # Prepare inputs
        arrays = []
        step_offset = len(self.sin) + len(self.sout)
        for i, (typ, sym) in enumerate(
                zip(self.signature.args, self.sin + self.sout)):
            ary = GUArrayArg(self.context, builder, arg_args, arg_steps, i,
                             step_offset, typ, sym, sym_dim)
            step_offset += len(sym)
            arrays.append(ary)

        bbreturn = builder.append_basic_block('.return')

        # Prologue
        self.gen_prologue(builder, pyapi)

        # Loop
        with cgutils.for_range(builder, loopcount, intp=intp_t) as loop:
            args = [a.get_array_at_offset(loop.index) for a in arrays]
            innercall, error = self.gen_loop_body(builder, pyapi, func, args)
            # If error, escape
            cgutils.cbranch_or_continue(builder, error, bbreturn)

        builder.branch(bbreturn)
        builder.position_at_end(bbreturn)

        # Epilogue
        self.gen_epilogue(builder, pyapi)

        builder.ret_void()

        # Link
        library.add_ir_module(wrapper_module)
        library.add_linking_library(self.library)
Esempio n. 58
0
 def get_external_function_type(self, fndesc):
     argtypes = [self.get_argument_type(aty) for aty in fndesc.argtypes]
     # don't wrap in pointer
     restype = self.get_argument_type(fndesc.restype)
     fnty = Type.function(restype, argtypes)
     return fnty
Esempio n. 59
0
def _python_array_obj_to_native_list(typ, obj, c, size, listptr, errorptr):
    """
    Construct a new native list from a Python array of objects.
    copied from _python_list_to_native but list_getitem is converted to array
    getitem.
    """
    def check_element_type(nth, itemobj, expected_typobj):
        typobj = nth.typeof(itemobj)
        # Check if *typobj* is NULL
        with c.builder.if_then(
                cgutils.is_null(c.builder, typobj),
                likely=False,
        ):
            c.builder.store(cgutils.true_bit, errorptr)
            loop.do_break()
        # Mandate that objects all have the same exact type
        type_mismatch = c.builder.icmp_signed('!=', typobj, expected_typobj)

        with c.builder.if_then(type_mismatch, likely=False):
            c.builder.store(cgutils.true_bit, errorptr)
            c.pyapi.err_format(
                "PyExc_TypeError",
                "can't unbox heterogeneous list: %S != %S",
                expected_typobj, typobj,
            )
            c.pyapi.decref(typobj)
            loop.do_break()
        c.pyapi.decref(typobj)

    # Allocate a new native list
    ok, list = listobj.ListInstance.allocate_ex(c.context, c.builder, typ, size)
    # Array getitem call
    arr_get_fnty = LLType.function(LLType.pointer(c.pyapi.pyobj), [c.pyapi.pyobj, c.pyapi.py_ssize_t])
    arr_get_fn = c.pyapi._get_function(arr_get_fnty, name="array_getptr1")

    with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok):
        with if_ok:
            list.size = size
            zero = lir.Constant(size.type, 0)
            with c.builder.if_then(c.builder.icmp_signed('>', size, zero),
                                   likely=True):
                # Traverse Python list and unbox objects into native list
                with _NumbaTypeHelper(c) as nth:
                    # Note: *expected_typobj* can't be NULL
                    # TODO: enable type checking when emty list item in
                    # list(list(str)) case can be handled
                    # expected_typobj = nth.typeof(c.builder.load(
                    #                 c.builder.call(arr_get_fn, [obj, zero])))
                    with cgutils.for_range(c.builder, size) as loop:
                        itemobj = c.builder.call(arr_get_fn, [obj, loop.index])
                        # extra load since we have ptr to object
                        itemobj = c.builder.load(itemobj)
                        # c.pyapi.print_object(itemobj)
                        # check_element_type(nth, itemobj, expected_typobj)
                        # XXX we don't call native cleanup for each
                        # list element, since that would require keeping
                        # of which unboxings have been successful.
                        native = c.unbox(typ.dtype, itemobj)
                        with c.builder.if_then(native.is_error, likely=False):
                            c.builder.store(cgutils.true_bit, errorptr)
                            loop.do_break()
                        # The object (e.g. string) is stored so incref=True
                        list.setitem(loop.index, native.value, incref=True)
                    # c.pyapi.decref(expected_typobj)
            if typ.reflected:
                list.parent = obj
            # Stuff meminfo pointer into the Python object for
            # later reuse.
            with c.builder.if_then(c.builder.not_(c.builder.load(errorptr)),
                                   likely=False):
                c.pyapi.object_set_private_data(obj, list.meminfo)
            list.set_dirty(False)
            c.builder.store(list.value, listptr)

        with if_not_ok:
            c.builder.store(cgutils.true_bit, errorptr)

    # If an error occurred, drop the whole native list
    with c.builder.if_then(c.builder.load(errorptr)):
        c.context.nrt.decref(c.builder, typ, list.value)
Esempio n. 60
0
def slice_new(self, start, stop, step):
    fnty = llvm_Type.function(self.pyobj, [self.pyobj, self.pyobj, self.pyobj])
    fn = self._get_function(fnty, name="PySlice_New")
    return self.builder.call(fn, [start, stop, step])