Ejemplo n.º 1
0
 def parse_tuple_and_keywords(self, args, kws, fmt, keywords, *objs):
     charptr = Type.pointer(Type.int(8))
     charptrary = Type.pointer(charptr)
     argtypes = [self.pyobj, self.pyobj, charptr, charptrary]
     fnty = Type.function(Type.int(), argtypes, var_arg=True)
     fn = self._get_function(fnty, name="PyArg_ParseTupleAndKeywords")
     return self.builder.call(fn, [args, kws, fmt, keywords] + list(objs))
Ejemplo n.º 2
0
    def __init__(self, context, builder):
        """
        Note: Maybe called multiple times when lowering a function
        """
        from numba.targets import boxing
        self.context = context
        self.builder = builder

        self.module = builder.basic_block.function.module
        # A unique mapping of serialized objects in this module
        try:
            self.module.__serialized
        except AttributeError:
            self.module.__serialized = {}

        # Initialize types
        self.pyobj = self.context.get_argument_type(types.pyobject)
        self.voidptr = Type.pointer(Type.int(8))
        self.long = Type.int(ctypes.sizeof(ctypes.c_long) * 8)
        self.ulonglong = Type.int(ctypes.sizeof(ctypes.c_ulonglong) * 8)
        self.longlong = self.ulonglong
        self.double = Type.double()
        self.py_ssize_t = self.context.get_value_type(types.intp)
        self.cstring = Type.pointer(Type.int(8))
        self.gil_state = Type.int(_helperlib.py_gil_state_size * 8)
        self.py_buffer_t = ir.ArrayType(ir.IntType(8), _helperlib.py_buffer_size)
Ejemplo n.º 3
0
    def make_keywords(self, kws):
        strings = []
        stringtype = Type.pointer(Type.int(8))
        for k in kws:
            strings.append(self.make_const_string(k))

        strings.append(Constant.null(stringtype))
        kwlist = Constant.array(stringtype, strings)
        kwlist = cgutils.global_constant(self.module, ".kwlist", kwlist)
        return Constant.bitcast(kwlist, Type.pointer(stringtype))
Ejemplo n.º 4
0
 def numba_array_adaptor(self, ary, ptr):
     voidptr = Type.pointer(Type.int(8))
     fnty = Type.function(Type.int(), [self.pyobj, voidptr])
     fn = self._get_function(fnty, name="numba_adapt_ndarray")
     fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE)
     fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE)
     return self.builder.call(fn, (ary, ptr))
Ejemplo n.º 5
0
 def get_argument_type(self, ty):
     if ty == types.boolean:
         return self.get_data_type(ty)
     elif self.is_struct_type(ty):
         return Type.pointer(self.get_value_type(ty))
     else:
         return self.get_value_type(ty)
Ejemplo n.º 6
0
    def insert_string_const_addrspace(self, builder, string):
        """
        Insert a constant string in the constant addresspace and return a
        generic i8 pointer to the data.

        This function attempts to deduplicate.
        """
        lmod = builder.basic_block.function.module
        text = Constant.stringz(string)
        name = "__conststring__.%s" % string
        charty = Type.int(8)

        for gv in lmod.global_variables:
            if gv.name == name and gv.type.pointee == text.type:
                break
        else:
            gv = lmod.add_global_variable(text.type, name=name,
                                          addrspace=nvvm.ADDRSPACE_CONSTANT)
            gv.linkage = LINKAGE_INTERNAL
            gv.global_constant = True
            gv.initializer = text

        constcharptrty = Type.pointer(charty, nvvm.ADDRSPACE_CONSTANT)
        charptr = builder.bitcast(gv, constcharptrty)

        conv = nvvmutils.insert_addrspace_conv(lmod, charty,
                                               nvvm.ADDRSPACE_CONSTANT)
        return builder.call(conv, [charptr])
Ejemplo n.º 7
0
 def call_function_pointer(self, builder, funcptr, signature, args, cconv=None):
     retty = self.get_value_type(signature.return_type)
     fnty = Type.function(retty, [a.type for a in args])
     fnptrty = Type.pointer(fnty)
     addr = self.get_constant(types.intp, funcptr)
     ptr = builder.inttoptr(addr, fnptrty)
     return builder.call(ptr, args, cconv=cconv)
Ejemplo n.º 8
0
    def __init__(self, context, builder, value=None, ref=None, cast_ref=False):
        self._type = context.get_struct_type(self)
        self._context = context
        self._builder = builder
        if ref is None:
            self._value = alloca_once(builder, self._type)
            if value is not None:
                assert not is_pointer(value.type)
                assert value.type == self._type, (value.type, self._type)
                builder.store(value, self._value)
        else:
            assert value is None
            assert is_pointer(ref.type)
            if self._type != ref.type.pointee:
                if cast_ref:
                    ref = builder.bitcast(ref, Type.pointer(self._type))
                else:
                    raise TypeError(
                        "mismatching pointer type: got %s, expected %s"
                        % (ref.type.pointee, self._type))
            self._value = ref

        self._namemap = {}
        self._fdmap = []
        self._typemap = []
        base = Constant.int(Type.int(), 0)
        for i, (k, tp) in enumerate(self._fields):
            self._namemap[k] = i
            self._fdmap.append((base, Constant.int(Type.int(), i)))
            self._typemap.append(tp)
Ejemplo n.º 9
0
 def to_native_generator(self, obj, typ):
     """
     Extract the generator structure pointer from a generator *obj*
     (a _dynfunc.Generator instance).
     """
     gen_ptr_ty = Type.pointer(self.context.get_data_type(typ))
     value = self.context.get_generator_state(self.builder, obj, gen_ptr_ty)
     return NativeValue(value)
Ejemplo n.º 10
0
 def nrt_adapt_buffer_from_python(self, buf, ptr):
     assert self.context.enable_nrt
     fnty = Type.function(Type.void(), [Type.pointer(self.py_buffer_t),
                                        self.voidptr])
     fn = self._get_function(fnty, name="NRT_adapt_buffer_from_python")
     fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE)
     fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE)
     return self.builder.call(fn, (buf, ptr))
Ejemplo n.º 11
0
 def get_return_type(self, ty):
     if isinstance(ty, types.Optional):
         return self.get_return_type(ty.type)
     elif self.is_struct_type(ty):
         return self.get_argument_type(ty)
     else:
         argty = self.get_argument_type(ty)
         return Type.pointer(argty)
Ejemplo n.º 12
0
 def gil_release(self, gil):
     """
     Release the acquired GIL by gil_ensure().
     Must be paired with a gil_ensure().
     """
     gilptrty = Type.pointer(self.gil_state)
     fnty = Type.function(Type.void(), [gilptrty])
     fn = self._get_function(fnty, "numba_gil_release")
     return self.builder.call(fn, [gil])
Ejemplo n.º 13
0
 def unpack_tuple(self, args, name, n_min, n_max, *objs):
     charptr = Type.pointer(Type.int(8))
     argtypes = [self.pyobj, charptr, self.py_ssize_t, self.py_ssize_t]
     fnty = Type.function(Type.int(), argtypes, var_arg=True)
     fn = self._get_function(fnty, name="PyArg_UnpackTuple")
     n_min = Constant.int(self.py_ssize_t, n_min)
     n_max = Constant.int(self.py_ssize_t, n_max)
     if isinstance(name, str):
         name = self.context.insert_const_string(self.builder.module, name)
     return self.builder.call(fn, [args, name, n_min, n_max] + list(objs))
Ejemplo n.º 14
0
    def __init__(self, context, builder):
        """
        Note: Maybe called multiple times when lowering a function
        """
        fix_python_api()
        self.context = context
        self.builder = builder

        self.module = builder.basic_block.function.module
        # Initialize types
        self.pyobj = self.context.get_argument_type(types.pyobject)
        self.voidptr = Type.pointer(Type.int(8))
        self.long = Type.int(ctypes.sizeof(ctypes.c_long) * 8)
        self.ulonglong = Type.int(ctypes.sizeof(ctypes.c_ulonglong) * 8)
        self.longlong = self.ulonglong
        self.double = Type.double()
        self.py_ssize_t = self.context.get_value_type(types.intp)
        self.cstring = Type.pointer(Type.int(8))
        self.gil_state = Type.int(_helperlib.py_gil_state_size * 8)
Ejemplo n.º 15
0
def frexp_impl(context, builder, sig, args):
    val, = args
    fltty = context.get_data_type(sig.args[0])
    intty = context.get_data_type(sig.return_type[1])
    expptr = cgutils.alloca_once(builder, intty, name="exp")
    fnty = Type.function(fltty, (fltty, Type.pointer(intty)))
    fname = {"float": "numba_frexpf", "double": "numba_frexp"}[str(fltty)]
    fn = builder.module.get_or_insert_function(fnty, name=fname)
    res = builder.call(fn, (val, expptr))
    res = cgutils.make_anonymous_struct(builder, (res, builder.load(expptr)))
    return impl_ret_untracked(context, builder, sig.return_type, res)
Ejemplo n.º 16
0
def print_charseq(context, builder, sig, args):
    [x] = args
    py = context.get_python_api(builder)
    xp = cgutils.alloca_once(builder, x.type)
    builder.store(x, xp)
    byteptr = builder.bitcast(xp, Type.pointer(Type.int(8)))
    size = context.get_constant(types.intp, x.type.elements[0].count)
    cstr = py.bytes_from_string_and_size(byteptr, size)
    py.print_object(cstr)
    py.decref(cstr)
    return context.get_dummy_value()
Ejemplo n.º 17
0
 def gil_ensure(self):
     """
     Ensure the GIL is acquired.
     The returned value must be consumed by gil_release().
     """
     gilptrty = Type.pointer(self.gil_state)
     fnty = Type.function(Type.void(), [gilptrty])
     fn = self._get_function(fnty, "numba_gil_ensure")
     gilptr = cgutils.alloca_once(self.builder, self.gil_state)
     self.builder.call(fn, [gilptr])
     return gilptr
Ejemplo n.º 18
0
    def get_value_type(self, ty):
        if ty == types.boolean:
            return Type.int(1)
        dataty = self.get_data_type(ty)

        if isinstance(ty, types.Record):
            # Record data are passed by refrence
            memory = dataty.elements[0]
            return Type.struct([Type.pointer(memory)])

        return dataty
Ejemplo n.º 19
0
    def from_native_generator(self, val, typ, env=None):
        """
        Make a Numba generator (a _dynfunc.Generator instance) from a
        generator structure pointer *val*.
        *env* is an optional _dynfunc.Environment instance to be wrapped
        in the generator.
        """
        llty = self.context.get_data_type(typ)
        assert not llty.is_pointer
        gen_struct_size = self.context.get_abi_sizeof(llty)

        gendesc = self.context.get_generator_desc(typ)

        # This is the PyCFunctionWithKeywords generated by PyCallWrapper
        genfnty = Type.function(self.pyobj, [self.pyobj, self.pyobj, self.pyobj])
        genfn = self._get_function(genfnty, name=gendesc.llvm_cpython_wrapper_name)

        # This is the raw finalizer generated by _lower_generator_finalize_func()
        finalizerty = Type.function(Type.void(), [self.voidptr])
        if typ.has_finalizer:
            finalizer = self._get_function(finalizerty, name=gendesc.llvm_finalizer_name)
        else:
            finalizer = Constant.null(Type.pointer(finalizerty))

        # PyObject *numba_make_generator(state_size, initial_state, nextfunc, finalizer, env)
        fnty = Type.function(self.pyobj, [self.py_ssize_t,
                                          self.voidptr,
                                          Type.pointer(genfnty),
                                          Type.pointer(finalizerty),
                                          self.voidptr])
        fn = self._get_function(fnty, name="numba_make_generator")

        state_size = ir.Constant(self.py_ssize_t, gen_struct_size)
        initial_state = self.builder.bitcast(val, self.voidptr)
        if env is None:
            env = self.get_null_object()
        env = self.builder.bitcast(env, self.voidptr)

        return self.builder.call(fn,
                                 (state_size, initial_state, genfn, finalizer, env))
Ejemplo n.º 20
0
def ptx_cmem_arylike(context, builder, sig, args):
    lmod = builder.module
    [arr] = args
    flat = arr.flatten(order='A')
    aryty = sig.return_type
    dtype = aryty.dtype

    if isinstance(dtype, types.Complex):
        elemtype = (types.float32
                    if dtype == types.complex64
                    else types.float64)
        constvals = []
        for i in range(flat.size):
            elem = flat[i]
            real = context.get_constant(elemtype, elem.real)
            imag = context.get_constant(elemtype, elem.imag)
            constvals.extend([real, imag])

    elif dtype in types.number_domain:
        constvals = [context.get_constant(dtype, flat[i])
                     for i in range(flat.size)]

    else:
        raise TypeError("unsupport type: %s" % dtype)

    constary = lc.Constant.array(constvals[0].type, constvals)

    addrspace = nvvm.ADDRSPACE_CONSTANT
    gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem",
                                  addrspace=addrspace)
    gv.linkage = lc.LINKAGE_INTERNAL
    gv.global_constant = True
    gv.initializer = constary

    # Convert to generic address-space
    conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
    addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace))
    genptr = builder.call(conv, [addrspaceptr])

    # Create array object
    ary = context.make_array(aryty)(context, builder)
    kshape = [context.get_constant(types.intp, s) for s in arr.shape]
    kstrides = [context.get_constant(types.intp, s) for s in arr.strides]
    context.populate_array(ary,
                           data=builder.bitcast(genptr, ary.data.type),
                           shape=cgutils.pack_array(builder, kshape),
                           strides=cgutils.pack_array(builder, kstrides),
                           itemsize=ary.itemsize,
                           parent=ary.parent,
                           meminfo=None)

    return ary._getvalue()
Ejemplo n.º 21
0
    def pack_value(self, builder, ty, value, ptr):
        """Pack data for array storage
        """
        if isinstance(ty, types.Record):
            pdata = cgutils.get_record_data(builder, value)
            databuf = builder.load(pdata)
            casted = builder.bitcast(ptr, Type.pointer(databuf.type))
            builder.store(databuf, casted)
            return

        if ty == types.boolean:
            value = cgutils.as_bool_byte(builder, value)
        assert value.type == ptr.type.pointee
        builder.store(value, ptr)
Ejemplo n.º 22
0
 def to_native_array(self, typ, ary):
     # TODO check matching dtype.
     #      currently, mismatching dtype will still work and causes
     #      potential memory corruption
     voidptr = Type.pointer(Type.int(8))
     nativearycls = self.context.make_array(typ)
     nativeary = nativearycls(self.context, self.builder)
     aryptr = nativeary._getpointer()
     ptr = self.builder.bitcast(aryptr, voidptr)
     errcode = self.numba_array_adaptor(ary, ptr)
     failed = cgutils.is_not_null(self.builder, errcode)
     with cgutils.if_unlikely(self.builder, failed):
         # TODO
         self.builder.unreachable()
     return self.builder.load(aryptr)
Ejemplo n.º 23
0
 def __init__(self, context, builder, args, steps, i, argtype):
     # Get data
     p = builder.gep(args, [context.get_constant(types.intp, i)])
     if cgutils.is_struct_ptr(argtype):
         self.byref = True
         self.data = builder.bitcast(builder.load(p), argtype)
     else:
         self.byref = False
         self.data = builder.bitcast(builder.load(p), Type.pointer(argtype))
         # Get step
     p = builder.gep(steps, [context.get_constant(types.intp, i)])
     abisize = context.get_constant(types.intp,
                                    context.get_abi_sizeof(argtype))
     self.step = builder.load(p)
     self.is_unit_strided = builder.icmp(ICMP_EQ, abisize, self.step)
     self.builder = builder
Ejemplo n.º 24
0
def _generic_array(context,
                   builder,
                   shape,
                   dtype,
                   symbol_name,
                   addrspace,
                   can_dynsized=False):
    elemcount = reduce(operator.mul, shape)
    lldtype = context.get_data_type(dtype)
    laryty = Type.array(lldtype, elemcount)

    if addrspace == nvvm.ADDRSPACE_LOCAL:
        # Special case local addrespace allocation to use alloca
        # NVVM is smart enough to only use local memory if no register is
        # available
        dataptr = cgutils.alloca_once(builder, laryty, name=symbol_name)
    else:
        lmod = builder.module

        # Create global variable in the requested address-space
        gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace)
        # Specify alignment to avoid misalignment bug
        gvmem.align = context.get_abi_sizeof(lldtype)

        if elemcount <= 0:
            if can_dynsized:  # dynamic shared memory
                gvmem.linkage = lc.LINKAGE_EXTERNAL
            else:
                raise ValueError("array length <= 0")
        else:
            ## Comment out the following line to workaround a NVVM bug
            ## which generates a invalid symbol name when the linkage
            ## is internal and in some situation.
            ## See _get_unique_smem_id()
            # gvmem.linkage = lc.LINKAGE_INTERNAL

            gvmem.initializer = lc.Constant.undef(laryty)

        if dtype not in types.number_domain:
            raise TypeError("unsupported type: %s" % dtype)

        # Convert to generic address-space
        conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
        addrspaceptr = gvmem.bitcast(Type.pointer(Type.int(8), addrspace))
        dataptr = builder.call(conv, [addrspaceptr])

    return _make_array(context, builder, dataptr, dtype, shape)
Ejemplo n.º 25
0
    def make_constant_array(self, builder, aryty, arr):
        """
        Unlike the parent version.  This returns a a pointer in the constant
        addrspace.
        """

        lmod = builder.module

        constvals = [
            self.get_constant(types.byte, i)
            for i in iter(arr.tobytes(order='A'))
        ]
        constary = lc.Constant.array(Type.int(8), constvals)

        addrspace = nvvm.ADDRSPACE_CONSTANT
        gv = lmod.add_global_variable(constary.type,
                                      name="_cudapy_cmem",
                                      addrspace=addrspace)
        gv.linkage = lc.LINKAGE_INTERNAL
        gv.global_constant = True
        gv.initializer = constary

        # Preserve the underlying alignment
        lldtype = self.get_data_type(aryty.dtype)
        align = self.get_abi_sizeof(lldtype)
        gv.align = 2**(align - 1).bit_length()

        # Convert to generic address-space
        conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
        addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace))
        genptr = builder.call(conv, [addrspaceptr])

        # Create array object
        ary = self.make_array(aryty)(self, builder)
        kshape = [self.get_constant(types.intp, s) for s in arr.shape]
        kstrides = [self.get_constant(types.intp, s) for s in arr.strides]
        self.populate_array(ary,
                            data=builder.bitcast(genptr, ary.data.type),
                            shape=kshape,
                            strides=kstrides,
                            itemsize=ary.itemsize,
                            parent=ary.parent,
                            meminfo=None)

        return ary._getvalue()
Ejemplo n.º 26
0
def ptx_cmem_arylike(context, builder, sig, args):
    lmod = builder.module
    [arr] = args
    aryty = sig.return_type

    constvals = [
        context.get_constant(types.byte, i)
        for i in iter(arr.tobytes(order="A"))
    ]
    constary = lc.Constant.array(Type.int(8), constvals)

    addrspace = nvvm.ADDRSPACE_CONSTANT
    gv = lmod.add_global_variable(constary.type,
                                  name="_cudapy_cmem",
                                  addrspace=addrspace)
    gv.linkage = lc.LINKAGE_INTERNAL
    gv.global_constant = True
    gv.initializer = constary

    # Preserve the underlying alignment
    lldtype = context.get_data_type(aryty.dtype)
    align = context.get_abi_sizeof(lldtype)
    gv.align = 2**(align - 1).bit_length()

    # Convert to generic address-space
    conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
    addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace))
    genptr = builder.call(conv, [addrspaceptr])

    # Create array object
    ary = context.make_array(aryty)(context, builder)
    kshape = [context.get_constant(types.intp, s) for s in arr.shape]
    kstrides = [context.get_constant(types.intp, s) for s in arr.strides]
    context.populate_array(
        ary,
        data=builder.bitcast(genptr, ary.data.type),
        shape=cgutils.pack_array(builder, kshape),
        strides=cgutils.pack_array(builder, kstrides),
        itemsize=ary.itemsize,
        parent=ary.parent,
        meminfo=None,
    )

    return ary._getvalue()
Ejemplo n.º 27
0
def _generic_array(context, builder, shape, dtype, symbol_name, addrspace,
                   can_dynsized=False):
    elemcount = reduce(operator.mul, shape)
    lldtype = context.get_data_type(dtype)
    laryty = Type.array(lldtype, elemcount)

    if addrspace == nvvm.ADDRSPACE_LOCAL:
        # Special case local addrespace allocation to use alloca
        # NVVM is smart enough to only use local memory if no register is
        # available
        dataptr = cgutils.alloca_once(builder, laryty, name=symbol_name)
    else:
        lmod = builder.module

        # Create global variable in the requested address-space
        gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace)
        # Specify alignment to avoid misalignment bug
        gvmem.align = context.get_abi_sizeof(lldtype)

        if elemcount <= 0:
            if can_dynsized:    # dynamic shared memory
                gvmem.linkage = lc.LINKAGE_EXTERNAL
            else:
                raise ValueError("array length <= 0")
        else:
            ## Comment out the following line to workaround a NVVM bug
            ## which generates a invalid symbol name when the linkage
            ## is internal and in some situation.
            ## See _get_unique_smem_id()
            # gvmem.linkage = lc.LINKAGE_INTERNAL

            gvmem.initializer = lc.Constant.undef(laryty)

        if dtype not in types.number_domain:
            raise TypeError("unsupported type: %s" % dtype)

        # Convert to generic address-space
        conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
        addrspaceptr = gvmem.bitcast(Type.pointer(Type.int(8), addrspace))
        dataptr = builder.call(conv, [addrspaceptr])

    return _make_array(context, builder, dataptr, dtype, shape)
Ejemplo n.º 28
0
    def test_inline_rsqrt(self):
        mod = Module.new(__name__)
        fnty = Type.function(Type.void(), [Type.pointer(Type.float())])
        fn = mod.add_function(fnty, "cu_rsqrt")
        bldr = Builder.new(fn.append_basic_block("entry"))

        rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()])
        inlineasm = InlineAsm.get(rsqrt_approx_fnty, "rsqrt.approx.f32 $0, $1;", "=f,f", side_effect=True)
        val = bldr.load(fn.args[0])
        res = bldr.call(inlineasm, [val])

        bldr.store(res, fn.args[0])
        bldr.ret_void()

        # generate ptx
        nvvm.fix_data_layout(mod)
        nvvm.set_cuda_kernel(fn)
        nvvmir = str(mod)
        ptx = nvvm.llvm_to_ptx(nvvmir)
        self.assertTrue("rsqrt.approx.f32" in str(ptx))
Ejemplo n.º 29
0
def real_divmod(context, builder, x, y):
    assert x.type == y.type
    floatty = x.type

    module = builder.module
    fname = ".numba.python.rem.%s" % x.type
    fnty = Type.function(floatty, (floatty, floatty, Type.pointer(floatty)))
    fn = module.get_or_insert_function(fnty, fname)

    if fn.is_declaration:
        fn.linkage = lc.LINKAGE_LINKONCE_ODR
        fnbuilder = lc.Builder.new(fn.append_basic_block('entry'))
        fx, fy, pmod = fn.args
        div, mod = real_divmod_func_body(context, fnbuilder, fx, fy)
        fnbuilder.store(mod, pmod)
        fnbuilder.ret(div)

    pmod = cgutils.alloca_once(builder, floatty)
    quotient = builder.call(fn, (x, y, pmod))
    return quotient, builder.load(pmod)
Ejemplo n.º 30
0
def printf(builder, format_string, *values):
    str_const = Constant.stringz(format_string)
    global_str_const = get_module(builder).add_global_variable(str_const.type,
                                                               '')
    global_str_const.initializer = str_const

    idx = [Constant.int(Type.int(32), 0), Constant.int(Type.int(32), 0)]
    str_addr = global_str_const.gep(idx)

    args = []
    for v in values:
        if isinstance(v, int):
            args.append(Constant.int(Type.int(), v))
        elif isinstance(v, float):
            args.append(Constant.real(Type.double(), v))
        else:
            args.append(v)
    functype = Type.function(Type.int(32), [Type.pointer(Type.int(8))], True)
    fn = get_module(builder).get_or_insert_function(functype, 'printf')
    builder.call(fn, [str_addr] + args)
Ejemplo n.º 31
0
def real_divmod(context, builder, x, y):
    assert x.type == y.type
    floatty = x.type

    module = builder.module
    fname = context.mangler(".numba.python.rem", [x.type])
    fnty = Type.function(floatty, (floatty, floatty, Type.pointer(floatty)))
    fn = module.get_or_insert_function(fnty, fname)

    if fn.is_declaration:
        fn.linkage = lc.LINKAGE_LINKONCE_ODR
        fnbuilder = lc.Builder(fn.append_basic_block('entry'))
        fx, fy, pmod = fn.args
        div, mod = real_divmod_func_body(context, fnbuilder, fx, fy)
        fnbuilder.store(mod, pmod)
        fnbuilder.ret(div)

    pmod = cgutils.alloca_once(builder, floatty)
    quotient = builder.call(fn, (x, y, pmod))
    return quotient, builder.load(pmod)
Ejemplo n.º 32
0
def printf(builder, format_string, *values):
    str_const = Constant.stringz(format_string)
    global_str_const = get_module(builder).add_global_variable(
        str_const.type, '')
    global_str_const.initializer = str_const

    idx = [Constant.int(Type.int(32), 0), Constant.int(Type.int(32), 0)]
    str_addr = global_str_const.gep(idx)

    args = []
    for v in values:
        if isinstance(v, int):
            args.append(Constant.int(Type.int(), v))
        elif isinstance(v, float):
            args.append(Constant.real(Type.double(), v))
        else:
            args.append(v)
    functype = Type.function(Type.int(32), [Type.pointer(Type.int(8))], True)
    fn = get_module(builder).get_or_insert_function(functype, 'printf')
    builder.call(fn, [str_addr] + args)
Ejemplo n.º 33
0
def ptx_cmem_arylike(context, builder, sig, args):
    lmod = builder.module
    [arr] = args
    aryty = sig.return_type

    constvals = [
        context.get_constant(types.byte, i)
        for i in six.iterbytes(arr.tobytes(order='A'))
    ]
    constary = lc.Constant.array(Type.int(8), constvals)

    addrspace = nvvm.ADDRSPACE_CONSTANT
    gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem",
                                  addrspace=addrspace)
    gv.linkage = lc.LINKAGE_INTERNAL
    gv.global_constant = True
    gv.initializer = constary

    # Preserve the underlying alignment
    lldtype = context.get_data_type(aryty.dtype)
    align = context.get_abi_sizeof(lldtype)
    gv.align = 2 ** (align - 1).bit_length()

    # Convert to generic address-space
    conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
    addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace))
    genptr = builder.call(conv, [addrspaceptr])

    # Create array object
    ary = context.make_array(aryty)(context, builder)
    kshape = [context.get_constant(types.intp, s) for s in arr.shape]
    kstrides = [context.get_constant(types.intp, s) for s in arr.strides]
    context.populate_array(ary,
                           data=builder.bitcast(genptr, ary.data.type),
                           shape=cgutils.pack_array(builder, kshape),
                           strides=cgutils.pack_array(builder, kstrides),
                           itemsize=ary.itemsize,
                           parent=ary.parent,
                           meminfo=None)

    return ary._getvalue()
Ejemplo n.º 34
0
    def test_inline_rsqrt(self):
        mod = Module.new(__name__)
        fnty = Type.function(Type.void(), [Type.pointer(Type.float())])
        fn = mod.add_function(fnty, 'cu_rsqrt')
        bldr = Builder.new(fn.append_basic_block('entry'))

        rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()])
        inlineasm = InlineAsm.get(rsqrt_approx_fnty,
                                  'rsqrt.approx.f32 $0, $1;',
                                  '=f,f', side_effect=True)
        val = bldr.load(fn.args[0])
        res = bldr.call(inlineasm, [val])

        bldr.store(res, fn.args[0])
        bldr.ret_void()

        # generate ptx
        nvvm.fix_data_layout(mod)
        nvvm.set_cuda_kernel(fn)
        nvvmir = str(mod)
        ptx = nvvm.llvm_to_ptx(nvvmir)
        self.assertTrue('rsqrt.approx.f32' in str(ptx))
Ejemplo n.º 35
0
def _python_array_obj_to_native_list(typ, obj, c, size, listptr, errorptr):
    """
    Construct a new native list from a Python array of objects.
    copied from _python_list_to_native but list_getitem is converted to array
    getitem.
    """
    def check_element_type(nth, itemobj, expected_typobj):
        typobj = nth.typeof(itemobj)
        # Check if *typobj* is NULL
        with c.builder.if_then(
                cgutils.is_null(c.builder, typobj),
                likely=False,
        ):
            c.builder.store(cgutils.true_bit, errorptr)
            loop.do_break()
        # Mandate that objects all have the same exact type
        type_mismatch = c.builder.icmp_signed('!=', typobj, expected_typobj)

        with c.builder.if_then(type_mismatch, likely=False):
            c.builder.store(cgutils.true_bit, errorptr)
            c.pyapi.err_format(
                "PyExc_TypeError",
                "can't unbox heterogeneous list: %S != %S",
                expected_typobj, typobj,
            )
            c.pyapi.decref(typobj)
            loop.do_break()
        c.pyapi.decref(typobj)

    # Allocate a new native list
    ok, list = listobj.ListInstance.allocate_ex(c.context, c.builder, typ, size)
    # Array getitem call
    arr_get_fnty = LLType.function(LLType.pointer(c.pyapi.pyobj), [c.pyapi.pyobj, c.pyapi.py_ssize_t])
    arr_get_fn = c.pyapi._get_function(arr_get_fnty, name="array_getptr1")

    with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok):
        with if_ok:
            list.size = size
            zero = lir.Constant(size.type, 0)
            with c.builder.if_then(c.builder.icmp_signed('>', size, zero),
                                   likely=True):
                # Traverse Python list and unbox objects into native list
                with _NumbaTypeHelper(c) as nth:
                    # Note: *expected_typobj* can't be NULL
                    # TODO: enable type checking when emty list item in
                    # list(list(str)) case can be handled
                    # expected_typobj = nth.typeof(c.builder.load(
                    #                 c.builder.call(arr_get_fn, [obj, zero])))
                    with cgutils.for_range(c.builder, size) as loop:
                        itemobj = c.builder.call(arr_get_fn, [obj, loop.index])
                        # extra load since we have ptr to object
                        itemobj = c.builder.load(itemobj)
                        # c.pyapi.print_object(itemobj)
                        # check_element_type(nth, itemobj, expected_typobj)
                        # XXX we don't call native cleanup for each
                        # list element, since that would require keeping
                        # of which unboxings have been successful.
                        native = c.unbox(typ.dtype, itemobj)
                        with c.builder.if_then(native.is_error, likely=False):
                            c.builder.store(cgutils.true_bit, errorptr)
                            loop.do_break()
                        # The object (e.g. string) is stored so incref=True
                        list.setitem(loop.index, native.value, incref=True)
                    # c.pyapi.decref(expected_typobj)
            if typ.reflected:
                list.parent = obj
            # Stuff meminfo pointer into the Python object for
            # later reuse.
            with c.builder.if_then(c.builder.not_(c.builder.load(errorptr)),
                                   likely=False):
                c.pyapi.object_set_private_data(obj, list.meminfo)
            list.set_dirty(False)
            c.builder.store(list.value, listptr)

        with if_not_ok:
            c.builder.store(cgutils.true_bit, errorptr)

    # If an error occurred, drop the whole native list
    with c.builder.if_then(c.builder.load(errorptr)):
        c.context.nrt.decref(c.builder, typ, list.value)
Ejemplo n.º 36
0
def _prepare_call_to_object_mode(context, builder, func, signature, args, env):
    mod = cgutils.get_module(builder)

    thisfunc = cgutils.get_function(builder)
    bb_core_return = thisfunc.append_basic_block('ufunc.core.return')

    pyapi = context.get_python_api(builder)

    # Call to
    # PyObject* ndarray_new(int nd,
    #       npy_intp *dims,   /* shape */
    #       npy_intp *strides,
    #       void* data,
    #       int type_num,
    #       int itemsize)

    ll_int = context.get_value_type(types.int32)
    ll_intp = context.get_value_type(types.intp)
    ll_intp_ptr = Type.pointer(ll_intp)
    ll_voidptr = context.get_value_type(types.voidptr)
    ll_pyobj = context.get_value_type(types.pyobject)
    fnty = Type.function(
        ll_pyobj,
        [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int])

    fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new")

    # Convert each llarray into pyobject
    error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error')
    builder.store(cgutils.true_bit, error_pointer)
    ndarray_pointers = []
    ndarray_objects = []
    for i, (arr, arrtype) in enumerate(zip(args, signature.args)):
        ptr = cgutils.alloca_once(builder, ll_pyobj)
        ndarray_pointers.append(ptr)

        builder.store(Constant.null(ll_pyobj), ptr)  # initialize to NULL

        arycls = context.make_array(arrtype)
        array = arycls(context, builder, ref=arr)

        zero = Constant.int(ll_int, 0)

        # Extract members of the llarray
        nd = Constant.int(ll_int, arrtype.ndim)
        dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero])
        strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero])
        data = builder.bitcast(array.data, ll_voidptr)
        dtype = np.dtype(str(arrtype.dtype))

        # Prepare other info for reconstruction of the PyArray
        type_num = Constant.int(ll_int, dtype.num)
        itemsize = Constant.int(ll_int, dtype.itemsize)

        # Call helper to reconstruct PyArray objects
        obj = builder.call(fn_array_new,
                           [nd, dims, strides, data, type_num, itemsize])
        builder.store(obj, ptr)
        ndarray_objects.append(obj)

        obj_is_null = cgutils.is_null(builder, obj)
        builder.store(obj_is_null, error_pointer)
        cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return)

    # Call ufunc core function
    object_sig = [types.pyobject] * len(ndarray_objects)

    status, retval = context.call_conv.call_function(builder,
                                                     func,
                                                     ll_pyobj,
                                                     object_sig,
                                                     ndarray_objects,
                                                     env=env)
    builder.store(status.is_error, error_pointer)

    # Release returned object
    pyapi.decref(retval)

    builder.branch(bb_core_return)
    # At return block
    builder.position_at_end(bb_core_return)

    # Release argument object
    for ndary_ptr in ndarray_pointers:
        pyapi.decref(builder.load(ndary_ptr))

    innercall = status.code
    return innercall, builder.load(error_pointer)
Ejemplo n.º 37
0
    def build(self):
        byte_t = Type.int(8)
        byte_ptr_t = Type.pointer(byte_t)
        byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
        intp_t = self.context.get_value_type(types.intp)
        intp_ptr_t = Type.pointer(intp_t)

        fnty = Type.function(
            Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

        wrapper_module = self.library.create_ir_module('')
        func_type = self.call_conv.get_function_type(self.fndesc.restype,
                                                     self.fndesc.argtypes)
        func = wrapper_module.add_function(func_type, name=self.func.name)
        func.attributes.add("alwaysinline")
        wrapper = wrapper_module.add_function(fnty,
                                              "__gufunc__." + self.func.name)
        arg_args, arg_dims, arg_steps, arg_data = wrapper.args
        arg_args.name = "args"
        arg_dims.name = "dims"
        arg_steps.name = "steps"
        arg_data.name = "data"

        builder = Builder.new(wrapper.append_basic_block("entry"))
        loopcount = builder.load(arg_dims, name="loopcount")

        # Unpack shapes
        unique_syms = set()
        for grp in (self.sin, self.sout):
            for syms in grp:
                unique_syms |= set(syms)

        sym_map = {}
        for syms in self.sin:
            for s in syms:
                if s not in sym_map:
                    sym_map[s] = len(sym_map)

        sym_dim = {}
        for s, i in sym_map.items():
            sym_dim[s] = builder.load(
                builder.gep(arg_dims,
                            [self.context.get_constant(types.intp, i + 1)]))

        # Prepare inputs
        arrays = []
        step_offset = len(self.sin) + len(self.sout)
        for i, (typ, sym) in enumerate(
                zip(self.signature.args, self.sin + self.sout)):
            ary = GUArrayArg(self.context, builder, arg_args, arg_dims,
                             arg_steps, i, step_offset, typ, sym, sym_dim)
            if not ary.as_scalar:
                step_offset += ary.ndim
            arrays.append(ary)

        bbreturn = cgutils.get_function(builder).append_basic_block('.return')

        # Prologue
        self.gen_prologue(builder)

        # Loop
        with cgutils.for_range(builder, loopcount, intp=intp_t) as ind:
            args = [a.array_value for a in arrays]
            innercall, error = self.gen_loop_body(builder, func, args)
            # If error, escape
            cgutils.cbranch_or_continue(builder, error, bbreturn)

            for a in arrays:
                a.next(ind)

        builder.branch(bbreturn)
        builder.position_at_end(bbreturn)

        # Epilogue
        self.gen_epilogue(builder)

        builder.ret_void()

        self.library.add_ir_module(wrapper_module)
        wrapper = self.library.get_function(wrapper.name)

        # Set core function to internal so that it is not generated
        self.func.linkage = LINKAGE_INTERNAL

        return wrapper, self.env
Ejemplo n.º 38
0
def build_ufunc_wrapper(library, context, func, signature, objmode, env):
    """
    Wrap the scalar function with a loop that iterates over the arguments
    """
    byte_t = Type.int(8)
    byte_ptr_t = Type.pointer(byte_t)
    byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
    intp_t = context.get_value_type(types.intp)
    intp_ptr_t = Type.pointer(intp_t)

    fnty = Type.function(Type.void(),
                         [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

    wrapper_module = library.create_ir_module('')
    if objmode:
        func_type = context.call_conv.get_function_type(
            types.pyobject, [types.pyobject] * len(signature.args))
    else:
        func_type = context.call_conv.get_function_type(
            signature.return_type, signature.args)
    oldfunc = func
    func = wrapper_module.add_function(func_type, name=func.name)
    func.attributes.add("alwaysinline")

    wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name)
    arg_args, arg_dims, arg_steps, arg_data = wrapper.args
    arg_args.name = "args"
    arg_dims.name = "dims"
    arg_steps.name = "steps"
    arg_data.name = "data"

    builder = Builder.new(wrapper.append_basic_block("entry"))

    loopcount = builder.load(arg_dims, name="loopcount")

    actual_args = context.call_conv.get_arguments(func)

    # Prepare inputs
    arrays = []
    for i, typ in enumerate(signature.args):
        arrays.append(
            UArrayArg(context, builder, arg_args, arg_steps, i,
                      context.get_argument_type(typ)))

    # Prepare output
    valty = context.get_data_type(signature.return_type)
    out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args),
                    valty)

    # Setup indices
    offsets = []
    zero = context.get_constant(types.intp, 0)
    for _ in arrays:
        p = cgutils.alloca_once(builder, intp_t)
        offsets.append(p)
        builder.store(zero, p)

    store_offset = cgutils.alloca_once(builder, intp_t)
    builder.store(zero, store_offset)

    unit_strided = cgutils.true_bit
    for ary in arrays:
        unit_strided = builder.and_(unit_strided, ary.is_unit_strided)

    if objmode:
        # General loop
        pyapi = context.get_python_api(builder)
        gil = pyapi.gil_ensure()
        with cgutils.for_range(builder, loopcount, intp=intp_t):
            slowloop = build_obj_loop_body(context, func, builder, arrays, out,
                                           offsets, store_offset, signature,
                                           pyapi, env)
        pyapi.gil_release(gil)
        builder.ret_void()

    else:

        with cgutils.ifelse(builder,
                            unit_strided) as (is_unit_strided, is_strided):

            with is_unit_strided:
                with cgutils.for_range(builder, loopcount, intp=intp_t) as ind:
                    fastloop = build_fast_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature,
                                                    ind)
                builder.ret_void()

            with is_strided:
                # General loop
                with cgutils.for_range(builder, loopcount, intp=intp_t):
                    slowloop = build_slow_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature)

                builder.ret_void()

        builder.ret_void()
    del builder

    # Run optimizer
    library.add_ir_module(wrapper_module)
    wrapper = library.get_function(wrapper.name)
    oldfunc.linkage = LINKAGE_INTERNAL

    return wrapper
Ejemplo n.º 39
0
 def recreate_record(self, pdata, size, dtypeaddr):
     fnty = Type.function(
         self.pyobj, [Type.pointer(Type.int(8)),
                      Type.int(), self.pyobj])
     fn = self._get_function(fnty, name="numba_recreate_record")
     return self.builder.call(fn, [pdata, size, dtypeaddr])
Ejemplo n.º 40
0
from llvmlite import ir as llvmir
import llvmlite.llvmpy.core as lc
from llvmlite.llvmpy.core import Type, Constant, LLVMException
import llvmlite.binding as ll

from numba import types, utils, cgutils, typing, funcdesc, debuginfo
from numba import _dynfunc, _helperlib
from numba.compiler_lock import global_compiler_lock
from numba.pythonapi import PythonAPI
from . import arrayobj, builtins, imputils
from .imputils import (user_function, user_generator,
                       builtin_registry, impl_ret_borrowed,
                       RegistryLoader)
from numba import datamodel

GENERIC_POINTER = Type.pointer(Type.int(8))
PYOBJECT = GENERIC_POINTER
void_ptr = GENERIC_POINTER


class OverloadSelector(object):
    """
    An object matching an actual signature against a registry of formal
    signatures and choosing the best candidate, if any.

    In the current implementation:
    - a "signature" is a tuple of type classes or type instances
    - the "best candidate" is the most specific match
    """

    def __init__(self):
Ejemplo n.º 41
0
    def get_data_type(self, ty):
        """
        Get a LLVM data representation of the Numba type *ty* that is safe
        for storage.  Record data are stored as byte array.

        The return value is a llvmlite.ir.Type object, or None if the type
        is an opaque pointer (???).
        """
        try:
            fac = type_registry.match(ty)
        except KeyError:
            pass
        else:
            return fac(self, ty)

        if (isinstance(ty, types.Dummy) or isinstance(ty, types.Module)
                or isinstance(ty, types.Function)
                or isinstance(ty, types.Dispatcher)
                or isinstance(ty, types.Object)
                or isinstance(ty, types.Macro)):
            return PYOBJECT

        elif isinstance(ty, types.CPointer):
            dty = self.get_data_type(ty.dtype)
            return Type.pointer(dty)

        elif isinstance(ty, types.Optional):
            return self.get_struct_type(self.make_optional(ty))

        elif isinstance(ty, types.Array):
            return self.get_struct_type(self.make_array(ty))

        elif isinstance(ty, types.UniTuple):
            dty = self.get_value_type(ty.dtype)
            return Type.array(dty, ty.count)

        elif isinstance(ty, types.Tuple):
            dtys = [self.get_value_type(t) for t in ty]
            return Type.struct(dtys)

        elif isinstance(ty, types.Record):
            # Record are represented as byte array
            return Type.struct([Type.array(Type.int(8), ty.size)])

        elif isinstance(ty, types.UnicodeCharSeq):
            charty = Type.int(numpy_support.sizeof_unicode_char * 8)
            return Type.struct([Type.array(charty, ty.count)])

        elif isinstance(ty, types.CharSeq):
            charty = Type.int(8)
            return Type.struct([Type.array(charty, ty.count)])

        elif ty in STRUCT_TYPES:
            return self.get_struct_type(STRUCT_TYPES[ty])

        else:
            try:
                impl = struct_registry.match(ty)
            except KeyError:
                pass
            else:
                return self.get_struct_type(impl(ty))

        if isinstance(ty, types.Pair):
            pairty = self.make_pair(ty.first_type, ty.second_type)
            return self.get_struct_type(pairty)

        else:
            return LTYPEMAP[ty]
Ejemplo n.º 42
0
from functools import singledispatch
from llvmlite.llvmpy.core import Type
from numba.core import types, cgutils
from numba.core.imputils import Registry
from numba.cuda import nvvmutils

registry = Registry()
lower = registry.lower

voidptr = Type.pointer(Type.int(8))

# NOTE: we don't use @lower here since print_item() doesn't return a LLVM value


@singledispatch
def print_item(ty, context, builder, val):
    """
    Handle printing of a single value of the given Numba type.
    A (format string, [list of arguments]) is returned that will allow
    forming the final printf()-like call.
    """
    raise NotImplementedError("printing unimplemented for values of type %s" %
                              (ty, ))


@print_item.register(types.Integer)
@print_item.register(types.IntegerLiteral)
def int_print_impl(ty, context, builder, val):
    if ty in types.unsigned_domain:
        rawfmt = "%llu"
        dsttype = types.uint64
Ejemplo n.º 43
0
 def recreate_record(self, pdata, size, dtype, env_manager):
     fnty = Type.function(self.pyobj, [Type.pointer(Type.int(8)),
                                       Type.int(), self.pyobj])
     fn = self._get_function(fnty, name="numba_recreate_record")
     dtypeaddr = env_manager.read_const(env_manager.add_const(dtype))
     return self.builder.call(fn, [pdata, size, dtypeaddr])
Ejemplo n.º 44
0
 def parse_tuple(self, args, fmt, *objs):
     charptr = Type.pointer(Type.int(8))
     argtypes = [self.pyobj, charptr]
     fnty = Type.function(Type.int(), argtypes, var_arg=True)
     fn = self._get_function(fnty, name="PyArg_ParseTuple")
     return self.builder.call(fn, [args, fmt] + list(objs))
Ejemplo n.º 45
0
Archivo: base.py Proyecto: yuguen/numba
from llvmlite import ir as llvmir
import llvmlite.llvmpy.core as lc
from llvmlite.llvmpy.core import Type, Constant, LLVMException
import llvmlite.binding as ll

from numba import types, utils, cgutils, typing, funcdesc, debuginfo
from numba import _dynfunc, _helperlib
from numba.pythonapi import PythonAPI
from . import arrayobj, builtins, imputils
from .imputils import (user_function, user_generator,
                       builtin_registry, impl_ret_borrowed,
                       RegistryLoader)
from numba import datamodel

GENERIC_POINTER = Type.pointer(Type.int(8))
PYOBJECT = GENERIC_POINTER
void_ptr = GENERIC_POINTER


class OverloadSelector(object):
    """
    An object matching an actual signature against a registry of formal
    signatures and choosing the best candidate, if any.

    In the current implementation:
    - a "signature" is a tuple of type classes or type instances
    - the "best candidate" is the most specific match
    """

    def __init__(self):
Ejemplo n.º 46
0
def build_ufunc_wrapper(library, context, fname, signature, objmode, envptr,
                        env):
    """
    Wrap the scalar function with a loop that iterates over the arguments
    """
    assert isinstance(fname, str)
    byte_t = Type.int(8)
    byte_ptr_t = Type.pointer(byte_t)
    byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
    intp_t = context.get_value_type(types.intp)
    intp_ptr_t = Type.pointer(intp_t)

    fnty = Type.function(Type.void(),
                         [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

    wrapperlib = context.codegen().create_library('ufunc_wrapper')
    wrapper_module = wrapperlib.create_ir_module('')
    if objmode:
        func_type = context.call_conv.get_function_type(
            types.pyobject, [types.pyobject] * len(signature.args))
    else:
        func_type = context.call_conv.get_function_type(
            signature.return_type, signature.args)

    func = wrapper_module.add_function(func_type, name=fname)
    func.attributes.add("alwaysinline")

    wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name)
    arg_args, arg_dims, arg_steps, arg_data = wrapper.args
    arg_args.name = "args"
    arg_dims.name = "dims"
    arg_steps.name = "steps"
    arg_data.name = "data"

    builder = Builder(wrapper.append_basic_block("entry"))

    loopcount = builder.load(arg_dims, name="loopcount")

    # Prepare inputs
    arrays = []
    for i, typ in enumerate(signature.args):
        arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, typ))

    # Prepare output
    out = UArrayArg(context, builder, arg_args, arg_steps, len(arrays),
                    signature.return_type)

    # Setup indices
    offsets = []
    zero = context.get_constant(types.intp, 0)
    for _ in arrays:
        p = cgutils.alloca_once(builder, intp_t)
        offsets.append(p)
        builder.store(zero, p)

    store_offset = cgutils.alloca_once(builder, intp_t)
    builder.store(zero, store_offset)

    unit_strided = cgutils.true_bit
    for ary in arrays:
        unit_strided = builder.and_(unit_strided, ary.is_unit_strided)

    pyapi = context.get_python_api(builder)
    if objmode:
        # General loop
        gil = pyapi.gil_ensure()
        with cgutils.for_range(builder, loopcount, intp=intp_t):
            slowloop = build_obj_loop_body(context, func, builder, arrays, out,
                                           offsets, store_offset, signature,
                                           pyapi, envptr, env)
        pyapi.gil_release(gil)
        builder.ret_void()

    else:
        with builder.if_else(unit_strided) as (is_unit_strided, is_strided):
            with is_unit_strided:
                with cgutils.for_range(builder, loopcount,
                                       intp=intp_t) as loop:
                    fastloop = build_fast_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature,
                                                    loop.index, pyapi)

            with is_strided:
                # General loop
                with cgutils.for_range(builder, loopcount, intp=intp_t):
                    slowloop = build_slow_loop_body(context, func, builder,
                                                    arrays, out, offsets,
                                                    store_offset, signature,
                                                    pyapi)

        builder.ret_void()
    del builder

    # Link and finalize
    wrapperlib.add_ir_module(wrapper_module)
    wrapperlib.add_linking_library(library)
    return wrapperlib.get_pointer_to_function(wrapper.name)
Ejemplo n.º 47
0
def get_record_member(builder, record, offset, typ):
    pval = gep_inbounds(builder, record, 0, offset)
    assert not is_pointer(pval.type.pointee)
    return builder.bitcast(pval, Type.pointer(typ))
Ejemplo n.º 48
0
def _prepare_call_to_object_mode(context, builder, pyapi, func, signature,
                                 args, env):
    mod = builder.module

    bb_core_return = builder.append_basic_block('ufunc.core.return')

    # Call to
    # PyObject* ndarray_new(int nd,
    #       npy_intp *dims,   /* shape */
    #       npy_intp *strides,
    #       void* data,
    #       int type_num,
    #       int itemsize)

    ll_int = context.get_value_type(types.int32)
    ll_intp = context.get_value_type(types.intp)
    ll_intp_ptr = Type.pointer(ll_intp)
    ll_voidptr = context.get_value_type(types.voidptr)
    ll_pyobj = context.get_value_type(types.pyobject)
    fnty = Type.function(
        ll_pyobj,
        [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int])

    fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new")

    # Convert each llarray into pyobject
    error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error')
    builder.store(cgutils.true_bit, error_pointer)

    # The PyObject* arguments to the kernel function
    object_args = []
    object_pointers = []

    for i, (arg, argty) in enumerate(zip(args, signature.args)):
        # Allocate NULL-initialized slot for this argument
        objptr = cgutils.alloca_once(builder, ll_pyobj, zfill=True)
        object_pointers.append(objptr)

        if isinstance(argty, types.Array):
            # Special case arrays: we don't need full-blown NRT reflection
            # since the argument will be gone at the end of the kernel
            arycls = context.make_array(argty)
            array = arycls(context, builder, value=arg)

            zero = Constant.int(ll_int, 0)

            # Extract members of the llarray
            nd = Constant.int(ll_int, argty.ndim)
            dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero])
            strides = builder.gep(array._get_ptr_by_name('strides'),
                                  [zero, zero])
            data = builder.bitcast(array.data, ll_voidptr)
            dtype = np.dtype(str(argty.dtype))

            # Prepare other info for reconstruction of the PyArray
            type_num = Constant.int(ll_int, dtype.num)
            itemsize = Constant.int(ll_int, dtype.itemsize)

            # Call helper to reconstruct PyArray objects
            obj = builder.call(fn_array_new,
                               [nd, dims, strides, data, type_num, itemsize])
        else:
            # Other argument types => use generic boxing
            obj = pyapi.from_native_value(argty, arg)

        builder.store(obj, objptr)
        object_args.append(obj)

        obj_is_null = cgutils.is_null(builder, obj)
        builder.store(obj_is_null, error_pointer)
        cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return)

    # Call ufunc core function
    object_sig = [types.pyobject] * len(object_args)

    status, retval = context.call_conv.call_function(builder,
                                                     func,
                                                     types.pyobject,
                                                     object_sig,
                                                     object_args,
                                                     env=env)
    builder.store(status.is_error, error_pointer)

    # Release returned object
    pyapi.decref(retval)

    builder.branch(bb_core_return)
    # At return block
    builder.position_at_end(bb_core_return)

    # Release argument objects
    for objptr in object_pointers:
        pyapi.decref(builder.load(objptr))

    innercall = status.code
    return innercall, builder.load(error_pointer)
Ejemplo n.º 49
0
def _generic_array(context,
                   builder,
                   shape,
                   dtype,
                   symbol_name,
                   addrspace,
                   can_dynsized=False):
    elemcount = reduce(operator.mul, shape, 1)

    # Check for valid shape for this type of allocation.
    # Only 1d arrays can be dynamic.
    dynamic_smem = elemcount <= 0 and can_dynsized and len(shape) == 1
    if elemcount <= 0 and not dynamic_smem:
        raise ValueError("array length <= 0")

    # Check that we support the requested dtype
    other_supported_type = isinstance(dtype, (types.Record, types.Boolean))
    if dtype not in types.number_domain and not other_supported_type:
        raise TypeError("unsupported type: %s" % dtype)

    lldtype = context.get_data_type(dtype)
    laryty = Type.array(lldtype, elemcount)

    if addrspace == nvvm.ADDRSPACE_LOCAL:
        # Special case local address space allocation to use alloca
        # NVVM is smart enough to only use local memory if no register is
        # available
        dataptr = cgutils.alloca_once(builder, laryty, name=symbol_name)
    else:
        lmod = builder.module

        # Create global variable in the requested address space
        gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace)
        # Specify alignment to avoid misalignment bug
        align = context.get_abi_sizeof(lldtype)
        # Alignment is required to be a power of 2 for shared memory. If it is
        # not a power of 2 (e.g. for a Record array) then round up accordingly.
        gvmem.align = 1 << (align - 1).bit_length()

        if dynamic_smem:
            gvmem.linkage = lc.LINKAGE_EXTERNAL
        else:
            ## Comment out the following line to workaround a NVVM bug
            ## which generates a invalid symbol name when the linkage
            ## is internal and in some situation.
            ## See _get_unique_smem_id()
            # gvmem.linkage = lc.LINKAGE_INTERNAL

            gvmem.initializer = lc.Constant.undef(laryty)

        # Convert to generic address-space
        conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
        addrspaceptr = gvmem.bitcast(Type.pointer(Type.int(8), addrspace))
        dataptr = builder.call(conv, [addrspaceptr])

    targetdata = _get_target_data(context)
    lldtype = context.get_data_type(dtype)
    itemsize = lldtype.get_abi_size(targetdata)

    # Compute strides
    laststride = itemsize
    rstrides = []
    for i, lastsize in enumerate(reversed(shape)):
        rstrides.append(laststride)
        laststride *= lastsize
    strides = [s for s in reversed(rstrides)]
    kstrides = [context.get_constant(types.intp, s) for s in strides]

    # Compute shape
    if dynamic_smem:
        # Compute the shape based on the dynamic shared memory configuration.
        # Unfortunately NVVM does not provide an intrinsic for the
        # %dynamic_smem_size register, so we must read it using inline
        # assembly.
        get_dynshared_size = InlineAsm.get(Type.function(Type.int(), []),
                                           "mov.u32 $0, %dynamic_smem_size;",
                                           '=r',
                                           side_effect=True)
        dynsmem_size = builder.zext(builder.call(get_dynshared_size, []),
                                    Type.int(width=64))
        # Only 1-D dynamic shared memory is supported so the following is a
        # sufficient construction of the shape
        kitemsize = context.get_constant(types.intp, itemsize)
        kshape = [builder.udiv(dynsmem_size, kitemsize)]
    else:
        kshape = [context.get_constant(types.intp, s) for s in shape]

    # Create array object
    ndim = len(shape)
    aryty = types.Array(dtype=dtype, ndim=ndim, layout='C')
    ary = context.make_array(aryty)(context, builder)

    context.populate_array(ary,
                           data=builder.bitcast(dataptr, ary.data.type),
                           shape=kshape,
                           strides=kstrides,
                           itemsize=context.get_constant(types.intp, itemsize),
                           meminfo=None)
    return ary._getvalue()
Ejemplo n.º 50
0
def get_record_member(builder, record, offset, typ):
    pdata = get_record_data(builder, record)
    pval = inbound_gep(builder, pdata, 0, offset)
    assert not is_pointer(pval.type.pointee)
    return builder.bitcast(pval, Type.pointer(typ))
Ejemplo n.º 51
0
    def _build_wrapper(self, library, name):
        """
        The LLVM IRBuilder code to create the gufunc wrapper.
        The *library* arg is the CodeLibrary for which the wrapper should
        be added to.  The *name* arg is the name of the wrapper function being
        created.
        """
        byte_t = Type.int(8)
        byte_ptr_t = Type.pointer(byte_t)
        byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
        intp_t = self.context.get_value_type(types.intp)
        intp_ptr_t = Type.pointer(intp_t)

        fnty = Type.function(
            Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

        wrapper_module = library.create_ir_module('')
        func_type = self.call_conv.get_function_type(self.fndesc.restype,
                                                     self.fndesc.argtypes)
        fname = self.fndesc.llvm_func_name
        func = wrapper_module.add_function(func_type, name=fname)

        func.attributes.add("alwaysinline")
        wrapper = wrapper_module.add_function(fnty, name)
        arg_args, arg_dims, arg_steps, arg_data = wrapper.args
        arg_args.name = "args"
        arg_dims.name = "dims"
        arg_steps.name = "steps"
        arg_data.name = "data"

        builder = Builder(wrapper.append_basic_block("entry"))
        loopcount = builder.load(arg_dims, name="loopcount")
        pyapi = self.context.get_python_api(builder)

        # Unpack shapes
        unique_syms = set()
        for grp in (self.sin, self.sout):
            for syms in grp:
                unique_syms |= set(syms)

        sym_map = {}
        for syms in self.sin:
            for s in syms:
                if s not in sym_map:
                    sym_map[s] = len(sym_map)

        sym_dim = {}
        for s, i in sym_map.items():
            sym_dim[s] = builder.load(
                builder.gep(arg_dims,
                            [self.context.get_constant(types.intp, i + 1)]))

        # Prepare inputs
        arrays = []
        step_offset = len(self.sin) + len(self.sout)
        for i, (typ, sym) in enumerate(
                zip(self.signature.args, self.sin + self.sout)):
            ary = GUArrayArg(self.context, builder, arg_args, arg_steps, i,
                             step_offset, typ, sym, sym_dim)
            step_offset += len(sym)
            arrays.append(ary)

        bbreturn = builder.append_basic_block('.return')

        # Prologue
        self.gen_prologue(builder, pyapi)

        # Loop
        with cgutils.for_range(builder, loopcount, intp=intp_t) as loop:
            args = [a.get_array_at_offset(loop.index) for a in arrays]
            innercall, error = self.gen_loop_body(builder, pyapi, func, args)
            # If error, escape
            cgutils.cbranch_or_continue(builder, error, bbreturn)

        builder.branch(bbreturn)
        builder.position_at_end(bbreturn)

        # Epilogue
        self.gen_epilogue(builder, pyapi)

        builder.ret_void()

        # Link
        library.add_ir_module(wrapper_module)
        library.add_linking_library(self.library)
Ejemplo n.º 52
0
def get_record_member(builder, record, offset, typ):
    pval = inbound_gep(builder, record, 0, offset)
    assert not is_pointer(pval.type.pointee)
    return builder.bitcast(pval, Type.pointer(typ))
Ejemplo n.º 53
0
import sys
import ctypes
import struct as struct_
from llvmlite.llvmpy.core import Type, Constant

_trace_refs_ = hasattr(sys, "getobjects")
_plat_bits = struct_.calcsize("@P") * 8

_int8 = Type.int(8)
_int32 = Type.int(32)

_void_star = Type.pointer(_int8)

_int8_star = _void_star

_sizeof_py_ssize_t = ctypes.sizeof(getattr(ctypes, "c_size_t"))
_llvm_py_ssize_t = Type.int(_sizeof_py_ssize_t * 8)

if _trace_refs_:
    _pyobject_head = Type.struct(
        [_void_star, _void_star, _llvm_py_ssize_t, _void_star])
    _pyobject_head_init = Constant.struct([
        Constant.null(_void_star),  # _ob_next
        Constant.null(_void_star),  # _ob_prev
        Constant.int(_llvm_py_ssize_t, 1),  # ob_refcnt
        Constant.null(_void_star),  # ob_type
    ])

else:
    _pyobject_head = Type.struct([_llvm_py_ssize_t, _void_star])
    _pyobject_head_init = Constant.struct([
Ejemplo n.º 54
0
    def from_native_value(self, val, typ):
        if typ == types.pyobject:
            return val

        elif typ == types.boolean:
            longval = self.builder.zext(val, self.long)
            return self.bool_from_long(longval)

        elif typ in types.unsigned_domain:
            ullval = self.builder.zext(val, self.ulonglong)
            return self.long_from_ulonglong(ullval)

        elif typ in types.signed_domain:
            ival = self.builder.sext(val, self.longlong)
            return self.long_from_longlong(ival)

        elif typ == types.float32:
            dbval = self.builder.fpext(val, self.double)
            return self.float_from_double(dbval)

        elif typ == types.float64:
            return self.float_from_double(val)

        elif typ == types.complex128:
            cmplxcls = self.context.make_complex(typ)
            cval = cmplxcls(self.context, self.builder, value=val)
            return self.complex_from_doubles(cval.real, cval.imag)

        elif typ == types.complex64:
            cmplxcls = self.context.make_complex(typ)
            cval = cmplxcls(self.context, self.builder, value=val)
            freal = self.context.cast(self.builder, cval.real, types.float32,
                                      types.float64)
            fimag = self.context.cast(self.builder, cval.imag, types.float32,
                                      types.float64)
            return self.complex_from_doubles(freal, fimag)

        elif isinstance(typ, types.NPDatetime):
            return self.create_np_datetime(val, typ.unit_code)

        elif isinstance(typ, types.NPTimedelta):
            return self.create_np_timedelta(val, typ.unit_code)

        elif typ == types.none:
            ret = self.make_none()
            return ret

        elif isinstance(typ, types.Optional):
            return self.from_native_return(val, typ.type)

        elif isinstance(typ, types.Array):
            return self.from_native_array(typ, val)

        elif isinstance(typ, types.Record):
            # Note we will create a copy of the record
            # This is the only safe way.
            pdata = cgutils.get_record_data(self.builder, val)
            size = Constant.int(Type.int(), pdata.type.pointee.count)
            ptr = self.builder.bitcast(pdata, Type.pointer(Type.int(8)))
            # Note: this will only work for CPU mode
            #       The following requires access to python object
            dtype_addr = Constant.int(self.py_ssize_t, id(typ.dtype))
            dtypeobj = dtype_addr.inttoptr(self.pyobj)
            return self.recreate_record(ptr, size, dtypeobj)

        elif isinstance(typ, (types.Tuple, types.UniTuple)):
            return self.from_tuple(typ, val)

        raise NotImplementedError(typ)