Esempio n. 1
0
def int_sign_impl(context, builder, sig, args):
    """
    np.sign(int)
    """
    [x] = args
    POS = Constant.int(x.type, 1)
    NEG = Constant.int(x.type, -1)
    ZERO = Constant.int(x.type, 0)

    cmp_zero = builder.icmp(lc.ICMP_EQ, x, ZERO)
    cmp_pos = builder.icmp(lc.ICMP_SGT, x, ZERO)

    presult = cgutils.alloca_once(builder, x.type)

    bb_zero = builder.append_basic_block(".zero")
    bb_postest = builder.append_basic_block(".postest")
    bb_pos = builder.append_basic_block(".pos")
    bb_neg = builder.append_basic_block(".neg")
    bb_exit = builder.append_basic_block(".exit")

    builder.cbranch(cmp_zero, bb_zero, bb_postest)

    with builder.goto_block(bb_zero):
        builder.store(ZERO, presult)
        builder.branch(bb_exit)

    with builder.goto_block(bb_postest):
        builder.cbranch(cmp_pos, bb_pos, bb_neg)

    with builder.goto_block(bb_pos):
        builder.store(POS, presult)
        builder.branch(bb_exit)

    with builder.goto_block(bb_neg):
        builder.store(NEG, presult)
        builder.branch(bb_exit)

    builder.position_at_end(bb_exit)
    res = builder.load(presult)
    return impl_ret_untracked(context, builder, sig.return_type, res)
Esempio n. 2
0
    def codegen(context, builder, sig, args):
        fnty = ir.FunctionType(
            ll_status,
            [ll_list_type, ll_ssize_t, ll_bytes],
        )
        [tl, tindex] = sig.args
        [l, index] = args
        fn = builder.module.get_or_insert_function(
            fnty, name='numba_list_{}'.format(op))

        dm_item = context.data_model_manager[tl.item_type]
        ll_item = context.get_data_type(tl.item_type)
        ptr_item = cgutils.alloca_once(builder, ll_item)

        lp = _container_get_data(context, builder, tl, l)
        status = builder.call(
            fn,
            [
                lp,
                index,
                _as_bytes(builder, ptr_item),
            ],
        )
        # Load item if output is available
        found = builder.icmp_signed('>=', status,
                                    status.type(int(ListStatus.LIST_OK)))
        out = context.make_optional_none(
            builder, tl.item_type if IS_NOT_NONE else types.int64)
        pout = cgutils.alloca_once_value(builder, out)

        with builder.if_then(found):
            if IS_NOT_NONE:
                item = dm_item.load_from_data_pointer(builder, ptr_item)
                context.nrt.incref(builder, tl.item_type, item)
                loaded = context.make_optional_value(builder, tl.item_type,
                                                     item)
                builder.store(loaded, pout)

        out = builder.load(pout)
        return context.make_tuple(builder, resty, [status, out])
Esempio n. 3
0
def impl_list_getiter(context, builder, sig, args):
    """Implement iter(List)."""
    [tl] = sig.args
    [l] = args
    iterablety = types.ListTypeIterableType(tl)
    it = context.make_helper(builder, iterablety.iterator_type)

    fnty = ir.FunctionType(ir.VoidType(), [ll_listiter_type, ll_list_type],)

    fn = builder.module.get_or_insert_function(fnty, name="numba_list_iter")

    proto = ctypes.CFUNCTYPE(ctypes.c_size_t)
    listiter_sizeof = proto(_helperlib.c_helpers["list_iter_sizeof"])
    state_type = ir.ArrayType(ir.IntType(8), listiter_sizeof())

    pstate = cgutils.alloca_once(builder, state_type, zfill=True)
    it.state = _as_bytes(builder, pstate)
    it.parent = l

    dp = _container_get_data(context, builder, iterablety.parent, args[0])
    builder.call(fn, [it.state, dp])
    return impl_ret_borrowed(context, builder, sig.return_type, it._getvalue(),)
Esempio n. 4
0
def string_split_impl(context, builder, sig, args):
    nitems = cgutils.alloca_once(builder, lir.IntType(64))
    # input str, sep, size pointer
    fnty = lir.FunctionType(
        lir.IntType(8).as_pointer().as_pointer(), [
            lir.IntType(8).as_pointer(),
            lir.IntType(8).as_pointer(),
            lir.IntType(64).as_pointer()
        ])
    fn = builder.module.get_or_insert_function(fnty, name="str_split")
    ptr = builder.call(fn, args + [nitems])
    size = builder.load(nitems)
    # TODO: use ptr instead of allocating and copying, use NRT_MemInfo_new
    # TODO: deallocate ptr
    _list = numba.cpython.listobj.ListInstance.allocate(
        context, builder, sig.return_type, size)
    _list.size = size
    with cgutils.for_range(builder, size) as loop:
        value = builder.load(cgutils.gep_inbounds(builder, ptr, loop.index))
        # TODO: refcounted str
        _list.setitem(loop.index, value, incref=False)
    return impl_ret_new_ref(context, builder, sig.return_type, _list.value)
Esempio n. 5
0
def bytes_to_charseq(context, builder, fromty, toty, val):
    barr = cgutils.create_struct_proxy(fromty)(context, builder, value=val)
    src = builder.bitcast(barr.data, ir.IntType(8).as_pointer())
    src_length = barr.nitems

    lty = context.get_value_type(toty)
    dstint_t = ir.IntType(8)
    dst_ptr = cgutils.alloca_once(builder, lty)
    dst = builder.bitcast(dst_ptr, dstint_t.as_pointer())

    dst_length = ir.Constant(src_length.type, toty.count)
    is_shorter_value = builder.icmp_unsigned('<', src_length, dst_length)
    count = builder.select(is_shorter_value, src_length, dst_length)
    with builder.if_then(is_shorter_value):
        cgutils.memset(builder, dst, ir.Constant(src_length.type, toty.count),
                       0)
    with cgutils.for_range(builder, count) as loop:
        in_ptr = builder.gep(src, [loop.index])
        in_val = builder.zext(builder.load(in_ptr), dstint_t)
        builder.store(in_val, builder.gep(dst, [loop.index]))

    return builder.load(dst_ptr)
Esempio n. 6
0
def unbox_COO(typ: COOType, obj: COO, c) -> NativeValue:
    ret_ptr = cgutils.alloca_once(c.builder, c.context.get_value_type(typ))
    is_error_ptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit)
    fail_obj = c.context.get_constant_null(typ)

    with local_return(c.builder) as ret:
        fail_blk = c.builder.append_basic_block("fail")
        with c.builder.goto_block(fail_blk):
            c.builder.store(cgutils.true_bit, is_error_ptr)
            c.builder.store(fail_obj, ret_ptr)
            ret()

        data = _unbox_native_field(typ.data_type, obj, "data", c)
        with cgutils.if_unlikely(c.builder, data.is_error):
            c.builder.branch(fail_blk)

        coords = _unbox_native_field(typ.coords_type, obj, "coords", c)
        with cgutils.if_unlikely(c.builder, coords.is_error):
            c.builder.branch(fail_blk)

        shape = _unbox_native_field(typ.shape_type, obj, "shape", c)
        with cgutils.if_unlikely(c.builder, shape.is_error):
            c.builder.branch(fail_blk)

        fill_value = _unbox_native_field(typ.fill_value_type, obj,
                                         "fill_value", c)
        with cgutils.if_unlikely(c.builder, fill_value.is_error):
            c.builder.branch(fail_blk)

        coo = cgutils.create_struct_proxy(typ)(c.context, c.builder)
        coo.coords = coords.value
        coo.data = data.value
        coo.shape = shape.value
        coo.fill_value = fill_value.value
        c.builder.store(cgutils.false_bit, is_error_ptr)
        c.builder.store(coo._getvalue(), ret_ptr)

    return NativeValue(c.builder.load(ret_ptr),
                       is_error=c.builder.load(is_error_ptr))
Esempio n. 7
0
def _list_new_codegen(context, builder, itemty, new_size, error_handler):
    fnty = ir.FunctionType(
        ll_status,
        [ll_list_type.as_pointer(), ll_ssize_t, ll_ssize_t],
    )
    fn = builder.module.get_or_insert_function(fnty, name='numba_list_new')
    # Determine sizeof item types
    ll_item = context.get_data_type(itemty)
    sz_item = context.get_abi_sizeof(ll_item)
    reflp = cgutils.alloca_once(builder, ll_list_type, zfill=True)
    status = builder.call(
        fn,
        [reflp, ll_ssize_t(sz_item), new_size],
    )
    msg = "Failed to allocate list"
    error_handler(
        builder,
        status,
        msg,
    )
    lp = builder.load(reflp)
    return lp
Esempio n. 8
0
def masked_scalar_is_null_impl(context, builder, sig, args):
    """
    Implement `MaskedType` is `NA`
    """
    if isinstance(sig.args[1], NAType):
        masked_type, na = sig.args
        value = args[0]
    else:
        na, masked_type = sig.args
        value = args[1]

    indata = cgutils.create_struct_proxy(masked_type)(context,
                                                      builder,
                                                      value=value)
    result = cgutils.alloca_once(builder, ir.IntType(1))
    with builder.if_else(indata.valid) as (then, otherwise):
        with then:
            builder.store(context.get_constant(types.boolean, 0), result)
        with otherwise:
            builder.store(context.get_constant(types.boolean, 1), result)

    return builder.load(result)
Esempio n. 9
0
def unbox_list(typ, obj, c):
    """
    Convert list *obj* to a native list.

    If list was previously unboxed, we reuse the existing native list
    to ensure consistency.
    """
    size = c.pyapi.list_size(obj)

    errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit)
    listptr = cgutils.alloca_once(c.builder, c.context.get_value_type(typ))

    # See if the list was previously unboxed, if so, re-use the meminfo.
    ptr = c.pyapi.object_get_private_data(obj)

    with c.builder.if_else(cgutils.is_not_null(c.builder, ptr)) as (
        has_meminfo,
        otherwise,
    ):

        with has_meminfo:
            # List was previously unboxed => reuse meminfo
            list = listobj.ListInstance.from_meminfo(c.context, c.builder, typ, ptr)
            list.size = size
            if typ.reflected:
                list.parent = obj
            c.builder.store(list.value, listptr)

        with otherwise:
            _python_list_to_native(typ, obj, c, size, listptr, errorptr)

    def cleanup():
        # Clean up the associated pointer, as the meminfo is now invalid.
        c.pyapi.object_reset_private_data(obj)

    return NativeValue(
        c.builder.load(listptr), is_error=c.builder.load(errorptr), cleanup=cleanup
    )
Esempio n. 10
0
def lower_dist_arr_reduce(context, builder, sig, args):

    op_typ = args[1].type

    # store an int to specify data type
    typ_enum = _numba_to_c_type_map[sig.args[0].dtype]
    typ_arg = cgutils.alloca_once_value(
        builder, lir.Constant(lir.IntType(32), typ_enum))
    ndims = sig.args[0].ndim

    out = make_array(sig.args[0])(context, builder, args[0])
    # store size vars array struct to pointer
    size_ptr = cgutils.alloca_once(builder, out.shape.type)
    builder.store(out.shape, size_ptr)
    size_arg = builder.bitcast(size_ptr, lir.IntType(64).as_pointer())

    ndim_arg = cgutils.alloca_once_value(
        builder, lir.Constant(lir.IntType(32), sig.args[0].ndim))
    call_args = [
        builder.bitcast(out.data,
                        lir.IntType(8).as_pointer()), size_arg,
        builder.load(ndim_arg), args[1],
        builder.load(typ_arg)
    ]

    # array, shape, ndim, extra last arg type for type enum
    arg_typs = [
        lir.IntType(8).as_pointer(),
        lir.IntType(64).as_pointer(),
        lir.IntType(32), op_typ,
        lir.IntType(32)
    ]
    fnty = lir.FunctionType(lir.IntType(32), arg_typs)
    fn = builder.module.get_or_insert_function(fnty,
                                               name="hpat_dist_arr_reduce")
    builder.call(fn, call_args)
    res = out._getvalue()
    return impl_ret_borrowed(context, builder, sig.return_type, res)
Esempio n. 11
0
 def codegen(context, builder, sig, args):
     fnty = ir.FunctionType(
         ll_status,
         [ll_dict_type.as_pointer(), ll_ssize_t, ll_ssize_t],
     )
     fn = builder.module.get_or_insert_function(
         fnty, name='numba_dict_new_minsize')
     # Determine sizeof key and value types
     ll_key = context.get_data_type(keyty.instance_type)
     ll_val = context.get_data_type(valty.instance_type)
     sz_key = context.get_abi_sizeof(ll_key)
     sz_val = context.get_abi_sizeof(ll_val)
     refdp = cgutils.alloca_once(builder, ll_dict_type, zfill=True)
     status = builder.call(
         fn,
         [refdp, ll_ssize_t(sz_key), ll_ssize_t(sz_val)],
     )
     _raise_if_error(
         context, builder, status,
         msg="Failed to allocate dictionary",
     )
     dp = builder.load(refdp)
     return dp
Esempio n. 12
0
def make_enumerate_object(context, builder, sig, args):
    assert len(args) == 1 or len(args) == 2 # enumerate(it) or enumerate(it, start)
    srcty = sig.args[0]

    if len(args) == 1:
        src = args[0]
        start_val = context.get_constant(types.intp, 0)
    elif len(args) == 2:
        src = args[0]
        start_val = context.cast(builder, args[1], sig.args[1], types.intp)

    iterobj = call_getiter(context, builder, srcty, src)

    enum = context.make_helper(builder, sig.return_type)

    countptr = cgutils.alloca_once(builder, start_val.type)
    builder.store(start_val, countptr)

    enum.count = countptr
    enum.iter = iterobj

    res = enum._getvalue()
    return impl_ret_new_ref(context, builder, sig.return_type, res)
Esempio n. 13
0
def atomic_sub_wrapper(context, builder, sig, args):
    # dpcpp yet does not support ``__spirv_AtomicFSubEXT``. To support atomic.sub we
    # reuse atomic.add and negate the value. For example, atomic.add(A, index, -val) is
    # equivalent to atomic.sub(A, index, val).
    val = args[2]
    new_val = cgutils.alloca_once(
        builder,
        context.get_value_type(sig.args[2]),
        size=context.get_constant(types.uintp, 1),
        name="new_val_0",
    )
    val_dtype = sig.args[2]
    if val_dtype == types.float32 or val_dtype == types.float64:
        builder.store(builder.fmul(val, context.get_constant(sig.args[2], -1)),
                      new_val)
    elif val_dtype == types.int32 or val_dtype == types.int64:
        builder.store(builder.mul(val, context.get_constant(sig.args[2], -1)),
                      new_val)
    else:
        raise TypeError("Unsupported type %s" % val_dtype)

    args[2] = builder.load(new_val)

    return native_atomic_add(context, builder, sig, args)
Esempio n. 14
0
def create_null_ptr(builder, context):
    """
    Allocates a new LLVM Value storing a ``void*`` and returns the Value to
    caller.

    Args:
        builder: The LLVM IR builder to be used for code generation.
        context: The LLVM IR builder context.

    Returns: An LLVM value storing a null pointer
    """
    null_ptr = cgutils.alloca_once(
        builder=builder,
        ty=context.get_value_type(types.voidptr),
        size=context.get_constant(types.uintp, 1),
    )
    builder.store(
        builder.inttoptr(
            context.get_constant(types.uintp, 0),
            get_llvm_type(context=context, type=types.voidptr),
        ),
        null_ptr,
    )
    return null_ptr
    def details(context, builder, signature, args):
        ll_void = context.get_value_type(types.void)
        ll_Py_UCS4 = context.get_value_type(_Py_UCS4)
        ll_intc = context.get_value_type(types.intc)
        ll_intc_ptr = ll_intc.as_pointer()
        ll_uchar = context.get_value_type(types.uchar)
        ll_uchar_ptr = ll_uchar.as_pointer()
        ll_ushort = context.get_value_type(types.ushort)
        ll_ushort_ptr = ll_ushort.as_pointer()
        fnty = lc.Type.function(
            ll_void,
            [
                ll_Py_UCS4,  # code
                ll_intc_ptr,  # upper
                ll_intc_ptr,  # lower
                ll_intc_ptr,  # title
                ll_uchar_ptr,  # decimal
                ll_uchar_ptr,  # digit
                ll_ushort_ptr,  # flags
            ],
        )
        fn = builder.module.get_or_insert_function(fnty,
                                                   name="numba_gettyperecord")
        upper = cgutils.alloca_once(builder, ll_intc, name="upper")
        lower = cgutils.alloca_once(builder, ll_intc, name="lower")
        title = cgutils.alloca_once(builder, ll_intc, name="title")
        decimal = cgutils.alloca_once(builder, ll_uchar, name="decimal")
        digit = cgutils.alloca_once(builder, ll_uchar, name="digit")
        flags = cgutils.alloca_once(builder, ll_ushort, name="flags")

        byref = [upper, lower, title, decimal, digit, flags]
        builder.call(fn, [args[0]] + byref)
        buf = []
        for x in byref:
            buf.append(builder.load(x))

        res = context.make_tuple(builder, signature.return_type, tuple(buf))
        return impl_ret_untracked(context, builder, signature.return_type, res)
Esempio n. 16
0
def _prepare_call_to_object_mode(context, builder, pyapi, func, signature,
                                 args):
    mod = builder.module

    bb_core_return = builder.append_basic_block('ufunc.core.return')

    # Call to
    # PyObject* ndarray_new(int nd,
    #       npy_intp *dims,   /* shape */
    #       npy_intp *strides,
    #       void* data,
    #       int type_num,
    #       int itemsize)

    ll_int = context.get_value_type(types.int32)
    ll_intp = context.get_value_type(types.intp)
    ll_intp_ptr = Type.pointer(ll_intp)
    ll_voidptr = context.get_value_type(types.voidptr)
    ll_pyobj = context.get_value_type(types.pyobject)
    fnty = Type.function(
        ll_pyobj,
        [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int])

    fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new")

    # Convert each llarray into pyobject
    error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error')
    builder.store(cgutils.true_bit, error_pointer)

    # The PyObject* arguments to the kernel function
    object_args = []
    object_pointers = []

    for i, (arg, argty) in enumerate(zip(args, signature.args)):
        # Allocate NULL-initialized slot for this argument
        objptr = cgutils.alloca_once(builder, ll_pyobj, zfill=True)
        object_pointers.append(objptr)

        if isinstance(argty, types.Array):
            # Special case arrays: we don't need full-blown NRT reflection
            # since the argument will be gone at the end of the kernel
            arycls = context.make_array(argty)
            array = arycls(context, builder, value=arg)

            zero = Constant.int(ll_int, 0)

            # Extract members of the llarray
            nd = Constant.int(ll_int, argty.ndim)
            dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero])
            strides = builder.gep(array._get_ptr_by_name('strides'),
                                  [zero, zero])
            data = builder.bitcast(array.data, ll_voidptr)
            dtype = np.dtype(str(argty.dtype))

            # Prepare other info for reconstruction of the PyArray
            type_num = Constant.int(ll_int, dtype.num)
            itemsize = Constant.int(ll_int, dtype.itemsize)

            # Call helper to reconstruct PyArray objects
            obj = builder.call(fn_array_new,
                               [nd, dims, strides, data, type_num, itemsize])
        else:
            # Other argument types => use generic boxing
            obj = pyapi.from_native_value(argty, arg)

        builder.store(obj, objptr)
        object_args.append(obj)

        obj_is_null = cgutils.is_null(builder, obj)
        builder.store(obj_is_null, error_pointer)
        cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return)

    # Call ufunc core function
    object_sig = [types.pyobject] * len(object_args)

    status, retval = context.call_conv.call_function(builder, func,
                                                     types.pyobject,
                                                     object_sig, object_args)
    builder.store(status.is_error, error_pointer)

    # Release returned object
    pyapi.decref(retval)

    builder.branch(bb_core_return)
    # At return block
    builder.position_at_end(bb_core_return)

    # Release argument objects
    for objptr in object_pointers:
        pyapi.decref(builder.load(objptr))

    innercall = status.code
    return innercall, builder.load(error_pointer)
Esempio n. 17
0
def box_COO(typ: COOType, val: "some LLVM thing", c) -> COO:
    ret_ptr = cgutils.alloca_once(c.builder, c.pyapi.pyobj)
    fail_obj = c.pyapi.get_null_object()

    coo = cgutils.create_struct_proxy(typ)(c.context, c.builder, value=val)

    with local_return(c.builder) as ret:
        data_obj = c.box(typ.data_type, coo.data)
        with cgutils.if_unlikely(c.builder,
                                 cgutils.is_null(c.builder, data_obj)):
            c.builder.store(fail_obj, ret_ptr)
            ret()

        coords_obj = c.box(typ.coords_type, coo.coords)
        with cgutils.if_unlikely(c.builder,
                                 cgutils.is_null(c.builder, coords_obj)):
            c.pyapi.decref(data_obj)
            c.builder.store(fail_obj, ret_ptr)
            ret()

        shape_obj = c.box(typ.shape_type, coo.shape)
        with cgutils.if_unlikely(c.builder,
                                 cgutils.is_null(c.builder, shape_obj)):
            c.pyapi.decref(coords_obj)
            c.pyapi.decref(data_obj)
            c.builder.store(fail_obj, ret_ptr)
            ret()

        fill_value_obj = c.box(typ.fill_value_type, coo.fill_value)
        with cgutils.if_unlikely(c.builder,
                                 cgutils.is_null(c.builder, fill_value_obj)):
            c.pyapi.decref(shape_obj)
            c.pyapi.decref(coords_obj)
            c.pyapi.decref(data_obj)
            c.builder.store(fail_obj, ret_ptr)
            ret()

        class_obj = c.pyapi.unserialize(c.pyapi.serialize_object(COO))
        with cgutils.if_unlikely(c.builder,
                                 cgutils.is_null(c.builder, class_obj)):
            c.pyapi.decref(shape_obj)
            c.pyapi.decref(coords_obj)
            c.pyapi.decref(data_obj)
            c.pyapi.decref(fill_value_obj)
            c.builder.store(fail_obj, ret_ptr)
            ret()

        args = c.pyapi.tuple_pack([coords_obj, data_obj, shape_obj])
        c.pyapi.decref(shape_obj)
        c.pyapi.decref(coords_obj)
        c.pyapi.decref(data_obj)
        with cgutils.if_unlikely(c.builder, cgutils.is_null(c.builder, args)):
            c.pyapi.decref(fill_value_obj)
            c.pyapi.decref(class_obj)
            c.builder.store(fail_obj, ret_ptr)
            ret()

        kwargs = c.pyapi.dict_pack([("fill_value", fill_value_obj)])
        c.pyapi.decref(fill_value_obj)
        with cgutils.if_unlikely(c.builder, cgutils.is_null(c.builder,
                                                            kwargs)):
            c.pyapi.decref(class_obj)
            c.builder.store(fail_obj, ret_ptr)
            ret()

        c.builder.store(c.pyapi.call(class_obj, args, kwargs), ret_ptr)
        c.pyapi.decref(class_obj)
        c.pyapi.decref(args)
        c.pyapi.decref(kwargs)

    return c.builder.load(ret_ptr)
Esempio n. 18
0
def _build_array(context, builder, array_ty, input_types, inputs):
    """Utility function to handle allocation of an implicit output array
    given the target context, builder, output array type, and a list of
    _ArrayHelper instances.
    """
    # First, strip optional types, ufunc loops are typed on concrete types
    input_types = [
        x.type if isinstance(x, types.Optional) else x for x in input_types
    ]

    intp_ty = context.get_value_type(types.intp)

    def make_intp_const(val):
        return context.get_constant(types.intp, val)

    ZERO = make_intp_const(0)
    ONE = make_intp_const(1)

    src_shape = cgutils.alloca_once(builder, intp_ty, array_ty.ndim,
                                    "src_shape")
    dest_ndim = make_intp_const(array_ty.ndim)
    dest_shape = cgutils.alloca_once(builder, intp_ty, array_ty.ndim,
                                     "dest_shape")
    dest_shape_addrs = tuple(
        cgutils.gep_inbounds(builder, dest_shape, index)
        for index in range(array_ty.ndim))

    # Initialize the destination shape with all ones.
    for dest_shape_addr in dest_shape_addrs:
        builder.store(ONE, dest_shape_addr)

    # For each argument, try to broadcast onto the destination shape,
    # mutating along any axis where the argument shape is not one and
    # the destination shape is one.
    for arg_number, arg in enumerate(inputs):
        if not hasattr(arg, "ndim"):  # Skip scalar arguments
            continue
        arg_ndim = make_intp_const(arg.ndim)
        for index in range(arg.ndim):
            builder.store(arg.shape[index],
                          cgutils.gep_inbounds(builder, src_shape, index))
        arg_result = context.compile_internal(
            builder, _broadcast_onto, _broadcast_onto_sig,
            [arg_ndim, src_shape, dest_ndim, dest_shape])
        with cgutils.if_unlikely(builder,
                                 builder.icmp(lc.ICMP_SLT, arg_result, ONE)):
            msg = "unable to broadcast argument %d to output array" % (
                arg_number, )

            loc = errors.loc_info.get('loc', None)
            if loc is not None:
                msg += '\nFile "%s", line %d, ' % (loc.filename, loc.line)

            context.call_conv.return_user_exc(builder, ValueError, (msg, ))

    real_array_ty = array_ty.as_array

    dest_shape_tup = tuple(
        builder.load(dest_shape_addr) for dest_shape_addr in dest_shape_addrs)
    array_val = arrayobj._empty_nd_impl(context, builder, real_array_ty,
                                        dest_shape_tup)

    # Get the best argument to call __array_wrap__ on
    array_wrapper_index = select_array_wrapper(input_types)
    array_wrapper_ty = input_types[array_wrapper_index]
    try:
        # __array_wrap__(source wrapped array, out array) -> out wrapped array
        array_wrap = context.get_function(
            '__array_wrap__', array_ty(array_wrapper_ty, real_array_ty))
    except NotImplementedError:
        # If it's the same priority as a regular array, assume we
        # should use the allocated array unchanged.
        if array_wrapper_ty.array_priority != types.Array.array_priority:
            raise
        out_val = array_val._getvalue()
    else:
        wrap_args = (inputs[array_wrapper_index].return_val,
                     array_val._getvalue())
        out_val = array_wrap(builder, wrap_args)

    ndim = array_ty.ndim
    shape = cgutils.unpack_tuple(builder, array_val.shape, ndim)
    strides = cgutils.unpack_tuple(builder, array_val.strides, ndim)
    return _ArrayHelper(context, builder, shape, strides, array_val.data,
                        array_ty.layout, array_ty.dtype, ndim, out_val)
Esempio n. 19
0
def build_ufunc_wrapper(library, context, fname, signature, objmode, cres):
    """
    Wrap the scalar function with a loop that iterates over the arguments

    Returns
    -------
    (library, env, name)
    """
    assert isinstance(fname, str)
    byte_t = Type.int(8)
    byte_ptr_t = Type.pointer(byte_t)
    byte_ptr_ptr_t = Type.pointer(byte_ptr_t)
    intp_t = context.get_value_type(types.intp)
    intp_ptr_t = Type.pointer(intp_t)

    fnty = Type.function(Type.void(),
                         [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t])

    wrapperlib = context.codegen().create_library('ufunc_wrapper')
    wrapper_module = wrapperlib.create_ir_module('')
    if objmode:
        func_type = context.call_conv.get_function_type(
            types.pyobject, [types.pyobject] * len(signature.args))
    else:
        func_type = context.call_conv.get_function_type(
            signature.return_type, signature.args)

    func = wrapper_module.add_function(func_type, name=fname)
    func.attributes.add("alwaysinline")

    wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name)
    arg_args, arg_dims, arg_steps, arg_data = wrapper.args
    arg_args.name = "args"
    arg_dims.name = "dims"
    arg_steps.name = "steps"
    arg_data.name = "data"

    builder = Builder(wrapper.append_basic_block("entry"))

    # Prepare Environment
    envname = context.get_env_name(cres.fndesc)
    env = cres.environment
    envptr = builder.load(context.declare_env_global(builder.module, envname))

    # Emit loop
    loopcount = builder.load(arg_dims, name="loopcount")

    # Prepare inputs
    arrays = []
    for i, typ in enumerate(signature.args):
        arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, typ))

    # Prepare output
    out = UArrayArg(context, builder, arg_args, arg_steps, len(arrays),
                    signature.return_type)

    # Setup indices
    offsets = []
    zero = context.get_constant(types.intp, 0)
    for _ in arrays:
        p = cgutils.alloca_once(builder, intp_t)
        offsets.append(p)
        builder.store(zero, p)

    store_offset = cgutils.alloca_once(builder, intp_t)
    builder.store(zero, store_offset)

    unit_strided = cgutils.true_bit
    for ary in arrays:
        unit_strided = builder.and_(unit_strided, ary.is_unit_strided)

    pyapi = context.get_python_api(builder)
    if objmode:
        # General loop
        gil = pyapi.gil_ensure()
        with cgutils.for_range(builder, loopcount, intp=intp_t):
            build_obj_loop_body(
                context,
                func,
                builder,
                arrays,
                out,
                offsets,
                store_offset,
                signature,
                pyapi,
                envptr,
                env,
            )
        pyapi.gil_release(gil)
        builder.ret_void()

    else:
        with builder.if_else(unit_strided) as (is_unit_strided, is_strided):
            with is_unit_strided:
                with cgutils.for_range(builder, loopcount,
                                       intp=intp_t) as loop:
                    build_fast_loop_body(
                        context,
                        func,
                        builder,
                        arrays,
                        out,
                        offsets,
                        store_offset,
                        signature,
                        loop.index,
                        pyapi,
                        env=envptr,
                    )

            with is_strided:
                # General loop
                with cgutils.for_range(builder, loopcount, intp=intp_t):
                    build_slow_loop_body(
                        context,
                        func,
                        builder,
                        arrays,
                        out,
                        offsets,
                        store_offset,
                        signature,
                        pyapi,
                        env=envptr,
                    )

        builder.ret_void()
    del builder

    # Link and finalize
    wrapperlib.add_ir_module(wrapper_module)
    wrapperlib.add_linking_library(library)
    return _wrapper_info(library=wrapperlib, env=env, name=wrapper.name)
Esempio n. 20
0
def init_gdb_codegen(cgctx,
                     builder,
                     signature,
                     args,
                     const_args,
                     do_break=False):

    int8_t = ir.IntType(8)
    int32_t = ir.IntType(32)
    intp_t = ir.IntType(utils.MACHINE_BITS)
    char_ptr = ir.PointerType(ir.IntType(8))
    zero_i32t = int32_t(0)

    mod = builder.module
    pid = cgutils.alloca_once(builder, int32_t, size=1)

    # 32bit pid, 11 char max + terminator
    pidstr = cgutils.alloca_once(builder, int8_t, size=12)

    # str consts
    intfmt = cgctx.insert_const_string(mod, '%d')
    gdb_str = cgctx.insert_const_string(mod, config.GDB_BINARY)
    attach_str = cgctx.insert_const_string(mod, 'attach')

    new_args = []
    # add break point command to known location
    # this command file thing is due to commands attached to a breakpoint
    # requiring an interactive prompt
    # https://sourceware.org/bugzilla/show_bug.cgi?id=10079
    new_args.extend(['-x', os.path.join(_path, 'cmdlang.gdb')])
    # issue command to continue execution from sleep function
    new_args.extend(['-ex', 'c'])
    # then run the user defined args if any
    new_args.extend([x.literal_value for x in const_args])
    cmdlang = [cgctx.insert_const_string(mod, x) for x in new_args]

    # insert getpid, getpid is always successful, call without concern!
    fnty = ir.FunctionType(int32_t, tuple())
    getpid = mod.get_or_insert_function(fnty, "getpid")

    # insert snprintf
    # int snprintf(char *str, size_t size, const char *format, ...);
    fnty = ir.FunctionType(int32_t, (char_ptr, intp_t, char_ptr), var_arg=True)
    snprintf = mod.get_or_insert_function(fnty, "snprintf")

    # insert fork
    fnty = ir.FunctionType(int32_t, tuple())
    fork = mod.get_or_insert_function(fnty, "fork")

    # insert execl
    fnty = ir.FunctionType(int32_t, (char_ptr, char_ptr), var_arg=True)
    execl = mod.get_or_insert_function(fnty, "execl")

    # insert sleep
    fnty = ir.FunctionType(int32_t, (int32_t, ))
    sleep = mod.get_or_insert_function(fnty, "sleep")

    # insert break point
    fnty = ir.FunctionType(ir.VoidType(), tuple())
    breakpoint = mod.get_or_insert_function(fnty, "numba_gdb_breakpoint")

    # do the work
    parent_pid = builder.call(getpid, tuple())
    builder.store(parent_pid, pid)
    pidstr_ptr = builder.gep(pidstr, [zero_i32t], inbounds=True)
    pid_val = builder.load(pid)

    # call snprintf to write the pid into a char *
    stat = builder.call(snprintf, (pidstr_ptr, intp_t(12), intfmt, pid_val))
    invalid_write = builder.icmp_signed('>', stat, int32_t(12))
    with builder.if_then(invalid_write, likely=False):
        msg = "Internal error: `snprintf` buffer would have overflowed."
        cgctx.call_conv.return_user_exc(builder, RuntimeError, (msg, ))

    # fork, check pids etc
    child_pid = builder.call(fork, tuple())
    fork_failed = builder.icmp_signed('==', child_pid, int32_t(-1))
    with builder.if_then(fork_failed, likely=False):
        msg = "Internal error: `fork` failed."
        cgctx.call_conv.return_user_exc(builder, RuntimeError, (msg, ))

    is_child = builder.icmp_signed('==', child_pid, zero_i32t)
    with builder.if_else(is_child) as (then, orelse):
        with then:
            # is child
            nullptr = ir.Constant(char_ptr, None)
            gdb_str_ptr = builder.gep(gdb_str, [zero_i32t], inbounds=True)
            attach_str_ptr = builder.gep(attach_str, [zero_i32t],
                                         inbounds=True)
            cgutils.printf(builder, "Attaching to PID: %s\n", pidstr)
            buf = (gdb_str_ptr, gdb_str_ptr, attach_str_ptr, pidstr_ptr)
            buf = buf + tuple(cmdlang) + (nullptr, )
            builder.call(execl, buf)
        with orelse:
            # is parent
            builder.call(sleep, (int32_t(10), ))
            # if breaking is desired, break now
            if do_break is True:
                builder.call(breakpoint, tuple())
Esempio n. 21
0
def alloc_boolean_result(builder, name='ret'):
    """
    Allocate an uninitialized boolean result slot.
    """
    ret = cgutils.alloca_once(builder, llvmlite.ir.IntType(1), name=name)
    return ret
Esempio n. 22
0
def real_divmod_func_body(context, builder, vx, wx):
    # Reference Objects/floatobject.c
    #
    # float_divmod(PyObject *v, PyObject *w)
    # {
    #     double vx, wx;
    #     double div, mod, floordiv;
    #     CONVERT_TO_DOUBLE(v, vx);
    #     CONVERT_TO_DOUBLE(w, wx);
    #     mod = fmod(vx, wx);
    #     /* fmod is typically exact, so vx-mod is *mathematically* an
    #        exact multiple of wx.  But this is fp arithmetic, and fp
    #        vx - mod is an approximation; the result is that div may
    #        not be an exact integral value after the division, although
    #        it will always be very close to one.
    #     */
    #     div = (vx - mod) / wx;
    #     if (mod) {
    #         /* ensure the remainder has the same sign as the denominator */
    #         if ((wx < 0) != (mod < 0)) {
    #             mod += wx;
    #             div -= 1.0;
    #         }
    #     }
    #     else {
    #         /* the remainder is zero, and in the presence of signed zeroes
    #            fmod returns different results across platforms; ensure
    #            it has the same sign as the denominator; we'd like to do
    #            "mod = wx * 0.0", but that may get optimized away */
    #         mod *= mod;  /* hide "mod = +0" from optimizer */
    #         if (wx < 0.0)
    #             mod = -mod;
    #     }
    #     /* snap quotient to nearest integral value */
    #     if (div) {
    #         floordiv = floor(div);
    #         if (div - floordiv > 0.5)
    #             floordiv += 1.0;
    #     }
    #     else {
    #         /* div is zero - get the same sign as the true quotient */
    #         div *= div;             /* hide "div = +0" from optimizers */
    #         floordiv = div * vx / wx; /* zero w/ sign of vx/wx */
    #     }
    #     return Py_BuildValue("(dd)", floordiv, mod);
    # }
    pmod = cgutils.alloca_once(builder, vx.type)
    pdiv = cgutils.alloca_once(builder, vx.type)
    pfloordiv = cgutils.alloca_once(builder, vx.type)

    mod = builder.frem(vx, wx)
    div = builder.fdiv(builder.fsub(vx, mod), wx)

    builder.store(mod, pmod)
    builder.store(div, pdiv)

    # Note the use of negative zero for proper negating with `ZERO - x`
    ZERO = vx.type(0.0)
    NZERO = vx.type(-0.0)
    ONE = vx.type(1.0)
    mod_istrue = builder.fcmp_unordered('!=', mod, ZERO)
    wx_ltz = builder.fcmp_ordered('<', wx, ZERO)
    mod_ltz = builder.fcmp_ordered('<', mod, ZERO)

    with builder.if_else(mod_istrue,
                         likely=True) as (if_nonzero_mod, if_zero_mod):
        with if_nonzero_mod:
            # `mod` is non-zero or NaN
            # Ensure the remainder has the same sign as the denominator
            wx_ltz_ne_mod_ltz = builder.icmp(lc.ICMP_NE, wx_ltz, mod_ltz)

            with builder.if_then(wx_ltz_ne_mod_ltz):
                builder.store(builder.fsub(div, ONE), pdiv)
                builder.store(builder.fadd(mod, wx), pmod)

        with if_zero_mod:
            # `mod` is zero, select the proper sign depending on
            # the denominator's sign
            mod = builder.select(wx_ltz, NZERO, ZERO)
            builder.store(mod, pmod)

    del mod, div

    div = builder.load(pdiv)
    div_istrue = builder.fcmp(lc.FCMP_ONE, div, ZERO)

    with builder.if_then(div_istrue):
        realtypemap = {'float': types.float32, 'double': types.float64}
        realtype = realtypemap[str(wx.type)]
        floorfn = context.get_function(math.floor,
                                       typing.signature(realtype, realtype))
        floordiv = floorfn(builder, [div])
        floordivdiff = builder.fsub(div, floordiv)
        floordivincr = builder.fadd(floordiv, ONE)
        HALF = Constant.real(wx.type, 0.5)
        pred = builder.fcmp(lc.FCMP_OGT, floordivdiff, HALF)
        floordiv = builder.select(pred, floordivincr, floordiv)
        builder.store(floordiv, pfloordiv)

    with cgutils.ifnot(builder, div_istrue):
        div = builder.fmul(div, div)
        builder.store(div, pdiv)
        floordiv = builder.fdiv(builder.fmul(div, vx), wx)
        builder.store(floordiv, pfloordiv)

    return builder.load(pfloordiv), builder.load(pmod)
Esempio n. 23
0
    def codegen(context, builder, signature, args):
        array_type, idx_type, axis_type, extent_type = signature.args
        array, idx, axis, extent = args
        array = context.make_array(array_type)(context, builder, array)

        zero = context.get_constant(types.intp, 0)
        llvm_intp_t = context.get_value_type(types.intp)
        ndim = array_type.ndim

        view_shape = cgutils.alloca_once(builder, llvm_intp_t)
        view_stride = cgutils.alloca_once(builder, llvm_intp_t)

        # Final array indexes. We only know the slicing index at runtime
        # so we need to recreate idx but with zero at the slicing axis
        indices = cgutils.alloca_once(builder,
                                      llvm_intp_t,
                                      size=array_type.ndim)

        for ax in range(array_type.ndim):
            llvm_ax = context.get_constant(types.intp, ax)
            predicate = builder.icmp_unsigned("!=", llvm_ax, axis)

            with builder.if_else(predicate) as (not_equal, equal):
                with not_equal:
                    # If this is not the slicing axis,
                    # use the appropriate tuple index
                    value = builder.extract_value(idx, ax)
                    builder.store(value, builder.gep(indices, [llvm_ax]))

                with equal:
                    # If this is the slicing axis,
                    # store zero as the index.
                    # Also record the stride and shape
                    builder.store(zero, builder.gep(indices, [llvm_ax]))
                    size = builder.extract_value(array.shape, ax)
                    stride = builder.extract_value(array.strides, ax)

                    if have_extent:
                        ext_predicate = builder.icmp_signed(">=", extent, size)
                        size = builder.select(ext_predicate, size, extent)

                    builder.store(size, view_shape)
                    builder.store(stride, view_stride)

        # Build a python list from indices
        tmp_indices = []

        for i in range(ndim):
            i = context.get_constant(types.intp, i)
            tmp_indices.append(builder.load(builder.gep(indices, [i])))

        # Get the data pointer obtained from indexing the array
        dataptr = cgutils.get_item_pointer(context,
                                           builder,
                                           array_type,
                                           array,
                                           tmp_indices,
                                           wraparound=True,
                                           boundscheck=True)

        # Set up the shape and stride. There'll only be one
        # dimension, corresponding to the axis along which we slice
        view_shapes = [builder.load(view_shape)]
        view_strides = [builder.load(view_stride)]

        # Make a view with the data pointer, shapes and strides
        retary = make_view(context, builder, array_type, array, return_type,
                           dataptr, view_shapes, view_strides)

        result = retary._getvalue()
        return impl_ret_borrowed(context, builder, return_type, result)
Esempio n. 24
0
def _generic_array(context,
                   builder,
                   shape,
                   dtype,
                   symbol_name,
                   addrspace,
                   can_dynsized=False):
    elemcount = reduce(operator.mul, shape, 1)

    # Check for valid shape for this type of allocation.
    # Only 1d arrays can be dynamic.
    dynamic_smem = elemcount <= 0 and can_dynsized and len(shape) == 1
    if elemcount <= 0 and not dynamic_smem:
        raise ValueError("array length <= 0")

    # Check that we support the requested dtype
    other_supported_type = isinstance(dtype, (types.Record, types.Boolean))
    if dtype not in types.number_domain and not other_supported_type:
        raise TypeError("unsupported type: %s" % dtype)

    lldtype = context.get_data_type(dtype)
    laryty = Type.array(lldtype, elemcount)

    if addrspace == nvvm.ADDRSPACE_LOCAL:
        # Special case local address space allocation to use alloca
        # NVVM is smart enough to only use local memory if no register is
        # available
        dataptr = cgutils.alloca_once(builder, laryty, name=symbol_name)
    else:
        lmod = builder.module

        # Create global variable in the requested address space
        gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace)
        # Specify alignment to avoid misalignment bug
        align = context.get_abi_sizeof(lldtype)
        # Alignment is required to be a power of 2 for shared memory. If it is
        # not a power of 2 (e.g. for a Record array) then round up accordingly.
        gvmem.align = 1 << (align - 1).bit_length()

        if dynamic_smem:
            gvmem.linkage = lc.LINKAGE_EXTERNAL
        else:
            ## Comment out the following line to workaround a NVVM bug
            ## which generates a invalid symbol name when the linkage
            ## is internal and in some situation.
            ## See _get_unique_smem_id()
            # gvmem.linkage = lc.LINKAGE_INTERNAL

            gvmem.initializer = lc.Constant.undef(laryty)

        # Convert to generic address-space
        conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
        addrspaceptr = gvmem.bitcast(Type.pointer(Type.int(8), addrspace))
        dataptr = builder.call(conv, [addrspaceptr])

    targetdata = _get_target_data(context)
    lldtype = context.get_data_type(dtype)
    itemsize = lldtype.get_abi_size(targetdata)

    # Compute strides
    laststride = itemsize
    rstrides = []
    for i, lastsize in enumerate(reversed(shape)):
        rstrides.append(laststride)
        laststride *= lastsize
    strides = [s for s in reversed(rstrides)]
    kstrides = [context.get_constant(types.intp, s) for s in strides]

    # Compute shape
    if dynamic_smem:
        # Compute the shape based on the dynamic shared memory configuration.
        # Unfortunately NVVM does not provide an intrinsic for the
        # %dynamic_smem_size register, so we must read it using inline
        # assembly.
        get_dynshared_size = InlineAsm.get(Type.function(Type.int(), []),
                                           "mov.u32 $0, %dynamic_smem_size;",
                                           '=r',
                                           side_effect=True)
        dynsmem_size = builder.zext(builder.call(get_dynshared_size, []),
                                    Type.int(width=64))
        # Only 1-D dynamic shared memory is supported so the following is a
        # sufficient construction of the shape
        kitemsize = context.get_constant(types.intp, itemsize)
        kshape = [builder.udiv(dynsmem_size, kitemsize)]
    else:
        kshape = [context.get_constant(types.intp, s) for s in shape]

    # Create array object
    ndim = len(shape)
    aryty = types.Array(dtype=dtype, ndim=ndim, layout='C')
    ary = context.make_array(aryty)(context, builder)

    context.populate_array(ary,
                           data=builder.bitcast(dataptr, ary.data.type),
                           shape=kshape,
                           strides=kstrides,
                           itemsize=context.get_constant(types.intp, itemsize),
                           meminfo=None)
    return ary._getvalue()
Esempio n. 25
0
    def codegen(context, builder, sig, args):
        vtablety = ir.LiteralStructType([
            ll_voidptr_type,  # equal
            ll_voidptr_type,  # key incref
            ll_voidptr_type,  # key decref
            ll_voidptr_type,  # val incref
            ll_voidptr_type,  # val decref
        ])
        setmethod_fnty = ir.FunctionType(
            ir.VoidType(),
            [ll_dict_type, vtablety.as_pointer()]
        )
        setmethod_fn = ir.Function(
            builder.module,
            setmethod_fnty,
            name='numba_dict_set_method_table',
        )
        dp = args[0]
        vtable = cgutils.alloca_once(builder, vtablety, zfill=True)

        # install key incref/decref
        key_equal_ptr = cgutils.gep_inbounds(builder, vtable, 0, 0)
        key_incref_ptr = cgutils.gep_inbounds(builder, vtable, 0, 1)
        key_decref_ptr = cgutils.gep_inbounds(builder, vtable, 0, 2)
        val_incref_ptr = cgutils.gep_inbounds(builder, vtable, 0, 3)
        val_decref_ptr = cgutils.gep_inbounds(builder, vtable, 0, 4)

        dm_key = context.data_model_manager[keyty.instance_type]
        if dm_key.contains_nrt_meminfo():
            equal = _get_equal(context, builder.module, dm_key, 'dict')
            key_incref, key_decref = _get_incref_decref(
                context, builder.module, dm_key, 'dict'
            )
            builder.store(
                builder.bitcast(equal, key_equal_ptr.type.pointee),
                key_equal_ptr,
            )
            builder.store(
                builder.bitcast(key_incref, key_incref_ptr.type.pointee),
                key_incref_ptr,
            )
            builder.store(
                builder.bitcast(key_decref, key_decref_ptr.type.pointee),
                key_decref_ptr,
            )

        dm_val = context.data_model_manager[valty.instance_type]
        if dm_val.contains_nrt_meminfo():
            val_incref, val_decref = _get_incref_decref(
                context, builder.module, dm_val, 'dict'
            )
            builder.store(
                builder.bitcast(val_incref, val_incref_ptr.type.pointee),
                val_incref_ptr,
            )
            builder.store(
                builder.bitcast(val_decref, val_decref_ptr.type.pointee),
                val_decref_ptr,
            )

        builder.call(setmethod_fn, [dp, vtable])
Esempio n. 26
0
    def enqueue_kernel_and_copy_back(self, dim_bounds, sycl_queue_val):
        """
        enqueue_kernel_and_copy_back(dim_bounds, sycl_queue_val)
        Submits the kernel to the specified queue, waits and then copies
        back any results to the host.

        Args:
            dim_bounds : An array of three tuple representing the starting
                         offset, end offset and the stride (step) for each
                         dimension of the input arrays. Every array in a parfor
                         is of the same dimensionality and shape, thus ensuring
                         the bounds are the same.
            sycl_queue_val : The SYCL queue on which the kernel is
                             submitted.
        """
        submit_fn = DpctlCAPIFnBuilder.get_dpctl_queue_submit_range(
            builder=self.builder, context=self.context)
        queue_wait_fn = DpctlCAPIFnBuilder.get_dpctl_queue_wait(
            builder=self.builder, context=self.context)
        event_del_fn = DpctlCAPIFnBuilder.get_dpctl_event_delete(
            builder=self.builder, context=self.context)
        memcpy_fn = DpctlCAPIFnBuilder.get_dpctl_queue_memcpy(
            builder=self.builder, context=self.context)
        free_fn = DpctlCAPIFnBuilder.get_dpctl_free_with_queue(
            builder=self.builder, context=self.context)
        event_wait_fn = DpctlCAPIFnBuilder.get_dpctl_event_wait(
            builder=self.builder, context=self.context)

        # the assumption is loop_ranges will always be less than or equal to 3
        # dimensions
        num_dim = len(dim_bounds) if len(dim_bounds) < 4 else 3

        # form the global range
        global_range = cgutils.alloca_once(
            self.builder,
            utils.get_llvm_type(context=self.context, type=types.uintp),
            size=self.context.get_constant(types.uintp, num_dim),
            name="global_range",
        )

        intp_t = utils.get_llvm_type(context=self.context, type=types.intp)
        intp_ptr_t = utils.get_llvm_ptr_type(intp_t)

        for i in range(num_dim):
            start, stop, step = dim_bounds[i]
            if stop.type != utils.LLVMTypes.int64_t:
                stop = self.builder.sext(stop, utils.LLVMTypes.int64_t)

            # we reverse the global range to account for how sycl and opencl
            # range differs
            self.builder.store(
                stop,
                self.builder.gep(
                    global_range,
                    [
                        self.context.get_constant(types.uintp,
                                                  (num_dim - 1) - i)
                    ],
                ),
            )

        args = [
            self.builder.inttoptr(
                self.context.get_constant(types.uintp, self.kernel_addr),
                utils.get_llvm_type(context=self.context, type=types.voidptr),
            ),
            self.builder.load(sycl_queue_val),
            self.kernel_arg_array,
            self.kernel_arg_ty_array,
            self.context.get_constant(types.uintp, self.total_kernel_args),
            self.builder.bitcast(global_range, intp_ptr_t),
            self.context.get_constant(types.uintp, num_dim),
            self.builder.bitcast(
                utils.create_null_ptr(builder=self.builder,
                                      context=self.context),
                utils.get_llvm_type(context=self.context, type=types.voidptr),
            ),
            self.context.get_constant(types.uintp, 0),
        ]

        # Submit the kernel
        event_ref = self.builder.call(submit_fn, args)

        # Add a wait on the queue
        self.builder.call(queue_wait_fn, [self.builder.load(sycl_queue_val)])

        # Note that the dpctl_queue_wait call waits on the event and then
        # decrements the ref count of the sycl::event C++ object. However, the
        # event object returned by the get_dpctl_queue_submit_range call still
        # needs to be explicitly deleted to free up the event object properly.
        self.builder.call(event_del_fn, [event_ref])

        # read buffers back to host
        for write_buff in self.write_buffs:
            buffer_ptr, total_size, data_member = write_buff
            args = [
                self.builder.load(sycl_queue_val),
                self.builder.bitcast(
                    self.builder.load(data_member),
                    utils.get_llvm_type(context=self.context,
                                        type=types.voidptr),
                ),
                self.builder.load(buffer_ptr),
                self.builder.load(total_size),
            ]
            # FIXME: In future, when the DctlQueue_Memcpy is made non-blocking
            # the returned event should be explicitly freed by calling
            # get_dpctl_event_delete.
            event_ref = self.builder.call(memcpy_fn, args)
            self.builder.call(event_wait_fn, [event_ref])
            self.builder.call(event_del_fn, [event_ref])

            self.builder.call(
                free_fn,
                [
                    self.builder.load(buffer_ptr),
                    self.builder.load(sycl_queue_val),
                ],
            )

        for read_buff in self.read_only_buffs:
            buffer_ptr, total_size, data_member = read_buff
            self.builder.call(
                free_fn,
                [
                    self.builder.load(buffer_ptr),
                    self.builder.load(sycl_queue_val),
                ],
            )
Esempio n. 27
0
def unicode_to_unicode_charseq(context, builder, fromty, toty, val):
    uni_str = cgutils.create_struct_proxy(fromty)(context, builder, value=val)
    src1 = builder.bitcast(uni_str.data, ir.IntType(8).as_pointer())
    src2 = builder.bitcast(uni_str.data, ir.IntType(16).as_pointer())
    src4 = builder.bitcast(uni_str.data, ir.IntType(32).as_pointer())
    kind1 = builder.icmp_unsigned("==", uni_str.kind, ir.Constant(uni_str.kind.type, 1))
    kind2 = builder.icmp_unsigned("==", uni_str.kind, ir.Constant(uni_str.kind.type, 2))
    kind4 = builder.icmp_unsigned("==", uni_str.kind, ir.Constant(uni_str.kind.type, 4))
    src_length = uni_str.length

    lty = context.get_value_type(toty)
    dstint_t = ir.IntType(8 * unicode_byte_width)
    dst_ptr = cgutils.alloca_once(builder, lty)
    dst = builder.bitcast(dst_ptr, dstint_t.as_pointer())

    dst_length = ir.Constant(src_length.type, toty.count)
    is_shorter_value = builder.icmp_unsigned("<", src_length, dst_length)
    count = builder.select(is_shorter_value, src_length, dst_length)
    with builder.if_then(is_shorter_value):
        cgutils.memset(
            builder,
            dst,
            ir.Constant(src_length.type, toty.count * unicode_byte_width),
            0,
        )

    with builder.if_then(kind1):
        with cgutils.for_range(builder, count) as loop:
            in_ptr = builder.gep(src1, [loop.index])
            in_val = builder.zext(builder.load(in_ptr), dstint_t)
            builder.store(in_val, builder.gep(dst, [loop.index]))

    with builder.if_then(kind2):
        if unicode_byte_width >= 2:
            with cgutils.for_range(builder, count) as loop:
                in_ptr = builder.gep(src2, [loop.index])
                in_val = builder.zext(builder.load(in_ptr), dstint_t)
                builder.store(in_val, builder.gep(dst, [loop.index]))
        else:
            context.call_conv.return_user_exc(
                builder,
                ValueError,
                "cannot cast 16-bit unicode_type to %s-bit %s"
                % (unicode_byte_width * 8, toty),
            )

    with builder.if_then(kind4):
        if unicode_byte_width >= 4:
            with cgutils.for_range(builder, count) as loop:
                in_ptr = builder.gep(src4, [loop.index])
                in_val = builder.zext(builder.load(in_ptr), dstint_t)
                builder.store(in_val, builder.gep(dst, [loop.index]))
        else:
            context.call_conv.return_user_exc(
                builder,
                ValueError,
                "cannot cast 32-bit unicode_type to %s-bit %s"
                % (unicode_byte_width * 8, toty),
            )

    return builder.load(dst_ptr)
Esempio n. 28
0
 def codegen(context, builder, sig, args):
     lir_res_type = context.get_value_type(number_dtype)
     res_ptr = cgutils.alloca_once(builder, lir_res_type)
     return builder.bitcast(res_ptr, cgutils.voidptr_t)
Esempio n. 29
0
 def from_data(self, builder, value):
     stack = cgutils.alloca_once(builder, value.type)
     builder.store(value, stack)
     return stack
Esempio n. 30
0
def _randrange_impl(context, builder, start, stop, step, state):
    state_ptr = get_state_ptr(context, builder, state)
    ty = stop.type
    zero = ir.Constant(ty, 0)
    one = ir.Constant(ty, 1)
    nptr = cgutils.alloca_once(builder, ty, name="n")
    # n = stop - start
    builder.store(builder.sub(stop, start), nptr)

    with builder.if_then(builder.icmp_signed("<", step, zero)):
        # n = (n + step + 1) // step
        w = builder.add(builder.add(builder.load(nptr), step), one)
        n = builder.sdiv(w, step)
        builder.store(n, nptr)
    with builder.if_then(builder.icmp_signed(">", step, one)):
        # n = (n + step - 1) // step
        w = builder.sub(builder.add(builder.load(nptr), step), one)
        n = builder.sdiv(w, step)
        builder.store(n, nptr)

    n = builder.load(nptr)
    with cgutils.if_unlikely(builder, builder.icmp_signed("<=", n, zero)):
        # n <= 0
        msg = "empty range for randrange()"
        context.call_conv.return_user_exc(builder, ValueError, (msg, ))

    fnty = ir.FunctionType(ty, [ty, cgutils.true_bit.type])
    fn = builder.function.module.get_or_insert_function(
        fnty, "llvm.ctlz.%s" % ty)
    # Since the upper bound is exclusive, we need to subtract one before
    # calculating the number of bits. This leads to a special case when
    # n == 1; there's only one possible result, so we don't need bits from
    # the PRNG. This case is handled separately towards the end of this
    # function. CPython's implementation is simpler and just runs another
    # iteration of the while loop when the resulting number is too large
    # instead of subtracting one, to avoid needing to handle a special
    # case. Thus, we only perform this subtraction for the NumPy case.
    nm1 = builder.sub(n, one) if state == "np" else n
    nbits = builder.trunc(builder.call(fn, [nm1, cgutils.true_bit]), int32_t)
    nbits = builder.sub(ir.Constant(int32_t, ty.width), nbits)

    rptr = cgutils.alloca_once(builder, ty, name="r")

    def get_num():
        bbwhile = builder.append_basic_block("while")
        bbend = builder.append_basic_block("while.end")
        builder.branch(bbwhile)

        builder.position_at_end(bbwhile)
        r = get_next_int(context, builder, state_ptr, nbits, state == "np")
        r = builder.trunc(r, ty)
        too_large = builder.icmp_signed(">=", r, n)
        builder.cbranch(too_large, bbwhile, bbend)

        builder.position_at_end(bbend)
        builder.store(r, rptr)

    if state == "np":
        # Handle n == 1 case, per previous comment.
        with builder.if_else(builder.icmp_signed("==", n,
                                                 one)) as (is_one, is_not_one):
            with is_one:
                builder.store(zero, rptr)
            with is_not_one:
                get_num()
    else:
        get_num()

    return builder.add(start, builder.mul(builder.load(rptr), step))