Ejemplo n.º 1
0
class VectorArithDriver2(CDefinition):
    _name_ = 'vector_arith_driver_2'
    _argtys_ = [
        ('A', C.pointer(C.float)),
        ('B', C.pointer(C.float)),
        ('C', C.pointer(C.float)),
        ('D', C.pointer(C.float)),
        ('n', C.int),
    ]

    def body(self, Aary, Bary, Cary, Dary, n):
        '''
        This version loads element of vector individually.
        This style generates scalar ld/st instead of vector ld/st.
        '''
        vecarith = self.depends(VectorArith())
        a = self.var(floatv4)
        b = self.var(floatv4)
        c = self.var(floatv4)
        elem_per_vec = self.constant(C.int, floatv4.count)
        with self.for_range(0, n, elem_per_vec) as (outer, i):
            with self.for_range(elem_per_vec) as (inner, j):
                a[j] = Aary[i + j]
                b[j] = Bary[i + j]
                c[j] = Cary[i + j]
            r = vecarith(a, b, c)
            Dary[i:].vector_store(r, align=1)
            #    self.debug(r[0], r[1], r[2], r[3])
        self.ret()
Ejemplo n.º 2
0
Archivo: basic.py Proyecto: tpn/numba
    def body(self, args, dimensions, steps, data):
        ufunc_ptr = self.depends(self.FuncDef)
        fnty = ufunc_ptr.type.pointee

        arg_ptrs = []
        arg_steps = []
        for i in range(len(fnty.args)+1):
            arg_ptrs.append(self.var_copy(args[i]))
            const_steps = self.var_copy(steps[i])
            const_steps.invariant = True
            arg_steps.append(const_steps)

        with self.for_range(dimensions[0]) as (loop, item):
            callargs = []
            for i, argty in enumerate(fnty.args):
                casted = arg_ptrs[i].cast(C.pointer(argty))
                callargs.append(casted.load())
                arg_ptrs[i].assign(arg_ptrs[i][arg_steps[i]:]) # increment pointer

            res = ufunc_ptr(*callargs, **dict(inline=True))
            retval_ptr = arg_ptrs[-1].cast(C.pointer(fnty.return_type))
            retval_ptr.store(res, nontemporal=True)
            arg_ptrs[-1].assign(arg_ptrs[-1][arg_steps[-1]:])

        self.ret()
Ejemplo n.º 3
0
class VectorArithDriver1(CDefinition):
    _name_ = 'vector_arith_driver_1'
    _argtys_ = [
        ('A', C.pointer(C.float)),
        ('B', C.pointer(C.float)),
        ('C', C.pointer(C.float)),
        ('D', C.pointer(C.float)),
        ('n', C.int),
    ]

    def body(self, Aary, Bary, Cary, Dary, n):
        '''
        This version uses vector load to fetch array elements as vectors.

        '''
        vecarith = self.depends(VectorArith())
        elem_per_vec = self.constant(C.int, floatv4.count)
        with self.for_range(0, n, elem_per_vec) as (loop, i):
            # Aary[i:] offset the array at i
            a = Aary[i:].vector_load(4, align=1)  # unaligned vector load
            b = Bary[i:].vector_load(4, align=1)
            c = Cary[i:].vector_load(4, align=1)
            r = vecarith(a, b, c)
            Dary[i:].vector_store(r, align=1)
            #    self.debug(r[0], r[1], r[2], r[3])
        self.ret()
Ejemplo n.º 4
0
 def _generate_argtys(self):
     argtys = []
     for arg in self.in_args:
         argtys.append((arg.name, C.pointer(arg.type), [ATTR_NO_ALIAS]))
     if self.returns_value:
         argtys.append(('out', C.pointer(self.return_type), [ATTR_NO_ALIAS]))
     else:
         for arg in self.out_args:
             argtys.append((arg.name, arg.type, [ATTR_NO_ALIAS]))
     argtys.append(('n_elements', C.py_ssize_t))
     return argtys
Ejemplo n.º 5
0
class NewAxis(NumbaCDefinition):

    _name_ = "newaxis"
    _argtys_ = [
        ('out_shape', C.pointer(C.npy_intp)),
        ('out_strides', C.pointer(C.npy_intp)),
        ('dst_dim', C.int),
    ]

    def body(self, out_shape, out_strides, dst_dim):
        one, zero = get_constants(self)
        out_shape[dst_dim] = one
        out_strides[dst_dim] = zero
        # self.debug("newaxis in dimension:", dst_dim)
        self.ret()
Ejemplo n.º 6
0
    def fakeit(self, dtype, data, dimensions, steps):
        assert len(dimensions) == len(steps)
        constant = self.parent.constant

        self.ob_refcnt.assign(constant(C.intp, 1))
        type_p = constant(C.py_ssize_t, id(np.ndarray))
        self.ob_type.assign(type_p.cast(C.pointer(C.int)))

        self.base.assign(self.parent.constant_null(C.void_p))
        dtype_p = constant(C.py_ssize_t, id(dtype))
        self.descr.assign(dtype_p.cast(C.void_p))
        self.flags.assign(constant(C.int, _internal.NPY_WRITEABLE))

        self.data.assign(data)
        self.nd.assign(constant(C.int, len(dimensions)))

        ary_dims = self.parent.array(C.intp, len(dimensions) * 2)
        ary_steps = ary_dims[len(dimensions):]
        for i, dim in enumerate(dimensions):
            ary_dims[i] = dim

        self.dimensions.assign(ary_dims)

        # ary_steps = self.parent.array(C.intp, len(steps))
        for i, step in enumerate(steps):
            ary_steps[i] = step
        self.strides.assign(ary_steps)
Ejemplo n.º 7
0
class WinThreadAPI(CExternal):
    '''external declaration of pthread API
    '''
    _calling_convention_ = CC_X86_STDCALL

    handle_t = C.void_p

    # lpStartAddress is an LPTHREAD_START_ROUTINE, with the form
    # DWORD ThreadProc (LPVOID lpdwThreadParam )
    CreateThread = Type.function(
        handle_t,
        [
            C.void_p,  # lpThreadAttributes (NULL for default)
            C.intp,  # dwStackSize (0 for default)
            C.void_p,  # lpStartAddress
            C.void_p,  # lpParameter
            C.int32,  # dwCreationFlags (0 for default)
            C.pointer(C.int32)
        ])  # lpThreadId (NULL if not required)

    # Return is WAIT_OBJECT_0 (0x00000000) to indicate the thread exited,
    # or WAIT_ABANDONED, WAIT_TIMEOUT, WAIT_FAILED for other conditions.
    WaitForSingleObject = Type.function(
        C.int32,
        [
            handle_t,  # hHandle
            C.int32
        ])  # dwMilliseconds (INFINITE == 0xFFFFFFFF means wait forever)

    CloseHandle = Type.function(C.int32, [handle_t])
Ejemplo n.º 8
0
def gen_test_worker(mod):
    cb = CBuilder.new_function(mod, 'worker', C.void, [C.pointer(C.int)])
    pval = cb.args[0]
    val = pval.load()
    one = cb.constant(val.type, 1)
    pval.store(val + one)
    cb.ret()
    cb.close()
Ejemplo n.º 9
0
    def body(self, args, dimensions, steps, data):
        ufunc_ptr = self.depends(self.FuncDef)
        fnty = ufunc_ptr.type.pointee

        # void return type implies return by reference in last argument
        is_void_ret = fnty.return_type.kind == lc.TYPE_VOID

        argtys = fnty.args[:-1] if is_void_ret else fnty.args

        arg_ptrs = []
        arg_steps = []
        for i in range(len(argtys)+1):
            arg_ptrs.append(self.var_copy(args[i]))
            const_steps = self.var_copy(steps[i])
            const_steps.invariant = True
            arg_steps.append(const_steps)

        with self.for_range(dimensions[0]) as (loop, item):
            callargs = []
            for i, argty in enumerate(argtys):
                if argty.kind == lc.TYPE_POINTER:
                    casted = arg_ptrs[i].cast(argty)
                    callargs.append(casted)
                else:
                    casted = arg_ptrs[i].cast(C.pointer(argty))
                    callargs.append(casted.load())
                # increment pointer
                arg_ptrs[i].assign(arg_ptrs[i][arg_steps[i]:])

            if is_void_ret:
                retty = fnty.args[-1]
                assert retty.kind == lc.TYPE_POINTER
                retval = self.var(retty.pointee)
                callargs.append(retval.ref)
                for i in callargs:
                    print(i)
                ufunc_ptr(*callargs, **dict(inline=True))
                res = retval
                retval_ptr = arg_ptrs[-1].cast(retty)
            else:
                res = ufunc_ptr(*callargs, **dict(inline=True))
                retval_ptr = arg_ptrs[-1].cast(C.pointer(fnty.return_type))
            retval_ptr.store(res, nontemporal=True)
            arg_ptrs[-1].assign(arg_ptrs[-1][arg_steps[-1]:])

        self.ret()
Ejemplo n.º 10
0
class IndexAxis(NumbaCDefinition):

    _name_ = "index"
    _retty_ = C.char_p
    _argtys_ = [
        ('data', C.char_p),
        ('in_shape', C.pointer(C.npy_intp)),
        ('in_strides', C.pointer(C.npy_intp)),
        ('src_dim', C.npy_intp),
        ('index', C.npy_intp),
    ]

    def body(self, data, in_shape, in_strides, src_dim, index):
        result = self.var(data.type, name='result')
        # self.debug("indexing...", src_dim, "stride", in_strides[src_dim])
        result.assign(data[in_strides[src_dim] * index:])
        self.ret(result)
Ejemplo n.º 11
0
def gen_test_worker(mod):
    cb = CBuilder.new_function(mod, 'worker', C.void, [C.pointer(C.int)])
    pval = cb.args[0]
    val = pval.load()
    one = cb.constant(val.type, 1)
    pval.store(val + one)
    cb.ret()
    cb.close()
Ejemplo n.º 12
0
    def body(self, args, dimensions, steps, data):
        ufunc_ptr = self.depends(self.FuncDef)
        fnty = ufunc_ptr.type.pointee

        # void return type implies return by reference in last argument
        is_void_ret = fnty.return_type.kind == lc.TYPE_VOID

        argtys = fnty.args[:-1] if is_void_ret else fnty.args

        arg_ptrs = []
        arg_steps = []
        for i in range(len(argtys) + 1):
            arg_ptrs.append(self.var_copy(args[i]))
            const_steps = self.var_copy(steps[i])
            const_steps.invariant = True
            arg_steps.append(const_steps)

        with self.for_range(dimensions[0]) as (loop, item):
            callargs = []
            for i, argty in enumerate(argtys):
                if argty.kind == lc.TYPE_POINTER:
                    casted = arg_ptrs[i].cast(argty)
                    callargs.append(casted)
                else:
                    casted = arg_ptrs[i].cast(C.pointer(argty))
                    callargs.append(casted.load())
                # increment pointer
                arg_ptrs[i].assign(arg_ptrs[i][arg_steps[i]:])

            if is_void_ret:
                retty = fnty.args[-1]
                assert retty.kind == lc.TYPE_POINTER
                retval = self.var(retty.pointee)
                callargs.append(retval.ref)
                for i in callargs:
                    print(i)
                ufunc_ptr(*callargs, **dict(inline=True))
                res = retval
                retval_ptr = arg_ptrs[-1].cast(retty)
            else:
                res = ufunc_ptr(*callargs, **dict(inline=True))
                retval_ptr = arg_ptrs[-1].cast(C.pointer(fnty.return_type))
            retval_ptr.store(res, nontemporal=True)
            arg_ptrs[-1].assign(arg_ptrs[-1][arg_steps[-1]:])

        self.ret()
Ejemplo n.º 13
0
    def _outer_loop(self, dargs, dimensions, pyarys, steps, data):
        # implement outer loop
        innerfunc = self.depends(self.FuncDef)
        with self.for_range(dimensions[0]) as (loop, idx):
            args = []

            for i, (arg, arg_type) in enumerate(zip(pyarys, innerfunc.handle.args)):
                if C.pointer(PyArray.llvm_type()) != arg_type.type: # scalar
                    val = arg.data[0:].cast(C.pointer(arg_type.type)).load()
                    args.append(val)
                else:
                    casted = arg.reference().cast(arg_type.type)
                    args.append(casted)

            innerfunc(*args)

            for i, ary in enumerate(pyarys):
                    ary.data.assign(ary.data[steps[i]:])
Ejemplo n.º 14
0
def gen_vector2d_dist(mod):
    functype = Type.function(C.float, [C.pointer(Vector2D.llvm_type())])
    func = mod.add_function(functype, 'vector2d_dist')

    cb = CBuilder(func)
    vec = cb.var(Vector2D, cb.args[0].load())
    dist = vec.x * vec.x + vec.y * vec.y

    cb.ret(dist)
    cb.close()
    return func
Ejemplo n.º 15
0
def gen_vector2d_dist(mod):
    functype = Type.function(C.float, [C.pointer(Vector2D.llvm_type())])
    func = mod.add_function(functype, "vector2d_dist")

    cb = CBuilder(func)
    vec = cb.var(Vector2D, cb.args[0].load())
    dist = vec.x * vec.x + vec.y * vec.y

    cb.ret(dist)
    cb.close()
    return func
Ejemplo n.º 16
0
Archivo: basic.py Proyecto: tpn/numba
class BasicUFunc(CDefinition):
    '''a generic ufunc that wraps the workload
    '''
    _argtys_ = [
        ('args',       C.pointer(C.char_p), [ATTR_NO_ALIAS]),
        ('dimensions', C.pointer(C.intp), [ATTR_NO_ALIAS]),
        ('steps',      C.pointer(C.intp), [ATTR_NO_ALIAS]),
        ('data',       C.void_p, [ATTR_NO_ALIAS]),
    ]

    def body(self, args, dimensions, steps, data):
        ufunc_ptr = self.depends(self.FuncDef)
        fnty = ufunc_ptr.type.pointee

        arg_ptrs = []
        arg_steps = []
        for i in range(len(fnty.args)+1):
            arg_ptrs.append(self.var_copy(args[i]))
            const_steps = self.var_copy(steps[i])
            const_steps.invariant = True
            arg_steps.append(const_steps)

        with self.for_range(dimensions[0]) as (loop, item):
            callargs = []
            for i, argty in enumerate(fnty.args):
                casted = arg_ptrs[i].cast(C.pointer(argty))
                callargs.append(casted.load())
                arg_ptrs[i].assign(arg_ptrs[i][arg_steps[i]:]) # increment pointer

            res = ufunc_ptr(*callargs, **dict(inline=True))
            retval_ptr = arg_ptrs[-1].cast(C.pointer(fnty.return_type))
            retval_ptr.store(res, nontemporal=True)
            arg_ptrs[-1].assign(arg_ptrs[-1][arg_steps[-1]:])

        self.ret()

    def specialize(cls, func_def):
        '''specialize to a workload
        '''
        cls._name_ = 'basicufunc_%s'% (func_def)
        cls.FuncDef = func_def
Ejemplo n.º 17
0
class PyObjectHead(CStruct):
    _fields_ = [
        ('ob_refcnt', C.intp),
        # NOTE: not a integer, just need to match definition in numba
        ('ob_type', C.pointer(C.int)),
    ]

    if llvm_types._trace_refs_:
        # Account for _PyObject_HEAD_EXTRA
        _fields_ = [
            ('ob_next', _intp_ptr),
            ('ob_prev', _intp_ptr),
        ] + _fields_
Ejemplo n.º 18
0
class PThreadAPI(CExternal):
    '''external declaration of pthread API
    '''
    pthread_t = C.void_p

    pthread_create = Type.function(
        C.int,
        [
            C.pointer(pthread_t),  # thread_t
            C.void_p,  # thread attr
            C.void_p,  # function
            C.void_p
        ])  # arg

    pthread_join = Type.function(C.int, [C.void_p, C.void_p])
Ejemplo n.º 19
0
    def body(self, args, dimensions, steps, data):
        func = self.depends(self.FuncDef)

        arg_ptrs = []
        arg_steps = []
        for i in range(self.nin + self.nout):
            arg_ptrs.append(self.var_copy(args[i]))
            const_steps = self.var_copy(steps[i])
            const_steps.invariant = True
            arg_steps.append(const_steps)

        N = self.var_copy(dimensions[0])
        N.invariant = True
        with self.for_range(N) as (loop, item):
            callargs = []
            for i, arg in enumerate(self.in_args):
                casted = arg_ptrs[i].cast(C.pointer(arg.type))
                callargs.append(casted.load())

            for i, arg in enumerate(self.out_args):
                i += self.nin
                casted = arg_ptrs[i].cast(arg.type)
                callargs.append(casted)

            if self.returns_value:
                res = func(*callargs, inline=True)
                retval_ptr = arg_ptrs[self.nin].cast(C.pointer(self.return_type))
                retval_ptr.store(res, nontemporal=True)
            else:
                func(*callargs, inline=True)

            for i in range(self.nin + self.nout):
                # increment pointers
                arg_ptrs[i].assign(arg_ptrs[i][arg_steps[i]:])

        self.ret()
Ejemplo n.º 20
0
def gen_test_worker(mod):
    cb = CBuilder.new_function(mod, 'worker', C.void, [C.pointer(C.int)])
    pval = cb.args[0]
    one = cb.constant(pval.type.pointee, 1)

    ct = cb.var(C.int, 0)
    limit = cb.constant(C.int, REPEAT)
    with cb.loop() as loop:
        with loop.condition() as setcond:
            setcond(ct < limit)

        with loop.body():
            cb.atomic_add(pval, one, 'acq_rel')
            ct += one

    cb.ret()
    cb.close()
    return cb.function
Ejemplo n.º 21
0
def gen_test_worker(mod):
    cb = CBuilder.new_function(mod, 'worker', C.void, [C.pointer(C.int)])
    pval = cb.args[0]
    one = cb.constant(pval.type.pointee, 1)

    ct = cb.var(C.int, 0)
    limit = cb.constant(C.int, REPEAT)
    with cb.loop() as loop:
        with loop.condition() as setcond:
            setcond( ct < limit )

        with loop.body():
            cb.atomic_add(pval, one, 'acq_rel')
            ct += one

    cb.ret()
    cb.close()
    return cb.function
Ejemplo n.º 22
0
    def _dispatch_worker(self, worker, contexts, num_thread):
        api = WinThreadAPI(self)
        NULL = self.constant_null(C.void_p)
        lpdword_NULL = self.constant_null(C.pointer(C.int32))
        zero = self.constant(C.int32, 0)
        intp_zero = self.constant(C.intp, 0)
        INFINITE = self.constant(C.int32, 0xFFFFFFFF)

        threads = self.array(api.handle_t, num_thread, name='threads')

        # self.debug("launch threads")
        # TODO error handling

        with self.for_range(num_thread) as (loop, i):
            threads[i] = api.CreateThread(NULL, intp_zero, worker,
                               contexts[i].reference().cast(C.void_p),
                               zero, lpdword_NULL)

        with self.for_range(num_thread) as (loop, i):
            api.WaitForSingleObject(threads[i], INFINITE)
            api.CloseHandle(threads[i])
Ejemplo n.º 23
0
    def _dispatch_worker(self, worker, contexts, num_thread):
        api = WinThreadAPI(self)
        NULL = self.constant_null(C.void_p)
        lpdword_NULL = self.constant_null(C.pointer(C.int32))
        zero = self.constant(C.int32, 0)
        intp_zero = self.constant(C.intp, 0)
        INFINITE = self.constant(C.int32, 0xFFFFFFFF)

        threads = self.array(api.handle_t, num_thread, name='threads')

        # self.debug("launch threads")
        # TODO error handling

        with self.for_range(num_thread) as (loop, i):
            threads[i] = api.CreateThread(
                NULL, intp_zero, worker,
                contexts[i].reference().cast(C.void_p), zero, lpdword_NULL)

        with self.for_range(num_thread) as (loop, i):
            api.WaitForSingleObject(threads[i], INFINITE)
            api.CloseHandle(threads[i])
Ejemplo n.º 24
0
    def _fold_loop(self, f, init, cb):
        hctx = cb.args[0]
        raw_length = reduce(lambda x, y: x * y, self.dims, 1)
        length = cb.constant(C.int, raw_length)

        int_size_bytes = cb.sizeof(C.int).cast(C.int)
        read_size = length * int_size_bytes

        buf = cb.var(C.pointer(C.int))
        buf_p = buf.reference().cast(C.char_pp)

        dltmp = cb.var(C.int)
        off = cb.var(C.int).assign(cb.zero)
        cb.cls_read(hctx, off, read_size, buf_p, dltmp.reference())

        i = cb.var(C.int).assign(cb.zero)
        with cb.loop() as loop:
            with loop.condition() as setcond:
                setcond(i < (dltmp / int_size_bytes))
            with loop.body():
                init.assign(f(init, buf[i]))
                i += cb.one
Ejemplo n.º 25
0
    def _fold_loop(self, f, init, cb):
        hctx = cb.args[0]
        raw_length = reduce(lambda x,y: x*y, self.dims, 1)
        length = cb.constant(C.int, raw_length)

        int_size_bytes = cb.sizeof(C.int).cast(C.int)
        read_size = length * int_size_bytes

        buf = cb.var(C.pointer(C.int))
        buf_p = buf.reference().cast(C.char_pp)

        dltmp = cb.var(C.int)
        off = cb.var(C.int).assign(cb.zero)
        cb.cls_read(hctx, off, read_size, buf_p, dltmp.reference())

        i = cb.var(C.int).assign(cb.zero)
        with cb.loop() as loop:
            with loop.condition() as setcond:
                setcond(i < (dltmp / int_size_bytes))
            with loop.body():
                init.assign(f(init, buf[i]))
                i += cb.one
Ejemplo n.º 26
0
class LibOSD(CExternal):
    cls_read = Type.function(
        C.int, [C.void_p, C.int, C.int, C.char_pp,
                C.pointer(C.int)])
    cls_write = Type.function(C.int, [C.void_p, C.int, C.int, C.char_p])
    cls_write_bl = Type.function(C.int, [C.void_p, C.int, C.int, C.void_p])
    cls_write_bl_full = Type.function(C.int, [C.void_p, C.int, C.void_p])
    cls_write_full = Type.function(C.int, [C.void_p, C.char_p])
    cls_map_get_val = Type.function(
        C.int, [C.void_p, C.char_p, C.char_pp,
                C.pointer(C.int)])
    cls_setxattr = Type.function(C.int, [C.void_p, C.char_p, C.char_p, C.int])
    cls_map_get_keys = Type.function(C.int, [
        C.void_p, C.char_p, C.int64,
        C.pointer(C.char_pp),
        C.pointer(C.pointer(C.int))
    ])
    cls_stat = Type.function(
        C.int, [C.void_p, C.pointer(C.int),
                C.pointer(C.int64)])
    cls_log = Type.function(C.int, [C.int, C.char_p], True)
Ejemplo n.º 27
0
def gen_test_worker(mod):
    cb = CBuilder.new_function(mod, 'worker', C.void, [C.pointer(C.int)])
    pval = cb.args[0]
    one = cb.constant(pval.type.pointee, 1)

    ct = cb.var(C.int, 0)
    limit = cb.constant(C.int, REPEAT)
    with cb.loop() as loop:
        with loop.condition() as setcond:
            setcond(ct < limit)

        with loop.body():
            oldval = pval.atomic_load('acquire')
            updated = oldval + one
            castmp = pval.atomic_cmpxchg(oldval, updated, 'release')

            with cb.ifelse(castmp == oldval) as ifelse:
                with ifelse.then():
                    ct += one

    cb.ret()
    cb.close()
    return cb.function
Ejemplo n.º 28
0
def gen_test_worker(mod):
    cb = CBuilder.new_function(mod, 'worker', C.void, [C.pointer(C.int)])
    pval = cb.args[0]
    one = cb.constant(pval.type.pointee, 1)

    ct = cb.var(C.int, 0)
    limit = cb.constant(C.int, REPEAT)
    with cb.loop() as loop:
        with loop.condition() as setcond:
            setcond( ct < limit )

        with loop.body():
            oldval = pval.atomic_load('acquire')
            updated = oldval + one
            castmp = pval.atomic_cmpxchg(oldval, updated, 'release')

            with cb.ifelse( castmp == oldval ) as ifelse:
                with ifelse.then():
                    ct += one

    cb.ret()
    cb.close()
    return cb.function
Ejemplo n.º 29
0
# -*- coding: utf-8 -*-
from __future__ import print_function, division, absolute_import
from numba import *
from numba import llvm_types
from numba import typedefs
from numba.utility.cbuilder.library import register
from numba.utility.cbuilder.numbacdef import NumbaCDefinition, from_numba

from llvm_cbuilder import shortnames

#------------------------------------------------------------------------
# Utilities
#------------------------------------------------------------------------

p_py_ssize_t = shortnames.pointer(shortnames.py_ssize_t)


def ob_refcnt(obj_p):
    return deref(p_refcnt(obj_p))


def p_refcnt(obj_p):
    return obj_p.cast(p_py_ssize_t)


def deref(obj_p):
    return obj_p[0]


def const(ctemp, val):
    return ctemp.parent.constant(shortnames.py_ssize_t, val)
Ejemplo n.º 30
0
# -*- coding: utf-8 -*-
from __future__ import print_function, division, absolute_import
from numba import *
from numba import llvm_types
from numba import typedefs
from numba.utility.cbuilder.library import register
from numba.utility.cbuilder.numbacdef import NumbaCDefinition, from_numba

from llvm_cbuilder import shortnames

#------------------------------------------------------------------------
# Utilities
#------------------------------------------------------------------------

p_py_ssize_t = shortnames.pointer(shortnames.py_ssize_t)

def ob_refcnt(obj_p):
    return deref(p_refcnt(obj_p))

def p_refcnt(obj_p):
    return obj_p.cast(p_py_ssize_t)

def deref(obj_p):
    return obj_p[0]

def const(ctemp, val):
    return ctemp.parent.constant(shortnames.py_ssize_t, val)

def add_refcnt(obj_p, refcnt):
    refcnt = const(obj_p, refcnt)
    refs = ob_refcnt(obj_p)
Ejemplo n.º 31
0
class Broadcast(NumbaCDefinition):
    """
    Transliteration of

        @cname('__pyx_memoryview_broadcast')
        cdef bint __pyx_broadcast(Py_ssize_t *dst_shape,
                                  Py_ssize_t *input_shape,
                                  Py_ssize_t *strides,
                                  int max_ndim, int ndim,
                                  bint *p_broadcast) nogil except -1:
            cdef Py_ssize_t i
            cdef int dim_offset = max_ndim - ndim

            for i in range(ndim):
                src_extent = input_shape[i]
                dst_extent = dst_shape[i + dim_offset]

                if src_extent == 1:
                    p_broadcast[0] = True
                    strides[i] = 0
                elif dst_extent == 1:
                    dst_shape[i + dim_offset] = src_extent
                elif src_extent != dst_extent:
                    __pyx_err_extents(i, dst_shape[i], input_shape[i])
    """

    _name_ = "__numba_util_broadcast"
    _argtys_ = [
        ('dst_shape', C.pointer(C.npy_intp)),
        ('src_shape', C.pointer(C.npy_intp)),
        ('src_strides', C.pointer(C.npy_intp)),
        ('max_ndim', C.int),
        ('ndim', C.int),
    ]
    _retty_ = C.int

    def body(self, dst_shape, src_shape, src_strides, max_ndim, ndim):
        dim_offset = max_ndim - ndim

        def constants(type):
            return self.constant(type, 0), self.constant(type, 1)

        zero, one = constants(C.npy_intp)
        zero_int, one_int = constants(C.int)

        with self.for_range(ndim) as (loop, i):
            src_extent = src_shape[i]
            dst_extent = dst_shape[i + dim_offset]

            with self.ifelse(src_extent == one) as ifelse:
                with ifelse.then():
                    src_strides[i] = zero
                with ifelse.otherwise():
                    with self.ifelse(dst_extent == one) as ifelse:
                        with ifelse.then():
                            dst_shape[i + dim_offset] = src_extent

                        with ifelse.otherwise():
                            with self.ifelse(
                                    src_extent != dst_extent) as ifelse:
                                with ifelse.then():
                                    # Shape mismatch
                                    self.ret(zero_int)

        self.ret(one_int)
Ejemplo n.º 32
0
 def _make_array_type(self, ndim, cb):
     return C.int if ndim == 1 else C.pointer(
         self._make_array_type(ndim - 1, cb))
Ejemplo n.º 33
0
class GUFuncEntry(CDefinition):
    '''a generalized ufunc that wraps a numba jit'ed function

    NOTE: Currently, this only works for array return type.
    And, return type must be the last argument of the nubma jit'ed function.
    '''
    _argtys_ = [
        ('args', C.pointer(C.char_p)),
        ('dimensions', C.pointer(C.intp)),
        ('steps', C.pointer(C.intp)),
        ('data', C.void_p),
    ]

    def _outer_loop(self, dargs, dimensions, pyarys, steps, data):
        # implement outer loop
        innerfunc = self.depends(self.FuncDef)
        with self.for_range(dimensions[0]) as (loop, idx):
            args = []

            for i, (arg,
                    arg_type) in enumerate(zip(pyarys, innerfunc.handle.args)):
                if C.pointer(PyArray.llvm_type()) != arg_type.type:  # scalar
                    val = arg.data[0:].cast(C.pointer(arg_type.type)).load()
                    args.append(val)
                else:
                    casted = arg.reference().cast(arg_type.type)
                    args.append(casted)

            innerfunc(*args)

            for i, ary in enumerate(pyarys):
                ary.data.assign(ary.data[steps[i]:])

    def body(self, args, dimensions, steps, data):
        diminfo = list(_parse_signature(self.Signature))

        n_pyarys = len(diminfo)
        assert n_pyarys == len(self.dtypes)

        # extract unique dimension names
        dims = []
        for grp in diminfo:
            for it in grp:
                if it not in dims:
                    if it:
                        dims.append(it)

        # build pyarrays for argument to inner function
        pyarys = [self.var(PyArray) for _ in range(n_pyarys)]

        # populate pyarrays
        step_offset = len(pyarys)
        for i, (dtype, ary) in enumerate(zip(self.dtypes, pyarys)):
            ary_ndim = len([x for x in diminfo[i] if x])
            ary_dims = []
            for k in diminfo[i]:
                if k:
                    ary_dims.append(dimensions[1 + dims.index(k)])
                else:
                    ary_dims.append(self.constant(C.intp, 0))

            ary_steps = []

            if not ary_ndim:
                ary_steps.append(self.constant(C.intp, 0))
            for j in range(ary_ndim):
                ary_steps.append(steps[step_offset])
                step_offset += 1

            ary.fakeit(dtype, args[i], ary_dims, ary_steps)

        self._outer_loop(args, dimensions, pyarys, steps, data)
        self.ret()

    @classmethod
    def specialize(cls, dtypes, signature, func_def):
        '''specialize to a workload
        '''
        signature = signature.replace(' ', '')  # remove all spaces
        cls.dtypes = dtypes
        cls._name_ = 'gufunc_%s_%s' % (signature, func_def)
        cls.FuncDef = func_def
        cls.Signature = signature
Ejemplo n.º 34
0
    def _get_tys_list(self):
        types_lists = []
        for numba_func in self.translates:
            dtype_nums = []
            types_lists.append(dtype_nums)
            for arg_type in self.get_argtypes(numba_func):
                if arg_type.is_array:
                    arg_type = arg_type.dtype
                dtype_nums.append(arg_type.get_dtype())

        return types_lists


GUFuncVectorize = GUFuncASTVectorize

_intp_ptr = C.pointer(C.intp)


class PyObjectHead(CStruct):
    _fields_ = [
        ('ob_refcnt', C.intp),
        # NOTE: not a integer, just need to match definition in numba
        ('ob_type', C.void_p),
    ]

    if llvm_types._trace_refs_:
        # Account for _PyObject_HEAD_EXTRA
        _fields_ = [
            ('ob_next', _intp_ptr),
            ('ob_prev', _intp_ptr),
        ] + _fields_
Ejemplo n.º 35
0
 def _make_array_type(self, ndim, cb):
     return C.int if ndim == 1 else C.pointer(self._make_array_type(ndim-1, cb))
Ejemplo n.º 36
0
class BasicUFunc(CDefinition):
    '''a generic ufunc that wraps the workload
    '''
    _argtys_ = [
        ('args', C.pointer(C.char_p), [ATTR_NO_ALIAS]),
        ('dimensions', C.pointer(C.intp), [ATTR_NO_ALIAS]),
        ('steps', C.pointer(C.intp), [ATTR_NO_ALIAS]),
        ('data', C.void_p, [ATTR_NO_ALIAS]),
    ]

    def body(self, args, dimensions, steps, data):
        ufunc_ptr = self.depends(self.FuncDef)
        fnty = ufunc_ptr.type.pointee

        # void return type implies return by reference in last argument
        is_void_ret = fnty.return_type.kind == lc.TYPE_VOID

        argtys = fnty.args[:-1] if is_void_ret else fnty.args

        arg_ptrs = []
        arg_steps = []
        for i in range(len(argtys) + 1):
            arg_ptrs.append(self.var_copy(args[i]))
            const_steps = self.var_copy(steps[i])
            const_steps.invariant = True
            arg_steps.append(const_steps)

        with self.for_range(dimensions[0]) as (loop, item):
            callargs = []
            for i, argty in enumerate(argtys):
                if argty.kind == lc.TYPE_POINTER:
                    casted = arg_ptrs[i].cast(argty)
                    callargs.append(casted)
                else:
                    casted = arg_ptrs[i].cast(C.pointer(argty))
                    callargs.append(casted.load())
                # increment pointer
                arg_ptrs[i].assign(arg_ptrs[i][arg_steps[i]:])

            if is_void_ret:
                retty = fnty.args[-1]
                assert retty.kind == lc.TYPE_POINTER
                retval = self.var(retty.pointee)
                callargs.append(retval.ref)
                for i in callargs:
                    print(i)
                ufunc_ptr(*callargs, **dict(inline=True))
                res = retval
                retval_ptr = arg_ptrs[-1].cast(retty)
            else:
                res = ufunc_ptr(*callargs, **dict(inline=True))
                retval_ptr = arg_ptrs[-1].cast(C.pointer(fnty.return_type))
            retval_ptr.store(res, nontemporal=True)
            arg_ptrs[-1].assign(arg_ptrs[-1][arg_steps[-1]:])

        self.ret()

    def specialize(cls, func_def):
        '''specialize to a workload
        '''
        cls._name_ = 'basicufunc_%s' % (func_def)
        cls.FuncDef = func_def
Ejemplo n.º 37
0
class SliceArray(CDefinition):

    _name_ = "slice"
    _retty_ = C.char_p
    _argtys_ = [
        ('data', C.char_p),
        ('in_shape', C.pointer(C.npy_intp)),
        ('in_strides', C.pointer(C.npy_intp)),
        ('out_shape', C.pointer(C.npy_intp)),
        ('out_strides', C.pointer(C.npy_intp)),
        ('start', C.npy_intp),
        ('stop', C.npy_intp),
        ('step', C.npy_intp),
        ('src_dim', C.int),
        ('dst_dim', C.int),
    ]

    def _adjust_given_index(self, extent, negative_step, index, is_start):
        # Tranliterate the below code to llvm cbuilder

        # TODO: write in numba

        # For the start index in start:stop:step, do:
        # if have_start:
        #     if start < 0:
        #         start += shape
        #         if start < 0:
        #             start = 0
        #     elif start >= shape:
        #         if negative_step:
        #             start = shape - 1
        #         else:
        #             start = shape
        # else:
        #     if negative_step:
        #         start = shape - 1
        #     else:
        #         start = 0

        # For the stop index, do:
        # if stop is not None:
        #     if stop < 0:
        #         stop += extent
        #         if stop < 0:
        #             stop = 0
        #     elif stop > extent:
        #         stop = extent
        # else:
        #     if negative_step:
        #         stop = -1
        #     else:
        #         stop = extent

        one, zero = get_constants(self)

        with self.ifelse(index < zero) as ifelse:
            with ifelse.then():
                index += extent
                with self.ifelse(index < zero) as ifelse_inner:
                    with ifelse_inner.then():
                        index.assign(zero)

            with ifelse.otherwise():
                with self.ifelse(index >= extent) as ifelse:
                    with ifelse.then():
                        if is_start:
                            # index is 'start' index
                            with self.ifelse(negative_step) as ifelse:
                                with ifelse.then():
                                    index.assign(extent - one)
                                with ifelse.otherwise():
                                    index.assign(extent)
                        else:
                            # index is 'stop' index. Stop is exclusive, to
                            # we don't care about the sign of the step
                            index.assign(extent)

    def _set_default_index(self, default1, default2, negative_step, index):
        with self.ifelse(negative_step) as ifelse:
            with ifelse.then():
                index.assign(default1)
            with ifelse.otherwise():
                index.assign(default2)

    def adjust_index(self,
                     extent,
                     negative_step,
                     index,
                     default1,
                     default2,
                     is_start=False,
                     have_index=True):
        if have_index:
            self._adjust_given_index(extent, negative_step, index, is_start)
        else:
            self._set_default_index(default1, default2, negative_step, index)

    def body(self, data, in_shape, in_strides, out_shape, out_strides, start,
             stop, step, src_dim, dst_dim):

        stride = in_strides[src_dim]
        extent = in_shape[src_dim]

        one, zero = get_constants(self)
        if not self.have_step:
            step = one

        negative_step = step < zero

        self.adjust_index(extent,
                          negative_step,
                          start,
                          default1=extent - one,
                          default2=zero,
                          is_start=True,
                          have_index=self.have_start)
        self.adjust_index(extent,
                          negative_step,
                          stop,
                          default1=-one,
                          default2=extent,
                          have_index=self.have_stop)

        # self.debug("extent", extent)
        # self.debug("negative_step", negative_step.cast(C.npy_intp))
        # self.debug("start/stop/step", start, stop, step)
        new_extent = self.var(C.npy_intp)
        new_extent.assign((stop - start) / step)
        with self.ifelse((stop - start) % step != zero) as ifelse:
            with ifelse.then():
                new_extent += one

        with self.ifelse(new_extent < zero) as ifelse:
            with ifelse.then():
                new_extent.assign(zero)

        result = self.var(data.type, name='result')
        result.assign(data[start * stride:])
        out_shape[dst_dim] = new_extent
        # self.debug("new_extent", new_extent)
        # self.debug("out stride:", dst_dim, stride * step)
        out_strides[dst_dim] = stride * step

        self.ret(result)

    def specialize(self, context, have_start, have_stop, have_step):
        self.context = context

        self.have_start = have_start
        self.have_stop = have_stop
        self.have_step = have_step

        self._name_ = "slice_%s_%s_%s" % (have_start, have_stop, have_step)
Ejemplo n.º 38
0
    def _get_tys_list(self):
        types_lists = []
        for numba_func in self.translates:
            dtype_nums = []
            types_lists.append(dtype_nums)
            for arg_type in self.get_argtypes(numba_func):
                if arg_type.is_array:
                    arg_type = arg_type.dtype
                dtype_nums.append(arg_type.get_dtype())

        return types_lists

GUFuncVectorize = GUFuncASTVectorize

_intp_ptr = C.pointer(C.intp)

class PyObjectHead(CStruct):
    _fields_ = [
        ('ob_refcnt', C.intp),
        # NOTE: not a integer, just need to match definition in numba
        ('ob_type', C.pointer(C.int)),
    ]

    if llvm_types._trace_refs_:
        # Account for _PyObject_HEAD_EXTRA
        _fields_ = [
            ('ob_next', _intp_ptr),
            ('ob_prev', _intp_ptr),
        ] + _fields_