예제 #1
0
    def build(self, cres, signature):
        # Buider wrapper for ufunc entry point
        ctx = cres.target_context
        library = cres.library
        llvm_func = library.get_function(cres.fndesc.llvm_func_name)

        env = None
        if cres.objectmode:
            # Get env
            env = cres.environment
            assert env is not None
            ll_intp = cres.target_context.get_value_type(types.intp)
            ll_pyobj = cres.target_context.get_value_type(types.pyobject)
            envptr = lc.Constant.int(ll_intp, id(env)).inttoptr(ll_pyobj)
        else:
            envptr = None

        wrapper = build_ufunc_wrapper(library, ctx, llvm_func, signature,
                                      cres.objectmode, envptr)
        ptr = library.get_pointer_to_function(wrapper.name)

        # Get dtypes
        dtypenums = [as_dtype(a).num for a in signature.args]
        dtypenums.append(as_dtype(signature.return_type).num)
        return dtypenums, ptr, env
예제 #2
0
    def build(self, cres, signature):
        # Buider wrapper for ufunc entry point
        ctx = cres.target_context
        library = cres.library
        llvm_func = library.get_function(cres.fndesc.llvm_func_name)

        env = None
        if cres.objectmode:
            # Get env
            env = cres.environment
            assert env is not None
            ll_intp = cres.target_context.get_value_type(types.intp)
            ll_pyobj = cres.target_context.get_value_type(types.pyobject)
            envptr = lc.Constant.int(ll_intp, id(env)).inttoptr(ll_pyobj)
        else:
            envptr = None

        wrapper = build_ufunc_wrapper(library, ctx, llvm_func, signature,
                                      cres.objectmode, envptr)
        ptr = library.get_pointer_to_function(wrapper.name)

        # Get dtypes
        dtypenums = [as_dtype(a).num for a in signature.args]
        dtypenums.append(as_dtype(signature.return_type).num)
        return dtypenums, ptr, env
예제 #3
0
def _build_element_wise_ufunc_wrapper(cres, signature):
    '''Build a wrapper for the ufunc loop entry point given by the
    compilation result object, using the element-wise signature.
    '''
    ctx = cres.target_context
    library = cres.library
    fname = cres.fndesc.llvm_func_name

    env = None
    if cres.objectmode:
        # Get env
        env = cres.environment
        assert env is not None
        ll_intp = cres.target_context.get_value_type(types.intp)
        ll_pyobj = cres.target_context.get_value_type(types.pyobject)
        envptr = lc.Constant.int(ll_intp, id(env)).inttoptr(ll_pyobj)
    else:
        envptr = None

    ptr = build_ufunc_wrapper(library, ctx, fname, signature,
                              cres.objectmode, envptr, env)

    # Get dtypes
    dtypenums = [as_dtype(a).num for a in signature.args]
    dtypenums.append(as_dtype(signature.return_type).num)
    return dtypenums, ptr, env
예제 #4
0
def _build_element_wise_ufunc_wrapper(cres, signature):
    '''Build a wrapper for the ufunc loop entry point given by the
    compilation result object, using the element-wise signature.
    '''
    ctx = cres.target_context
    library = cres.library
    llvm_func = library.get_function(cres.fndesc.llvm_func_name)

    env = None
    if cres.objectmode:
        # Get env
        env = cres.environment
        assert env is not None
        ll_intp = cres.target_context.get_value_type(types.intp)
        ll_pyobj = cres.target_context.get_value_type(types.pyobject)
        envptr = lc.Constant.int(ll_intp, id(env)).inttoptr(ll_pyobj)
    else:
        envptr = None

    wrapper = build_ufunc_wrapper(library, ctx, llvm_func, signature,
                                  cres.objectmode, envptr, env)
    ptr = library.get_pointer_to_function(wrapper.name)

    # Get dtypes
    dtypenums = [as_dtype(a).num for a in signature.args]
    dtypenums.append(as_dtype(signature.return_type).num)
    return dtypenums, ptr, env
예제 #5
0
def find_common_dtype_from_numpy_dtypes(array_types, scalar_types):
    """Used to find common numba dtype for a sequences of numba dtypes each representing some numpy dtype"""
    np_array_dtypes = [numpy_support.as_dtype(dtype) for dtype in array_types]
    np_scalar_dtypes = [numpy_support.as_dtype(dtype) for dtype in scalar_types]
    np_common_dtype = numpy.find_common_type(np_array_dtypes, np_scalar_dtypes)
    numba_common_dtype = numpy_support.from_dtype(np_common_dtype)

    return numba_common_dtype
예제 #6
0
 def kernel_wrapper(values):
     n = len(values)
     inputs = [np.empty(n, dtype=numpy_support.as_dtype(tp))
               for tp in argtypes]
     output = np.empty(n, dtype=numpy_support.as_dtype(restype))
     for i, vs in enumerate(values):
         for v, inp in zip(vs, inputs):
             inp[i] = v
     args = [output] + inputs
     kernel[int(math.ceil(n / 256)), 256](*args)
     return list(output)
예제 #7
0
 def check_round(cfunc, values, inty, outty, decimals):
     # Create input and output arrays of the right type
     arr = values.astype(as_dtype(inty))
     out = np.zeros_like(arr).astype(as_dtype(outty))
     pyout = out.copy()
     _fixed_np_round(arr, decimals, pyout)
     cfunc(arr, decimals, out)
     np.testing.assert_allclose(out, pyout)
     # Output shape mismatch
     with self.assertRaises(ValueError) as raises:
         cfunc(arr, decimals, out[1:])
     self.assertEqual(str(raises.exception), "invalid output shape")
예제 #8
0
 def kernel_wrapper(values):
     n = len(values)
     inputs = [
         np.empty(n, dtype=numpy_support.as_dtype(tp)) for tp in argtypes
     ]
     output = np.empty(n, dtype=numpy_support.as_dtype(restype))
     for i, vs in enumerate(values):
         for v, inp in zip(vs, inputs):
             inp[i] = v
     args = [output] + inputs
     kernel[int(math.ceil(n / 256)), 256](*args)
     return list(output)
예제 #9
0
 def check_round(cfunc, values, inty, outty, decimals):
     # Create input and output arrays of the right type
     arr = values.astype(as_dtype(inty))
     out = np.zeros_like(arr).astype(as_dtype(outty))
     pyout = out.copy()
     _fixed_np_round(arr, decimals, pyout)
     cfunc(arr, decimals, out)
     np.testing.assert_allclose(out, pyout)
     # Output shape mismatch
     with self.assertRaises(ValueError) as raises:
         cfunc(arr, decimals, out[1:])
     self.assertEqual(str(raises.exception),
                      "invalid output shape")
예제 #10
0
파일: ufuncbuilder.py 프로젝트: numba/numba
def _build_element_wise_ufunc_wrapper(cres, signature):
    '''Build a wrapper for the ufunc loop entry point given by the
    compilation result object, using the element-wise signature.
    '''
    ctx = cres.target_context
    library = cres.library
    fname = cres.fndesc.llvm_func_name

    with global_compiler_lock:
        ptr = build_ufunc_wrapper(library, ctx, fname, signature,
                                  cres.objectmode, cres)

    # Get dtypes
    dtypenums = [as_dtype(a).num for a in signature.args]
    dtypenums.append(as_dtype(signature.return_type).num)
    return dtypenums, ptr, cres.environment
예제 #11
0
    def build(self, cres):
        """
        Returns (dtype numbers, function ptr, EnvironmentObject)
        """
        # Buider wrapper for ufunc entry point
        signature = cres.signature
        info = build_gufunc_wrapper(
            self.py_func,
            cres,
            self.sin,
            self.sout,
            cache=self.cache,
            is_parfors=False,
        )

        env = info.env
        ptr = info.library.get_pointer_to_function(info.name)
        # Get dtypes
        dtypenums = []
        for a in signature.args:
            if isinstance(a, types.Array):
                ty = a.dtype
            else:
                ty = a
            dtypenums.append(as_dtype(ty).num)
        return dtypenums, ptr, env
예제 #12
0
def _build_element_wise_ufunc_wrapper(cres, signature):
    '''Build a wrapper for the ufunc loop entry point given by the
    compilation result object, using the element-wise signature.
    '''
    ctx = cres.target_context
    library = cres.library
    fname = cres.fndesc.llvm_func_name

    with global_compiler_lock:
        info = build_ufunc_wrapper(library, ctx, fname, signature,
                                   cres.objectmode, cres)
        ptr = info.library.get_pointer_to_function(info.name)
    # Get dtypes
    dtypenums = [as_dtype(a).num for a in signature.args]
    dtypenums.append(as_dtype(signature.return_type).num)
    return dtypenums, ptr, cres.environment
예제 #13
0
    def build(self, cres):
        """
        Returns (dtype numbers, function ptr, EnvironmentObject)
        """
        _launch_threads()
        _init()

        # Build wrapper for ufunc entry point
        ctx = cres.target_context
        library = cres.library
        signature = cres.signature
        ptr, env = build_gufunc_wrapper(library,
                                        ctx,
                                        signature,
                                        self.sin,
                                        self.sout,
                                        fndesc=cres.fndesc,
                                        env=cres.environment)

        # Get dtypes
        dtypenums = []
        for a in signature.args:
            if isinstance(a, types.Array):
                ty = a.dtype
            else:
                ty = a
            dtypenums.append(as_dtype(ty).num)

        return dtypenums, ptr, env
예제 #14
0
    def build(self, cres):
        """
        Returns (dtype numbers, function ptr, EnvironmentObject)
        """
        # Buider wrapper for ufunc entry point
        ctx = cres.target_context
        library = cres.library
        signature = cres.signature
        llvm_func = library.get_function(cres.fndesc.llvm_func_name)
        wrapper, env = build_gufunc_wrapper(library,
                                            ctx,
                                            llvm_func,
                                            signature,
                                            self.sin,
                                            self.sout,
                                            fndesc=cres.fndesc,
                                            env=cres.environment)

        ptr = library.get_pointer_to_function(wrapper.name)

        # Get dtypes
        dtypenums = []
        for a in signature.args:
            if isinstance(a, types.Array):
                ty = a.dtype
            else:
                ty = a
            dtypenums.append(as_dtype(ty).num)
        return dtypenums, ptr, env
예제 #15
0
    def build(self, cres):
        """
        Returns (dtype numbers, function ptr, EnvironmentObject)
        """
        # Buider wrapper for ufunc entry point
        ctx = cres.target_context
        library = cres.library
        signature = cres.signature
        llvm_func = library.get_function(cres.fndesc.llvm_func_name)
        wrapper, env = build_gufunc_wrapper(library, ctx, llvm_func,
                                            signature, self.sin, self.sout,
                                            fndesc=cres.fndesc,
                                            env=cres.environment)

        ptr = library.get_pointer_to_function(wrapper.name)

        # Get dtypes
        dtypenums = []
        for a in signature.args:
            if isinstance(a, types.Array):
                ty = a.dtype
            else:
                ty = a
            dtypenums.append(as_dtype(ty).num)
        return dtypenums, ptr, env
예제 #16
0
    def test_hypot(self, flags=enable_pyobj_flags):
        pyfunc = hypot
        x_types = [types.int64, types.uint64, types.float32, types.float64]
        x_values = [1, 2, 3, 4, 5, 6, .21, .34]
        y_values = [x + 2 for x in x_values]
        # Issue #563: precision issues with math.hypot() under Windows.
        prec = 'single'
        self.run_binary(pyfunc, x_types, x_values, y_values, flags, prec)

        # Check that values that overflow in naive implementations do not
        # in the numba impl

        def naive_hypot(x, y):
            return math.sqrt(x * x + y * y)

        for fltty in (types.float32, types.float64):
            cr = self.ccache.compile(pyfunc, (fltty, fltty), flags=flags)
            cfunc = cr.entry_point
            dt = numpy_support.as_dtype(fltty).type
            val = dt(np.finfo(dt).max / 30.)
            nb_ans = cfunc(val, val)
            self.assertPreciseEqual(nb_ans, pyfunc(val, val), prec='single')
            self.assertTrue(np.isfinite(nb_ans))

            with warnings.catch_warnings():
                warnings.simplefilter("error", RuntimeWarning)
                self.assertRaisesRegexp(RuntimeWarning,
                                        'overflow encountered in .*_scalars',
                                        naive_hypot, val, val)
예제 #17
0
def compile_udf(udf, type_signature):
    """Copmile ``udf`` with `numba`

    Compile a python callable function ``udf`` with
    `numba.cuda.jit(device=True)` using ``type_signature`` into CUDA PTX
    together with the generated output type.

    The output is expected to be passed to the PTX parser in `libcudf`
    to generate a CUDA device funtion to be inlined into CUDA kernels,
    compiled at runtime and launched.

    Parameters
    --------
    udf:
      a python callable function

    type_signature:
      a tuple that specifies types of each of the input parameters of ``udf``.
      The types should be one in `numba.types` and could be converted from
      numpy types with `numba.numpy_support.from_dtype(...)`.

    Returns
    --------
    ptx_code:
      The compiled CUDA PTX

    output_type:
      An numpy type

    """
    decorated_udf = cuda.jit(udf, device=True)
    compiled = decorated_udf.compile(type_signature)
    ptx_code = decorated_udf.inspect_ptx(type_signature).decode("utf-8")
    output_type = numpy_support.as_dtype(compiled.signature.return_type)
    return (ptx_code, output_type.type)
예제 #18
0
    def build(self, cres):
        """
        Returns (dtype numbers, function ptr, EnvironmentObject)
        """
        _launch_threads()
        _init()

        # Build wrapper for ufunc entry point
        ctx = cres.target_context
        library = cres.library
        signature = cres.signature
        ptr, env = build_gufunc_wrapper(library, ctx, signature, self.sin,
                                        self.sout, fndesc=cres.fndesc,
                                        env=cres.environment)

        # Get dtypes
        dtypenums = []
        for a in signature.args:
            if isinstance(a, types.Array):
                ty = a.dtype
            else:
                ty = a
            dtypenums.append(as_dtype(ty).num)

        return dtypenums, ptr, env
예제 #19
0
    def test_hypot(self, flags=enable_pyobj_flags):
        pyfunc = hypot
        x_types = [types.int64, types.uint64,
                   types.float32, types.float64]
        x_values = [1, 2, 3, 4, 5, 6, .21, .34]
        y_values = [x + 2 for x in x_values]
        # Issue #563: precision issues with math.hypot() under Windows.
        prec = 'single' if sys.platform == 'win32' else 'exact'
        self.run_binary(pyfunc, x_types, x_values, y_values, flags, prec)
        # Check that values that overflow in naive implementations do not
        # in the numba impl

        def naive_hypot(x, y):
            return math.sqrt(x * x + y * y)
        for fltty in (types.float32, types.float64):
            cr = self.ccache.compile(pyfunc, (fltty, fltty), flags=flags)
            cfunc = cr.entry_point
            dt = numpy_support.as_dtype(fltty).type
            val = dt(np.finfo(dt).max / 30.)
            nb_ans = cfunc(val, val)
            self.assertPreciseEqual(nb_ans, pyfunc(val, val), prec='single')
            self.assertTrue(np.isfinite(nb_ans))

            with warnings.catch_warnings():
                warnings.simplefilter("error", RuntimeWarning)
                self.assertRaisesRegexp(RuntimeWarning,
                                        'overflow encountered in .*_scalars',
                                        naive_hypot, val, val)
예제 #20
0
def map_struct_to_record_dtype(cffi_type):
    """Convert a cffi type into a NumPy Record dtype
    """
    fields = {
        'names': [],
        'formats': [],
        'offsets': [],
        'itemsize': ffi.sizeof(cffi_type),
    }
    is_aligned = True
    for k, v in cffi_type.fields:
        # guard unsupport values
        if v.bitshift != -1:
            msg = "field {!r} has bitshift, this is not supported"
            raise ValueError(msg.format(k))
        if v.flags != 0:
            msg = "field {!r} has flags, this is not supported"
            raise ValueError(msg.format(k))
        if v.bitsize != -1:
            msg = "field {!r} has bitsize, this is not supported"
            raise ValueError(msg.format(k))
        dtype = numpy_support.as_dtype(map_type(v.type,
                                                use_record_dtype=True), )
        fields['names'].append(k)
        fields['formats'].append(dtype)
        fields['offsets'].append(v.offset)
        # Check alignment
        is_aligned &= (v.offset % dtype.alignment == 0)

    return numpy_support.from_dtype(np.dtype(fields, align=is_aligned))
예제 #21
0
 def test_record_dtype_with_titles_roundtrip(self):
     recdtype = np.dtype([(("title a", 'a'), np.float), ('b', np.float)])
     nbtype = numpy_support.from_dtype(recdtype)
     self.assertTrue(nbtype.is_title('title a'))
     self.assertFalse(nbtype.is_title('a'))
     self.assertFalse(nbtype.is_title('b'))
     got = numpy_support.as_dtype(nbtype)
     self.assertTrue(got, recdtype)
예제 #22
0
 def test_record_dtype_with_titles_roundtrip(self):
     recdtype = np.dtype([(("title a", 'a'), np.float), ('b', np.float)])
     nbtype = numpy_support.from_dtype(recdtype)
     self.assertTrue(nbtype.is_title('title a'))
     self.assertFalse(nbtype.is_title('a'))
     self.assertFalse(nbtype.is_title('b'))
     got = numpy_support.as_dtype(nbtype)
     self.assertTrue(got, recdtype)
예제 #23
0
    def xinfo_impl(arg):
        nbty = getattr(arg, 'dtype', arg)
        f = np_func(as_dtype(nbty))
        data = tuple([getattr(f, x) for x in attr])

        def impl(arg):
            return container(*data)

        return impl
예제 #24
0
def _build_element_wise_ufunc_wrapper(cres, signature):
    '''Build a wrapper for the ufunc loop entry point given by the
    compilation result object, using the element-wise signature.
    '''
    ctx = cres.target_context
    library = cres.library
    fname = cres.fndesc.llvm_func_name

    env = cres.environment
    envptr = env.as_pointer(ctx)

    with compiler.lock_compiler:
        ptr = build_ufunc_wrapper(library, ctx, fname, signature,
                                  cres.objectmode, envptr, env)

    # Get dtypes
    dtypenums = [as_dtype(a).num for a in signature.args]
    dtypenums.append(as_dtype(signature.return_type).num)
    return dtypenums, ptr, env
예제 #25
0
def array_cumprod(context, builder, sig, args):
    scalar_dtype = sig.return_type.dtype
    dtype = as_dtype(scalar_dtype)

    def array_cumprod_impl(arr):
        size = 1
        for i in arr.shape:
            size = size * i
        out = numpy.empty(size, dtype)
        c = 1
        for idx, v in enumerate(arr.flat):
            c *= v
            out[idx] = c
        return out

    res = context.compile_internal(builder, array_cumprod_impl, sig, args, locals=dict(c=scalar_dtype))
    return impl_ret_new_ref(context, builder, sig.return_type, res)
예제 #26
0
def roots_impl(p):

    # cast int vectors to float cf. numpy, this is a bit dicey as
    # the roots could be complex which will fail anyway
    ty = getattr(p, 'dtype', p)
    if isinstance(ty, types.Integer):
        cast_t = np.float64
    else:
        cast_t = np_support.as_dtype(ty)

    def roots_impl(p):
        # impl based on numpy:
        # https://github.com/numpy/numpy/blob/master/numpy/lib/polynomial.py

        if len(p.shape) != 1:
            raise ValueError("Input must be a 1d array.")

        non_zero = np.nonzero(p)[0]

        if len(non_zero) == 0:
            return np.zeros(0, dtype=cast_t)

        tz = len(p) - non_zero[-1] - 1

        # pull out the coeffs selecting between possible zero pads
        p = p[int(non_zero[0]):int(non_zero[-1]) + 1]

        n = len(p)
        if n > 1:
            # construct companion matrix, ensure fortran order
            # to give to eigvals, write to upper diag and then
            # transpose.
            A = np.diag(np.ones((n - 2,), cast_t), 1).T
            A[0, :] = -p[1:] / p[0]  # normalize
            roots = np.linalg.eigvals(A)
        else:
            roots = np.zeros(0, dtype=cast_t)

        # add in additional zeros on the end if needed
        if tz > 0:
            return np.hstack((roots, np.zeros(tz, dtype=cast_t)))
        else:
            return roots

    return roots_impl
예제 #27
0
def roots_impl(p):

    # cast int vectors to float cf. numpy, this is a bit dicey as
    # the roots could be complex which will fail anyway
    ty = getattr(p, 'dtype', p)
    if isinstance(ty, types.Integer):
        cast_t = np.float64
    else:
        cast_t = np_support.as_dtype(ty)

    def roots_impl(p):
        # impl based on numpy:
        # https://github.com/numpy/numpy/blob/master/numpy/lib/polynomial.py

        if len(p.shape) != 1:
            raise ValueError("Input must be a 1d array.")

        non_zero = np.nonzero(p)[0]

        if len(non_zero) == 0:
            return np.zeros(0, dtype=cast_t)

        tz = len(p) - non_zero[-1] - 1

        # pull out the coeffs selecting between possible zero pads
        p = p[int(non_zero[0]):int(non_zero[-1]) + 1]

        n = len(p)
        if n > 1:
            # construct companion matrix, ensure fortran order
            # to give to eigvals, write to upper diag and then
            # transpose.
            A = np.diag(np.ones((n - 2,), cast_t), 1).T
            A[0, :] = -p[1:] / p[0]  # normalize
            roots = np.linalg.eigvals(A)
        else:
            roots = np.zeros(0, dtype=cast_t)

        # add in additional zeros on the end if needed
        if tz > 0:
            return np.hstack((roots, np.zeros(tz, dtype=cast_t)))
        else:
            return roots

    return roots_impl
예제 #28
0
    def build(self, cres):
        """
        Returns (dtype numbers, function ptr, EnvironmentObject)
        """
        # Buider wrapper for ufunc entry point
        signature = cres.signature
        ptr, env = build_gufunc_wrapper(self.py_func, cres, self.sin, self.sout,
                                        cache=self.cache)

        # Get dtypes
        dtypenums = []
        for a in signature.args:
            if isinstance(a, types.Array):
                ty = a.dtype
            else:
                ty = a
            dtypenums.append(as_dtype(ty).num)
        return dtypenums, ptr, env
예제 #29
0
    def build(self, cres):
        """
        Returns (dtype numbers, function ptr, EnvironmentObject)
        """
        # Buider wrapper for ufunc entry point
        signature = cres.signature
        ptr, env, wrapper_name = build_gufunc_wrapper(self.py_func, cres, self.sin, self.sout,
                                        cache=self.cache)

        # Get dtypes
        dtypenums = []
        for a in signature.args:
            if isinstance(a, types.Array):
                ty = a.dtype
            else:
                ty = a
            dtypenums.append(as_dtype(ty).num)
        return dtypenums, ptr, env
예제 #30
0
    def array(self, shape, dtype):
        dtype = numpy_support.as_dtype(dtype)
        # Dynamic shared memory is requested with size 0 - this all shares the
        # same underlying memory
        if shape == 0:
            # Count must be the maximum number of whole elements that fit in the
            # buffer (Numpy complains if the buffer is not a multiple of the
            # element size)
            count = self._dynshared_size // dtype.itemsize
            return np.frombuffer(self._dynshared.data, dtype=dtype, count=count)

        # Otherwise, identify allocations by source file and line number
        caller = traceback.extract_stack()[-2][0:2]
        res = self._allocations.get(caller)
        if res is None:
            res = np.empty(shape, dtype)
            self._allocations[caller] = res
        return res
예제 #31
0
    def test_cfunc_callback(self):
        ffi = self.get_ffi()
        big_struct = ffi.typeof('big_struct')
        nb_big_struct = cffi_support.map_type(big_struct, use_record_dtype=True)
        sig = cffi_support.map_type(ffi.typeof('myfunc'), use_record_dtype=True)

        @njit
        def calc(base):
            tmp = 0
            for i in range(base.size):
                elem = base[i]
                tmp += elem.i1 * elem.f2 / elem.d3
                tmp += base[i].af4.sum()
            return tmp

        @cfunc(sig)
        def foo(ptr, n):
            base = carray(ptr, n)
            return calc(base)

        # Make data
        mydata = ffi.new('big_struct[3]')
        ptr = ffi.cast('big_struct*', mydata)
        for i in range(3):
            ptr[i].i1 = i * 123
            ptr[i].f2 = i * 213
            ptr[i].d3 = (1 + i) * 213
            for j in range(9):
                ptr[i].af4[j] = i * 10 + j

        # Address of my data
        addr = int(ffi.cast('size_t', ptr))
        got = foo.ctypes(addr, 3)

        # Make numpy array from the cffi buffer
        array = np.ndarray(
            buffer=ffi.buffer(mydata),
            dtype=numpy_support.as_dtype(nb_big_struct),
            shape=3,
            )
        expect = calc(array)
        self.assertEqual(got, expect)
예제 #32
0
    def test_pickling_vectorize(self):
        @vectorize(['intp(intp)', 'float64(float64)'], target='cuda')
        def cuda_vect(x):
            return x * 2

        # accommodate int representations in np.arange
        npty = numpy_support.as_dtype(types.intp)
        # get expected result
        ary = np.arange(10, dtype=npty)
        expected = cuda_vect(ary)
        # first pickle
        foo1 = pickle.loads(pickle.dumps(cuda_vect))
        del cuda_vect
        got1 = foo1(ary)
        np.testing.assert_equal(expected, got1)
        # second pickle
        foo2 = pickle.loads(pickle.dumps(foo1))
        del foo1
        got2 = foo2(ary)
        np.testing.assert_equal(expected, got2)
예제 #33
0
    def array(self, shape, dtype):
        dtype = numpy_support.as_dtype(dtype)
        # Dynamic shared memory is requested with size 0 - this all shares the
        # same underlying memory
        if shape == 0:
            # Count must be the maximum number of whole elements that fit in the
            # buffer (Numpy complains if the buffer is not a multiple of the
            # element size)
            count = self._dynshared_size // dtype.itemsize
            return np.frombuffer(self._dynshared.data,
                                 dtype=dtype,
                                 count=count)

        # Otherwise, identify allocations by source file and line number
        caller = traceback.extract_stack()[-2][0:2]
        res = self._allocations.get(caller)
        if res is None:
            res = np.empty(shape, dtype)
            self._allocations[caller] = res
        return res
예제 #34
0
    def test_pickling_vectorize(self):
        @vectorize(['intp(intp)', 'float64(float64)'], target='cuda')
        def cuda_vect(x):
            return x * 2

        # accommodate int representations in np.arange
        npty = numpy_support.as_dtype(types.intp)
        # get expected result
        ary = np.arange(10, dtype=npty)
        expected = cuda_vect(ary)
        # first pickle
        foo1 = pickle.loads(pickle.dumps(cuda_vect))
        del cuda_vect
        got1 = foo1(ary)
        np.testing.assert_equal(expected, got1)
        # second pickle
        foo2 = pickle.loads(pickle.dumps(foo1))
        del foo1
        got2 = foo2(ary)
        np.testing.assert_equal(expected, got2)
예제 #35
0
def array_cumprod(context, builder, sig, args):
    scalar_dtype = sig.return_type.dtype
    dtype = as_dtype(scalar_dtype)

    def array_cumprod_impl(arr):
        size = 1
        for i in arr.shape:
            size = size * i
        out = np.empty(size, dtype)
        c = 1
        for idx, v in enumerate(arr.flat):
            c *= v
            out[idx] = c
        return out

    res = context.compile_internal(builder,
                                   array_cumprod_impl,
                                   sig,
                                   args,
                                   locals=dict(c=scalar_dtype))
    return impl_ret_new_ref(context, builder, sig.return_type, res)
예제 #36
0
def hpat_arrays_append_overload(A, B):
    '''Function for appending underlying arrays (A and B) or list/tuple of arrays B to an array A'''

    if isinstance(A, types.Array):
        if isinstance(B, types.Array):

            def _append_single_numeric_impl(A, B):
                return numpy.concatenate((
                    A,
                    B,
                ))

            return _append_single_numeric_impl
        elif isinstance(B, (types.UniTuple, types.List)):
            # TODO: this heavily relies on B being a homogeneous tuple/list - find a better way
            # to resolve common dtype of heterogeneous sequence of arrays
            np_dtypes = [
                numpy_support.as_dtype(A.dtype),
                numpy_support.as_dtype(B.dtype.dtype)
            ]
            np_common_dtype = numpy.find_common_type([], np_dtypes)
            numba_common_dtype = numpy_support.from_dtype(np_common_dtype)

            # TODO: refactor to use numpy.concatenate when Numba supports building a tuple at runtime
            def _append_list_numeric_impl(A, B):

                total_length = len(A) + numpy.array([len(arr)
                                                     for arr in B]).sum()
                new_data = numpy.empty(total_length, numba_common_dtype)

                stop = len(A)
                new_data[:stop] = A
                for arr in B:
                    start = stop
                    stop = start + len(arr)
                    new_data[start:stop] = arr
                return new_data

            return _append_list_numeric_impl

    elif A == string_array_type:
        if B == string_array_type:

            def _append_single_string_array_impl(A, B):
                total_size = len(A) + len(B)
                total_chars = num_total_chars(A) + num_total_chars(B)
                new_data = sdc.str_arr_ext.pre_alloc_string_array(
                    total_size, total_chars)

                pos = 0
                pos += append_string_array_to(new_data, pos, A)
                pos += append_string_array_to(new_data, pos, B)

                return new_data

            return _append_single_string_array_impl
        elif (isinstance(B, (types.UniTuple, types.List))
              and B.dtype == string_array_type):

            def _append_list_string_array_impl(A, B):
                array_list = [A] + list(B)
                total_size = numpy.array([len(arr)
                                          for arr in array_list]).sum()
                total_chars = numpy.array(
                    [num_total_chars(arr) for arr in array_list]).sum()

                new_data = sdc.str_arr_ext.pre_alloc_string_array(
                    total_size, total_chars)

                pos = 0
                pos += append_string_array_to(new_data, pos, A)
                for arr in B:
                    pos += append_string_array_to(new_data, pos, arr)

                return new_data

            return _append_list_string_array_impl
예제 #37
0
    def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes,
                         *args):
        # Overall approach:
        # 1) Construct a string containing a function definition for the stencil function
        #    that will execute the stencil kernel.  This function definition includes a
        #    unique stencil function name, the parameters to the stencil kernel, loop
        #    nests across the dimenions of the input array.  Those loop nests use the
        #    computed stencil kernel size so as not to try to compute elements where
        #    elements outside the bounds of the input array would be needed.
        # 2) The but of the loop nest in this new function is a special sentinel
        #    assignment.
        # 3) Get the IR of this new function.
        # 4) Split the block containing the sentinel assignment and remove the sentinel
        #    assignment.  Insert the stencil kernel IR into the stencil function IR
        #    after label and variable renaming of the stencil kernel IR to prevent
        #    conflicts with the stencil function IR.
        # 5) Compile the combined stencil function IR + stencil kernel IR into existence.

        # Copy the kernel so that our changes for this callsite
        # won't effect other callsites.
        (kernel_copy,
         copy_calltypes) = self.copy_ir_with_calltypes(self.kernel_ir,
                                                       calltypes)
        # The stencil kernel body becomes the body of a loop, for which args aren't needed.
        ir_utils.remove_args(kernel_copy.blocks)
        first_arg = kernel_copy.arg_names[0]

        in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap)
        name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks)
        ir_utils.apply_copy_propagate(kernel_copy.blocks, in_cps,
                                      name_var_table, typemap, copy_calltypes)

        if "out" in name_var_table:
            raise ValueError(
                "Cannot use the reserved word 'out' in stencil kernels.")

        sentinel_name = ir_utils.get_unused_var_name("__sentinel__",
                                                     name_var_table)
        if config.DEBUG_ARRAY_OPT == 1:
            print("name_var_table", name_var_table, sentinel_name)

        the_array = args[0]

        if config.DEBUG_ARRAY_OPT == 1:
            print("_stencil_wrapper", return_type, return_type.dtype,
                  type(return_type.dtype), args)
            ir_utils.dump_blocks(kernel_copy.blocks)

        # We generate a Numba function to execute this stencil and here
        # create the unique name of this function.
        stencil_func_name = "__numba_stencil_%s_%s" % (hex(
            id(the_array)).replace("-", "_"), self.id)

        # We will put a loop nest in the generated function for each
        # dimension in the input array.  Here we create the name for
        # the index variable for each dimension.  index0, index1, ...
        index_vars = []
        for i in range(the_array.ndim):
            index_var_name = ir_utils.get_unused_var_name(
                "index" + str(i), name_var_table)
            index_vars += [index_var_name]

        # Create extra signature for out and neighborhood.
        out_name = ir_utils.get_unused_var_name("out", name_var_table)
        neighborhood_name = ir_utils.get_unused_var_name(
            "neighborhood", name_var_table)
        sig_extra = ""
        if result is not None:
            sig_extra += ", {}=None".format(out_name)
        if "neighborhood" in dict(self.kws):
            sig_extra += ", {}=None".format(neighborhood_name)

        # Get a list of the standard indexed array names.
        standard_indexed = self.options.get("standard_indexing", [])

        if first_arg in standard_indexed:
            raise ValueError("The first argument to a stencil kernel must "
                             "use relative indexing, not standard indexing.")

        if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0:
            raise ValueError("Standard indexing requested for an array name "
                             "not present in the stencil kernel definition.")

        # Add index variables to getitems in the IR to transition the accesses
        # in the kernel from relative to regular Python indexing.  Returns the
        # computed size of the stencil kernel and a list of the relatively indexed
        # arrays.
        kernel_size, relatively_indexed = self.add_indices_to_kernel(
            kernel_copy, index_vars, the_array.ndim, self.neighborhood,
            standard_indexed)
        if self.neighborhood is None:
            self.neighborhood = kernel_size

        if config.DEBUG_ARRAY_OPT == 1:
            print("After add_indices_to_kernel")
            ir_utils.dump_blocks(kernel_copy.blocks)

        # The return in the stencil kernel becomes a setitem for that
        # particular point in the iteration space.
        ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks,
                                                      index_vars, out_name)

        if config.DEBUG_ARRAY_OPT == 1:
            print("After replace_return_with_setitem", ret_blocks)
            ir_utils.dump_blocks(kernel_copy.blocks)

        # Start to form the new function to execute the stencil kernel.
        func_text = "def {}({}{}):\n".format(stencil_func_name,
                                             ",".join(kernel_copy.arg_names),
                                             sig_extra)

        # Get loop ranges for each dimension, which could be either int
        # or variable. In the latter case we'll use the extra neighborhood
        # argument to the function.
        ranges = []
        for i in range(the_array.ndim):
            if isinstance(kernel_size[i][0], int):
                lo = kernel_size[i][0]
                hi = kernel_size[i][1]
            else:
                lo = "{}[{}][0]".format(neighborhood_name, i)
                hi = "{}[{}][1]".format(neighborhood_name, i)
            ranges.append((lo, hi))

        # If there are more than one relatively indexed arrays, add a call to
        # a function that will raise an error if any of the relatively indexed
        # arrays are of different size than the first input array.
        if len(relatively_indexed) > 1:
            func_text += "    raise_if_incompatible_array_sizes(" + first_arg
            for other_array in relatively_indexed:
                if other_array != first_arg:
                    func_text += "," + other_array
            func_text += ")\n"

        # Get the shape of the first input array.
        shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table)
        func_text += "    {} = {}.shape\n".format(shape_name, first_arg)

        # If we have to allocate the output array (the out argument was not used)
        # then us numpy.full if the user specified a cval stencil decorator option
        # or np.zeros if they didn't to allocate the array.
        if result is None:
            return_type_name = numpy_support.as_dtype(
                return_type.dtype).type.__name__
            if "cval" in self.options:
                cval = self.options["cval"]
                if return_type.dtype != typing.typeof.typeof(cval):
                    raise ValueError(
                        "cval type does not match stencil return type.")
                out_init = "{} = np.full({}, {}, dtype=np.{})\n".format(
                    out_name, shape_name, cval, return_type_name)
            else:
                out_init = "{} = np.zeros({}, dtype=np.{})\n".format(
                    out_name, shape_name, return_type_name)
            func_text += "    " + out_init

        offset = 1
        # Add the loop nests to the new function.
        for i in range(the_array.ndim):
            for j in range(offset):
                func_text += "    "
            # ranges[i][0] is the minimum index used in the i'th dimension
            # but minimum's greater than 0 don't preclude any entry in the array.
            # So, take the minimum of 0 and the minimum index found in the kernel
            # and this will be a negative number (potentially -0).  Then, we do
            # unary - on that to get the positive offset in this dimension whose
            # use is precluded.
            # ranges[i][1] is the maximum of 0 and the observed maximum index
            # in this dimension because negative maximums would not cause us to
            # preclude any entry in the array from being used.
            func_text += ("for {} in range(-min(0,{}),"
                          "{}[{}]-max(0,{})):\n").format(
                              index_vars[i], ranges[i][0], shape_name, i,
                              ranges[i][1])
            offset += 1

        for j in range(offset):
            func_text += "    "
        # Put a sentinel in the code so we can locate it in the IR.  We will
        # remove this sentinel assignment and replace it with the IR for the
        # stencil kernel body.
        func_text += "{} = 0\n".format(sentinel_name)
        func_text += "    return {}\n".format(out_name)

        if config.DEBUG_ARRAY_OPT == 1:
            print("new stencil func text")
            print(func_text)

        # Force the new stencil function into existence.
        exec_(func_text) in globals(), locals()
        stencil_func = eval(stencil_func_name)
        if sigret is not None:
            pysig = utils.pysignature(stencil_func)
            sigret.pysig = pysig
        # Get the IR for the newly created stencil function.
        stencil_ir = compiler.run_frontend(stencil_func)
        ir_utils.remove_dels(stencil_ir.blocks)

        # rename all variables in stencil_ir afresh
        var_table = ir_utils.get_name_var_table(stencil_ir.blocks)
        new_var_dict = {}
        reserved_names = (
            [sentinel_name, out_name, neighborhood_name, shape_name] +
            kernel_copy.arg_names + index_vars)
        for name, var in var_table.items():
            if not name in reserved_names:
                new_var_dict[name] = ir_utils.mk_unique_var(name)
        ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict)

        stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1

        # Shift lables in the kernel copy so they are guaranteed unique
        # and don't conflict with any labels in the stencil_ir.
        kernel_copy.blocks = ir_utils.add_offset_to_labels(
            kernel_copy.blocks, stencil_stub_last_label)
        new_label = max(kernel_copy.blocks.keys()) + 1
        # Adjust ret_blocks to account for addition of the offset.
        ret_blocks = [x + stencil_stub_last_label for x in ret_blocks]

        if config.DEBUG_ARRAY_OPT == 1:
            print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label)
            print("before replace sentinel stencil_ir")
            ir_utils.dump_blocks(stencil_ir.blocks)
            print("before replace sentinel kernel_copy")
            ir_utils.dump_blocks(kernel_copy.blocks)

        # Search all the block in the stencil outline for the sentinel.
        for label, block in stencil_ir.blocks.items():
            for i, inst in enumerate(block.body):
                if (isinstance(inst, ir.Assign)
                        and inst.target.name == sentinel_name):
                    # We found the sentinel assignment.
                    loc = inst.loc
                    scope = block.scope
                    # split block across __sentinel__
                    # A new block is allocated for the statements prior to the
                    # sentinel but the new block maintains the current block
                    # label.
                    prev_block = ir.Block(scope, loc)
                    prev_block.body = block.body[:i]
                    # The current block is used for statements after sentinel.
                    block.body = block.body[i + 1:]
                    # But the current block gets a new label.
                    body_first_label = min(kernel_copy.blocks.keys())

                    # The previous block jumps to the minimum labelled block of
                    # the parfor body.
                    prev_block.append(ir.Jump(body_first_label, loc))
                    # Add all the parfor loop body blocks to the gufunc
                    # function's IR.
                    for (l, b) in kernel_copy.blocks.items():
                        stencil_ir.blocks[l] = b

                    stencil_ir.blocks[new_label] = block
                    stencil_ir.blocks[label] = prev_block
                    # Add a jump from all the blocks that previously contained
                    # a return in the stencil kernel to the block
                    # containing statements after the sentinel.
                    for ret_block in ret_blocks:
                        stencil_ir.blocks[ret_block].append(
                            ir.Jump(new_label, loc))
                    break
            else:
                continue
            break

        stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks)
        ir_utils.remove_dels(stencil_ir.blocks)

        assert (isinstance(the_array, types.Type))
        array_types = args

        new_stencil_param_types = list(array_types)

        if config.DEBUG_ARRAY_OPT == 1:
            print("new_stencil_param_types", new_stencil_param_types)
            ir_utils.dump_blocks(stencil_ir.blocks)

        # Compile the combined stencil function with the replaced loop
        # body in it.
        new_func = compiler.compile_ir(self._typingctx, self._targetctx,
                                       stencil_ir, new_stencil_param_types,
                                       None, compiler.DEFAULT_FLAGS, {})
        return new_func
예제 #38
0
def lstsq_impl(a, b, rcond=-1.0):
    ensure_lapack()

    _check_linalg_matrix(a, "lstsq")

    # B can be 1D or 2D.
    _check_linalg_1_or_2d_matrix(b, "lstsq")

    a_F_layout = a.layout == 'F'
    b_F_layout = b.layout == 'F'

    # the typing context is not easily accessible in `@overload` mode
    # so type unification etc. is done manually below
    a_np_dt = np_support.as_dtype(a.dtype)
    b_np_dt = np_support.as_dtype(b.dtype)

    np_shared_dt = np.promote_types(a_np_dt, b_np_dt)
    nb_shared_dt = np_support.from_dtype(np_shared_dt)

    # convert typing floats to np floats for use in the impl
    r_type = getattr(nb_shared_dt, "underlying_float", nb_shared_dt)
    if r_type.bitwidth == 32:
        real_dtype = np.float32
    else:
        real_dtype = np.float64

    # the lapack wrapper signature
    numba_ez_gelsd_sig = types.intc(
        types.char,  # kind
        types.intp,  # m
        types.intp,  # n
        types.intp,  # nrhs
        types.CPointer(nb_shared_dt),  # a
        types.intp,  # lda
        types.CPointer(nb_shared_dt),  # b
        types.intp,  # ldb
        types.CPointer(r_type),  # S
        types.float64,  # rcond
        types.CPointer(types.intc)  # rank
    )

    # the lapack wrapper function
    numba_ez_gelsd = types.ExternalFunction("numba_ez_gelsd",
                                            numba_ez_gelsd_sig)

    kind = ord(get_blas_kind(nb_shared_dt, "lstsq"))

    # The following functions select specialisations based on
    # information around 'b', a lot of this effort is required
    # as 'b' can be either 1D or 2D, and then there are
    # some optimisations available depending on real or complex
    # space.

    # get a specialisation for computing the number of RHS
    b_nrhs = _get_compute_nrhs(b)

    # get a specialised residual computation based on the dtype
    compute_res = _get_res_impl(nb_shared_dt, real_dtype, b)

    # b copy function
    b_copy_in = _get_copy_in_b_impl(b)

    # return blob function
    b_ret = _get_compute_return_impl(b)

    # check system is dimensionally valid function
    check_dimensionally_valid = _get_check_lstsq_dimensionally_valid_impl(a, b)

    def lstsq_impl(a, b, rcond=-1.0):
        n = a.shape[-1]
        m = a.shape[-2]
        nrhs = b_nrhs(b)

        # check the systems have no inf or NaN
        _check_finite_matrix(a)
        _check_finite_matrix(b)

        # check the systems is dimensionally valid
        check_dimensionally_valid(a, b)

        minmn = min(m, n)
        maxmn = max(m, n)

        # a is destroyed on exit, copy it
        acpy = a.astype(np_shared_dt)
        if a_F_layout:
            acpy = np.copy(acpy)
        else:
            acpy = np.asfortranarray(acpy)

        # b is overwritten on exit with the solution, copy allocate
        bcpy = np.empty((nrhs, maxmn), dtype=np_shared_dt).T
        # specialised copy in due to b being 1 or 2D
        b_copy_in(bcpy, b, nrhs)

        # Allocate returns
        s = np.empty(minmn, dtype=real_dtype)
        rank_ptr = np.empty(1, dtype=np.int32)

        r = numba_ez_gelsd(
            kind,  # kind
            m,  # m
            n,  # n
            nrhs,  # nrhs
            acpy.ctypes,  # a
            m,  # lda
            bcpy.ctypes,  # a
            maxmn,  # ldb
            s.ctypes,  # s
            rcond,  # rcond
            rank_ptr.ctypes  # rank
        )

        if r < 0:
            fatal_error_func()
            assert 0  # unreachable

        # set rank to that which was computed
        rank = rank_ptr[0]

        # compute residuals
        if rank < n or m <= n:
            res = np.empty((0), dtype=real_dtype)
        else:
            # this requires additional dispatch as there's a faster
            # impl if the result is in the real domain (no abs() required)
            res = compute_res(bcpy, n, nrhs)

        # extract 'x', the solution
        x = b_ret(bcpy, n)

        # help liveness analysis
        acpy.size
        bcpy.size
        s.size
        rank_ptr.size

        return (x, res, rank, s[:minmn])

    return lstsq_impl
예제 #39
0
파일: utils.py 프로젝트: TanB1xuan/sdc
def min_dtype_int_val(dtype):
    numpy_dtype = numpy_support.as_dtype(dtype)
    return np.iinfo(numpy_dtype).min
예제 #40
0
파일: linalg.py 프로젝트: MPOWER4RU/numba
def lstsq_impl(a, b, rcond=-1.0):
    ensure_lapack()

    _check_linalg_matrix(a, "lstsq")

    # B can be 1D or 2D.
    _check_linalg_1_or_2d_matrix(b, "lstsq")

    a_F_layout = a.layout == 'F'
    b_F_layout = b.layout == 'F'

    # the typing context is not easily accessible in `@overload` mode
    # so type unification etc. is done manually below
    a_np_dt = np_support.as_dtype(a.dtype)
    b_np_dt = np_support.as_dtype(b.dtype)

    np_shared_dt = np.promote_types(a_np_dt, b_np_dt)
    nb_shared_dt = np_support.from_dtype(np_shared_dt)

    # convert typing floats to np floats for use in the impl
    r_type = getattr(nb_shared_dt, "underlying_float", nb_shared_dt)
    if r_type.bitwidth == 32:
        real_dtype = np.float32
    else:
        real_dtype = np.float64

    # the lapack wrapper signature
    numba_ez_gelsd_sig = types.intc(
        types.char,  # kind
        types.intp,  # m
        types.intp,  # n
        types.intp,  # nrhs
        types.CPointer(nb_shared_dt),  # a
        types.intp,  # lda
        types.CPointer(nb_shared_dt),  # b
        types.intp,  # ldb
        types.CPointer(r_type),  # S
        types.float64,  # rcond
        types.CPointer(types.intc)  # rank
    )

    # the lapack wrapper function
    numba_ez_gelsd = types.ExternalFunction("numba_ez_gelsd",
                                            numba_ez_gelsd_sig)

    kind = ord(get_blas_kind(nb_shared_dt, "lstsq"))

    # The following functions select specialisations based on
    # information around 'b', a lot of this effort is required
    # as 'b' can be either 1D or 2D, and then there are
    # some optimisations available depending on real or complex
    # space.

    # get a specialisation for computing the number of RHS
    b_nrhs = _get_compute_nrhs(b)

    # get a specialised residual computation based on the dtype
    compute_res = _get_res_impl(nb_shared_dt, real_dtype, b)

    # b copy function
    b_copy_in = _get_copy_in_b_impl(b)

    # return blob function
    b_ret = _get_compute_return_impl(b)

    # check system is dimensionally valid function
    check_dimensionally_valid = _get_check_lstsq_dimensionally_valid_impl(a, b)

    def lstsq_impl(a, b, rcond=-1.0):
        n = a.shape[-1]
        m = a.shape[-2]
        nrhs = b_nrhs(b)

        # check the systems have no inf or NaN
        _check_finite_matrix(a)
        _check_finite_matrix(b)

        # check the systems is dimensionally valid
        check_dimensionally_valid(a, b)

        minmn = min(m, n)
        maxmn = max(m, n)

        # a is destroyed on exit, copy it
        acpy = a.astype(np_shared_dt)
        if a_F_layout:
            acpy = np.copy(acpy)
        else:
            acpy = np.asfortranarray(acpy)

        # b is overwritten on exit with the solution, copy allocate
        bcpy = np.empty((nrhs, maxmn), dtype=np_shared_dt).T
        # specialised copy in due to b being 1 or 2D
        b_copy_in(bcpy, b, nrhs)

        # Allocate returns
        s = np.empty(minmn, dtype=real_dtype)
        rank_ptr = np.empty(1, dtype=np.int32)

        r = numba_ez_gelsd(
            kind,  # kind
            m,  # m
            n,  # n
            nrhs,  # nrhs
            acpy.ctypes,  # a
            m,  # lda
            bcpy.ctypes,  # a
            maxmn,  # ldb
            s.ctypes,  # s
            rcond,  # rcond
            rank_ptr.ctypes  # rank
        )

        if r < 0:
            fatal_error_func()
            assert 0   # unreachable

        # set rank to that which was computed
        rank = rank_ptr[0]

        # compute residuals
        if rank < n or m <= n:
            res = np.empty((0), dtype=real_dtype)
        else:
            # this requires additional dispatch as there's a faster
            # impl if the result is in the real domain (no abs() required)
            res = compute_res(bcpy, n, nrhs)

        # extract 'x', the solution
        x = b_ret(bcpy, n)

        # help liveness analysis
        acpy.size
        bcpy.size
        s.size
        rank_ptr.size

        return (x, res, rank, s[:minmn])

    return lstsq_impl
예제 #41
0
 def check(typestring, numba_type):
     # Only native ordering and alignment is supported
     dtype = np.dtype(typestring)
     self.assertEqual(numpy_support.from_dtype(dtype), numba_type)
     self.assertEqual(dtype, numpy_support.as_dtype(numba_type))
예제 #42
0
 def check(typestring, numba_type):
     # Only native ordering and alignment is supported
     dtype = np.dtype(typestring)
     self.assertEqual(numpy_support.from_dtype(dtype), numba_type)
     self.assertEqual(dtype, numpy_support.as_dtype(numba_type))
예제 #43
0
 def check(typechar, numba_type):
     # Only native ordering and alignment is supported
     dtype = np.dtype(typechar)
     self.assertIs(f(dtype), numba_type)
     self.assertIs(f(np.dtype('=' + typechar)), numba_type)
     self.assertEqual(dtype, numpy_support.as_dtype(numba_type))
예제 #44
0
 def check(base_inst, enum_def, type_class):
     np_dt = np.dtype(base_inst)
     nb_ty = numpy_support.from_dtype(np_dt)
     inst = type_class(enum_def, nb_ty)
     recovered = numpy_support.as_dtype(inst)
     self.assertEqual(np_dt, recovered)
예제 #45
0
파일: kernelapi.py 프로젝트: yuguen/numba
 def array(self, shape, dtype):
     dtype = numpy_support.as_dtype(dtype)
     return np.empty(shape, dtype)
예제 #46
0
 def array(self, shape, dtype):
     dtype = numpy_support.as_dtype(dtype)
     return np.empty(shape, dtype)
예제 #47
0
 def check(dtype, numba_type, code):
     tp = numpy_support.from_dtype(dtype)
     self.assertEqual(tp, numba_type)
     self.assertEqual(tp.unit_code, code)
     self.assertEqual(numpy_support.as_dtype(numba_type), dtype)
     self.assertEqual(numpy_support.as_dtype(tp), dtype)
예제 #48
0
파일: stencil.py 프로젝트: numba/numba
    def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args):
        # Overall approach:
        # 1) Construct a string containing a function definition for the stencil function
        #    that will execute the stencil kernel.  This function definition includes a
        #    unique stencil function name, the parameters to the stencil kernel, loop
        #    nests across the dimenions of the input array.  Those loop nests use the
        #    computed stencil kernel size so as not to try to compute elements where
        #    elements outside the bounds of the input array would be needed.
        # 2) The but of the loop nest in this new function is a special sentinel
        #    assignment.
        # 3) Get the IR of this new function.
        # 4) Split the block containing the sentinel assignment and remove the sentinel
        #    assignment.  Insert the stencil kernel IR into the stencil function IR
        #    after label and variable renaming of the stencil kernel IR to prevent
        #    conflicts with the stencil function IR.
        # 5) Compile the combined stencil function IR + stencil kernel IR into existence.

        # Copy the kernel so that our changes for this callsite
        # won't effect other callsites.
        (kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes(
                                            self.kernel_ir, calltypes)
        # The stencil kernel body becomes the body of a loop, for which args aren't needed.
        ir_utils.remove_args(kernel_copy.blocks)
        first_arg = kernel_copy.arg_names[0]

        in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap)
        name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks)
        ir_utils.apply_copy_propagate(
            kernel_copy.blocks,
            in_cps,
            name_var_table,
            typemap,
            copy_calltypes)

        if "out" in name_var_table:
            raise ValueError("Cannot use the reserved word 'out' in stencil kernels.")

        sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table)
        if config.DEBUG_ARRAY_OPT == 1:
            print("name_var_table", name_var_table, sentinel_name)

        the_array = args[0]

        if config.DEBUG_ARRAY_OPT == 1:
            print("_stencil_wrapper", return_type, return_type.dtype,
                                      type(return_type.dtype), args)
            ir_utils.dump_blocks(kernel_copy.blocks)

        # We generate a Numba function to execute this stencil and here
        # create the unique name of this function.
        stencil_func_name = "__numba_stencil_%s_%s" % (
                                        hex(id(the_array)).replace("-", "_"),
                                        self.id)

        # We will put a loop nest in the generated function for each
        # dimension in the input array.  Here we create the name for
        # the index variable for each dimension.  index0, index1, ...
        index_vars = []
        for i in range(the_array.ndim):
            index_var_name = ir_utils.get_unused_var_name("index" + str(i),
                                                          name_var_table)
            index_vars += [index_var_name]

        # Create extra signature for out and neighborhood.
        out_name = ir_utils.get_unused_var_name("out", name_var_table)
        neighborhood_name = ir_utils.get_unused_var_name("neighborhood",
                                                         name_var_table)
        sig_extra = ""
        if result is not None:
            sig_extra += ", {}=None".format(out_name)
        if "neighborhood" in dict(self.kws):
            sig_extra += ", {}=None".format(neighborhood_name)

        # Get a list of the standard indexed array names.
        standard_indexed = self.options.get("standard_indexing", [])

        if first_arg in standard_indexed:
            raise ValueError("The first argument to a stencil kernel must "
                             "use relative indexing, not standard indexing.")

        if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0:
            raise ValueError("Standard indexing requested for an array name "
                             "not present in the stencil kernel definition.")

        # Add index variables to getitems in the IR to transition the accesses
        # in the kernel from relative to regular Python indexing.  Returns the
        # computed size of the stencil kernel and a list of the relatively indexed
        # arrays.
        kernel_size, relatively_indexed = self.add_indices_to_kernel(
                kernel_copy, index_vars, the_array.ndim,
                self.neighborhood, standard_indexed, typemap, copy_calltypes)
        if self.neighborhood is None:
            self.neighborhood = kernel_size

        if config.DEBUG_ARRAY_OPT == 1:
            print("After add_indices_to_kernel")
            ir_utils.dump_blocks(kernel_copy.blocks)

        # The return in the stencil kernel becomes a setitem for that
        # particular point in the iteration space.
        ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks,
                                                      index_vars, out_name)

        if config.DEBUG_ARRAY_OPT == 1:
            print("After replace_return_with_setitem", ret_blocks)
            ir_utils.dump_blocks(kernel_copy.blocks)

        # Start to form the new function to execute the stencil kernel.
        func_text = "def {}({}{}):\n".format(stencil_func_name,
                        ",".join(kernel_copy.arg_names), sig_extra)

        # Get loop ranges for each dimension, which could be either int
        # or variable. In the latter case we'll use the extra neighborhood
        # argument to the function.
        ranges = []
        for i in range(the_array.ndim):
            if isinstance(kernel_size[i][0], int):
                lo = kernel_size[i][0]
                hi = kernel_size[i][1]
            else:
                lo = "{}[{}][0]".format(neighborhood_name, i)
                hi = "{}[{}][1]".format(neighborhood_name, i)
            ranges.append((lo, hi))

        # If there are more than one relatively indexed arrays, add a call to
        # a function that will raise an error if any of the relatively indexed
        # arrays are of different size than the first input array.
        if len(relatively_indexed) > 1:
            func_text += "    raise_if_incompatible_array_sizes(" + first_arg
            for other_array in relatively_indexed:
                if other_array != first_arg:
                    func_text += "," + other_array
            func_text += ")\n"

        # Get the shape of the first input array.
        shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table)
        func_text += "    {} = {}.shape\n".format(shape_name, first_arg)


        # If we have to allocate the output array (the out argument was not used)
        # then us numpy.full if the user specified a cval stencil decorator option
        # or np.zeros if they didn't to allocate the array.
        if result is None:
            return_type_name = numpy_support.as_dtype(
                               return_type.dtype).type.__name__
            if "cval" in self.options:
                cval = self.options["cval"]
                if return_type.dtype != typing.typeof.typeof(cval):
                    raise ValueError(
                        "cval type does not match stencil return type.")
                out_init ="{} = np.full({}, {}, dtype=np.{})\n".format(
                            out_name, shape_name, cval, return_type_name)
            else:
                out_init ="{} = np.zeros({}, dtype=np.{})\n".format(
                            out_name, shape_name, return_type_name)
            func_text += "    " + out_init
        else: # result is present, if cval is set then use it
            if "cval" in self.options:
                cval = self.options["cval"]
                cval_ty = typing.typeof.typeof(cval)
                if not self._typingctx.can_convert(cval_ty, return_type.dtype):
                    msg = "cval type does not match stencil return type."
                    raise ValueError(msg)
                out_init = "{}[:] = {}\n".format(out_name, cval)
                func_text += "    " + out_init

        offset = 1
        # Add the loop nests to the new function.
        for i in range(the_array.ndim):
            for j in range(offset):
                func_text += "    "
            # ranges[i][0] is the minimum index used in the i'th dimension
            # but minimum's greater than 0 don't preclude any entry in the array.
            # So, take the minimum of 0 and the minimum index found in the kernel
            # and this will be a negative number (potentially -0).  Then, we do
            # unary - on that to get the positive offset in this dimension whose
            # use is precluded.
            # ranges[i][1] is the maximum of 0 and the observed maximum index
            # in this dimension because negative maximums would not cause us to
            # preclude any entry in the array from being used.
            func_text += ("for {} in range(-min(0,{}),"
                          "{}[{}]-max(0,{})):\n").format(
                            index_vars[i],
                            ranges[i][0],
                            shape_name,
                            i,
                            ranges[i][1])
            offset += 1

        for j in range(offset):
            func_text += "    "
        # Put a sentinel in the code so we can locate it in the IR.  We will
        # remove this sentinel assignment and replace it with the IR for the
        # stencil kernel body.
        func_text += "{} = 0\n".format(sentinel_name)
        func_text += "    return {}\n".format(out_name)

        if config.DEBUG_ARRAY_OPT == 1:
            print("new stencil func text")
            print(func_text)

        # Force the new stencil function into existence.
        exec_(func_text) in globals(), locals()
        stencil_func = eval(stencil_func_name)
        if sigret is not None:
            pysig = utils.pysignature(stencil_func)
            sigret.pysig = pysig
        # Get the IR for the newly created stencil function.
        stencil_ir = compiler.run_frontend(stencil_func)
        ir_utils.remove_dels(stencil_ir.blocks)

        # rename all variables in stencil_ir afresh
        var_table = ir_utils.get_name_var_table(stencil_ir.blocks)
        new_var_dict = {}
        reserved_names = ([sentinel_name, out_name, neighborhood_name,
                           shape_name] + kernel_copy.arg_names + index_vars)
        for name, var in var_table.items():
            if not name in reserved_names:
                new_var_dict[name] = ir_utils.mk_unique_var(name)
        ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict)

        stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1

        # Shift lables in the kernel copy so they are guaranteed unique
        # and don't conflict with any labels in the stencil_ir.
        kernel_copy.blocks = ir_utils.add_offset_to_labels(
                                kernel_copy.blocks, stencil_stub_last_label)
        new_label = max(kernel_copy.blocks.keys()) + 1
        # Adjust ret_blocks to account for addition of the offset.
        ret_blocks = [x + stencil_stub_last_label for x in ret_blocks]

        if config.DEBUG_ARRAY_OPT == 1:
            print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label)
            print("before replace sentinel stencil_ir")
            ir_utils.dump_blocks(stencil_ir.blocks)
            print("before replace sentinel kernel_copy")
            ir_utils.dump_blocks(kernel_copy.blocks)

        # Search all the block in the stencil outline for the sentinel.
        for label, block in stencil_ir.blocks.items():
            for i, inst in enumerate(block.body):
                if (isinstance( inst, ir.Assign) and
                    inst.target.name == sentinel_name):
                    # We found the sentinel assignment.
                    loc = inst.loc
                    scope = block.scope
                    # split block across __sentinel__
                    # A new block is allocated for the statements prior to the
                    # sentinel but the new block maintains the current block
                    # label.
                    prev_block = ir.Block(scope, loc)
                    prev_block.body = block.body[:i]
                    # The current block is used for statements after sentinel.
                    block.body = block.body[i + 1:]
                    # But the current block gets a new label.
                    body_first_label = min(kernel_copy.blocks.keys())

                    # The previous block jumps to the minimum labelled block of
                    # the parfor body.
                    prev_block.append(ir.Jump(body_first_label, loc))
                    # Add all the parfor loop body blocks to the gufunc
                    # function's IR.
                    for (l, b) in kernel_copy.blocks.items():
                        stencil_ir.blocks[l] = b

                    stencil_ir.blocks[new_label] = block
                    stencil_ir.blocks[label] = prev_block
                    # Add a jump from all the blocks that previously contained
                    # a return in the stencil kernel to the block
                    # containing statements after the sentinel.
                    for ret_block in ret_blocks:
                        stencil_ir.blocks[ret_block].append(
                            ir.Jump(new_label, loc))
                    break
            else:
                continue
            break

        stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks)
        ir_utils.remove_dels(stencil_ir.blocks)

        assert(isinstance(the_array, types.Type))
        array_types = args

        new_stencil_param_types = list(array_types)

        if config.DEBUG_ARRAY_OPT == 1:
            print("new_stencil_param_types", new_stencil_param_types)
            ir_utils.dump_blocks(stencil_ir.blocks)

        # Compile the combined stencil function with the replaced loop
        # body in it.
        new_func = compiler.compile_ir(
            self._typingctx,
            self._targetctx,
            stencil_ir,
            new_stencil_param_types,
            None,
            compiler.DEFAULT_FLAGS,
            {})
        return new_func
예제 #49
0
 def check(dtype, numba_type, code):
     tp = numpy_support.from_dtype(dtype)
     self.assertEqual(tp, numba_type)
     self.assertEqual(tp.unit_code, code)
     self.assertEqual(numpy_support.as_dtype(numba_type), dtype)
     self.assertEqual(numpy_support.as_dtype(tp), dtype)
예제 #50
0
 def check(typechar, numba_type):
     # Only native ordering and alignment is supported
     dtype = np.dtype(typechar)
     self.assertIs(f(dtype), numba_type)
     self.assertIs(f(np.dtype('=' + typechar)), numba_type)
     self.assertEqual(dtype, numpy_support.as_dtype(numba_type))
예제 #51
0
 def check(base_inst, enum_def, type_class):
     np_dt = np.dtype(base_inst)
     nb_ty = numpy_support.from_dtype(np_dt)
     inst = type_class(enum_def, nb_ty)
     recovered = numpy_support.as_dtype(inst)
     self.assertEqual(np_dt, recovered)