Ejemplo n.º 1
0
def convert_types(restype, argtypes):
    # eval type string
    if sigutils.is_signature(restype):
        assert argtypes is None
        argtypes, restype = sigutils.normalize_signature(restype)

    return restype, argtypes
Ejemplo n.º 2
0
def convert_types(restype, argtypes):
    # eval type string
    if sigutils.is_signature(restype):
        assert argtypes is None
        argtypes, restype = sigutils.normalize_signature(restype)

    return restype, argtypes
Ejemplo n.º 3
0
def jit(signature=None, device=False):
    """JIT compile a python function conforming to
    the HSA-Python
    """
    if signature is None:
        return autojit(device=device)
    elif not sigutils.is_signature(signature):
        func = signature
        return autojit(device=device)(func)
    else:
        if device:
            return _device_jit(signature)
        else:
            return _kernel_jit(signature)
Ejemplo n.º 4
0
def jit(func_or_sig=None,
        argtypes=None,
        device=False,
        inline=False,
        bind=True,
        link=[],
        debug=None,
        **kws):
    """
    JIT compile a python function conforming to the CUDA Python specification.
    If a signature is supplied, then a function is returned that takes a
    function to compile. If

    :param func_or_sig: A function to JIT compile, or a signature of a function
       to compile. If a function is supplied, then an :class:`AutoJitCUDAKernel`
       is returned. If a signature is supplied, then a function which takes a
       function to compile and returns an :class:`AutoJitCUDAKernel` is
       returned.

       .. note:: A kernel cannot have any return value.
    :type func_or_sig: function or numba.typing.Signature
    :param device: Indicates whether this is a device function.
    :type device: bool
    :param bind: Force binding to CUDA context immediately
    :type bind: bool
    :param link: A list of files containing PTX source to link with the function
    :type link: list
    :param debug: If True, check for exceptions thrown when executing the
       kernel. Since this degrades performance, this should only be used for
       debugging purposes.  Defaults to False.  (The default value can be
       overriden by setting environment variable ``NUMBA_CUDA_DEBUGINFO=1``.)
    :param fastmath: If true, enables flush-to-zero and fused-multiply-add,
       disables precise division and square root. This parameter has no effect
       on device function, whose fastmath setting depends on the kernel function
       from which they are called.
    """
    debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug

    if link and config.ENABLE_CUDASIM:
        raise NotImplementedError('Cannot link PTX in the simulator')

    fastmath = kws.get('fastmath', False)
    if argtypes is None and not sigutils.is_signature(func_or_sig):
        if func_or_sig is None:
            if config.ENABLE_CUDASIM:

                def autojitwrapper(func):
                    return FakeCUDAKernel(func,
                                          device=device,
                                          fastmath=fastmath,
                                          debug=debug)
            else:

                def autojitwrapper(func):
                    return jit(func,
                               device=device,
                               bind=bind,
                               debug=debug,
                               **kws)

            return autojitwrapper
        # func_or_sig is a function
        else:
            if config.ENABLE_CUDASIM:
                return FakeCUDAKernel(func_or_sig,
                                      device=device,
                                      fastmath=fastmath,
                                      debug=debug)
            elif device:
                return jitdevice(func_or_sig, debug=debug, **kws)
            else:
                targetoptions = kws.copy()
                targetoptions['debug'] = debug
                return AutoJitCUDAKernel(func_or_sig,
                                         bind=bind,
                                         targetoptions=targetoptions)

    else:
        if config.ENABLE_CUDASIM:

            def jitwrapper(func):
                return FakeCUDAKernel(func,
                                      device=device,
                                      fastmath=fastmath,
                                      debug=debug)

            return jitwrapper

        restype, argtypes = convert_types(func_or_sig, argtypes)

        if restype and not device and restype != types.void:
            raise TypeError("CUDA kernel must have void return type.")

        def kernel_jit(func):
            kernel = compile_kernel(func,
                                    argtypes,
                                    link=link,
                                    debug=debug,
                                    inline=inline,
                                    fastmath=fastmath)

            # Force compilation for the current context
            if bind:
                kernel.bind()

            return kernel

        def device_jit(func):
            return compile_device(func,
                                  restype,
                                  argtypes,
                                  inline=inline,
                                  debug=debug)

        if device:
            return device_jit
        else:
            return kernel_jit
Ejemplo n.º 5
0
def jit(signature_or_function=None, argtypes=None, restype=None, locals={},
        target='cpu', **targetoptions):
    """jit([signature_or_function, [locals={}, [target='cpu',
            [**targetoptions]]]])

    The function can be used as the following versions:

    1) jit(signature, [target='cpu', [**targetoptions]]) -> jit(function)

        Equivalent to:

            d = dispatcher(function, targetoptions)
            d.compile(signature)

        Create a dispatcher object for a python function and default
        target-options.  Then, compile the funciton with the given signature.

        Example:

            @jit("void(int32, float32)")
            def foo(x, y):
                return x + y

    2) jit(function) -> dispatcher

        Same as old autojit.  Create a dispatcher function object that
        specialize at call site.

        Example:

            @jit
            def foo(x, y):
                return x + y

    3) jit([target='cpu', [**targetoptions]]) -> configured_jit(function)

        Same as old autojit and 2).  But configure with target and default
        target-options.


        Example:

            @jit(target='cpu', nopython=True)
            def foo(x, y):
                return x + y

    Target Options
    ---------------
    The CPU (default target) defines the following:

        - nopython: [bool]

            Set to True to disable the use of PyObjects and Python API
            calls.  The default behavior is to allow the use of PyObjects and
            Python API.  Default value is False.

        - forceobj: [bool]

            Set to True to force the use of PyObjects for every value.  Default
            value is False.

    """

    # Handle deprecated argtypes and restype keyword arguments
    if argtypes is not None:

        assert signature_or_function is None, "argtypes used but " \
                                              "signature is provided"
        warnings.warn("Keyword argument 'argtypes' is deprecated",
                      DeprecationWarning)
        if restype is None:
            signature_or_function = tuple(argtypes)
        else:
            signature_or_function = restype(*argtypes)

    # Handle signature
    if signature_or_function is None:
        # Used as autojit
        def configured_jit(arg):
            return jit(arg, locals=locals, target=target, **targetoptions)
        return configured_jit
    elif sigutils.is_signature(signature_or_function):
        # Function signature is provided
        sig = signature_or_function
        return _jit(sig, locals=locals, target=target,
                    targetoptions=targetoptions)
    else:
        # No signature is provided
        pyfunc = signature_or_function
        dispatcher = registry.target_registry[target]
        dispatcher = dispatcher(py_func=pyfunc, locals=locals,
                                targetoptions=targetoptions)
        # NOTE This affects import time for large function
        # # Compile a pure object mode
        # if target == 'cpu' and not targetoptions.get('nopython', False):
        #     dispatcher.compile((), locals=locals, forceobj=True)
        return dispatcher
Ejemplo n.º 6
0
def jit(func_or_sig=None, argtypes=None, device=False, inline=False, bind=True,
        link=[], debug=False, **kws):
    """
    JIT compile a python function conforming to the CUDA Python specification.
    If a signature is supplied, then a function is returned that takes a
    function to compile. If

    :param func_or_sig: A function to JIT compile, or a signature of a function
       to compile. If a function is supplied, then an :class:`AutoJitCUDAKernel`
       is returned. If a signature is supplied, then a function which takes a
       function to compile and returns an :class:`AutoJitCUDAKernel` is
       returned.

       .. note:: A kernel cannot have any return value.
    :type func_or_sig: function or numba.typing.Signature
    :param device: Indicates whether this is a device function.
    :type device: bool
    :param bind: Force binding to CUDA context immediately
    :type bind: bool
    :param link: A list of files containing PTX source to link with the function
    :type link: list
    :param debug: If True, check for exceptions thrown when executing the
       kernel. Since this degrades performance, this should only be used for
       debugging purposes.
    :param fastmath: If true, enables flush-to-zero and fused-multiply-add,
       disables precise division and square root. This parameter has no effect
       on device function, whose fastmath setting depends on the kernel function
       from which they are called.
    """

    if link and config.ENABLE_CUDASIM:
        raise NotImplementedError('Cannot link PTX in the simulator')

    if argtypes is None and not sigutils.is_signature(func_or_sig):
        if func_or_sig is None:
            if config.ENABLE_CUDASIM:
                def autojitwrapper(func):
                    return FakeCUDAKernel(func, device=device, fastmath=fastmath,
                                          debug=debug)
            else:
                def autojitwrapper(func):
                    return jit(func, device=device, bind=bind, **kws)

            return autojitwrapper
        # func_or_sig is a function
        else:
            if config.ENABLE_CUDASIM:
                return FakeCUDAKernel(func_or_sig, device=device, fastmath=fastmath,
                                       debug=debug)
            elif device:
                return jitdevice(func_or_sig, **kws)
            else:
                targetoptions = kws.copy()
                targetoptions['debug'] = debug
                return AutoJitCUDAKernel(func_or_sig, bind=bind, targetoptions=targetoptions)

    else:
        fastmath = kws.get('fastmath', False)
        if config.ENABLE_CUDASIM:
            def jitwrapper(func):
                return FakeCUDAKernel(func, device=device, fastmath=fastmath,
                                      debug=debug)
            return jitwrapper

        restype, argtypes = convert_types(func_or_sig, argtypes)

        if restype and not device and restype != types.void:
            raise TypeError("CUDA kernel must have void return type.")

        def kernel_jit(func):
            kernel = compile_kernel(func, argtypes, link=link, debug=debug,
                                    inline=inline, fastmath=fastmath)

            # Force compilation for the current context
            if bind:
                kernel.bind()

            return kernel

        def device_jit(func):
            return compile_device(func, restype, argtypes, inline=inline,
                                  debug=debug)

        if device:
            return device_jit
        else:
            return kernel_jit
Ejemplo n.º 7
0
def jit(restype=None, argtypes=None, device=False, inline=False, bind=True,
        link=[], debug=False, **kws):
    """JIT compile a python function conforming to
    the CUDA-Python specification.

    To define a CUDA kernel that takes two int 1D-arrays::

        @cuda.jit('void(int32[:], int32[:])')
        def foo(aryA, aryB):
            ...

    .. note:: A kernel cannot have any return value.

    To launch the cuda kernel::

        griddim = 1, 2
        blockdim = 3, 4
        foo[griddim, blockdim](aryA, aryB)


    ``griddim`` is the number of thread-block per grid.
    It can be:

    * an int;
    * tuple-1 of ints;
    * tuple-2 of ints.

    ``blockdim`` is the number of threads per block.
    It can be:

    * an int;
    * tuple-1 of ints;
    * tuple-2 of ints;
    * tuple-3 of ints.

    The above code is equaivalent to the following CUDA-C.

    .. code-block:: c

        dim3 griddim(1, 2);
        dim3 blockdim(3, 4);
        foo<<<griddim, blockdim>>>(aryA, aryB);


    To access the compiled PTX code::

        print foo.ptx


    To define a CUDA device function that takes two ints and returns a int::

        @cuda.jit('int32(int32, int32)', device=True)
        def bar(a, b):
            ...

    To force inline the device function::

        @cuda.jit('int32(int32, int32)', device=True, inline=True)
        def bar_forced_inline(a, b):
            ...

    A device function can only be used inside another kernel.
    It cannot be called from the host.

    Using ``bar`` in a CUDA kernel::

        @cuda.jit('void(int32[:], int32[:], int32[:])')
        def use_bar(aryA, aryB, aryOut):
            i = cuda.grid(1) # global position of the thread for a 1D grid.
            aryOut[i] = bar(aryA[i], aryB[i])

    When the function signature is not given, this decorator behaves like
    autojit.
    """

    if argtypes is None and not sigutils.is_signature(restype):
        if restype is None:
            return autojit(device=device, bind=bind, link=link, debug=debug,
                           inline=inline, **kws)

        # restype is a function
        else:
            decor = autojit(device=device, bind=bind, link=link, debug=debug,
                            inline=inline, **kws)
            return decor(restype)

    else:
        restype, argtypes = convert_types(restype, argtypes)

        if restype and not device and restype != types.void:
            raise TypeError("CUDA kernel must have void return type.")

        def kernel_jit(func):
            kernel = compile_kernel(func, argtypes, link=link, debug=debug,
                                    inline=inline)

            # Force compilation for the current context
            if bind:
                kernel.bind()

            return kernel

        def device_jit(func):
            return compile_device(func, restype, argtypes, inline=inline,
                                  debug=debug)

        if device:
            return device_jit
        else:
            return kernel_jit
Ejemplo n.º 8
0
def jit(restype=None,
        argtypes=None,
        device=False,
        inline=False,
        bind=True,
        link=[],
        debug=False,
        **kws):
    """JIT compile a python function conforming to
    the CUDA-Python specification.

    To define a CUDA kernel that takes two int 1D-arrays::

        @cuda.jit('void(int32[:], int32[:])')
        def foo(aryA, aryB):
            ...

    .. note:: A kernel cannot have any return value.

    To launch the cuda kernel::

        griddim = 1, 2
        blockdim = 3, 4
        foo[griddim, blockdim](aryA, aryB)


    ``griddim`` is the number of thread-block per grid.
    It can be:

    * an int;
    * tuple-1 of ints;
    * tuple-2 of ints.

    ``blockdim`` is the number of threads per block.
    It can be:

    * an int;
    * tuple-1 of ints;
    * tuple-2 of ints;
    * tuple-3 of ints.

    The above code is equaivalent to the following CUDA-C.

    .. code-block:: c

        dim3 griddim(1, 2);
        dim3 blockdim(3, 4);
        foo<<<griddim, blockdim>>>(aryA, aryB);


    To access the compiled PTX code::

        print foo.ptx


    To define a CUDA device function that takes two ints and returns a int::

        @cuda.jit('int32(int32, int32)', device=True)
        def bar(a, b):
            ...

    To force inline the device function::

        @cuda.jit('int32(int32, int32)', device=True, inline=True)
        def bar_forced_inline(a, b):
            ...

    A device function can only be used inside another kernel.
    It cannot be called from the host.

    Using ``bar`` in a CUDA kernel::

        @cuda.jit('void(int32[:], int32[:], int32[:])')
        def use_bar(aryA, aryB, aryOut):
            i = cuda.grid(1) # global position of the thread for a 1D grid.
            aryOut[i] = bar(aryA[i], aryB[i])

    When the function signature is not given, this decorator behaves like
    autojit.


    The following addition options are available for kernel functions only.
    They are ignored in device function.

    - fastmath: bool
        Enables flush-to-zero for denormal float;
        Enables fused-multiply-add;
        Disables precise division;
        Disables precise square root.
    """

    if link and config.ENABLE_CUDASIM:
        raise NotImplementedError('Cannot link PTX in the simulator')

    if argtypes is None and not sigutils.is_signature(restype):
        if restype is None:
            return autojit(device=device,
                           bind=bind,
                           link=link,
                           debug=debug,
                           inline=inline,
                           **kws)

        # restype is a function
        else:
            decor = autojit(device=device,
                            bind=bind,
                            link=link,
                            debug=debug,
                            inline=inline,
                            **kws)
            return decor(restype)

    else:
        fastmath = kws.get('fastmath', False)
        if config.ENABLE_CUDASIM:

            def jitwrapper(func):
                return FakeCUDAKernel(func,
                                      device=device,
                                      fastmath=fastmath,
                                      debug=debug)

            return jitwrapper

        restype, argtypes = convert_types(restype, argtypes)

        if restype and not device and restype != types.void:
            raise TypeError("CUDA kernel must have void return type.")

        def kernel_jit(func):
            kernel = compile_kernel(func,
                                    argtypes,
                                    link=link,
                                    debug=debug,
                                    inline=inline,
                                    fastmath=fastmath)

            # Force compilation for the current context
            if bind:
                kernel.bind()

            return kernel

        def device_jit(func):
            return compile_device(func,
                                  restype,
                                  argtypes,
                                  inline=inline,
                                  debug=debug)

        if device:
            return device_jit
        else:
            return kernel_jit
Ejemplo n.º 9
0
def jit(signature_or_function=None,
        argtypes=None,
        restype=None,
        locals={},
        target='cpu',
        **targetoptions):
    """jit([signature_or_function, [locals={}, [target='cpu',
            [**targetoptions]]]])

    The function can be used as the following versions:

    1) jit(signature, [target='cpu', [**targetoptions]]) -> jit(function)

        Equivalent to:

            d = dispatcher(function, targetoptions)
            d.compile(signature)

        Create a dispatcher object for a python function and default
        target-options.  Then, compile the funciton with the given signature.

        Example:

            @jit("void(int32, float32)")
            def foo(x, y):
                return x + y

    2) jit(function) -> dispatcher

        Same as old autojit.  Create a dispatcher function object that
        specialize at call site.

        Example:

            @jit
            def foo(x, y):
                return x + y

    3) jit([target='cpu', [**targetoptions]]) -> configured_jit(function)

        Same as old autojit and 2).  But configure with target and default
        target-options.


        Example:

            @jit(target='cpu', nopython=True)
            def foo(x, y):
                return x + y

    Target Options
    ---------------
    The CPU (default target) defines the following:

        - nopython: [bool]

            Set to True to disable the use of PyObjects and Python API
            calls.  The default behavior is to allow the use of PyObjects and
            Python API.  Default value is False.

        - forceobj: [bool]

            Set to True to force the use of PyObjects for every value.  Default
            value is False.

    """

    # Handle deprecated argtypes and restype keyword arguments
    if argtypes is not None:

        assert signature_or_function is None, "argtypes used but " \
                                              "signature is provided"
        warnings.warn("Keyword argument 'argtypes' is deprecated",
                      DeprecationWarning)
        if restype is None:
            signature_or_function = tuple(argtypes)
        else:
            signature_or_function = restype(*argtypes)

    # Handle signature
    if signature_or_function is None:
        # Used as autojit
        def configured_jit(arg):
            return jit(arg, locals=locals, target=target, **targetoptions)

        return configured_jit
    elif sigutils.is_signature(signature_or_function):
        # Function signature is provided
        sig = signature_or_function
        return _jit(sig,
                    locals=locals,
                    target=target,
                    targetoptions=targetoptions)
    else:
        # No signature is provided
        pyfunc = signature_or_function
        dispatcher = registry.target_registry[target]
        dispatcher = dispatcher(py_func=pyfunc,
                                locals=locals,
                                targetoptions=targetoptions)
        # NOTE This affects import time for large function
        # # Compile a pure object mode
        # if target == 'cpu' and not targetoptions.get('nopython', False):
        #     dispatcher.compile((), locals=locals, forceobj=True)
        return dispatcher