예제 #1
0
 def llvm_cpython_wrapper_name(self):
     """
     The LLVM-registered name for a CPython-compatible wrapper of the
     raw function (i.e. a PyCFunctionWithKeywords).
     """
     return itanium_mangler.prepend_namespace(self.mangled_name,
                                              ns="cpython")
예제 #2
0
    def prepare_cuda_kernel(self,
                            codelib,
                            func_name,
                            argtypes,
                            debug,
                            nvvm_options,
                            max_registers=None):
        """
        Adapt a code library ``codelib`` with the numba compiled CUDA kernel
        with name ``fname`` and arguments ``argtypes`` for NVVM.
        A new library is created with a wrapper function that can be used as
        the kernel entry point for the given kernel.

        Returns the new code library and the wrapper function.

        Parameters:

        codelib:       The CodeLibrary containing the device function to wrap
                       in a kernel call.
        func_name:     The mangled name of the device function.
        argtypes:      An iterable of the types of arguments to the kernel.
        debug:         Whether to compile with debug.
        nvvm_options:  Dict of NVVM options used when compiling the new library.
        max_registers: The max_registers argument for the code library.
        """
        kernel_name = itanium_mangler.prepend_namespace(func_name, ns='cudapy')
        library = self.codegen().create_library(f'{codelib.name}_kernel_',
                                                entry_name=kernel_name,
                                                nvvm_options=nvvm_options,
                                                max_registers=max_registers)
        library.add_linking_library(codelib)
        wrapper = self.generate_kernel_wrapper(library, kernel_name, func_name,
                                               argtypes, debug)
        return library, wrapper
예제 #3
0
    def generate_kernel_wrapper(self, library, fname, argtypes, debug):
        """
        Generate the kernel wrapper in the given ``library``.
        The function being wrapped have the name ``fname`` and argument types
        ``argtypes``.  The wrapper function is returned.
        """
        arginfo = self.get_arg_packer(argtypes)
        argtys = list(arginfo.argument_types)
        wrapfnty = ir.FunctionType(ir.VoidType(), argtys)
        wrapper_module = self.create_module("cuda.kernel.wrapper")
        fnty = ir.FunctionType(
            ir.IntType(32),
            [self.call_conv.get_return_type(types.pyobject)] + argtys)
        func = ir.Function(wrapper_module, fnty, fname)

        prefixed = itanium_mangler.prepend_namespace(func.name, ns='cudapy')
        wrapfn = ir.Function(wrapper_module, wrapfnty, prefixed)
        builder = ir.IRBuilder(wrapfn.append_basic_block(''))

        # Define error handling variables
        def define_error_gv(postfix):
            name = wrapfn.name + postfix
            gv = cgutils.add_global_variable(wrapper_module, ir.IntType(32),
                                             name)
            gv.initializer = ir.Constant(gv.type.pointee, None)
            return gv

        gv_exc = define_error_gv("__errcode__")
        gv_tid = []
        gv_ctaid = []
        for i in 'xyz':
            gv_tid.append(define_error_gv("__tid%s__" % i))
            gv_ctaid.append(define_error_gv("__ctaid%s__" % i))

        callargs = arginfo.from_arguments(builder, wrapfn.args)
        status, _ = self.call_conv.call_function(builder, func, types.void,
                                                 argtypes, callargs)

        if debug:
            # Check error status
            with cgutils.if_likely(builder, status.is_ok):
                builder.ret_void()

            with builder.if_then(builder.not_(status.is_python_exc)):
                # User exception raised
                old = ir.Constant(gv_exc.type.pointee, None)

                # Use atomic cmpxchg to prevent rewriting the error status
                # Only the first error is recorded

                if nvvm.NVVM().is_nvvm70:
                    xchg = builder.cmpxchg(gv_exc, old, status.code,
                                           'monotonic', 'monotonic')
                    changed = builder.extract_value(xchg, 1)
                else:
                    casfnty = ir.FunctionType(
                        old.type, [gv_exc.type, old.type, old.type])

                    cas_hack = "___numba_atomic_i32_cas_hack"
                    casfn = ir.Function(wrapper_module, casfnty, name=cas_hack)
                    xchg = builder.call(casfn, [gv_exc, old, status.code])
                    changed = builder.icmp_unsigned('==', xchg, old)

                # If the xchange is successful, save the thread ID.
                sreg = nvvmutils.SRegBuilder(builder)
                with builder.if_then(changed):
                    for dim, ptr, in zip("xyz", gv_tid):
                        val = sreg.tid(dim)
                        builder.store(val, ptr)

                    for dim, ptr, in zip("xyz", gv_ctaid):
                        val = sreg.ctaid(dim)
                        builder.store(val, ptr)

        builder.ret_void()

        nvvm.set_cuda_kernel(wrapfn)
        library.add_ir_module(wrapper_module)
        library.finalize()
        wrapfn = library.get_function(wrapfn.name)
        return wrapfn
# file axpy.cubin

# SASS

# come back to this once toolkit downloaded
# cuobjdump -sass axpy.cubin

# Load module

from numba.cuda.cudadrv.driver import load_module_image  # noqa
from numba.core import itanium_mangler  # noqa

ctx = cuda.get_current_device().get_primary_context()
module = load_module_image(ctx, cubin)

mangled_name = itanium_mangler.prepend_namespace(fname, ns='cudapy')

cufunc = module.get_function(mangled_name)

type(cufunc)

# Launch kernel

# Copy our arrays to the device - normally Numba does this for us
d_r = cuda.to_device(r)
d_x = cuda.to_device(x)
d_y = cuda.to_device(y)

# A couple of helpers from Numba's CUDA driver implementation
from numba.cuda.cudadrv.driver import (device_pointer, is_device_memory,
                                       device_ctypes_pointer)  # noqa