예제 #1
0
    def compile_device(self, args):
        """Compile the device function for the given argument types.

        Each signature is compiled once by caching the compiled function inside
        this object.

        Returns the `CompileResult`.
        """
        if args not in self.overloads:

            debug = self.targetoptions.get('debug')
            inline = self.targetoptions.get('inline')

            nvvm_options = {
                'debug': debug,
                'opt': 3 if self.targetoptions.get('opt') else 0
            }

            cres = compile_cuda(self.py_func,
                                None,
                                args,
                                debug=debug,
                                inline=inline,
                                nvvm_options=nvvm_options)
            self.overloads[args] = cres

            # The inserted function uses the id of the CompileResult as a key,
            # consistent with get_overload() above.
            cres.target_context.insert_user_function(id(cres), cres.fndesc,
                                                     [cres.library])
        else:
            cres = self.overloads[args]

        return cres
예제 #2
0
파일: dispatcher.py 프로젝트: braniii/numba
    def compile_device(self, args):
        """Compile the device function for the given argument types.

        Each signature is compiled once by caching the compiled function inside
        this object.

        Returns the `CompileResult`.
        """
        if args not in self.overloads:

            debug = self.targetoptions.get('debug')
            inline = self.targetoptions.get('inline')
            fastmath = self.targetoptions.get('fastmath')

            nvvm_options = {
                'debug': debug,
                'opt': 3 if self.targetoptions.get('opt') else 0,
                'fastmath': fastmath
            }

            cres = compile_cuda(self.py_func,
                                None,
                                args,
                                debug=debug,
                                inline=inline,
                                fastmath=fastmath,
                                nvvm_options=nvvm_options)
            self.overloads[args] = cres

            cres.target_context.insert_user_function(cres.entry_point,
                                                     cres.fndesc,
                                                     [cres.library])
        else:
            cres = self.overloads[args]

        return cres
예제 #3
0
    def __init__(self,
                 py_func,
                 argtypes,
                 link=None,
                 debug=False,
                 lineinfo=False,
                 inline=False,
                 fastmath=False,
                 extensions=None,
                 max_registers=None,
                 opt=True,
                 device=False):

        if device:
            raise RuntimeError('Cannot compile a device function as a kernel')

        super().__init__()

        self.py_func = py_func
        self.argtypes = argtypes
        self.debug = debug
        self.lineinfo = lineinfo
        self.extensions = extensions or []

        nvvm_options = {
            'debug': self.debug,
            'lineinfo': self.lineinfo,
            'fastmath': fastmath,
            'opt': 3 if opt else 0
        }

        cres = compile_cuda(self.py_func,
                            types.void,
                            self.argtypes,
                            debug=self.debug,
                            lineinfo=self.lineinfo,
                            inline=inline,
                            fastmath=fastmath,
                            nvvm_options=nvvm_options)
        tgt_ctx = cres.target_context
        code = self.py_func.__code__
        filename = code.co_filename
        linenum = code.co_firstlineno
        lib, kernel = tgt_ctx.prepare_cuda_kernel(cres.library, cres.fndesc,
                                                  debug, nvvm_options,
                                                  filename, linenum,
                                                  max_registers)

        if not link:
            link = []

        # A kernel needs cooperative launch if grid_sync is being used.
        self.cooperative = 'cudaCGGetIntrinsicHandle' in lib.get_asm_str()
        # We need to link against cudadevrt if grid sync is being used.
        if self.cooperative:
            link.append(get_cudalib('cudadevrt', static=True))

        for filepath in link:
            lib.add_linking_file(filepath)

        # populate members
        self.entry_name = kernel.name
        self.signature = cres.signature
        self._type_annotation = cres.type_annotation
        self._codelibrary = lib
        self.call_helper = cres.call_helper
    print(f'Compute capability {cc} / argtypes {types}:\n')
    print(llir)
    print()

# LLVM to PTX

from numba.core.compiler_lock import global_compiler_lock  # noqa
from numba.cuda.cudadrv import nvvm  # noqa
from numba.cuda.compiler import compile_cuda  # noqa
from numba import float32, int32, void  # noqa

# Have to cheat a bit here to get everything needed to give to NVVM
with global_compiler_lock:
    argtys = (float32[:], int32, float32[:], float32[:])
    returnty = void
    cres = compile_cuda(axpy.py_func, void, argtys, debug=False, inline=False)
    fname = cres.fndesc.llvm_func_name
    lib, kernel = cres.target_context.prepare_cuda_kernel(cres.library,
                                                          fname,
                                                          cres.signature.args,
                                                          debug=False)
    llvm_module = lib._final_module

    cc = (5, 2)
    arch = nvvm.get_arch_option(*cc)
    llvmir = str(llvm_module)
    ptx = nvvm.llvm_to_ptx(llvmir, opt=3, arch=arch)

print(ptx.decode('utf-8'))

# PTX to module
예제 #5
0
파일: dispatcher.py 프로젝트: braniii/numba
    def __init__(self,
                 py_func,
                 argtypes,
                 link=None,
                 debug=False,
                 lineinfo=False,
                 inline=False,
                 fastmath=False,
                 extensions=None,
                 max_registers=None,
                 opt=True,
                 device=False):

        if device:
            raise RuntimeError('Cannot compile a device function as a kernel')

        super().__init__()

        # _DispatcherBase.nopython_signatures() expects this attribute to be
        # present, because it assumes an overload is a CompileResult. In the
        # CUDA target, _Kernel instances are stored instead, so we provide this
        # attribute here to avoid duplicating nopython_signatures() in the CUDA
        # target with slight modifications.
        self.objectmode = False

        # The finalizer constructed by _DispatcherBase._make_finalizer also
        # expects overloads to be a CompileResult. It uses the entry_point to
        # remove a CompileResult from a target context. However, since we never
        # insert kernels into a target context (there is no need because they
        # cannot be called by other functions, only through the dispatcher) it
        # suffices to pretend we have an entry point of None.
        self.entry_point = None

        self.py_func = py_func
        self.argtypes = argtypes
        self.debug = debug
        self.lineinfo = lineinfo
        self.extensions = extensions or []

        nvvm_options = {
            'debug': self.debug,
            'lineinfo': self.lineinfo,
            'fastmath': fastmath,
            'opt': 3 if opt else 0
        }

        cres = compile_cuda(self.py_func,
                            types.void,
                            self.argtypes,
                            debug=self.debug,
                            lineinfo=self.lineinfo,
                            inline=inline,
                            fastmath=fastmath,
                            nvvm_options=nvvm_options)
        tgt_ctx = cres.target_context
        code = self.py_func.__code__
        filename = code.co_filename
        linenum = code.co_firstlineno
        lib, kernel = tgt_ctx.prepare_cuda_kernel(cres.library, cres.fndesc,
                                                  debug, nvvm_options,
                                                  filename, linenum,
                                                  max_registers)

        if not link:
            link = []

        # A kernel needs cooperative launch if grid_sync is being used.
        self.cooperative = 'cudaCGGetIntrinsicHandle' in lib.get_asm_str()
        # We need to link against cudadevrt if grid sync is being used.
        if self.cooperative:
            link.append(get_cudalib('cudadevrt', static=True))

        for filepath in link:
            lib.add_linking_file(filepath)

        # populate members
        self.entry_name = kernel.name
        self.signature = cres.signature
        self._type_annotation = cres.type_annotation
        self._codelibrary = lib
        self.call_helper = cres.call_helper