def run_test(): # on windows only nvvm is available to numba if sys.platform.startswith('win'): nvvm = NVVM() print("NVVM version", nvvm.get_version()) return nvvm.get_version() is not None if not test(): return False nvvm = NVVM() print("NVVM version", nvvm.get_version()) # check pkg version matches lib pulled in gotlib = get_cudalib('cublas') # check cufft b/c cublas has an incorrect version in 10.1 update 1 gotlib = get_cudalib('cufft') return bool(get_cudalib('cublas')) and bool(get_cudalib('cufft'))
def run_test(): if not test(): return False nvvm = NVVM() print("NVVM version", nvvm.get_version()) # check pkg version matches lib pulled in gotlib = get_cudalib('cublas') lookfor = '9.2' if sys.platform.startswith('win'): # windows libs have no dot lookfor = lookfor.replace('.', '') return lookfor in gotlib
def run_test(): if not test(): return False nvvm = NVVM() print("NVVM version", nvvm.get_version()) # check pkg version matches lib pulled in gotlib = get_cudalib('cublas') lookfor = os.environ['PKG_VERSION'] if sys.platform.startswith('win'): # windows libs have no dot lookfor = lookfor.replace('.', '') return lookfor in gotlib
def run_test(): # on windows only nvvm is available to numba if sys.platform.startswith('win'): nvvm = NVVM() print("NVVM version", nvvm.get_version()) return nvvm.get_version() is not None if not test(): return False nvvm = NVVM() print("NVVM version", nvvm.get_version()) # check pkg version matches lib pulled in gotlib = get_cudalib('cublas') lookfor = os.environ['PKG_VERSION'] return lookfor in gotlib
def run_test(): # on windows only nvvm is available to numba if sys.platform.startswith("win"): nvvm = NVVM() print("NVVM version", nvvm.get_version()) return nvvm.get_version() is not None if not test(): return False nvvm = NVVM() print("NVVM version", nvvm.get_version()) extra_lib_tests = ( "cublas", # check pkg version matches lib pulled in "cufft", # check cufft b/c cublas has an incorrect version in 10.1 update 1 "cupti", # check this is getting included ) found_paths = [get_cudalib(lib) for lib in extra_lib_tests] print(*zip(extra_lib_tests, found_paths), sep="\n") return all(extra_lib_tests)
def __init__(self, py_func, argtypes, link=None, debug=False, lineinfo=False, inline=False, fastmath=False, extensions=None, max_registers=None, opt=True, device=False): if device: raise RuntimeError('Cannot compile a device function as a kernel') super().__init__() self.py_func = py_func self.argtypes = argtypes self.debug = debug self.lineinfo = lineinfo self.extensions = extensions or [] nvvm_options = { 'debug': self.debug, 'lineinfo': self.lineinfo, 'fastmath': fastmath, 'opt': 3 if opt else 0 } cres = compile_cuda(self.py_func, types.void, self.argtypes, debug=self.debug, lineinfo=self.lineinfo, inline=inline, fastmath=fastmath, nvvm_options=nvvm_options) tgt_ctx = cres.target_context code = self.py_func.__code__ filename = code.co_filename linenum = code.co_firstlineno lib, kernel = tgt_ctx.prepare_cuda_kernel(cres.library, cres.fndesc, debug, nvvm_options, filename, linenum, max_registers) if not link: link = [] # A kernel needs cooperative launch if grid_sync is being used. self.cooperative = 'cudaCGGetIntrinsicHandle' in lib.get_asm_str() # We need to link against cudadevrt if grid sync is being used. if self.cooperative: link.append(get_cudalib('cudadevrt', static=True)) for filepath in link: lib.add_linking_file(filepath) # populate members self.entry_name = kernel.name self.signature = cres.signature self._type_annotation = cres.type_annotation self._codelibrary = lib self.call_helper = cres.call_helper
def __init__(self, py_func, argtypes, link=None, debug=False, lineinfo=False, inline=False, fastmath=False, extensions=None, max_registers=None, opt=True, device=False): if device: raise RuntimeError('Cannot compile a device function as a kernel') super().__init__() # _DispatcherBase.nopython_signatures() expects this attribute to be # present, because it assumes an overload is a CompileResult. In the # CUDA target, _Kernel instances are stored instead, so we provide this # attribute here to avoid duplicating nopython_signatures() in the CUDA # target with slight modifications. self.objectmode = False # The finalizer constructed by _DispatcherBase._make_finalizer also # expects overloads to be a CompileResult. It uses the entry_point to # remove a CompileResult from a target context. However, since we never # insert kernels into a target context (there is no need because they # cannot be called by other functions, only through the dispatcher) it # suffices to pretend we have an entry point of None. self.entry_point = None self.py_func = py_func self.argtypes = argtypes self.debug = debug self.lineinfo = lineinfo self.extensions = extensions or [] nvvm_options = { 'debug': self.debug, 'lineinfo': self.lineinfo, 'fastmath': fastmath, 'opt': 3 if opt else 0 } cres = compile_cuda(self.py_func, types.void, self.argtypes, debug=self.debug, lineinfo=self.lineinfo, inline=inline, fastmath=fastmath, nvvm_options=nvvm_options) tgt_ctx = cres.target_context code = self.py_func.__code__ filename = code.co_filename linenum = code.co_firstlineno lib, kernel = tgt_ctx.prepare_cuda_kernel(cres.library, cres.fndesc, debug, nvvm_options, filename, linenum, max_registers) if not link: link = [] # A kernel needs cooperative launch if grid_sync is being used. self.cooperative = 'cudaCGGetIntrinsicHandle' in lib.get_asm_str() # We need to link against cudadevrt if grid sync is being used. if self.cooperative: link.append(get_cudalib('cudadevrt', static=True)) for filepath in link: lib.add_linking_file(filepath) # populate members self.entry_name = kernel.name self.signature = cres.signature self._type_annotation = cres.type_annotation self._codelibrary = lib self.call_helper = cres.call_helper