def test_get_arch_option(self): # Test returning the nearest lowest arch. self.assertEqual(get_arch_option(3, 0), "compute_30") self.assertEqual(get_arch_option(3, 3), "compute_30") self.assertEqual(get_arch_option(3, 4), "compute_30") # Test known arch. for arch in SUPPORTED_CC: self.assertEqual(get_arch_option(*arch), "compute_%d%d" % arch) self.assertEqual(get_arch_option(1000, 0), "compute_%d%d" % SUPPORTED_CC[-1])
def test_get_arch_option(self): # Test returning the nearest lowest arch. self.assertEqual(get_arch_option(3, 0), 'compute_30') self.assertEqual(get_arch_option(3, 3), 'compute_30') self.assertEqual(get_arch_option(3, 4), 'compute_30') # Test known arch. for arch in SUPPORTED_CC: self.assertEqual(get_arch_option(*arch), 'compute_%d%d' % arch) self.assertEqual(get_arch_option(1000, 0), 'compute_%d%d' % SUPPORTED_CC[-1])
def test_get_arch_option(self): # Test returning the nearest lowest arch. self.assertEqual(get_arch_option(5, 0), 'compute_50') self.assertEqual(get_arch_option(5, 1), 'compute_50') self.assertEqual(get_arch_option(3, 7), 'compute_35') # Test known arch. for arch in SUPPORTED_CC: self.assertEqual(get_arch_option(*arch), 'compute_%d%d' % arch) self.assertEqual(get_arch_option(1000, 0), 'compute_%d%d' % SUPPORTED_CC[-1])
def test_get_arch_option(self): # Test returning the nearest lowest arch. self.assertEqual(get_arch_option(5, 0), 'compute_50') self.assertEqual(get_arch_option(5, 1), 'compute_50') self.assertEqual(get_arch_option(3, 7), 'compute_35') # Test known arch. supported_cc = get_supported_ccs() for arch in supported_cc: self.assertEqual(get_arch_option(*arch), 'compute_%d%d' % arch) self.assertEqual(get_arch_option(1000, 0), 'compute_%d%d' % supported_cc[-1])
def test_get_arch_option(self): self.assertEqual(get_arch_option(2, 0), 'compute_20') self.assertEqual(get_arch_option(2, 1), 'compute_21') self.assertEqual(get_arch_option(3, 0), 'compute_30') self.assertEqual(get_arch_option(3, 3), 'compute_30') self.assertEqual(get_arch_option(3, 4), 'compute_30') self.assertEqual(get_arch_option(3, 5), 'compute_35') self.assertEqual(get_arch_option(3, 6), 'compute_35') self.assertEqual(get_arch_option(5, 0), 'compute_50') self.assertEqual(get_arch_option(5, 1), 'compute_50') self.assertEqual(get_arch_option(1000, 0), 'compute_%d%d' % SUPPORTED_CC[-1])
def test_get_arch_option(self): self.assertTrue(get_arch_option(2, 0) == 'compute_20') self.assertTrue(get_arch_option(2, 1) == 'compute_20') self.assertTrue(get_arch_option(3, 0) == 'compute_30') self.assertTrue(get_arch_option(3, 3) == 'compute_30') self.assertTrue(get_arch_option(3, 4) == 'compute_30') self.assertTrue(get_arch_option(3, 5) == 'compute_35') self.assertTrue(get_arch_option(3, 6) == 'compute_35')
from numba import float32, int32, void # noqa # Have to cheat a bit here to get everything needed to give to NVVM with global_compiler_lock: argtys = (float32[:], int32, float32[:], float32[:]) returnty = void cres = compile_cuda(axpy.py_func, void, argtys, debug=False, inline=False) fname = cres.fndesc.llvm_func_name lib, kernel = cres.target_context.prepare_cuda_kernel(cres.library, fname, cres.signature.args, debug=False) llvm_module = lib._final_module cc = (5, 2) arch = nvvm.get_arch_option(*cc) llvmir = str(llvm_module) ptx = nvvm.llvm_to_ptx(llvmir, opt=3, arch=arch) print(ptx.decode('utf-8')) # PTX to module from numba.cuda.cudadrv.driver import Linker # noqa linker = Linker() linker.add_ptx(ptx) cubin, size = linker.complete() compile_info = linker.info_log