예제 #1
0
 def test_get_arch_option(self):
     # Test returning the nearest lowest arch.
     self.assertEqual(get_arch_option(3, 0), "compute_30")
     self.assertEqual(get_arch_option(3, 3), "compute_30")
     self.assertEqual(get_arch_option(3, 4), "compute_30")
     # Test known arch.
     for arch in SUPPORTED_CC:
         self.assertEqual(get_arch_option(*arch), "compute_%d%d" % arch)
     self.assertEqual(get_arch_option(1000, 0), "compute_%d%d" % SUPPORTED_CC[-1])
예제 #2
0
 def test_get_arch_option(self):
     # Test returning the nearest lowest arch.
     self.assertEqual(get_arch_option(3, 0), 'compute_30')
     self.assertEqual(get_arch_option(3, 3), 'compute_30')
     self.assertEqual(get_arch_option(3, 4), 'compute_30')
     # Test known arch.
     for arch in SUPPORTED_CC:
         self.assertEqual(get_arch_option(*arch), 'compute_%d%d' % arch)
     self.assertEqual(get_arch_option(1000, 0),
                      'compute_%d%d' % SUPPORTED_CC[-1])
예제 #3
0
 def test_get_arch_option(self):
     # Test returning the nearest lowest arch.
     self.assertEqual(get_arch_option(5, 0), 'compute_50')
     self.assertEqual(get_arch_option(5, 1), 'compute_50')
     self.assertEqual(get_arch_option(3, 7), 'compute_35')
     # Test known arch.
     for arch in SUPPORTED_CC:
         self.assertEqual(get_arch_option(*arch), 'compute_%d%d' % arch)
     self.assertEqual(get_arch_option(1000, 0),
                      'compute_%d%d' % SUPPORTED_CC[-1])
예제 #4
0
 def test_get_arch_option(self):
     # Test returning the nearest lowest arch.
     self.assertEqual(get_arch_option(5, 0), 'compute_50')
     self.assertEqual(get_arch_option(5, 1), 'compute_50')
     self.assertEqual(get_arch_option(3, 7), 'compute_35')
     # Test known arch.
     supported_cc = get_supported_ccs()
     for arch in supported_cc:
         self.assertEqual(get_arch_option(*arch), 'compute_%d%d' % arch)
     self.assertEqual(get_arch_option(1000, 0),
                      'compute_%d%d' % supported_cc[-1])
 def test_get_arch_option(self):
     self.assertEqual(get_arch_option(2, 0), 'compute_20')
     self.assertEqual(get_arch_option(2, 1), 'compute_21')
     self.assertEqual(get_arch_option(3, 0), 'compute_30')
     self.assertEqual(get_arch_option(3, 3), 'compute_30')
     self.assertEqual(get_arch_option(3, 4), 'compute_30')
     self.assertEqual(get_arch_option(3, 5), 'compute_35')
     self.assertEqual(get_arch_option(3, 6), 'compute_35')
     self.assertEqual(get_arch_option(5, 0), 'compute_50')
     self.assertEqual(get_arch_option(5, 1), 'compute_50')
     self.assertEqual(get_arch_option(1000, 0),
                      'compute_%d%d' % SUPPORTED_CC[-1])
예제 #6
0
 def test_get_arch_option(self):
     self.assertEqual(get_arch_option(2, 0), 'compute_20')
     self.assertEqual(get_arch_option(2, 1), 'compute_21')
     self.assertEqual(get_arch_option(3, 0), 'compute_30')
     self.assertEqual(get_arch_option(3, 3), 'compute_30')
     self.assertEqual(get_arch_option(3, 4), 'compute_30')
     self.assertEqual(get_arch_option(3, 5), 'compute_35')
     self.assertEqual(get_arch_option(3, 6), 'compute_35')
     self.assertEqual(get_arch_option(5, 0), 'compute_50')
     self.assertEqual(get_arch_option(5, 1), 'compute_50')
     self.assertEqual(get_arch_option(1000, 0),
                      'compute_%d%d' % SUPPORTED_CC[-1])
예제 #7
0
 def test_get_arch_option(self):
     self.assertTrue(get_arch_option(2, 0) == 'compute_20')
     self.assertTrue(get_arch_option(2, 1) == 'compute_20')
     self.assertTrue(get_arch_option(3, 0) == 'compute_30')
     self.assertTrue(get_arch_option(3, 3) == 'compute_30')
     self.assertTrue(get_arch_option(3, 4) == 'compute_30')
     self.assertTrue(get_arch_option(3, 5) == 'compute_35')
     self.assertTrue(get_arch_option(3, 6) == 'compute_35')
from numba import float32, int32, void  # noqa

# Have to cheat a bit here to get everything needed to give to NVVM
with global_compiler_lock:
    argtys = (float32[:], int32, float32[:], float32[:])
    returnty = void
    cres = compile_cuda(axpy.py_func, void, argtys, debug=False, inline=False)
    fname = cres.fndesc.llvm_func_name
    lib, kernel = cres.target_context.prepare_cuda_kernel(cres.library,
                                                          fname,
                                                          cres.signature.args,
                                                          debug=False)
    llvm_module = lib._final_module

    cc = (5, 2)
    arch = nvvm.get_arch_option(*cc)
    llvmir = str(llvm_module)
    ptx = nvvm.llvm_to_ptx(llvmir, opt=3, arch=arch)

print(ptx.decode('utf-8'))

# PTX to module

from numba.cuda.cudadrv.driver import Linker  # noqa

linker = Linker()
linker.add_ptx(ptx)
cubin, size = linker.complete()

compile_info = linker.info_log