def test_linking(self): global bar # must be a global; other it is recognized as a freevar bar = cuda.declare_device('bar', 'int32(int32)') link = os.path.join(os.path.dirname(__file__), 'data', 'jitlink.ptx') @cuda.jit('void(int32[:], int32[:])', link=[link]) def foo(x, y): i = cuda.grid(1) x[i] += bar(y[i]) A = np.array([123]) B = np.array([321]) foo[1, 1](A, B) self.assertTrue(A[0] == 123 + 2 * 321)
def test_linking_cu_log_warning(self): bar = cuda.declare_device('bar', 'int32(int32)') link = os.path.join(os.path.dirname(__file__), 'data', 'warn.cu') with warnings.catch_warnings(record=True) as w: ignore_internal_warnings() @cuda.jit('void(int32)', link=[link]) def kernel(x): bar(x) self.assertEqual(len(w), 1, 'Expected warnings from NVRTC') # Check the warning refers to the log messages self.assertIn('NVRTC log messages', str(w[0].message)) # Check the message pertaining to the unused variable is provided self.assertIn('declared but never referenced', str(w[0].message))
def test_linking(self): global bar # must be a global; other it is recognized as a freevar bar = cuda.declare_device('bar', 'int32(int32)') link = os.path.join(os.path.dirname(__file__), 'data', 'jitlink.ptx') @cuda.jit('void(int32[:], int32[:])', link=[link]) def foo(x, y): i = cuda.grid(1) x[i] += bar(y[i]) A = np.array([123]) B = np.array([321]) foo(A, B) self.assertTrue(A[0] == 123 + 2 * 321)
def test_linking_cu_error(self): bar = cuda.declare_device('bar', 'int32(int32)') link = os.path.join(os.path.dirname(__file__), 'data', 'error.cu') with self.assertRaises(NvrtcError) as e: @cuda.jit('void(int32)', link=[link]) def kernel(x): bar(x) msg = e.exception.args[0] # Check the error message refers to the NVRTC compile self.assertIn('NVRTC Compilation failure', msg) # Check the expected error in the CUDA source is reported self.assertIn('identifier "SYNTAX" is undefined', msg) # Check the filename is reported correctly self.assertIn('in the compilation of "error.cu"', msg)
def test_linking_cu(self): bar = cuda.declare_device('bar', 'int32(int32)') link = os.path.join(os.path.dirname(__file__), 'data', 'jitlink.cu') @cuda.jit(link=[link]) def kernel(r, x): i = cuda.grid(1) if i < len(r): r[i] = bar(x[i]) x = np.arange(10, dtype=np.int32) r = np.zeros_like(x) kernel[1, 32](r, x) # Matches the operation of bar() in jitlink.cu expected = x * 2 np.testing.assert_array_equal(r, expected)
def test_linking(self): global bar # must be a global; other it is recognized as a freevar bar = cuda.declare_device('bar', 'int32(int32)') link = os.path.join(os.path.dirname(__file__), 'data', 'jitlink.o') print('link to:', link) if not os.path.isfile(link): print('test skipped due to missing file') return @cuda.jit('void(int32[:], int32[:])', link=[link]) def foo(x, y): i = cuda.grid(1) x[i] += bar(y[i]) A = np.array([123]) B = np.array([321]) foo(A, B) self.assertTrue(A[0] == 123 + 2 * 321)
""" Demonstrating CUDA JIT integration """ from __future__ import print_function from numba import cuda import numpy import os # Declare function to link to bar = cuda.declare_device('bar', 'int32(int32, int32)') # Get path to precompiled library curdir = os.path.join(os.path.dirname(__file__)) link = os.path.join(curdir, 'jitlink.o') print("Linking: %s", link) # Code that uses CUDA JIT @cuda.jit('void(int32[:], int32[:])', link=[link]) def foo(inp, out): i = cuda.grid(1) out[i] = bar(inp[i], 2) print(foo.ptx) n = 5 inp = numpy.arange(n, dtype='int32') out = numpy.zeros_like(inp) foo[1, out.size](inp, out)
""" Demonstrating CUDA JIT integration """ from __future__ import print_function from numba import cuda import numpy import os # Declare function to link to bar = cuda.declare_device('bar', 'int32(int32, int32)') # Get path to precompiled library curdir = os.path.join(os.path.dirname(__file__)) link = os.path.join(curdir, 'jitlink.o') print("Linking: %s", link) # Code that uses CUDA JIT @cuda.jit('void(int32[:], int32[:])', link=[link]) def foo(inp, out): i = cuda.grid(1) out[i] = bar(inp[i], 2) print(foo.ptx) n = 5 inp = numpy.arange(n, dtype='int32') out = numpy.zeros_like(inp) foo[1, out.size](inp, out) print("inp =", inp)
def test_bad_declare_device_string(self): with self.assertRaisesRegex(TypeError, 'Return type'): cuda.declare_device('f1', '(float32[:],)')
def test_declare_device_string(self): f1 = cuda.declare_device('f1', 'int32(float32[:])') self._test_declare_device(f1)
def test_declare_device_signature(self): f1 = cuda.declare_device('f1', int32(float32[:])) self._test_declare_device(f1)