def test_nvvm_accepts_encoding(self): # Test that NVVM will accept a constant containing all possible 8-bit # characters. Taken from the test case added in llvmlite PR #53: # # https://github.com/numba/llvmlite/pull/53 # # This test case is included in Numba to ensure that the encoding used # by llvmlite (e.g. utf-8, latin1, etc.) does not result in an input to # NVVM that it cannot parse correctly # Create a module with a constant containing all 8-bit characters c = ir.Constant(ir.ArrayType(ir.IntType(8), 256), bytearray(range(256))) m = ir.Module() gv = ir.GlobalVariable(m, c.type, "myconstant") gv.global_constant = True gv.initializer = c nvvm.fix_data_layout(m) # Parse with LLVM then dump the parsed module into NVVM parsed = llvm.parse_assembly(str(m)) ptx = nvvm.llvm_to_ptx(str(parsed)) # Ensure all characters appear in the generated constant array. elements = ", ".join([str(i) for i in range(256)]) myconstant = f"myconstant[256] = {{{elements}}}".encode('utf-8') self.assertIn(myconstant, ptx)
def _test_nvvm_support(self, arch): nvvmir = self.get_ptx() compute_xx = 'compute_{0}{1}'.format(*arch) ptx = llvm_to_ptx(nvvmir, arch=compute_xx, ftz=1, prec_sqrt=0, prec_div=0).decode('utf8') self.assertIn(".target sm_{0}{1}".format(*arch), ptx) self.assertIn('simple', ptx) self.assertIn('ave', ptx)
def test_nvvm_llvm_to_ptx(self): """ A simple test to exercise nvvm.llvm_to_ptx() to trigger issues with mismatch NVVM API. """ def foo(x): x[0] = 123 cukern = compile_kernel(foo, args=(types.int32[::1], ), link=()) llvmir = cukern._func.ptx.llvmir ptx = nvvm.llvm_to_ptx(llvmir) self.assertIn("foo", ptx.decode('ascii'))
def test_nvvm_llvm_to_ptx(self): """ A simple test to exercise nvvm.llvm_to_ptx() to trigger issues with mismatch NVVM API. """ def foo(x): x[0] = 123 cukern = compile_kernel(foo, args=(types.int32[::1],), link=()) llvmir = cukern._func.ptx.llvmir ptx = nvvm.llvm_to_ptx(llvmir) self.assertIn("foo", ptx.decode('ascii'))
def test_const_string(self): # These imports are incompatible with CUDASIM from numba.cuda.descriptor import cuda_target from numba.cuda.cudadrv.nvvm import llvm_to_ptx, ADDRSPACE_CONSTANT targetctx = cuda_target.target_context mod = targetctx.create_module("") textstring = 'A Little Brown Fox' gv0 = targetctx.insert_const_string(mod, textstring) # Insert the same const string a second time - the first should be # reused. targetctx.insert_const_string(mod, textstring) res = re.findall( r"@\"__conststring__.*internal.*constant.*\[" r"19\s+x\s+i8\]", str(mod)) # Ensure that the const string was only inserted once self.assertEqual(len(res), 1) fnty = ir.FunctionType(ir.IntType(8).as_pointer(), []) # Using insert_const_string fn = ir.Function(mod, fnty, "test_insert_const_string") builder = ir.IRBuilder(fn.append_basic_block()) res = targetctx.insert_addrspace_conv(builder, gv0, addrspace=ADDRSPACE_CONSTANT) builder.ret(res) matches = re.findall( r"@\"__conststring__.*internal.*constant.*\[" r"19\s+x\s+i8\]", str(mod)) self.assertEqual(len(matches), 1) # Using insert_string_const_addrspace fn = ir.Function(mod, fnty, "test_insert_string_const_addrspace") builder = ir.IRBuilder(fn.append_basic_block()) res = targetctx.insert_string_const_addrspace(builder, textstring) builder.ret(res) matches = re.findall( r"@\"__conststring__.*internal.*constant.*\[" r"19\s+x\s+i8\]", str(mod)) self.assertEqual(len(matches), 1) ptx = llvm_to_ptx(str(mod)).decode('ascii') matches = list(re.findall(r"\.const.*__conststring__", ptx)) self.assertEqual(len(matches), 1)
def test_nvvm_from_llvm(self): m = Module("test_nvvm_from_llvm") fty = Type.function(Type.void(), [Type.int()]) kernel = m.add_function(fty, name='mycudakernel') bldr = Builder(kernel.append_basic_block('entry')) bldr.ret_void() set_cuda_kernel(kernel) fix_data_layout(m) ptx = llvm_to_ptx(str(m)).decode('utf8') self.assertTrue('mycudakernel' in ptx) if is64bit: self.assertTrue('.address_size 64' in ptx) else: self.assertTrue('.address_size 32' in ptx)
def test_nvvm_from_llvm(self): m = ir.Module("test_nvvm_from_llvm") fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)]) kernel = ir.Function(m, fty, name='mycudakernel') bldr = ir.IRBuilder(kernel.append_basic_block('entry')) bldr.ret_void() set_cuda_kernel(kernel) fix_data_layout(m) ptx = llvm_to_ptx(str(m)).decode('utf8') self.assertTrue('mycudakernel' in ptx) if is64bit: self.assertTrue('.address_size 64' in ptx) else: self.assertTrue('.address_size 32' in ptx)
def test_const_string(self): # These imports is incompatible with CUDASIM from numba.cuda.descriptor import CUDATargetDesc from numba.cuda.cudadrv.nvvm import llvm_to_ptx, ADDRSPACE_CONSTANT targetctx = CUDATargetDesc.targetctx mod = targetctx.create_module("") textstring = 'A Little Brown Fox' gv0 = targetctx.insert_const_string(mod, textstring) gv1 = targetctx.insert_const_string(mod, textstring) res = re.findall( r"@\"__conststring__.*internal.*constant.*\[" r"19\s+x\s+i8\]", str(mod)) self.assertEqual(len(res), 1) fnty = ir.FunctionType(ir.IntType(8).as_pointer(), []) # Using insert_const_string fn = mod.add_function(fnty, name="test_insert_const_string") builder = ir.IRBuilder(fn.append_basic_block()) res = targetctx.insert_addrspace_conv(builder, gv0, addrspace=ADDRSPACE_CONSTANT) builder.ret(res) matches = re.findall( r"@\"__conststring__.*internal.*constant.*\[" r"19\s+x\s+i8\]", str(mod)) self.assertEqual(len(matches), 1) # Using insert_string_const_addrspace fn = mod.add_function(fnty, name="test_insert_string_const_addrspace") builder = ir.IRBuilder(fn.append_basic_block()) res = targetctx.insert_string_const_addrspace(builder, textstring) builder.ret(res) matches = re.findall( r"@\"__conststring__.*internal.*constant.*\[" r"19\s+x\s+i8\]", str(mod)) self.assertEqual(len(matches), 1) ptx = llvm_to_ptx(str(mod)).decode('ascii') matches = list(re.findall(r"\.const.*__conststring__", ptx)) self.assertEqual(len(matches), 1)
def test_const_string(self): # These imports is incompatible with CUDASIM from numba.cuda.descriptor import CUDATargetDesc from numba.cuda.cudadrv.nvvm import llvm_to_ptx, ADDRSPACE_CONSTANT targetctx = CUDATargetDesc.targetctx mod = targetctx.create_module("") textstring = 'A Little Brown Fox' gv0 = targetctx.insert_const_string(mod, textstring) gv1 = targetctx.insert_const_string(mod, textstring) res = re.findall(r"@\"__conststring__.*internal.*constant.*\[" r"19\s+x\s+i8\]", str(mod)) self.assertEqual(len(res), 1) fnty = ir.FunctionType(ir.IntType(8).as_pointer(), []) # Using insert_const_string fn = mod.add_function(fnty, name="test_insert_const_string") builder = ir.IRBuilder(fn.append_basic_block()) res = targetctx.insert_addrspace_conv(builder, gv0, addrspace=ADDRSPACE_CONSTANT) builder.ret(res) matches = re.findall(r"@\"__conststring__.*internal.*constant.*\[" r"19\s+x\s+i8\]", str(mod)) self.assertEqual(len(matches), 1) # Using insert_string_const_addrspace fn = mod.add_function(fnty, name="test_insert_string_const_addrspace") builder = ir.IRBuilder(fn.append_basic_block()) res = targetctx.insert_string_const_addrspace(builder, textstring) builder.ret(res) matches = re.findall(r"@\"__conststring__.*internal.*constant.*\[" r"19\s+x\s+i8\]", str(mod)) self.assertEqual(len(matches), 1) ptx = llvm_to_ptx(str(mod)).decode('ascii') matches = list(re.findall(r"\.const.*__conststring__", ptx)) self.assertEqual(len(matches), 1)
def test_inline_rsqrt(self): mod = Module.new(__name__) fnty = Type.function(Type.void(), [Type.pointer(Type.float())]) fn = mod.add_function(fnty, "cu_rsqrt") bldr = Builder.new(fn.append_basic_block("entry")) rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()]) inlineasm = InlineAsm.get(rsqrt_approx_fnty, "rsqrt.approx.f32 $0, $1;", "=f,f", side_effect=True) val = bldr.load(fn.args[0]) res = bldr.call(inlineasm, [val]) bldr.store(res, fn.args[0]) bldr.ret_void() # generate ptx nvvm.fix_data_layout(mod) nvvm.set_cuda_kernel(fn) nvvmir = str(mod) ptx = nvvm.llvm_to_ptx(nvvmir) self.assertTrue("rsqrt.approx.f32" in str(ptx))
def test_inline_rsqrt(self): mod = Module.new(__name__) fnty = Type.function(Type.void(), [Type.pointer(Type.float())]) fn = mod.add_function(fnty, 'cu_rsqrt') bldr = Builder.new(fn.append_basic_block('entry')) rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()]) inlineasm = InlineAsm.get(rsqrt_approx_fnty, 'rsqrt.approx.f32 $0, $1;', '=f,f', side_effect=True) val = bldr.load(fn.args[0]) res = bldr.call(inlineasm, [val]) bldr.store(res, fn.args[0]) bldr.ret_void() # generate ptx nvvm.fix_data_layout(mod) nvvm.set_cuda_kernel(fn) nvvmir = str(mod) ptx = nvvm.llvm_to_ptx(nvvmir) self.assertTrue('rsqrt.approx.f32' in str(ptx))
def test_inline_rsqrt(self): mod = ir.Module(__name__) fnty = ir.FunctionType(ir.VoidType(), [ir.PointerType(ir.FloatType())]) fn = ir.Function(mod, fnty, 'cu_rsqrt') bldr = ir.IRBuilder(fn.append_basic_block('entry')) rsqrt_approx_fnty = ir.FunctionType(ir.FloatType(), [ir.FloatType()]) inlineasm = ir.InlineAsm(rsqrt_approx_fnty, 'rsqrt.approx.f32 $0, $1;', '=f,f', side_effect=True) val = bldr.load(fn.args[0]) res = bldr.call(inlineasm, [val]) bldr.store(res, fn.args[0]) bldr.ret_void() # generate ptx nvvm.fix_data_layout(mod) nvvm.set_cuda_kernel(fn) nvvmir = str(mod) ptx = nvvm.llvm_to_ptx(nvvmir) self.assertTrue('rsqrt.approx.f32' in str(ptx))
def test_nvvm_compile_simple(self): nvvmir = self.get_ptx() ptx = llvm_to_ptx(nvvmir).decode('utf8') print(ptx) self.assertTrue('simple' in ptx) self.assertTrue('ave' in ptx)
def test_nvvm_compile_simple(self): nvvmir = self.get_ptx() ptx = llvm_to_ptx(nvvmir).decode('utf8') self.assertTrue('simple' in ptx) self.assertTrue('ave' in ptx)
# Have to cheat a bit here to get everything needed to give to NVVM with global_compiler_lock: argtys = (float32[:], int32, float32[:], float32[:]) returnty = void cres = compile_cuda(axpy.py_func, void, argtys, debug=False, inline=False) fname = cres.fndesc.llvm_func_name lib, kernel = cres.target_context.prepare_cuda_kernel(cres.library, fname, cres.signature.args, debug=False) llvm_module = lib._final_module cc = (5, 2) arch = nvvm.get_arch_option(*cc) llvmir = str(llvm_module) ptx = nvvm.llvm_to_ptx(llvmir, opt=3, arch=arch) print(ptx.decode('utf-8')) # PTX to module from numba.cuda.cudadrv.driver import Linker # noqa linker = Linker() linker.add_ptx(ptx) cubin, size = linker.complete() compile_info = linker.info_log print(size) print(compile_info)
def test_nvvm_compile_simple(self): nvvmir = self.get_ptx() ptx = llvm_to_ptx(nvvmir).decode("utf8") self.assertTrue("simple" in ptx) self.assertTrue("ave" in ptx)
def test_nvvm_ir_verify_fail(self): m = ir.Module("test_bad_ir") m.triple = "unknown-unknown-unknown" fix_data_layout(m) with self.assertRaisesRegex(NvvmError, 'Invalid target triple'): llvm_to_ptx(str(m))