コード例 #1
0
    def test_nvvm_accepts_encoding(self):
        # Test that NVVM will accept a constant containing all possible 8-bit
        # characters. Taken from the test case added in llvmlite PR #53:
        #
        #     https://github.com/numba/llvmlite/pull/53
        #
        # This test case is included in Numba to ensure that the encoding used
        # by llvmlite (e.g. utf-8, latin1, etc.) does not result in an input to
        # NVVM that it cannot parse correctly

        # Create a module with a constant containing all 8-bit characters
        c = ir.Constant(ir.ArrayType(ir.IntType(8), 256),
                        bytearray(range(256)))
        m = ir.Module()
        gv = ir.GlobalVariable(m, c.type, "myconstant")
        gv.global_constant = True
        gv.initializer = c
        nvvm.fix_data_layout(m)

        # Parse with LLVM then dump the parsed module into NVVM
        parsed = llvm.parse_assembly(str(m))
        ptx = nvvm.llvm_to_ptx(str(parsed))

        # Ensure all characters appear in the generated constant array.
        elements = ", ".join([str(i) for i in range(256)])
        myconstant = f"myconstant[256] = {{{elements}}}".encode('utf-8')
        self.assertIn(myconstant, ptx)
コード例 #2
0
 def _test_nvvm_support(self, arch):
     nvvmir = self.get_ptx()
     compute_xx = 'compute_{0}{1}'.format(*arch)
     ptx = llvm_to_ptx(nvvmir, arch=compute_xx, ftz=1, prec_sqrt=0,
                       prec_div=0).decode('utf8')
     self.assertIn(".target sm_{0}{1}".format(*arch), ptx)
     self.assertIn('simple', ptx)
     self.assertIn('ave', ptx)
コード例 #3
0
ファイル: test_nvvm_driver.py プロジェクト: cpcloud/numba
 def _test_nvvm_support(self, arch):
     nvvmir = self.get_ptx()
     compute_xx = 'compute_{0}{1}'.format(*arch)
     ptx = llvm_to_ptx(nvvmir, arch=compute_xx, ftz=1, prec_sqrt=0,
                       prec_div=0).decode('utf8')
     self.assertIn(".target sm_{0}{1}".format(*arch), ptx)
     self.assertIn('simple', ptx)
     self.assertIn('ave', ptx)
コード例 #4
0
    def test_nvvm_llvm_to_ptx(self):
        """
        A simple test to exercise nvvm.llvm_to_ptx()
        to trigger issues with mismatch NVVM API.
        """
        def foo(x):
            x[0] = 123

        cukern = compile_kernel(foo, args=(types.int32[::1], ), link=())
        llvmir = cukern._func.ptx.llvmir
        ptx = nvvm.llvm_to_ptx(llvmir)
        self.assertIn("foo", ptx.decode('ascii'))
コード例 #5
0
ファイル: test_nvvm.py プロジェクト: Alexhuszagh/numba
    def test_nvvm_llvm_to_ptx(self):
        """
        A simple test to exercise nvvm.llvm_to_ptx()
        to trigger issues with mismatch NVVM API.
        """

        def foo(x):
            x[0] = 123

        cukern = compile_kernel(foo, args=(types.int32[::1],), link=())
        llvmir = cukern._func.ptx.llvmir
        ptx = nvvm.llvm_to_ptx(llvmir)
        self.assertIn("foo", ptx.decode('ascii'))
コード例 #6
0
    def test_const_string(self):
        # These imports are incompatible with CUDASIM
        from numba.cuda.descriptor import cuda_target
        from numba.cuda.cudadrv.nvvm import llvm_to_ptx, ADDRSPACE_CONSTANT

        targetctx = cuda_target.target_context
        mod = targetctx.create_module("")
        textstring = 'A Little Brown Fox'
        gv0 = targetctx.insert_const_string(mod, textstring)
        # Insert the same const string a second time - the first should be
        # reused.
        targetctx.insert_const_string(mod, textstring)

        res = re.findall(
            r"@\"__conststring__.*internal.*constant.*\["
            r"19\s+x\s+i8\]", str(mod))
        # Ensure that the const string was only inserted once
        self.assertEqual(len(res), 1)

        fnty = ir.FunctionType(ir.IntType(8).as_pointer(), [])

        # Using insert_const_string
        fn = ir.Function(mod, fnty, "test_insert_const_string")
        builder = ir.IRBuilder(fn.append_basic_block())
        res = targetctx.insert_addrspace_conv(builder,
                                              gv0,
                                              addrspace=ADDRSPACE_CONSTANT)
        builder.ret(res)

        matches = re.findall(
            r"@\"__conststring__.*internal.*constant.*\["
            r"19\s+x\s+i8\]", str(mod))
        self.assertEqual(len(matches), 1)

        # Using insert_string_const_addrspace
        fn = ir.Function(mod, fnty, "test_insert_string_const_addrspace")
        builder = ir.IRBuilder(fn.append_basic_block())
        res = targetctx.insert_string_const_addrspace(builder, textstring)
        builder.ret(res)

        matches = re.findall(
            r"@\"__conststring__.*internal.*constant.*\["
            r"19\s+x\s+i8\]", str(mod))
        self.assertEqual(len(matches), 1)

        ptx = llvm_to_ptx(str(mod)).decode('ascii')
        matches = list(re.findall(r"\.const.*__conststring__", ptx))

        self.assertEqual(len(matches), 1)
コード例 #7
0
ファイル: test_nvvm_driver.py プロジェクト: cpcloud/numba
    def test_nvvm_from_llvm(self):
        m = Module("test_nvvm_from_llvm")
        fty = Type.function(Type.void(), [Type.int()])
        kernel = m.add_function(fty, name='mycudakernel')
        bldr = Builder(kernel.append_basic_block('entry'))
        bldr.ret_void()
        set_cuda_kernel(kernel)

        fix_data_layout(m)
        ptx = llvm_to_ptx(str(m)).decode('utf8')
        self.assertTrue('mycudakernel' in ptx)
        if is64bit:
            self.assertTrue('.address_size 64' in ptx)
        else:
            self.assertTrue('.address_size 32' in ptx)
コード例 #8
0
    def test_nvvm_from_llvm(self):
        m = Module("test_nvvm_from_llvm")
        fty = Type.function(Type.void(), [Type.int()])
        kernel = m.add_function(fty, name='mycudakernel')
        bldr = Builder(kernel.append_basic_block('entry'))
        bldr.ret_void()
        set_cuda_kernel(kernel)

        fix_data_layout(m)
        ptx = llvm_to_ptx(str(m)).decode('utf8')
        self.assertTrue('mycudakernel' in ptx)
        if is64bit:
            self.assertTrue('.address_size 64' in ptx)
        else:
            self.assertTrue('.address_size 32' in ptx)
コード例 #9
0
ファイル: test_nvvm_driver.py プロジェクト: zhaijf1992/numba
    def test_nvvm_from_llvm(self):
        m = ir.Module("test_nvvm_from_llvm")
        fty = ir.FunctionType(ir.VoidType(), [ir.IntType(32)])
        kernel = ir.Function(m, fty, name='mycudakernel')
        bldr = ir.IRBuilder(kernel.append_basic_block('entry'))
        bldr.ret_void()
        set_cuda_kernel(kernel)

        fix_data_layout(m)
        ptx = llvm_to_ptx(str(m)).decode('utf8')
        self.assertTrue('mycudakernel' in ptx)
        if is64bit:
            self.assertTrue('.address_size 64' in ptx)
        else:
            self.assertTrue('.address_size 32' in ptx)
コード例 #10
0
    def test_const_string(self):
        # These imports is incompatible with CUDASIM
        from numba.cuda.descriptor import CUDATargetDesc
        from numba.cuda.cudadrv.nvvm import llvm_to_ptx, ADDRSPACE_CONSTANT

        targetctx = CUDATargetDesc.targetctx
        mod = targetctx.create_module("")
        textstring = 'A Little Brown Fox'
        gv0 = targetctx.insert_const_string(mod, textstring)
        gv1 = targetctx.insert_const_string(mod, textstring)

        res = re.findall(
            r"@\"__conststring__.*internal.*constant.*\["
            r"19\s+x\s+i8\]", str(mod))
        self.assertEqual(len(res), 1)

        fnty = ir.FunctionType(ir.IntType(8).as_pointer(), [])

        # Using insert_const_string
        fn = mod.add_function(fnty, name="test_insert_const_string")
        builder = ir.IRBuilder(fn.append_basic_block())
        res = targetctx.insert_addrspace_conv(builder,
                                              gv0,
                                              addrspace=ADDRSPACE_CONSTANT)
        builder.ret(res)

        matches = re.findall(
            r"@\"__conststring__.*internal.*constant.*\["
            r"19\s+x\s+i8\]", str(mod))
        self.assertEqual(len(matches), 1)

        # Using insert_string_const_addrspace
        fn = mod.add_function(fnty, name="test_insert_string_const_addrspace")
        builder = ir.IRBuilder(fn.append_basic_block())
        res = targetctx.insert_string_const_addrspace(builder, textstring)
        builder.ret(res)

        matches = re.findall(
            r"@\"__conststring__.*internal.*constant.*\["
            r"19\s+x\s+i8\]", str(mod))
        self.assertEqual(len(matches), 1)

        ptx = llvm_to_ptx(str(mod)).decode('ascii')
        matches = list(re.findall(r"\.const.*__conststring__", ptx))

        self.assertEqual(len(matches), 1)
コード例 #11
0
    def test_const_string(self):
        # These imports is incompatible with CUDASIM
        from numba.cuda.descriptor import CUDATargetDesc
        from numba.cuda.cudadrv.nvvm import llvm_to_ptx, ADDRSPACE_CONSTANT

        targetctx = CUDATargetDesc.targetctx
        mod = targetctx.create_module("")
        textstring = 'A Little Brown Fox'
        gv0 = targetctx.insert_const_string(mod, textstring)
        gv1 = targetctx.insert_const_string(mod, textstring)

        res = re.findall(r"@\"__conststring__.*internal.*constant.*\["
                         r"19\s+x\s+i8\]", str(mod))
        self.assertEqual(len(res), 1)

        fnty = ir.FunctionType(ir.IntType(8).as_pointer(), [])

        # Using insert_const_string
        fn = mod.add_function(fnty, name="test_insert_const_string")
        builder = ir.IRBuilder(fn.append_basic_block())
        res = targetctx.insert_addrspace_conv(builder, gv0,
                                              addrspace=ADDRSPACE_CONSTANT)
        builder.ret(res)

        matches = re.findall(r"@\"__conststring__.*internal.*constant.*\["
                             r"19\s+x\s+i8\]", str(mod))
        self.assertEqual(len(matches), 1)

        # Using insert_string_const_addrspace
        fn = mod.add_function(fnty, name="test_insert_string_const_addrspace")
        builder = ir.IRBuilder(fn.append_basic_block())
        res = targetctx.insert_string_const_addrspace(builder, textstring)
        builder.ret(res)

        matches = re.findall(r"@\"__conststring__.*internal.*constant.*\["
                             r"19\s+x\s+i8\]", str(mod))
        self.assertEqual(len(matches), 1)

        ptx = llvm_to_ptx(str(mod)).decode('ascii')
        matches = list(re.findall(r"\.const.*__conststring__", ptx))

        self.assertEqual(len(matches), 1)
コード例 #12
0
ファイル: test_inline_ptx.py プロジェクト: neeck/numba
    def test_inline_rsqrt(self):
        mod = Module.new(__name__)
        fnty = Type.function(Type.void(), [Type.pointer(Type.float())])
        fn = mod.add_function(fnty, "cu_rsqrt")
        bldr = Builder.new(fn.append_basic_block("entry"))

        rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()])
        inlineasm = InlineAsm.get(rsqrt_approx_fnty, "rsqrt.approx.f32 $0, $1;", "=f,f", side_effect=True)
        val = bldr.load(fn.args[0])
        res = bldr.call(inlineasm, [val])

        bldr.store(res, fn.args[0])
        bldr.ret_void()

        # generate ptx
        nvvm.fix_data_layout(mod)
        nvvm.set_cuda_kernel(fn)
        nvvmir = str(mod)
        ptx = nvvm.llvm_to_ptx(nvvmir)
        self.assertTrue("rsqrt.approx.f32" in str(ptx))
コード例 #13
0
    def test_inline_rsqrt(self):
        mod = Module.new(__name__)
        fnty = Type.function(Type.void(), [Type.pointer(Type.float())])
        fn = mod.add_function(fnty, 'cu_rsqrt')
        bldr = Builder.new(fn.append_basic_block('entry'))

        rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()])
        inlineasm = InlineAsm.get(rsqrt_approx_fnty,
                                  'rsqrt.approx.f32 $0, $1;',
                                  '=f,f', side_effect=True)
        val = bldr.load(fn.args[0])
        res = bldr.call(inlineasm, [val])

        bldr.store(res, fn.args[0])
        bldr.ret_void()

        # generate ptx
        nvvm.fix_data_layout(mod)
        nvvm.set_cuda_kernel(fn)
        nvvmir = str(mod)
        ptx = nvvm.llvm_to_ptx(nvvmir)
        self.assertTrue('rsqrt.approx.f32' in str(ptx))
コード例 #14
0
ファイル: test_inline_ptx.py プロジェクト: zhaijf1992/numba
    def test_inline_rsqrt(self):
        mod = ir.Module(__name__)
        fnty = ir.FunctionType(ir.VoidType(), [ir.PointerType(ir.FloatType())])
        fn = ir.Function(mod, fnty, 'cu_rsqrt')
        bldr = ir.IRBuilder(fn.append_basic_block('entry'))

        rsqrt_approx_fnty = ir.FunctionType(ir.FloatType(), [ir.FloatType()])
        inlineasm = ir.InlineAsm(rsqrt_approx_fnty,
                                 'rsqrt.approx.f32 $0, $1;',
                                 '=f,f',
                                 side_effect=True)
        val = bldr.load(fn.args[0])
        res = bldr.call(inlineasm, [val])

        bldr.store(res, fn.args[0])
        bldr.ret_void()

        # generate ptx
        nvvm.fix_data_layout(mod)
        nvvm.set_cuda_kernel(fn)
        nvvmir = str(mod)
        ptx = nvvm.llvm_to_ptx(nvvmir)
        self.assertTrue('rsqrt.approx.f32' in str(ptx))
コード例 #15
0
 def test_nvvm_compile_simple(self):
     nvvmir = self.get_ptx()
     ptx = llvm_to_ptx(nvvmir).decode('utf8')
     print(ptx)
     self.assertTrue('simple' in ptx)
     self.assertTrue('ave' in ptx)
コード例 #16
0
 def test_nvvm_compile_simple(self):
     nvvmir = self.get_ptx()
     ptx = llvm_to_ptx(nvvmir).decode('utf8')
     self.assertTrue('simple' in ptx)
     self.assertTrue('ave' in ptx)
コード例 #17
0
# Have to cheat a bit here to get everything needed to give to NVVM
with global_compiler_lock:
    argtys = (float32[:], int32, float32[:], float32[:])
    returnty = void
    cres = compile_cuda(axpy.py_func, void, argtys, debug=False, inline=False)
    fname = cres.fndesc.llvm_func_name
    lib, kernel = cres.target_context.prepare_cuda_kernel(cres.library,
                                                          fname,
                                                          cres.signature.args,
                                                          debug=False)
    llvm_module = lib._final_module

    cc = (5, 2)
    arch = nvvm.get_arch_option(*cc)
    llvmir = str(llvm_module)
    ptx = nvvm.llvm_to_ptx(llvmir, opt=3, arch=arch)

print(ptx.decode('utf-8'))

# PTX to module

from numba.cuda.cudadrv.driver import Linker  # noqa

linker = Linker()
linker.add_ptx(ptx)
cubin, size = linker.complete()

compile_info = linker.info_log

print(size)
print(compile_info)
コード例 #18
0
 def test_nvvm_compile_simple(self):
     nvvmir = self.get_ptx()
     ptx = llvm_to_ptx(nvvmir).decode("utf8")
     self.assertTrue("simple" in ptx)
     self.assertTrue("ave" in ptx)
コード例 #19
0
 def test_nvvm_ir_verify_fail(self):
     m = ir.Module("test_bad_ir")
     m.triple = "unknown-unknown-unknown"
     fix_data_layout(m)
     with self.assertRaisesRegex(NvvmError, 'Invalid target triple'):
         llvm_to_ptx(str(m))