'CUdeviceptr ptr = extract<CUdeviceptr>(gpu_array.attr("gpudata"))', #Call Thrust routine, compiled into the CudaModule 'my_sort(ptr, length)', #Return result 'return gpu_array', ] host_mod.add_function( FunctionBody( FunctionDeclaration(Value('object', 'host_entry'), [Value('object', 'gpu_array')]), Block([Statement(x) for x in host_statements]))) #Print out generated code, to see what we're actually compiling print("---------------------- Host code ----------------------") print(host_mod.generate()) print("--------------------- Device code ---------------------") print(nvcc_mod.generate()) print("-------------------------------------------------------") #Compile modules import codepy.jit, codepy.toolchain gcc_toolchain = codepy.toolchain.guess_toolchain() nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain() module = nvcc_mod.compile(gcc_toolchain, nvcc_toolchain, debug=True)
'CUdeviceptr ptr = extract<CUdeviceptr>(gpu_array.attr("ptr"))', #Call Thrust routine, compiled into the CudaModule 'my_sort(ptr, length)', #Return result 'return gpu_array', ] host_mod.add_function( FunctionBody( FunctionDeclaration(Value('object', 'host_entry'), [Value('object', 'gpu_array')]), Block([Statement(x) for x in host_statements]))) #Print out generated code, to see what we're actually compiling print("---------------------- Host code ----------------------") print((host_mod.generate())) print("--------------------- Device code ---------------------") print((nvcc_mod.generate())) print("-------------------------------------------------------") #Compile modules import codepy.jit, codepy.toolchain gcc_toolchain = codepy.toolchain.guess_toolchain() nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain() module = nvcc_mod.compile(gcc_toolchain, nvcc_toolchain, debug=True) length = 100 a = np.array(np.random.rand(length), dtype=np.float32) print("---------------------- Unsorted -----------------------") print(a)