def test_compile(drv, *args): # setup mocked stuff drv = setup_mock(drv) with cuda.CudaFunctions(0) as dev: dev.source_mod = Mock() dev.source_mod.return_value.get_function.return_value = 'func' # call compile kernel_string = "__global__ void vector_add()" kernel_name = "vector_add" kernel_sources = KernelSource(kernel_name, kernel_string, "cuda") kernel_instance = KernelInstance(kernel_name, kernel_sources, kernel_string, [], None, None, dict(), []) func = dev.compile(kernel_instance) # verify behavior assert dev.source_mod.call_count == 1 assert dev.current_module is dev.source_mod.return_value assert func == 'func' assert kernel_string == list(dev.source_mod.mock_calls[0])[1][0] optional_args = list(dev.source_mod.mock_calls[0])[2] assert optional_args['code'] == 'sm_55' assert optional_args['arch'] == 'compute_55'
def test_copy_texture_memory_args(drv, *args): drv = setup_mock(drv) fake_array = np.zeros(10).astype(np.float32) texmem_args = {'fake_tex': fake_array} texref = Mock() dev = cuda.CudaFunctions(0) dev.current_module = Mock() dev.current_module.get_texref.return_value = texref dev.copy_texture_memory_args(texmem_args) drv.matrix_to_texref.assert_called_once_with(fake_array, texref, order="C") dev.current_module.get_texref.assert_called_once_with('fake_tex') texmem_args = { 'fake_tex2': { 'array': fake_array, 'filter_mode': 'linear', 'address_mode': ['border', 'clamp'] } } dev.copy_texture_memory_args(texmem_args) drv.matrix_to_texref.assert_called_with(fake_array, texref, order="C") dev.current_module.get_texref.assert_called_with('fake_tex2') texref.set_filter_mode.assert_called_once_with(drv.filter_mode.LINEAR) texref.set_address_mode.assert_any_call(0, drv.address_mode.BORDER) texref.set_address_mode.assert_any_call(1, drv.address_mode.CLAMP)
def test_benchmark(drv, *args): drv = setup_mock(drv) drv.Event.return_value.time_since.return_value = 0.1 dev = cuda.CudaFunctions(0) res = dev.benchmark(dummy_func, [1, 2], (1, 2), (1, 2)) assert res["time"] > 0 assert dev.context.synchronize.call_count == 1 assert drv.Event.return_value.synchronize.call_count == dev.iterations assert drv.Event.return_value.record.call_count == 2 * dev.iterations assert drv.Event.return_value.time_since.call_count == dev.iterations
def test_benchmark(drv): drv = setup_mock(drv) drv.Event.return_value.time_since.return_value = 0.1 dev = cuda.CudaFunctions(0) args = [1, 2] time = dev.benchmark(test_func, args, (1, 2), (1, 2)) assert time > 0 assert dev.context.synchronize.call_count == 2 * dev.iterations assert drv.Event.return_value.record.call_count == 2 * dev.iterations assert drv.Event.return_value.time_since.call_count == dev.iterations
def test_copy_constant_memory_args(drv, *args): drv = setup_mock(drv) fake_array = np.zeros(10).astype(np.float32) cmem_args = {'fake_array': fake_array} dev = cuda.CudaFunctions(0) dev.current_module = Mock() dev.current_module.get_global.return_value = ['get_global'] dev.copy_constant_memory_args(cmem_args) drv.memcpy_htod.assert_called_once_with('get_global', fake_array) dev.current_module.get_global.assert_called_once_with('fake_array')
def test_ready_argument_list(): size = 1000 a = np.int32(75) b = np.random.randn(size).astype(np.float32) c = np.zeros_like(b) arguments = [c, a, b] dev = cuda.CudaFunctions(0) gpu_args = dev.ready_argument_list(arguments) assert isinstance(gpu_args[0], pycuda.driver.DeviceAllocation) assert isinstance(gpu_args[1], np.int32) assert isinstance(gpu_args[2], pycuda.driver.DeviceAllocation)
def test_compile(drv, src_mod): drv = setup_mock(drv) src_mod.return_value.get_function.return_value = 'func' dev = cuda.CudaFunctions(0) kernel_string = "__global__ void vector_add()" func = dev.compile("vector_add", kernel_string) assert src_mod.call_count == 1 assert dev.current_module is src_mod.return_value assert func == 'func' assert kernel_string == list(src_mod.mock_calls[0])[1][0] optional_args = list(src_mod.mock_calls[0])[2] assert optional_args['code'] == 'sm_55' assert optional_args['arch'] == 'compute_55'
def test_ready_argument_list(drv, *args): drv = setup_mock(drv) size = 5 a = np.int32(75) b = np.random.randn(size).astype(np.float32) arguments = [a, b] dev = cuda.CudaFunctions(0) gpu_args = dev.ready_argument_list(arguments) print(drv.mock_calls) print(gpu_args) drv.mem_alloc.assert_called_once_with(20) drv.memcpy_htod.assert_called_once_with('mem_alloc', b) assert isinstance(gpu_args[0], np.int32)
def test_compile(): kernel_string = """ __global__ void vector_add(float *c, float *a, float *b, int n) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i<n) { c[i] = a[i] + b[i]; } } """ kernel_sources = KernelSource(kernel_string, "cuda") kernel_instance = KernelInstance("vector_add", kernel_sources, kernel_string, [], None, None, dict(), []) dev = cuda.CudaFunctions(0) try: dev.compile(kernel_instance) except Exception as e: pytest.fail("Did not expect any exception:" + str(e))
def test_compile(drv, _): #setup mocked stuff drv = setup_mock(drv) dev = cuda.CudaFunctions(0) dev.source_mod = Mock() dev.source_mod.return_value.get_function.return_value = 'func' #call compile kernel_string = "__global__ void vector_add()" func = dev.compile("vector_add", kernel_string) #verify behavior assert dev.source_mod.call_count == 1 assert dev.current_module is dev.source_mod.return_value assert func == 'func' assert kernel_string == list(dev.source_mod.mock_calls[0])[1][0] optional_args = list(dev.source_mod.mock_calls[0])[2] assert optional_args['code'] == 'sm_55' assert optional_args['arch'] == 'compute_55'
def test_compile(): original_kernel = """ __global__ void vector_add(float *c, float *a, float *b, int n) { __shared__ float test[shared_size]; int i = blockIdx.x * blockDim.x + threadIdx.x; if (i<n) { test[0] = a[i]; c[i] = test[0] + b[i]; } } """ kernel_string = original_kernel.replace("shared_size", str(100 * 1024 * 1024)) dev = cuda.CudaFunctions(0) try: func = dev.compile("vector_add", kernel_string) assert isinstance(func, pycuda.driver.Function) print( "Expected an exception because too much shared memory is requested" ) assert False except Exception as e: if "uses too much shared data" in str(e): assert True else: print("Expected a different exception:" + str(e)) assert False kernel_string = original_kernel.replace("shared_size", str(100)) try: func = dev.compile("vector_add", kernel_string) assert True except Exception as e: print("Did not expect any exception:") print(str(e)) assert False
def test_benchmark(): dev = cuda.CudaFunctions(0) args = [1, 2] res = dev.benchmark(dummy_func, args, (1, 2), (1, 2)) assert res["time"] > 0 assert len(res["times"]) == dev.iterations
def test_benchmark(): skip_if_no_cuda_device() dev = cuda.CudaFunctions(0) args = [1, 2] time = dev.benchmark(test_func, args, (1,2), (1,2)) assert time > 0
def test_benchmark_times(): dev = cuda.CudaFunctions(0) args = [1, 2] time = dev.benchmark(dummy_func, args, (1, 2), (1, 2), True) assert len(time) == 7
def test_benchmark(): dev = cuda.CudaFunctions(0) args = [1, 2] time = dev.benchmark(dummy_func, args, (1, 2), (1, 2), False) assert time > 0