def test_interface_handles_compile_error(dev_interface): dev = dev_interface.return_value dev_interface.configure_mock(**mock_config) tune_params = {"block_size_x": [256]} dev.compile.side_effect = Exception("uses too much shared data") tune_kernel("fake_kernel", "fake_kernel", (1, 1), [numpy.int32(0)], tune_params, lang="CUDA") assert dev.compile.call_count == 1 assert dev.benchmark.called == False
def test_interface_handles_compile_error(dev_interface): dev = dev_interface.return_value dev_interface.configure_mock(**mock_config) tune_params = {"block_size_x": [256]} dev.compile.side_effect = Exception("uses too much shared data") kernel_string = "__global__ void fake_kernel(int number)" tune_kernel("fake_kernel", kernel_string, (1, 1), [np.int32(0)], tune_params, lang="CUDA") assert dev.compile.call_count == 2 assert not dev.benchmark.called
def test_interface_handles_max_threads(dev_interface): dev = dev_interface.return_value dev_interface.configure_mock(**mock_config) tune_params = {"block_size_x": [256, 512]} dev.max_threads = 256 kernel_string = "__global__ void fake_kernel(int number)" tune_kernel("fake_kernel", kernel_string, (1, 1), [np.int32(0)], tune_params, lang="CUDA") # verify that only a single instance of the kernel is compiled dev.compile.assert_called()
def test_interface_calls_functions(dev_interface): dev = dev_interface.return_value dev_interface.configure_mock(**mock_config) kernel_name, kernel_string, size, args, tune_params = get_fake_kernel() tune_kernel(kernel_name, kernel_string, size, args, tune_params, verbose=True) dev.compile.assert_called() dev.benchmark.assert_called_with('compile', 'ready_argument_list', (128, 1, 1), (10, 1, 1))
def test_interface_noodles_checks_version(sysmock): sysmock.version_info = [2, 7] kernel_name, kernel_string, size, args, tune_params = get_fake_kernel() try: tune_kernel(kernel_name, kernel_string, size, args, tune_params, use_noodles=True, num_threads=4) assert False except ValueError: assert True
def test_interface_calls_functions(dev_interface): dev = dev_interface.return_value dev_interface.configure_mock(**mock_config) kernel_name, kernel_string, size, args, tune_params = get_fake_kernel() tune_kernel(kernel_name, kernel_string, size, args, tune_params, verbose=True) expected = "#define block_size_z 1\n#define block_size_y 1\n#define block_size_x 128\n#define grid_size_z 1\n#define grid_size_y 1\n#define grid_size_x 10\n__global__ void fake_kernel(int number)" dev.compile.assert_called_once_with("fake_kernel", expected) dev.benchmark.assert_called_once_with('compile', 'ready_argument_list', (128, 1, 1), (10, 1, 1))
def test_interface_noodles_checks_noodles(importlibmock): importlibmock.util.find_spec.return_value = None kernel_name, kernel_string, size, args, tune_params = get_fake_kernel() try: tune_kernel(kernel_name, kernel_string, size, args, tune_params, use_noodles=True, num_threads=4) assert False except ValueError: assert True
def test_interface_handles_max_threads(dev_interface): dev = dev_interface.return_value dev_interface.configure_mock(**mock_config) tune_params = {"block_size_x": [256, 512]} dev.max_threads = 256 tune_kernel("fake_kernel", "fake_kernel", (1, 1), [numpy.int32(0)], tune_params, lang="CUDA") dev.compile.assert_called_once_with( "fake_kernel_256", "#define block_size_x 256\n#define grid_size_z 1\n#define grid_size_y 1\n#define grid_size_x 1\nfake_kernel_256" )
def test_interface_handles_restriction(dev_interface): dev = dev_interface.return_value dev_interface.configure_mock(**mock_config) tune_params = {"block_size_x": [128, 256, 512]} restrict = ["block_size_x > 128", "block_size_x < 512"] tune_kernel("fake_kernel", "fake_kernel", (1, 1), [numpy.int32(0)], tune_params, restrictions=restrict, lang="CUDA", verbose=True) assert dev.compile.call_count == 1 dev.benchmark.assert_called_once_with('compile', 'ready_argument_list', (256, 1, 1), (1, 1, 1))
def test_interface_handles_runtime_error(dev_interface): dev = dev_interface.return_value dev_interface.configure_mock(**mock_config) tune_params = {"block_size_x": [256]} dev.benchmark.side_effect = Exception( "too many resources requested for launch") results, _ = tune_kernel("fake_kernel", "fake_kernel", (1, 1), [numpy.int32(0)], tune_params, lang="CUDA") assert dev.compile.call_count == 1 dev.benchmark.assert_called_once_with('compile', 'ready_argument_list', (256, 1, 1), (1, 1, 1)) assert len(results) == 0