Python CudaFunctions Examples, kernel_tuner.cuda.CudaFunctions Python Examples

Example #1

0

Show file

def test_compile(drv, *args):

    # setup mocked stuff
    drv = setup_mock(drv)
    with cuda.CudaFunctions(0) as dev:
        dev.source_mod = Mock()
        dev.source_mod.return_value.get_function.return_value = 'func'

        # call compile
        kernel_string = "__global__ void vector_add()"
        kernel_name = "vector_add"
        kernel_sources = KernelSource(kernel_name, kernel_string, "cuda")
        kernel_instance = KernelInstance(kernel_name, kernel_sources,
                                         kernel_string, [], None, None, dict(),
                                         [])
        func = dev.compile(kernel_instance)

        # verify behavior
        assert dev.source_mod.call_count == 1
        assert dev.current_module is dev.source_mod.return_value
        assert func == 'func'

        assert kernel_string == list(dev.source_mod.mock_calls[0])[1][0]
        optional_args = list(dev.source_mod.mock_calls[0])[2]
        assert optional_args['code'] == 'sm_55'
        assert optional_args['arch'] == 'compute_55'

Example #2

0

Show file

File: test_cuda_mocked.py Project: wjp/kernel_tuner

def test_copy_texture_memory_args(drv, *args):
    drv = setup_mock(drv)

    fake_array = np.zeros(10).astype(np.float32)
    texmem_args = {'fake_tex': fake_array}

    texref = Mock()

    dev = cuda.CudaFunctions(0)
    dev.current_module = Mock()
    dev.current_module.get_texref.return_value = texref

    dev.copy_texture_memory_args(texmem_args)

    drv.matrix_to_texref.assert_called_once_with(fake_array, texref, order="C")
    dev.current_module.get_texref.assert_called_once_with('fake_tex')

    texmem_args = {
        'fake_tex2': {
            'array': fake_array,
            'filter_mode': 'linear',
            'address_mode': ['border', 'clamp']
        }
    }

    dev.copy_texture_memory_args(texmem_args)
    drv.matrix_to_texref.assert_called_with(fake_array, texref, order="C")
    dev.current_module.get_texref.assert_called_with('fake_tex2')
    texref.set_filter_mode.assert_called_once_with(drv.filter_mode.LINEAR)
    texref.set_address_mode.assert_any_call(0, drv.address_mode.BORDER)
    texref.set_address_mode.assert_any_call(1, drv.address_mode.CLAMP)

Example #3

0

Show file

File: test_cuda_mocked.py Project: wjp/kernel_tuner

def test_benchmark(drv, *args):
    drv = setup_mock(drv)

    drv.Event.return_value.time_since.return_value = 0.1

    dev = cuda.CudaFunctions(0)
    res = dev.benchmark(dummy_func, [1, 2], (1, 2), (1, 2))
    assert res["time"] > 0

    assert dev.context.synchronize.call_count == 1
    assert drv.Event.return_value.synchronize.call_count == dev.iterations
    assert drv.Event.return_value.record.call_count == 2 * dev.iterations
    assert drv.Event.return_value.time_since.call_count == dev.iterations

Example #4

0

Show file

File: test_cuda_mocked.py Project: TaihuLight/kernel_tuner

def test_benchmark(drv):
    drv = setup_mock(drv)

    drv.Event.return_value.time_since.return_value = 0.1

    dev = cuda.CudaFunctions(0)
    args = [1, 2]
    time = dev.benchmark(test_func, args, (1, 2), (1, 2))
    assert time > 0

    assert dev.context.synchronize.call_count == 2 * dev.iterations
    assert drv.Event.return_value.record.call_count == 2 * dev.iterations
    assert drv.Event.return_value.time_since.call_count == dev.iterations

Example #5

0

Show file

File: test_cuda_mocked.py Project: wjp/kernel_tuner

def test_copy_constant_memory_args(drv, *args):
    drv = setup_mock(drv)

    fake_array = np.zeros(10).astype(np.float32)
    cmem_args = {'fake_array': fake_array}

    dev = cuda.CudaFunctions(0)
    dev.current_module = Mock()
    dev.current_module.get_global.return_value = ['get_global']

    dev.copy_constant_memory_args(cmem_args)

    drv.memcpy_htod.assert_called_once_with('get_global', fake_array)
    dev.current_module.get_global.assert_called_once_with('fake_array')

Example #6

0

Show file

File: test_cuda_functions.py Project: wjp/kernel_tuner

def test_ready_argument_list():

    size = 1000
    a = np.int32(75)
    b = np.random.randn(size).astype(np.float32)
    c = np.zeros_like(b)

    arguments = [c, a, b]

    dev = cuda.CudaFunctions(0)
    gpu_args = dev.ready_argument_list(arguments)

    assert isinstance(gpu_args[0], pycuda.driver.DeviceAllocation)
    assert isinstance(gpu_args[1], np.int32)
    assert isinstance(gpu_args[2], pycuda.driver.DeviceAllocation)

Example #7

0

Show file

File: test_cuda_mocked.py Project: TaihuLight/kernel_tuner

def test_compile(drv, src_mod):
    drv = setup_mock(drv)

    src_mod.return_value.get_function.return_value = 'func'

    dev = cuda.CudaFunctions(0)
    kernel_string = "__global__ void vector_add()"
    func = dev.compile("vector_add", kernel_string)

    assert src_mod.call_count == 1
    assert dev.current_module is src_mod.return_value
    assert func == 'func'

    assert kernel_string == list(src_mod.mock_calls[0])[1][0]
    optional_args = list(src_mod.mock_calls[0])[2]
    assert optional_args['code'] == 'sm_55'
    assert optional_args['arch'] == 'compute_55'

Example #8

0

Show file

File: test_cuda_mocked.py Project: wjp/kernel_tuner

def test_ready_argument_list(drv, *args):
    drv = setup_mock(drv)

    size = 5
    a = np.int32(75)
    b = np.random.randn(size).astype(np.float32)
    arguments = [a, b]

    dev = cuda.CudaFunctions(0)
    gpu_args = dev.ready_argument_list(arguments)

    print(drv.mock_calls)
    print(gpu_args)

    drv.mem_alloc.assert_called_once_with(20)
    drv.memcpy_htod.assert_called_once_with('mem_alloc', b)

    assert isinstance(gpu_args[0], np.int32)

Example #9

0

Show file

File: test_cuda_functions.py Project: wjp/kernel_tuner

def test_compile():

    kernel_string = """
    __global__ void vector_add(float *c, float *a, float *b, int n) {
        int i = blockIdx.x * blockDim.x + threadIdx.x;
        if (i<n) {
            c[i] = a[i] + b[i];
        }
    }
    """

    kernel_sources = KernelSource(kernel_string, "cuda")
    kernel_instance = KernelInstance("vector_add", kernel_sources,
                                     kernel_string, [], None, None, dict(), [])
    dev = cuda.CudaFunctions(0)
    try:
        dev.compile(kernel_instance)
    except Exception as e:
        pytest.fail("Did not expect any exception:" + str(e))

Example #10

0

Show file

def test_compile(drv, _):

    #setup mocked stuff
    drv = setup_mock(drv)
    dev = cuda.CudaFunctions(0)
    dev.source_mod = Mock()
    dev.source_mod.return_value.get_function.return_value = 'func'

    #call compile
    kernel_string = "__global__ void vector_add()"
    func = dev.compile("vector_add", kernel_string)

    #verify behavior
    assert dev.source_mod.call_count == 1
    assert dev.current_module is dev.source_mod.return_value
    assert func == 'func'

    assert kernel_string == list(dev.source_mod.mock_calls[0])[1][0]
    optional_args = list(dev.source_mod.mock_calls[0])[2]
    assert optional_args['code'] == 'sm_55'
    assert optional_args['arch'] == 'compute_55'

Example #11

0

Show file

def test_compile():

    original_kernel = """
    __global__ void vector_add(float *c, float *a, float *b, int n) {
        __shared__ float test[shared_size];
        int i = blockIdx.x * blockDim.x + threadIdx.x;
        if (i<n) {
            test[0] = a[i];
            c[i] = test[0] + b[i];
        }
    }
    """

    kernel_string = original_kernel.replace("shared_size",
                                            str(100 * 1024 * 1024))

    dev = cuda.CudaFunctions(0)
    try:
        func = dev.compile("vector_add", kernel_string)
        assert isinstance(func, pycuda.driver.Function)
        print(
            "Expected an exception because too much shared memory is requested"
        )
        assert False
    except Exception as e:
        if "uses too much shared data" in str(e):
            assert True
        else:
            print("Expected a different exception:" + str(e))
            assert False

    kernel_string = original_kernel.replace("shared_size", str(100))
    try:
        func = dev.compile("vector_add", kernel_string)
        assert True
    except Exception as e:
        print("Did not expect any exception:")
        print(str(e))
        assert False

Example #12

0

Show file

File: test_cuda_functions.py Project: wjp/kernel_tuner

def test_benchmark():
    dev = cuda.CudaFunctions(0)
    args = [1, 2]
    res = dev.benchmark(dummy_func, args, (1, 2), (1, 2))
    assert res["time"] > 0
    assert len(res["times"]) == dev.iterations

Example #13

0

Show file

def test_benchmark():
    skip_if_no_cuda_device()
    dev = cuda.CudaFunctions(0)
    args = [1, 2]
    time = dev.benchmark(test_func, args, (1,2), (1,2))
    assert time > 0

Example #14

0

Show file

def test_benchmark_times():
    dev = cuda.CudaFunctions(0)
    args = [1, 2]
    time = dev.benchmark(dummy_func, args, (1, 2), (1, 2), True)
    assert len(time) == 7

Example #15

0

Show file

def test_benchmark():
    dev = cuda.CudaFunctions(0)
    args = [1, 2]
    time = dev.benchmark(dummy_func, args, (1, 2), (1, 2), False)
    assert time > 0