Ejemplo n.º 1
0
def test_atomic_fp_native(filter_str, return_list_of_op, fdtype, addrspace):
    LLVM_SPIRV_ROOT = os.environ.get("NUMBA_DPPY_LLVM_SPIRV_ROOT")
    if LLVM_SPIRV_ROOT == "" or LLVM_SPIRV_ROOT is None:
        pytest.skip(
            "Please set envar NUMBA_DPPY_LLVM_SPIRV_ROOT to run this test"
        )

    if atomic_skip_test(filter_str):
        pytest.skip()

    a = np.array([0], fdtype)

    op_type, expected = return_list_of_op

    if addrspace == "global":
        op = getattr(dppy.atomic, op_type)

        def f(a):
            op(a, 0, 1)

    elif addrspace == "local":
        f = get_func_local(op_type, fdtype)

    kernel = dppy.kernel(f)

    NATIVE_FP_ATOMICS_old_val = config.NATIVE_FP_ATOMICS
    config.NATIVE_FP_ATOMICS = 1

    LLVM_SPIRV_ROOT_old_val = config.LLVM_SPIRV_ROOT
    config.LLVM_SPIRV_ROOT = LLVM_SPIRV_ROOT

    with dppy.offload_to_sycl_device(filter_str) as sycl_queue:
        kern = kernel[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
            kernel._get_argtypes(a), sycl_queue
        )
        if filter_str != "opencl:cpu:0":
            assert "__spirv_AtomicFAddEXT" in kern.assembly
        else:
            assert "__spirv_AtomicFAddEXT" not in kern.assembly

    config.NATIVE_FP_ATOMICS = 0

    # To bypass caching
    kernel = dppy.kernel(f)
    with dppy.offload_to_sycl_device(filter_str) as sycl_queue:
        kern = kernel[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
            kernel._get_argtypes(a), sycl_queue
        )
        assert "__spirv_AtomicFAddEXT" not in kern.assembly

    config.NATIVE_FP_ATOMICS = NATIVE_FP_ATOMICS_old_val
    config.LLVM_SPIRV_ROOT = LLVM_SPIRV_ROOT_old_val
Ejemplo n.º 2
0
def kernel_result_pair(request):
    op = getattr(dppy.atomic, request.param[0])

    def f(a):
        op(a, 0, 1)

    return dppy.kernel(f), request.param[1]
Ejemplo n.º 3
0
def test_caching_kernel_using_same_context(filter_str):
    """Test kernel caching for the scenario where different SYCL queues that
    share a SYCL context are used to submit a kernel.

    Args:
        filter_str: SYCL filter selector string
    """
    global_size = 10
    N = global_size

    def data_parallel_sum(a, b, c):
        i = dppy.get_global_id(0)
        c[i] = a[i] + b[i]

    a = np.array(np.random.random(N), dtype=np.float32)
    b = np.array(np.random.random(N), dtype=np.float32)
    c = np.ones_like(a)

    # Set the global queue to the default device so that the cached_kernel gets
    # created for that device
    dpctl.set_global_queue(filter_str)
    func = dppy.kernel(data_parallel_sum)
    default_queue = dpctl.get_current_queue()
    cached_kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
        func._get_argtypes(a, b, c), default_queue)
    for i in range(0, 10):
        # Each iteration create a fresh queue that will share the same context
        with dpctl.device_context(filter_str) as gpu_queue:
            _kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
                func._get_argtypes(a, b, c), gpu_queue)
            assert _kernel == cached_kernel
Ejemplo n.º 4
0
def test_caching_kernel_using_same_queue(filter_str):
    """Test kernel caching when the same queue is used to submit a kernel
    multiple times.

    Args:
        filter_str: SYCL filter selector string
    """
    global_size = 10
    N = global_size

    def data_parallel_sum(a, b, c):
        i = dppy.get_global_id(0)
        c[i] = a[i] + b[i]

    a = np.array(np.random.random(N), dtype=np.float32)
    b = np.array(np.random.random(N), dtype=np.float32)
    c = np.ones_like(a)

    with dpctl.device_context(filter_str) as gpu_queue:
        func = dppy.kernel(data_parallel_sum)
        cached_kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
            func._get_argtypes(a, b, c), gpu_queue)

        for i in range(10):
            _kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
                func._get_argtypes(a, b, c), gpu_queue)
            assert _kernel == cached_kernel
Ejemplo n.º 5
0
def test_atomic_fp_native(
    filter_str,
    NATIVE_FP_ATOMICS,
    expected_native_atomic_for_device,
    function_generator,
    operator_name,
    expected_spirv_function,
    dtype,
):
    function = function_generator(operator_name, dtype)
    kernel = dppy.kernel(function)
    argtypes = kernel._get_argtypes(np.array([0], dtype))

    with override_config("NATIVE_FP_ATOMICS", NATIVE_FP_ATOMICS):

        with dpctl.device_context(filter_str) as sycl_queue:

            specialized_kernel = kernel[global_size,
                                        dppy.DEFAULT_LOCAL_SIZE].specialize(
                                            argtypes, sycl_queue)

            is_native_atomic = (expected_spirv_function
                                in specialized_kernel.assembly)
            assert is_native_atomic == expected_native_atomic_for_device(
                filter_str)
Ejemplo n.º 6
0
def test_kernel_arg_types(filter_str, input_arrays):
    kernel = dppy.kernel(mul_kernel)
    a, actual, c = input_arrays
    expected = a * c
    device = dpctl.SyclDevice(filter_str)
    with dpctl.device_context(device):
        kernel[global_size, local_size](a, actual, c)
    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
Ejemplo n.º 7
0
def test_return(filter_str, sig):
    a = np.array(np.random.random(122), np.int32)

    with pytest.raises(TypeError):
        kernel = dppy.kernel(sig)(f)

        device = dpctl.SyclDevice(filter_str)
        with dpctl.device_context(device):
            kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a)
Ejemplo n.º 8
0
def test_kernel_atomic_local(filter_str, input_arrays, return_list_of_op):
    a, dtype = input_arrays
    op_type, expected = return_list_of_op
    f = get_func_local(op_type, dtype)
    kernel = dppy.kernel(f)
    device = dpctl.SyclDevice(filter_str)
    with dpctl.device_context(device):
        kernel[global_size, global_size](a)
    assert a[0] == expected
Ejemplo n.º 9
0
def test_bool_type(filter_str):
    kernel = dppy.kernel(check_bool_kernel)
    a = np.array([2], np.int64)

    device = dpctl.SyclDevice(filter_str)
    with dpctl.device_context(device):
        kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a, True)
        assert a[0] == 111
        kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a, False)
        assert a[0] == 222
Ejemplo n.º 10
0
def test_kernel_arg_types(filter_str, input_arrays):
    if skip_test(filter_str):
        pytest.skip()

    kernel = dppy.kernel(mul_kernel)
    a, actual, c = input_arrays
    expected = a * c
    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        kernel[global_size, local_size](a, actual, c)
    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
Ejemplo n.º 11
0
def test_kernel_atomic_local(filter_str, input_arrays, return_list_of_op):
    if atomic_skip_test(filter_str):
        pytest.skip()

    a, dtype = input_arrays
    op_type, expected = return_list_of_op
    f = get_func_local(op_type, dtype)
    kernel = dppy.kernel(f)
    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        kernel[global_size, global_size](a)
    assert a[0] == expected
Ejemplo n.º 12
0
def test_return(offload_device, sig):
    if skip_test(offload_device):
        pytest.skip()

    a = np.array(np.random.random(122), np.int32)

    with pytest.raises(TypeError):
        kernel = dppy.kernel(sig)(f)

        device = dpctl.SyclDevice(offload_device)
        with dppy.offload_to_sycl_device(device):
            kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a)
Ejemplo n.º 13
0
def get_kernel_multi_dim(op_type, size):
    op = getattr(dppy.atomic, op_type)
    if size == 1:
        idx = 0
    else:
        idx = (0,)
        for i in range(size - 1):
            idx += (0,)

    def f(a):
        op(a, idx, 1)

    return dppy.kernel(f)
Ejemplo n.º 14
0
def test_bool_type(filter_str):
    if skip_test(filter_str):
        pytest.skip()

    kernel = dppy.kernel(check_bool_kernel)
    a = np.array([2], np.int64)

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a, True)
        assert a[0] == 111
        kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a, False)
        assert a[0] == 222
Ejemplo n.º 15
0
def kernel(request):
    return dppy.kernel(access_types=request.param)(sum_kernel)
Ejemplo n.º 16
0
 def _compile_kernel(self, fnobj, sig):
     return dppy.kernel(sig)(fnobj)