예제 #1
0
def create_subdevice_queue():
    """
    Partition a CPU sycl device into sub-devices.
    Create a multi-device sycl context.

    """
    cpu_d = dpctl.SyclDevice("cpu")
    cpu_count = cpu_d.max_compute_units
    sub_devs = cpu_d.create_sub_devices(partition=cpu_count // 2)
    multidevice_ctx = dpctl.SyclContext(sub_devs)
    # create a SyclQueue for each sub-device, using commont
    # multi-device context
    q0, q1 = [dpctl.SyclQueue(multidevice_ctx, d) for d in sub_devs]
    # for each sub-device allocate 26 bytes
    m0 = dpctl.memory.MemoryUSMDevice(26, queue=q0)
    m1 = dpctl.memory.MemoryUSMDevice(26, queue=q1)
    # populate m0 with host data of spaces
    hostmem = bytearray(b" " * 26)
    # copy spaces into m1
    m1.copy_from_host(hostmem)
    for i in range(26):
        hostmem[i] = ord("a") + i
    # copy character sequence into m0
    m0.copy_from_host(hostmem)
    # from from m0 to m1. Due to using multi-device context,
    # copying can be done directly
    m1.copy_from_device(m0)
    return bytes(m1.copy_to_host())
예제 #2
0
    def test_with_dppy_context_cpu(self):
        @njit
        def nested_func(a, b):
            np.sin(a, b)

        @njit
        def func(b):
            a = np.ones((64), dtype=np.float64)
            nested_func(a, b)

        config.DEBUG = 1
        expected = np.ones((64), dtype=np.float64)
        got_cpu = np.ones((64), dtype=np.float64)

        with captured_stdout() as got_cpu_message:
            device = dpctl.SyclDevice("opencl:cpu")
            with dppy.offload_to_sycl_device(device):
                func(got_cpu)

        config.DEBUG = 0
        func(expected)

        np.testing.assert_array_equal(expected, got_cpu)
        self.assertTrue(
            "Parfor offloaded to opencl:cpu" in got_cpu_message.getvalue())
def test_mix_argtype(offload_device, input_arrays):
    usm_type = "device"

    a, b, expected = input_arrays
    got = np.ones_like(a)

    device = dpctl.SyclDevice(offload_device)
    queue = dpctl.SyclQueue(device)

    da = dpt.usm_ndarray(
        a.shape,
        dtype=a.dtype,
        buffer=usm_type,
        buffer_ctor_kwargs={"queue": queue},
    )
    da.usm_data.copy_from_host(a.reshape((-1)).view("|u1"))

    dc = dpt.usm_ndarray(
        got.shape,
        dtype=got.dtype,
        buffer=usm_type,
        buffer_ctor_kwargs={"queue": queue},
    )

    with pytest.raises(TypeError) as error_msg:
        sum_kernel[global_size, local_size](da, b, dc)

        assert mix_datatype_err_msg in error_msg
예제 #4
0
def select_device_SUAI(N):
    usm_type = "device"

    a = np.array(np.random.random(N), np.float32)
    b = np.array(np.random.random(N), np.float32)
    got = np.ones_like(a)

    device = dpctl.SyclDevice("opencl:gpu")
    queue = dpctl.SyclQueue(device)

    # We are allocating the data in Opencl GPU and this device
    # will be selected for compute.
    da, db, dc = allocate_SUAI_data(a, b, got, usm_type, queue)

    # Users don't need to specify where the computation will
    # take place. It will be inferred from data.
    sum_kernel[N, 1](da, db, dc)

    dc.usm_data.copy_to_host(got.reshape((-1)).view("|u1"))

    expected = a + b

    assert np.array_equal(got, expected)
    print(
        "Correct result when array with __sycl_usm_array_interface__ is passed!"
    )
예제 #5
0
def test_eig(filter_str, eig_input, capfd):
    if skip_test(filter_str):
        pytest.skip()

    a = eig_input
    fn = get_fn("linalg.eig", 1)
    f = njit(fn)

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device), dpnp_debug():
        actual_val, actual_vec = f(a)
        captured = capfd.readouterr()
        assert "dpnp implementation" in captured.out

        expected_val, expected_vec = fn(a)

        # sort val/vec by abs value
        vvsort(actual_val, actual_vec)
        vvsort(expected_val, expected_vec)

        # NP change sign of vectors
        for i in range(expected_vec.shape[1]):
            if expected_vec[0, i] * actual_vec[0, i] < 0:
                expected_vec[:, i] = -expected_vec[:, i]

        assert np.allclose(actual_val, expected_val)
        assert np.allclose(actual_vec, expected_vec)
예제 #6
0
def test_kernel_arg_accessor(filter_str, input_arrays, kernel):
    a, b, actual = input_arrays
    expected = a + b
    device = dpctl.SyclDevice(filter_str)
    with dpctl.device_context(device):
        call_kernel(global_size, local_size, a, b, actual, kernel)
    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
def test_unary_ops(filter_str, unary_op, input_arrays, get_shape, capfd):
    a = input_arrays[0]
    op, name = unary_op
    if name != "argsort" and name != "copy":
        a = np.reshape(a, get_shape)
    if name == "cumprod" and (filter_str == "opencl:cpu:0"
                              or a.dtype == np.int32 or is_gen12(filter_str)):
        pytest.skip()
    if name == "cumsum" and (filter_str == "opencl:cpu:0"
                             or a.dtype == np.int32 or is_gen12(filter_str)):
        pytest.skip()
    if name == "mean" and is_gen12(filter_str):
        pytest.skip()
    if name == "argmax" and is_gen12(filter_str):
        pytest.skip()

    actual = np.empty(shape=a.shape, dtype=a.dtype)
    expected = np.empty(shape=a.shape, dtype=a.dtype)

    f = njit(op)
    device = dpctl.SyclDevice(filter_str)
    with dpctl.device_context(device), dpnp_debug():
        actual = f(a)
        captured = capfd.readouterr()
        assert "dpnp implementation" in captured.out

    expected = op(a)
    np.testing.assert_allclose(actual, expected, rtol=1e-3, atol=0)
예제 #8
0
def test_asarray_change_usm_type(src_usm_type, dst_usm_type):
    d = dpctl.SyclDevice()
    if d.is_host:
        pytest.skip(
            "Skip test of host device, which only "
            "supports host USM allocations"
        )
    X = dpt.empty(10, dtype="u1", usm_type=src_usm_type)
    Y = dpt.asarray(X, usm_type=dst_usm_type)
    assert X.shape == Y.shape
    assert X.usm_type == src_usm_type
    assert Y.usm_type == dst_usm_type

    with pytest.raises(ValueError):
        # zero copy is not possible
        dpt.asarray(X, usm_type=dst_usm_type, copy=False)

    Y = dpt.asarray(X, usm_type=dst_usm_type, sycl_queue=X.sycl_queue)
    assert X.shape == Y.shape
    assert Y.usm_type == dst_usm_type

    Y = dpt.asarray(
        X,
        usm_type=dst_usm_type,
        sycl_queue=X.sycl_queue,
        device=d.get_filter_string(),
    )
    assert X.shape == Y.shape
    assert Y.usm_type == dst_usm_type
    def test_dppy_fallback_false(self):
        @numba.jit
        def fill_value(i):
            return i

        def inner_call_fallback():
            x = 10
            a = np.empty(shape=x, dtype=np.float32)

            for i in numba.prange(x):
                a[i] = fill_value(i)

            return a

        try:
            config.DEBUG = 1
            config.FALLBACK_ON_CPU = 0
            with warnings.catch_warnings(record=True) as w:
                device = dpctl.SyclDevice("opencl:gpu")
                with numba_dppy.offload_to_sycl_device(device):
                    dppy = numba.njit(parallel=True)(inner_call_fallback)
                    dppy_fallback_false = dppy()

        finally:
            ref_result = inner_call_fallback()
            config.FALLBACK_ON_CPU = 1
            config.DEBUG = 0

            not np.testing.assert_array_equal(dppy_fallback_false, ref_result)
            self.assertNotIn("Failed to offload parfor", str(w[-1].message))
예제 #10
0
    def test_multiple_prange(self):
        @njit
        def f(a, b):
            # dimensions must be provided as scalar
            m, n = a.shape
            for i in prange(m):
                val = 10
                for j in prange(n):
                    b[i, j] = a[i, j] * val

            for i in prange(m):
                for j in prange(n):
                    a[i, j] = a[i, j] * 10

        m = 8
        n = 8
        a = np.ones((m, n))
        b = np.ones((m, n))

        device = dpctl.SyclDevice("opencl:gpu")
        with assert_auto_offloading(
                parfor_offloaded=2), dpctl.device_context(device):
            f(a, b)

        assert np.all(b == 10)
        assert np.all(a == 10)
예제 #11
0
def test_kernel_atomic_simple(filter_str, input_arrays, kernel_result_pair):
    a, dtype = input_arrays
    kernel, expected = kernel_result_pair
    device = dpctl.SyclDevice(filter_str)
    with dpctl.device_context(device):
        kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](a)
    assert a[0] == expected
예제 #12
0
def test_njit(filter_str):
    if _helper.platform_not_supported(filter_str):
        pytest.skip()

    if _helper.skip_test(filter_str):
        pytest.skip()

    @vectorize(nopython=True)
    def axy(a, x, y):
        return a * x + y

    def f(a0, a1):
        return np.cos(axy(a0, np.sin(a1) - 1.0, 1.0))

    A = np.random.random(10)
    B = np.random.random(10)

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device), assert_auto_offloading():
        f_njit = njit(f)
        expected = f_njit(A, B)
        actual = f(A, B)

        max_abs_err = expected.sum() - actual.sum()
        assert max_abs_err < 1e-5
예제 #13
0
def test_context_multi_device():
    try:
        d = dpctl.SyclDevice("cpu")
    except ValueError:
        pytest.skip()
    if d.default_selector_score < 0:
        pytest.skip()
    n = d.max_compute_units
    n1 = n // 2
    n2 = n - n1
    if n1 == 0 or n2 == 0:
        pytest.skip()
    d1, d2 = d.create_sub_devices(partition=(n1, n2))
    ctx = dpctl.SyclContext((d1, d2))
    assert ctx.device_count == 2
    assert type(repr(ctx)) is str
    q1 = dpctl.SyclQueue(ctx, d1)
    q2 = dpctl.SyclQueue(ctx, d2)
    import dpctl.memory as dpmem

    shmem_1 = dpmem.MemoryUSMShared(256, queue=q1)
    shmem_2 = dpmem.MemoryUSMDevice(256, queue=q2)
    shmem_2.copy_from_device(shmem_1)
    # create context for single sub-device
    ctx1 = dpctl.SyclContext(d1)
    q1 = dpctl.SyclQueue(ctx1, d1)
    shmem_1 = dpmem.MemoryUSMShared(256, queue=q1)
    cap = ctx1._get_capsule()
    cap2 = ctx1._get_capsule()
    del ctx1
    del cap2  # exercise deleter of non-renamed capsule
    ctx2 = dpctl.SyclContext(cap)
    q2 = dpctl.SyclQueue(ctx2, d1)
    shmem_2 = dpmem.MemoryUSMDevice(256, queue=q2)
    shmem_2.copy_from_device(shmem_1)
예제 #14
0
    def test_sum_reduction(self):
        # This test will only work for even case
        N = 1024
        assert N % 2 == 0

        A = np.array(np.random.random(N), dtype=np.float32)
        A_copy = A.copy()
        # at max we will require half the size of A to store sum
        R = np.array(np.random.random(math.ceil(N / 2)), dtype=np.float32)

        device = dpctl.SyclDevice("opencl:gpu")
        with dpctl.device_context(device):
            total = N

            while total > 1:
                # call kernel
                global_size = total // 2
                reduction_kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](
                    A, R, global_size
                )
                total = total // 2

            result = A_copy.sum()
            max_abs_err = result - R[0]
            assert max_abs_err < 1e-4
예제 #15
0
def test_dpctl_api(filter_str):
    device = dpctl.SyclDevice(filter_str)
    with dpctl.device_context(device):
        dpctl.lsplatform()
        dpctl.get_current_queue()
        dpctl.get_num_activated_queues()
        dpctl.is_in_device_context()
예제 #16
0
    def test_dppy_func_ndarray(self):
        @dppy.func
        def g(a):
            return a + 1

        @dppy.kernel
        def f(a, b):
            i = dppy.get_global_id(0)
            b[i] = g(a[i])

        @dppy.kernel
        def h(a, b):
            i = dppy.get_global_id(0)
            b[i] = g(a[i]) + 1

        a = np.ones(self.N)
        b = np.ones(self.N)

        device = dpctl.SyclDevice("opencl:gpu")
        with dpctl.device_context(device):
            f[self.N, dppy.DEFAULT_LOCAL_SIZE](a, b)

            assert np.all(b == 2)

            h[self.N, dppy.DEFAULT_LOCAL_SIZE](a, b)

            assert np.all(b == 3)
예제 #17
0
def test_kernel_arg_types(filter_str, input_arrays):
    kernel = dppy.kernel(mul_kernel)
    a, actual, c = input_arrays
    expected = a * c
    device = dpctl.SyclDevice(filter_str)
    with dpctl.device_context(device):
        kernel[global_size, local_size](a, actual, c)
    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
예제 #18
0
def test_valid_filter_selectors(valid_filter, check):
    """Tests if we can create a SyclDevice using a supported filter selector string."""
    device = None
    try:
        device = dpctl.SyclDevice(valid_filter)
    except ValueError:
        pytest.skip("Failed to create device with supported filter")
    check(device)
예제 #19
0
def test_hashing_of_device():
    """
    Test that a :class:`dpctl.SyclDevice` object can be used as
    a dictionary key.

    """
    device_dict = {dpctl.SyclDevice(): "default_device"}
    assert device_dict
예제 #20
0
def test_kernel_atomic_local(filter_str, input_arrays, return_list_of_op):
    a, dtype = input_arrays
    op_type, expected = return_list_of_op
    f = get_func_local(op_type, dtype)
    kernel = dppy.kernel(f)
    device = dpctl.SyclDevice(filter_str)
    with dpctl.device_context(device):
        kernel[global_size, global_size](a)
    assert a[0] == expected
예제 #21
0
def test_return(filter_str, sig):
    a = np.array(np.random.random(122), np.int32)

    with pytest.raises(TypeError):
        kernel = dppy.kernel(sig)(f)

        device = dpctl.SyclDevice(filter_str)
        with dpctl.device_context(device):
            kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a)
예제 #22
0
def test_context_can_be_used_in_queue2(valid_filter):
    try:
        d = dpctl.SyclDevice(valid_filter)
    except ValueError:
        pytest.skip()
    if d.default_selector_score < 0:
        # skip test for devices rejected by default selector
        pytest.skip()
    ctx = dpctl.SyclContext(d)
    dpctl.SyclQueue(ctx, d)
예제 #23
0
def test_filter_string_property():
    """
    Test that filter_string reconstructs the same device.
    """
    devices = dpctl.get_devices()
    for d in devices:
        if d.default_selector_score >= 0:
            dev_id = d.filter_string
            d_r = dpctl.SyclDevice(dev_id)
            assert d == d_r
예제 #24
0
def test_dpctl_api(filter_str):
    if skip_test(filter_str):
        pytest.skip()

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        dpctl.lsplatform()
        dpctl.get_current_queue()
        dpctl.get_num_activated_queues()
        dpctl.is_in_device_context()
예제 #25
0
def test_kernel_arg_accessor(filter_str, input_arrays, kernel):
    if skip_test(filter_str):
        pytest.skip()

    a, b, actual = input_arrays
    expected = a + b
    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        call_kernel(global_size, local_size, a, b, actual, kernel)
    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)
예제 #26
0
def test_bool_type(filter_str):
    kernel = dppy.kernel(check_bool_kernel)
    a = np.array([2], np.int64)

    device = dpctl.SyclDevice(filter_str)
    with dpctl.device_context(device):
        kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a, True)
        assert a[0] == 111
        kernel[a.size, dppy.DEFAULT_LOCAL_SIZE](a, False)
        assert a[0] == 222
예제 #27
0
def test_kernel_atomic_simple(filter_str, input_arrays, kernel_result_pair):
    if atomic_skip_test(filter_str):
        pytest.skip()

    a, dtype = input_arrays
    kernel, expected = kernel_result_pair
    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](a)
    assert a[0] == expected
예제 #28
0
def test_kernel_atomic_multi_dim(filter_str, return_list_of_op,
                                 return_list_of_dim, return_dtype):
    op_type, expected = return_list_of_op
    dim = return_list_of_dim
    kernel = get_kernel_multi_dim(op_type, len(dim))
    a = np.zeros(dim, return_dtype)
    device = dpctl.SyclDevice(filter_str)
    with dpctl.device_context(device):
        kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](a)
    assert a[0] == expected
    def test_parfor_message(self):
        device = dpctl.SyclDevice("opencl:gpu")
        with dpctl.device_context(device):
            config.DEBUG = 1
            jitted = njit(prange_example)

            with captured_stdout() as got:
                jitted()

            config.DEBUG = 0
            assert "Parfor offloaded " in got.getvalue()
예제 #30
0
def test_kernel_arg_types(filter_str, input_arrays):
    if skip_test(filter_str):
        pytest.skip()

    kernel = dppy.kernel(mul_kernel)
    a, actual, c = input_arrays
    expected = a * c
    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        kernel[global_size, local_size](a, actual, c)
    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)