Python offload_to_sycl_device Examples, numba_dppy.offload_to_sycl_device Python Examples

Example #1

0

Show file

def test_trigonometric_fn(filter_str, trig_op, input_arrays):
    if skip_test(filter_str):
        pytest.skip()

    # FIXME: Why does archcosh fail on Gen12 discrete graphics card?
    if trig_op == "arccosh" and is_gen12(filter_str):
        pytest.skip()

    a, b = input_arrays
    trig_fn = getattr(np, trig_op)
    actual = np.empty(shape=a.shape, dtype=a.dtype)
    expected = np.empty(shape=a.shape, dtype=a.dtype)

    if trig_op == "arctan2":

        @njit
        def f(a, b):
            return trig_fn(a, b)

        device = dpctl.SyclDevice(filter_str)
        with dppy.offload_to_sycl_device(device), assert_auto_offloading():
            actual = f(a, b)
        expected = trig_fn(a, b)
    else:

        @njit
        def f(a):
            return trig_fn(a)

        device = dpctl.SyclDevice(filter_str)
        with dppy.offload_to_sycl_device(device), assert_auto_offloading():
            actual = f(a)
        expected = trig_fn(a)

    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)

Example #2

0

Show file

def test_atomic_fp_native(filter_str, return_list_of_op, fdtype, addrspace):
    LLVM_SPIRV_ROOT = os.environ.get("NUMBA_DPPY_LLVM_SPIRV_ROOT")
    if LLVM_SPIRV_ROOT == "" or LLVM_SPIRV_ROOT is None:
        pytest.skip(
            "Please set envar NUMBA_DPPY_LLVM_SPIRV_ROOT to run this test"
        )

    if atomic_skip_test(filter_str):
        pytest.skip()

    a = np.array([0], fdtype)

    op_type, expected = return_list_of_op

    if addrspace == "global":
        op = getattr(dppy.atomic, op_type)

        def f(a):
            op(a, 0, 1)

    elif addrspace == "local":
        f = get_func_local(op_type, fdtype)

    kernel = dppy.kernel(f)

    NATIVE_FP_ATOMICS_old_val = config.NATIVE_FP_ATOMICS
    config.NATIVE_FP_ATOMICS = 1

    LLVM_SPIRV_ROOT_old_val = config.LLVM_SPIRV_ROOT
    config.LLVM_SPIRV_ROOT = LLVM_SPIRV_ROOT

    with dppy.offload_to_sycl_device(filter_str) as sycl_queue:
        kern = kernel[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
            kernel._get_argtypes(a), sycl_queue
        )
        if filter_str != "opencl:cpu:0":
            assert "__spirv_AtomicFAddEXT" in kern.assembly
        else:
            assert "__spirv_AtomicFAddEXT" not in kern.assembly

    config.NATIVE_FP_ATOMICS = 0

    # To bypass caching
    kernel = dppy.kernel(f)
    with dppy.offload_to_sycl_device(filter_str) as sycl_queue:
        kern = kernel[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
            kernel._get_argtypes(a), sycl_queue
        )
        assert "__spirv_AtomicFAddEXT" not in kern.assembly

    config.NATIVE_FP_ATOMICS = NATIVE_FP_ATOMICS_old_val
    config.LLVM_SPIRV_ROOT = LLVM_SPIRV_ROOT_old_val

Example #3

0

Show file

File: test_prange.py Project: diptorupd/numba-dppy

    def test_multiple_prange(self):
        @njit
        def f(a, b):
            # dimensions must be provided as scalar
            m, n = a.shape
            for i in prange(m):
                val = 10
                for j in prange(n):
                    b[i, j] = a[i, j] * val

            for i in prange(m):
                for j in prange(n):
                    a[i, j] = a[i, j] * 10

        m = 8
        n = 8
        a = np.ones((m, n))
        b = np.ones((m, n))

        device = dpctl.SyclDevice("opencl:gpu")
        with assert_auto_offloading(
                parfor_offloaded=2), dppy.offload_to_sycl_device(device):
            f(a, b)

        self.assertTrue(np.all(b == 10))
        self.assertTrue(np.all(a == 10))

Example #4

0

Show file

File: test_barrier.py Project: diptorupd/numba-dppy

def test_local_memory(filter_str):
    if skip_test(filter_str):
        pytest.skip()
    blocksize = 10

    if skip_if_win():
        pytest.skip()

    @dppy.kernel("void(float32[::1])")
    def reverse_array(A):
        lm = dppy.local.array(shape=10, dtype=np.float32)
        i = dppy.get_global_id(0)

        # preload
        lm[i] = A[i]
        # barrier local or global will both work as we only have one work group
        dppy.barrier(dppy.CLK_LOCAL_MEM_FENCE)  # local mem fence
        # write
        A[i] += lm[blocksize - 1 - i]

    arr = np.arange(blocksize).astype(np.float32)
    orig = arr.copy()

    with dppy.offload_to_sycl_device(filter_str):
        reverse_array[blocksize, blocksize](arr)

    expected = orig[::-1] + orig
    np.testing.assert_allclose(expected, arr)

Example #5

0

Show file

File: test_barrier.py Project: diptorupd/numba-dppy

def test_proper_lowering(filter_str):
    if skip_test(filter_str):
        pytest.skip()

    # We perform eager compilation at the site of
    # @dppy.kernel. This takes the default dpctl
    # queue which is level_zero backed. Level_zero
    # is not yet supported on Windows platform and
    # hence we skip these tests if the platform is
    # Windows regardless of which backend filter_str
    # specifies.
    if skip_if_win():
        pytest.skip()

    # This will trigger eager compilation
    @dppy.kernel("void(float32[::1])")
    def twice(A):
        i = dppy.get_global_id(0)
        d = A[i]
        dppy.barrier(dppy.CLK_LOCAL_MEM_FENCE)  # local mem fence
        A[i] = d * 2

    N = 256
    arr = np.random.random(N).astype(np.float32)
    orig = arr.copy()

    with dppy.offload_to_sycl_device(filter_str):
        twice[N, N // 2](arr)

    # The computation is correct?
    np.testing.assert_allclose(orig * 2, arr)

Example #6

0

Show file

File: test_controllable_fallback.py Project: diptorupd/numba-dppy

    def test_dppy_fallback_false(self):
        @numba.jit
        def fill_value(i):
            return i

        def inner_call_fallback():
            x = 10
            a = np.empty(shape=x, dtype=np.float32)

            for i in numba.prange(x):
                a[i] = fill_value(i)

            return a

        try:
            config.DEBUG = 1
            config.FALLBACK_ON_CPU = 0
            with warnings.catch_warnings(record=True) as w:
                device = dpctl.SyclDevice("opencl:gpu")
                with numba_dppy.offload_to_sycl_device(device):
                    dppy = numba.njit(parallel=True)(inner_call_fallback)
                    dppy_fallback_false = dppy()

        finally:
            ref_result = inner_call_fallback()
            config.FALLBACK_ON_CPU = 1
            config.DEBUG = 0

            not np.testing.assert_array_equal(dppy_fallback_false, ref_result)
            self.assertNotIn("Failed to offload parfor", str(w[-1].message))

Example #7

0

Show file

def test_vectorize(filter_str, shape, dtypes, input_type):
    if _helper.platform_not_supported(filter_str):
        pytest.skip()

    if _helper.skip_test(filter_str):
        pytest.skip()

    def vector_add(a, b):
        return a + b

    dtype, sig_dtype = dtypes
    sig = [sig_dtype(sig_dtype, sig_dtype)]
    size, shape = shape

    if input_type == "array":
        A = np.arange(size, dtype=dtype).reshape(shape)
        B = np.arange(size, dtype=dtype).reshape(shape)
    elif input_type == "scalar":
        A = dtype(1.2)
        B = dtype(2.3)

    with dppy.offload_to_sycl_device(filter_str):
        f = vectorize(sig, target="dppy")(vector_add)
        expected = f(A, B)
        actual = vector_add(A, B)

        max_abs_err = np.sum(expected) - np.sum(actual)
        assert max_abs_err < 1e-5

Example #8

0

Show file

    def test_device_array_args_gpu(self):
        c = np.ones_like(a)

        with dppy.offload_to_sycl_device("opencl:gpu"):
            data_parallel_sum[global_size, dppy.DEFAULT_LOCAL_SIZE](a, b, c)

        self.assertTrue(np.all(c == d))

Example #9

0

Show file

File: rand.py Project: diptorupd/numba-dppy

def main():
    size = 9
    scale = 3.0

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        result = rand()
        # Random values in a given shape (3, 2)
        print(result)

        result = random_sample(size)
        # Array of shape (9,) with random floats in the
        # half-open interval [0.0, 1.0)
        print(result)

        result = random_exponential(scale, size)
        # Array of shape (9,) with samples from an exponential distribution
        print(result)

        result = random_normal(0.0, 0.1, size)
        # Array of shape (9,) with samples from a normal distribution
        print(result)

    print("Done...")

Example #10

0

Show file

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--api",
        required=False,
        default="numba",
        choices=["numba", "numba-dppy"],
        help="Start the version of functions using numba or numba-dppy API",
    )

    args = parser.parse_args()

    print("Using API:", args.api)

    global_size = 10
    N = global_size

    a = np.arange(N, dtype=np.float32)
    b = np.arange(N, dtype=np.float32)
    c = np.empty_like(a)

    if args.api == "numba-dppy":
        device = dpctl.select_default_device()
        with dppy.offload_to_sycl_device(device):
            dppy_kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](a, b, c)
    else:
        numba_func_driver(a, b, c)

    print("Done...")

Example #11

0

Show file

File: test_numpy_linalg.py Project: diptorupd/numba-dppy

def test_eig(filter_str, eig_input, capfd):
    if skip_test(filter_str):
        pytest.skip()

    a = eig_input
    fn = get_fn("linalg.eig", 1)
    f = njit(fn)

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device), dpnp_debug():
        actual_val, actual_vec = f(a)
        captured = capfd.readouterr()
        assert "dpnp implementation" in captured.out

        expected_val, expected_vec = fn(a)

        # sort val/vec by abs value
        vvsort(actual_val, actual_vec)
        vvsort(expected_val, expected_vec)

        # NP change sign of vectors
        for i in range(expected_vec.shape[1]):
            if expected_vec[0, i] * actual_vec[0, i] < 0:
                expected_vec[:, i] = -expected_vec[:, i]

        assert np.allclose(actual_val, expected_val)
        assert np.allclose(actual_vec, expected_vec)

Example #12

0

Show file

def test_caching_kernel_using_same_context(filter_str):
    """Test kernel caching for the scenario where different SYCL queues that
    share a SYCL context are used to submit a kernel.

    Args:
        filter_str: SYCL filter selector string
    """
    if skip_test(filter_str):
        pytest.skip()
    global_size = 10
    N = global_size

    def data_parallel_sum(a, b, c):
        i = dppy.get_global_id(0)
        c[i] = a[i] + b[i]

    a = np.array(np.random.random(N), dtype=np.float32)
    b = np.array(np.random.random(N), dtype=np.float32)
    c = np.ones_like(a)

    # Set the global queue to the default device so that the cached_kernel gets
    # created for that device
    dpctl.set_global_queue(filter_str)
    func = dppy.kernel(data_parallel_sum)
    default_queue = dpctl.get_current_queue()
    cached_kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
        func._get_argtypes(a, b, c), default_queue)
    for i in range(0, 10):
        # Each iteration create a fresh queue that will share the same context
        with dppy.offload_to_sycl_device(filter_str) as gpu_queue:
            _kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
                func._get_argtypes(a, b, c), gpu_queue)
            assert _kernel == cached_kernel

Example #13

0

Show file

def test_caching_kernel_using_same_queue(filter_str):
    """Test kernel caching when the same queue is used to submit a kernel
    multiple times.

    Args:
        filter_str: SYCL filter selector string
    """
    if skip_test(filter_str):
        pytest.skip()
    global_size = 10
    N = global_size

    def data_parallel_sum(a, b, c):
        i = dppy.get_global_id(0)
        c[i] = a[i] + b[i]

    a = np.array(np.random.random(N), dtype=np.float32)
    b = np.array(np.random.random(N), dtype=np.float32)
    c = np.ones_like(a)

    with dppy.offload_to_sycl_device(filter_str) as gpu_queue:
        func = dppy.kernel(data_parallel_sum)
        cached_kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
            func._get_argtypes(a, b, c), gpu_queue)

        for i in range(10):
            _kernel = func[global_size, dppy.DEFAULT_LOCAL_SIZE].specialize(
                func._get_argtypes(a, b, c), gpu_queue)
            assert _kernel == cached_kernel

Example #14

0

Show file

File: usm_ndarray.py Project: diptorupd/numba-dppy

def main():
    global_size = 10
    N = global_size
    print("N", N)

    a = np.array(np.random.random(N), dtype=np.float32)
    b = np.array(np.random.random(N), dtype=np.float32)
    c = np.ones_like(a)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        da = dpt.usm_ndarray(a.shape, dtype=a.dtype, buffer="shared")
        da.usm_data.copy_from_host(a.reshape((-1)).view("|u1"))

        db = dpt.usm_ndarray(b.shape, dtype=b.dtype, buffer="shared")
        db.usm_data.copy_from_host(b.reshape((-1)).view("|u1"))

        dc = dpt.usm_ndarray(c.shape, dtype=c.dtype, buffer="shared")

        driver(da, db, dc, global_size)

    print("Done...")

Example #15

0

Show file

def test_all(dtype, shape, filter_str):
    if skip_test(filter_str):
        pytest.skip()

    size = 1
    for i in range(len(shape)):
        size *= shape[i]

    for i in range(2**size):
        t = i

        a = np.empty(size, dtype=dtype)

        for j in range(size):
            a[j] = 0 if t % 2 == 0 else j + 1
            t = t >> 1

        a = a.reshape(shape)

        def fn(a):
            return np.all(a)

        f = njit(fn)
        with dppy.offload_to_sycl_device(filter_str), dpnp_debug():
            actual = f(a)

        expected = fn(a)
        np.testing.assert_allclose(actual, expected, rtol=1e-3, atol=0)

Example #16

0

Show file

def no_arg_barrier_support():
    """
    This example demonstrates the usage of numba_dppy's ``barrier``
    intrinsic function. The ``barrier`` function is usable only inside
    a ``kernel`` and is equivalent to OpenCL's ``barrier`` function.
    """
    @dppy.kernel
    def twice(A):
        i = dppy.get_global_id(0)
        d = A[i]
        # no argument defaults to global mem fence
        dppy.barrier()
        A[i] = d * 2

    N = 10
    arr = np.arange(N).astype(np.float32)
    print(arr)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        twice[N, dppy.DEFAULT_LOCAL_SIZE](arr)

    # the output should be `arr * 2, i.e. [0, 2, 4, 6, ...]`
    print(arr)

Example #17

0

Show file

File: test_dpnp_interop.py Project: diptorupd/numba-dppy

def test_consuming_array_from_dpnp(offload_device, dtype):
    if not ensure_dpnp():
        pytest.skip("No DPNP")

    import dpnp

    if skip_test(offload_device):
        pytest.skip("No device for " + offload_device)

    if ("opencl" not in dpctl.get_current_queue().sycl_device.filter_string
            and "opencl" in offload_device):
        pytest.skip("Bug in DPNP. See: IntelPython/dpnp#723")

    @dppy.kernel
    def data_parallel_sum(a, b, c):
        """
        Vector addition using the ``kernel`` decorator.
        """
        i = dppy.get_global_id(0)
        c[i] = a[i] + b[i]

    global_size = 1021

    with dppy.offload_to_sycl_device(offload_device):
        a = dppy.asarray(dpnp.arange(global_size, dtype=dtype))
        b = dppy.asarray(dpnp.arange(global_size, dtype=dtype))
        c = dppy.asarray(dpnp.ones_like(a))

        data_parallel_sum[global_size, dppy.DEFAULT_LOCAL_SIZE](a, b, c)

Example #18

0

Show file

def local_memory():
    """
    This example demonstrates the usage of numba-dppy's `local.array`
    intrinsic function. The function is used to create a static array
    allocated on the devices local address space.
    """
    blocksize = 10

    @dppy.kernel
    def reverse_array(A):
        lm = dppy.local.array(shape=10, dtype=float32)
        i = dppy.get_global_id(0)

        # preload
        lm[i] = A[i]
        # barrier local or global will both work as we only have one work group
        dppy.barrier(dppy.CLK_LOCAL_MEM_FENCE)  # local mem fence
        # write
        A[i] += lm[blocksize - 1 - i]

    arr = np.arange(blocksize).astype(np.float32)
    print(arr)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        reverse_array[blocksize, dppy.DEFAULT_LOCAL_SIZE](arr)

    # the output should be `orig[::-1] + orig, i.e. [9, 9, 9, ...]``
    print(arr)

Example #19

0

Show file

def test_njit(filter_str):
    if _helper.platform_not_supported(filter_str):
        pytest.skip()

    if _helper.skip_test(filter_str):
        pytest.skip()

    @vectorize(nopython=True)
    def axy(a, x, y):
        return a * x + y

    def f(a0, a1):
        return np.cos(axy(a0, np.sin(a1) - 1.0, 1.0))

    A = np.random.random(10)
    B = np.random.random(10)

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device), assert_auto_offloading():
        f_njit = njit(f)
        expected = f_njit(A, B)
        actual = f(A, B)

        max_abs_err = expected.sum() - actual.sum()
        assert max_abs_err < 1e-5

Example #20

0

Show file

    def test_dppy_func_ndarray(self):
        @dppy.func
        def g(a):
            return a + 1

        @dppy.kernel
        def f(a, b):
            i = dppy.get_global_id(0)
            b[i] = g(a[i])

        @dppy.kernel
        def h(a, b):
            i = dppy.get_global_id(0)
            b[i] = g(a[i]) + 1

        a = np.ones(self.N)
        b = np.ones(self.N)

        device = dpctl.SyclDevice("opencl:gpu")
        with dppy.offload_to_sycl_device(device):
            f[self.N, dppy.DEFAULT_LOCAL_SIZE](a, b)

            self.assertTrue(np.all(b == 2))

            h[self.N, dppy.DEFAULT_LOCAL_SIZE](a, b)

            self.assertTrue(np.all(b == 3))

Example #21

0

Show file

File: kernel_private_memory.py Project: IntelPython/numba-dppy

def private_memory():
    """
    This example demonstrates the usage of numba-dppy's `private.array`
    intrinsic function. The function is used to create a static array
    allocated on the devices private address space.
    """
    @numba_dppy.kernel
    def private_memory_kernel(A):
        memory = numba_dppy.private.array(shape=1, dtype=np.float32)
        i = numba_dppy.get_global_id(0)

        # preload
        memory[0] = i
        numba_dppy.barrier(numba_dppy.CLK_LOCAL_MEM_FENCE)  # local mem fence

        # memory will not hold correct deterministic result if it is not
        # private to each thread.
        A[i] = memory[0] * 2

    N = 4
    arr = np.zeros(N).astype(np.float32)
    orig = np.arange(N).astype(np.float32)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with numba_dppy.offload_to_sycl_device(device):
        private_memory_kernel[N, N](arr)

    np.testing.assert_allclose(orig * 2, arr)
    # the output should be `orig[i] * 2, i.e. [0, 2, 4, ..]``
    print(arr)

Example #22

0

Show file

def sum_reduce(A):
    global_size = len(A)
    work_group_size = 64
    nb_work_groups = global_size // work_group_size
    if (global_size % work_group_size) != 0:
        nb_work_groups += 1

    partial_sums = np.zeros(nb_work_groups).astype(A.dtype)

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        inp_buf = dpctl_mem.MemoryUSMShared(A.size * A.dtype.itemsize)
        inp_ndarray = np.ndarray(A.shape, buffer=inp_buf, dtype=A.dtype)
        np.copyto(inp_ndarray, A)

        partial_sums_buf = dpctl_mem.MemoryUSMShared(
            partial_sums.size * partial_sums.dtype.itemsize
        )
        partial_sums_ndarray = np.ndarray(
            partial_sums.shape,
            buffer=partial_sums_buf,
            dtype=partial_sums.dtype,
        )
        np.copyto(partial_sums_ndarray, partial_sums)

        result = sum_recursive_reduction(
            global_size, work_group_size, inp_ndarray, partial_sums_ndarray
        )

    return result

Example #23

0

Show file

    def test_dppy_fallback_inner_call(self):
        @numba.jit
        def fill_value(i):
            return i

        def inner_call_fallback():
            x = 10
            a = np.empty(shape=x, dtype=np.float32)

            for i in numba.prange(x):
                a[i] = fill_value(i)

            return a

        device = dpctl.SyclDevice("opencl:gpu")
        with warnings.catch_warnings(
            record=True
        ) as w, numba_dppy.offload_to_sycl_device(device):
            dppy = numba.njit(inner_call_fallback)
            dppy_result = dppy()

        ref_result = inner_call_fallback()

        np.testing.assert_array_equal(dppy_result, ref_result)
        self.assertIn("Failed to offload parfor ", str(w[-1].message))

Example #24

0

Show file

File: sum-1d.py Project: diptorupd/numba-dppy

def main():
    global_size = 64
    local_size = 32
    N = global_size * local_size
    print("N", N)

    a = np.ones(N, dtype=np.float32)
    b = np.ones(N, dtype=np.float32)

    print("a:", a, hex(a.ctypes.data))
    print("b:", b, hex(b.ctypes.data))

    # Use the environment variable SYCL_DEVICE_FILTER to change
    # the default device. See
    # https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        c = f1(a, b)

    print("RESULT c:", c, hex(c.ctypes.data))
    for i in range(N):
        if c[i] != 2.0:
            print("First index not equal to 2.0 was", i)
            break

    print("Done...")

Example #25

0

Show file

File: test_with_context.py Project: diptorupd/numba-dppy

    def test_with_dppy_context_cpu(self):
        @njit
        def nested_func(a, b):
            np.sin(a, b)

        @njit
        def func(b):
            a = np.ones((64), dtype=np.float64)
            nested_func(a, b)

        config.DEBUG = 1
        expected = np.ones((64), dtype=np.float64)
        got_cpu = np.ones((64), dtype=np.float64)

        with captured_stdout() as got_cpu_message:
            device = dpctl.SyclDevice("opencl:cpu")
            with dppy.offload_to_sycl_device(device):
                func(got_cpu)

        config.DEBUG = 0
        func(expected)

        np.testing.assert_array_equal(expected, got_cpu)
        self.assertTrue(
            "Parfor offloaded to opencl:cpu" in got_cpu_message.getvalue())

Example #26

0

Show file

File: blacksholes_kernel.py Project: diptorupd/numba-dppy

def main():
    blockdim = 512, 1
    griddim = int(math.ceil(float(OPT_N) / blockdim[0])), 1

    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        for i in range(iterations):
            black_scholes_dppy[blockdim, griddim](
                callResult,
                putResult,
                stockPrice,
                optionStrike,
                optionYears,
                RISKFREE,
                VOLATILITY,
            )

    print("callResult : \n", callResult)
    print("putResult : \n", putResult)

    print("Done...")

Example #27

0

Show file

def main():
    # Use the environment variable SYCL_DEVICE_FILTER to change the default device.
    # See https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#sycl_device_filter.
    device = dpctl.select_default_device()
    print("Using device ...")
    device.print_device_info()

    with dppy.offload_to_sycl_device(device):
        c = f1(a, b)

    print("c:", c, hex(c.ctypes.data))
    for i in range(N):
        for j in range(N):
            for k in range(N):
                for l in range(N):  # noqa
                    for m in range(N):
                        if c[i, j, k, l, m] != 2.0:
                            print(
                                "First index not equal to 2.0 was",
                                i,
                                j,
                                k,
                                l,
                                m,
                            )
                            break

    print("Done...")

Example #28

0

Show file

def test_dpctl_api(filter_str):
    if skip_test(filter_str):
        pytest.skip()

    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        dpctl.lsplatform()
        dpctl.get_current_queue()
        dpctl.get_num_activated_queues()
        dpctl.is_in_device_context()

Example #29

0

Show file

def test_kernel_arg_accessor(filter_str, input_arrays, kernel):
    if skip_test(filter_str):
        pytest.skip()

    a, b, actual = input_arrays
    expected = a + b
    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        call_kernel(global_size, local_size, a, b, actual, kernel)
    np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=0)

Example #30

0

Show file

def test_kernel_atomic_simple(filter_str, input_arrays, kernel_result_pair):
    if atomic_skip_test(filter_str):
        pytest.skip()

    a, dtype = input_arrays
    kernel, expected = kernel_result_pair
    device = dpctl.SyclDevice(filter_str)
    with dppy.offload_to_sycl_device(device):
        kernel[global_size, dppy.DEFAULT_LOCAL_SIZE](a)
    assert a[0] == expected