def test_get_execution_queue(): try: q = dpctl.SyclQueue() q2 = dpctl.SyclQueue() except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be create for default device") exec_q = dpctl.utils.get_execution_queue(()) assert exec_q is None exec_q = dpctl.utils.get_execution_queue([q]) assert exec_q is q exec_q = dpctl.utils.get_execution_queue([q, q, q, q]) assert exec_q is q exec_q = dpctl.utils.get_execution_queue((q, q, None, q)) assert exec_q is None exec_q = dpctl.utils.get_execution_queue( ( q, q2, q, ) ) assert exec_q is q
def produce_event(profiling=False): oclSrc = " \ kernel void add(global int* a) { \ size_t index = get_global_id(0); \ a[index] = a[index] + 1; \ }" if profiling: q = dpctl.SyclQueue("opencl:cpu", property="enable_profiling") else: q = dpctl.SyclQueue("opencl:cpu") prog = dpctl_prog.create_program_from_source(q, oclSrc) addKernel = prog.get_sycl_kernel("add") bufBytes = 1024 * np.dtype("i").itemsize abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) a = np.ndarray((1024), buffer=abuf, dtype="i") a[:] = np.arange(1024) args = [] args.append(a.base) r = [1024] ev = q.submit(addKernel, args, r) return ev
def test_context_multi_device(): try: d = dpctl.SyclDevice("cpu") except ValueError: pytest.skip() if d.default_selector_score < 0: pytest.skip() n = d.max_compute_units n1 = n // 2 n2 = n - n1 if n1 == 0 or n2 == 0: pytest.skip() d1, d2 = d.create_sub_devices(partition=(n1, n2)) ctx = dpctl.SyclContext((d1, d2)) assert ctx.device_count == 2 assert type(repr(ctx)) is str q1 = dpctl.SyclQueue(ctx, d1) q2 = dpctl.SyclQueue(ctx, d2) import dpctl.memory as dpmem shmem_1 = dpmem.MemoryUSMShared(256, queue=q1) shmem_2 = dpmem.MemoryUSMDevice(256, queue=q2) shmem_2.copy_from_device(shmem_1) # create context for single sub-device ctx1 = dpctl.SyclContext(d1) q1 = dpctl.SyclQueue(ctx1, d1) shmem_1 = dpmem.MemoryUSMShared(256, queue=q1) cap = ctx1._get_capsule() cap2 = ctx1._get_capsule() del ctx1 del cap2 # exercise deleter of non-renamed capsule ctx2 = dpctl.SyclContext(cap) q2 = dpctl.SyclQueue(ctx2, d1) shmem_2 = dpmem.MemoryUSMDevice(256, queue=q2) shmem_2.copy_from_device(shmem_1)
def test_queue_capsule(): q = dpctl.SyclQueue() cap = q._get_capsule() cap2 = q._get_capsule() q2 = dpctl.SyclQueue(cap) assert q == q2 del cap2 # call deleter on non-renamed capsule assert q2 != [] # compare with other types
def test_context_equals(): try: gpuQ1 = dpctl.SyclQueue("gpu") gpuQ0 = dpctl.SyclQueue("gpu") except dpctl.SyclQueueCreationError: pytest.skip() ctx0 = gpuQ0.get_sycl_context() ctx1 = gpuQ1.get_sycl_context() assert ctx0.equals(ctx1)
def test_constructor_many_arg(): with pytest.raises(TypeError): dpctl.SyclQueue(None, None, None, None) with pytest.raises(TypeError): dpctl.SyclQueue(None, None) ctx = dpctl.SyclContext() with pytest.raises(TypeError): dpctl.SyclQueue(ctx, None) with pytest.raises(TypeError): dpctl.SyclQueue(ctx)
def test_context_not_equals(): try: gpuQ = dpctl.SyclQueue("gpu") except dpctl.SyclQueueCreationError: pytest.skip() ctx_gpu = gpuQ.get_sycl_context() try: cpuQ = dpctl.SyclQueue("cpu") except dpctl.SyclQueueCreationError: pytest.skip() ctx_cpu = cpuQ.get_sycl_context() assert not ctx_cpu.equals(ctx_gpu)
def test_get_execution_queue_nonequiv(): try: q = dpctl.SyclQueue("cpu") d1, d2 = q.sycl_device.create_sub_devices(partition=[1, 1]) ctx = dpctl.SyclContext([q.sycl_device, d1, d2]) q1 = dpctl.SyclQueue(ctx, d1) q2 = dpctl.SyclQueue(ctx, d2) except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be create for default device") exec_q = dpctl.utils.get_execution_queue((q, q1, q2)) assert exec_q is None
def test_equivalent_usm_ndarray(input_arrays): usm_type = "device" a, b, expected = input_arrays got = np.ones_like(a) device1 = dpctl.SyclDevice("level_zero:gpu") queue1 = dpctl.SyclQueue(device1) device2 = dpctl.SyclDevice("opencl:gpu") queue2 = dpctl.SyclQueue(device2) da = dpt.usm_ndarray( a.shape, dtype=a.dtype, buffer=usm_type, buffer_ctor_kwargs={"queue": queue1}, ) da.usm_data.copy_from_host(a.reshape((-1)).view("|u1")) not_equivalent_db = dpt.usm_ndarray( b.shape, dtype=b.dtype, buffer=usm_type, buffer_ctor_kwargs={"queue": queue2}, ) not_equivalent_db.usm_data.copy_from_host(b.reshape((-1)).view("|u1")) equivalent_db = dpt.usm_ndarray( b.shape, dtype=b.dtype, buffer=usm_type, buffer_ctor_kwargs={"queue": queue1}, ) equivalent_db.usm_data.copy_from_host(b.reshape((-1)).view("|u1")) dc = dpt.usm_ndarray( got.shape, dtype=got.dtype, buffer=usm_type, buffer_ctor_kwargs={"queue": queue1}, ) with pytest.raises(IndeterminateExecutionQueueError) as error_msg: sum_kernel[global_size, local_size](da, not_equivalent_db, dc) assert IndeterminateExecutionQueueError_msg in str(error_msg.value) sum_kernel[global_size, local_size](da, equivalent_db, dc) dc.usm_data.copy_to_host(got.reshape((-1)).view("|u1")) expected = a + b assert np.array_equal(got, expected)
def get_queues(filter_='cpu,gpu,host'): queues = [] if 'host' in filter_: queues.append(None) try: import dpctl if dpctl.has_cpu_devices and 'cpu' in filter_: queues.append(dpctl.SyclQueue('cpu')) if dpctl.has_gpu_devices and 'gpu' in filter_: queues.append(dpctl.SyclQueue('gpu')) finally: return queues
def test_context_not_equals(): try: gpuQ = dpctl.SyclQueue("gpu") except dpctl.SyclQueueCreationError: pytest.skip() ctx_gpu = gpuQ.get_sycl_context() try: cpuQ = dpctl.SyclQueue("cpu") except dpctl.SyclQueueCreationError: pytest.skip() ctx_cpu = cpuQ.get_sycl_context() assert ctx_cpu != ctx_gpu assert hash(ctx_cpu) != hash(ctx_gpu) assert gpuQ != cpuQ assert hash(cpuQ) != hash(gpuQ)
def test_constructor_inconsistent_ctx_dev(): try: q = dpctl.SyclQueue("cpu") except dpctl.SyclQueueCreationError: pytest.skip("Failed to create CPU queue") cpuD = q.sycl_device n_eu = cpuD.max_compute_units n_half = n_eu // 2 try: d0, d1 = cpuD.create_sub_devices(partition=[n_half, n_eu - n_half]) except Exception: pytest.skip("Could not create CPU sub-devices") ctx = dpctl.SyclContext(d0) with pytest.raises(dpctl.SyclQueueCreationError): dpctl.SyclQueue(ctx, d1)
def test_asarray_input_validation2(): d = dpctl.get_devices() if len(d) < 2: pytest.skip("Not enough SYCL devices available") d0, d1 = d[:2] try: q0 = dpctl.SyclQueue(d0) except dpctl.SyclQueueCreationError: pytest.skip(f"SyclQueue could not be created for {d0}") try: q1 = dpctl.SyclQueue(d1) except dpctl.SyclQueueCreationError: pytest.skip(f"SyclQueue could not be created for {d1}") with pytest.raises(TypeError): dpt.asarray([1, 2], sycl_queue=q0, device=q1)
def test_create_program_from_spirv(self): CURR_DIR = os.path.dirname(os.path.abspath(__file__)) spirv_file = os.path.join(CURR_DIR, "input_files/multi_kernel.spv") with open(spirv_file, "rb") as fin: spirv = fin.read() q = dpctl.SyclQueue("level_zero:gpu") dpctl_prog.create_program_from_spirv(q, spirv)
def test_queue__repr__(): q1 = dpctl.SyclQueue(property=0) r1 = q1.__repr__() q2 = dpctl.SyclQueue(property="in_order") r2 = q2.__repr__() q3 = dpctl.SyclQueue(property="enable_profiling") r3 = q3.__repr__() q4 = dpctl.SyclQueue(property="default") r4 = q4.__repr__() q5 = dpctl.SyclQueue(property=["in_order", "enable_profiling", 0]) r5 = q5.__repr__() assert type(r1) is str assert type(r2) is str assert type(r3) is str assert type(r4) is str assert type(r5) is str
def test_mix_argtype(offload_device, input_arrays): usm_type = "device" a, b, expected = input_arrays got = np.ones_like(a) device = dpctl.SyclDevice(offload_device) queue = dpctl.SyclQueue(device) da = dpt.usm_ndarray( a.shape, dtype=a.dtype, buffer=usm_type, buffer_ctor_kwargs={"queue": queue}, ) da.usm_data.copy_from_host(a.reshape((-1)).view("|u1")) dc = dpt.usm_ndarray( got.shape, dtype=got.dtype, buffer=usm_type, buffer_ctor_kwargs={"queue": queue}, ) with pytest.raises(TypeError) as error_msg: sum_kernel[global_size, local_size](da, b, dc) assert mix_datatype_err_msg in error_msg
def select_device_SUAI(N): usm_type = "device" a = np.array(np.random.random(N), np.float32) b = np.array(np.random.random(N), np.float32) got = np.ones_like(a) device = dpctl.SyclDevice("opencl:gpu") queue = dpctl.SyclQueue(device) # We are allocating the data in Opencl GPU and this device # will be selected for compute. da, db, dc = allocate_SUAI_data(a, b, got, usm_type, queue) # Users don't need to specify where the computation will # take place. It will be inferred from data. sum_kernel[N, 1](da, db, dc) dc.usm_data.copy_to_host(got.reshape((-1)).view("|u1")) expected = a + b assert np.array_equal(got, expected) print( "Correct result when array with __sycl_usm_array_interface__ is passed!" )
def test_create_program_from_source(self): oclSrc = " \ kernel void add(global int* a, global int* b, global int* c) { \ size_t index = get_global_id(0); \ c[index] = a[index] + b[index]; \ } \ kernel void axpy(global int* a, global int* b, global int* c, int d) { \ size_t index = get_global_id(0); \ c[index] = a[index] + d*b[index]; \ }" q = dpctl.SyclQueue("opencl:gpu") prog = dpctl_prog.create_program_from_source(q, oclSrc) self.assertIsNotNone(prog) self.assertTrue(prog.has_sycl_kernel("add")) self.assertTrue(prog.has_sycl_kernel("axpy")) addKernel = prog.get_sycl_kernel("add") axpyKernel = prog.get_sycl_kernel("axpy") self.assertEqual(addKernel.get_function_name(), "add") self.assertEqual(axpyKernel.get_function_name(), "axpy") self.assertEqual(addKernel.get_num_args(), 3) self.assertEqual(axpyKernel.get_num_args(), 4)
def test_asarray_copy_false(): try: q = dpctl.SyclQueue() except dpctl.SyclQueueCreationError: pytest.skip("Could not create a queue") X = dpt.from_numpy(np.random.randn(10, 4), usm_type="device", sycl_queue=q) Y1 = dpt.asarray(X, copy=False, order="K") assert Y1 is X Y1c = dpt.asarray(X, copy=True, order="K") assert not (Y1c is X) Y2 = dpt.asarray(X, copy=False, order="C") assert Y2 is X Y3 = dpt.asarray(X, copy=False, order="A") assert Y3 is X with pytest.raises(ValueError): Y1 = dpt.asarray(X, copy=False, order="F") Xf = dpt.empty( X.shape, dtype=X.dtype, usm_type="device", sycl_queue=X.sycl_queue, order="F", ) Xf[:] = X Y4 = dpt.asarray(Xf, copy=False, order="K") assert Y4 is Xf Y5 = dpt.asarray(Xf, copy=False, order="F") assert Y5 is Xf Y6 = dpt.asarray(Xf, copy=False, order="A") assert Y6 is Xf with pytest.raises(ValueError): dpt.asarray(Xf, copy=False, order="C")
def test_create_program_from_source(self): oclSrc = " \ kernel void axpy(global int* a, global int* b, global int* c, int d) { \ size_t index = get_global_id(0); \ c[index] = d*a[index] + b[index]; \ }" q = dpctl.SyclQueue("opencl:gpu") prog = dpctl_prog.create_program_from_source(q, oclSrc) axpyKernel = prog.get_sycl_kernel("axpy") bufBytes = 1024 * np.dtype("i").itemsize abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) bbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) cbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q) a = np.ndarray((1024), buffer=abuf, dtype="i") b = np.ndarray((1024), buffer=bbuf, dtype="i") c = np.ndarray((1024), buffer=cbuf, dtype="i") a[:] = np.arange(1024) b[:] = np.arange(1024, 0, -1) c[:] = 0 d = 2 args = [] args.append(a.base) args.append(b.base) args.append(c.base) args.append(ctypes.c_int(d)) r = [1024] q.submit(axpyKernel, args, r) self.assertTrue(np.allclose(c, a * d + b))
def test_invalid_filter_selectors(invalid_filter): """ An invalid filter string should always be caught and a SyclQueueCreationError raised. """ with pytest.raises(dpctl.SyclQueueCreationError): dpctl.SyclQueue(invalid_filter)
def test_valid_filter_selectors(valid_filter, check): """ Tests if we can create a SyclDevice using a supported filter selector string. """ device = None try: q = dpctl.SyclQueue(valid_filter) device = q.get_sycl_device() assert q.is_in_order is False q2 = dpctl.SyclQueue(valid_filter, property="in_order") # assert device == q2.get_sycl_device() assert q2.is_in_order is True except dpctl.SyclQueueCreationError: pytest.skip("Failed to create device with supported filter") check(device)
def create_subdevice_queue(): """ Partition a CPU sycl device into sub-devices. Create a multi-device sycl context. """ cpu_d = dpctl.SyclDevice("cpu") cpu_count = cpu_d.max_compute_units sub_devs = cpu_d.create_sub_devices(partition=cpu_count // 2) multidevice_ctx = dpctl.SyclContext(sub_devs) # create a SyclQueue for each sub-device, using commont # multi-device context q0, q1 = [dpctl.SyclQueue(multidevice_ctx, d) for d in sub_devs] # for each sub-device allocate 26 bytes m0 = dpctl.memory.MemoryUSMDevice(26, queue=q0) m1 = dpctl.memory.MemoryUSMDevice(26, queue=q1) # populate m0 with host data of spaces hostmem = bytearray(b" " * 26) # copy spaces into m1 m1.copy_from_host(hostmem) for i in range(26): hostmem[i] = ord("a") + i # copy character sequence into m0 m0.copy_from_host(hostmem) # from from m0 to m1. Due to using multi-device context, # copying can be done directly m1.copy_from_device(m0) return bytes(m1.copy_to_host())
def test_tofrom_numpy(shape, dtype, usm_type): q = dpctl.SyclQueue() Xnp = np.zeros(shape, dtype=dtype) Xusm = dpt.from_numpy(Xnp, usm_type=usm_type, sycl_queue=q) Ynp = np.ones(shape, dtype=dtype) ind = (slice(None, None, None), ) * Ynp.ndim Xusm[ind] = Ynp assert np.array_equal(dpt.to_numpy(Xusm), Ynp)
def test_hashing_of_queue(): """ Test that a :class:`dpctl.SyclQueue` object can be used as a dictionary key. """ queue_dict = {dpctl.SyclQueue(): "default_queue"} assert queue_dict
def test_context_can_be_used_in_queue(valid_filter): try: ctx = dpctl.SyclContext(valid_filter) except ValueError: pytest.skip() devs = ctx.get_devices() assert len(devs) == ctx.device_count for d in devs: dpctl.SyclQueue(ctx, d)
def test_get_current_backend(): dpctl.get_current_backend() dpctl.get_current_device_type() q = dpctl.SyclQueue() dpctl.set_global_queue(q) if has_gpu(): dpctl.set_global_queue("gpu") elif has_cpu(): dpctl.set_global_queue("cpu")
def test_valid_filter_selectors(valid_filter, check): """Tests if we can create a SyclDevice using a supported filter selector string.""" device = None try: q = dpctl.SyclQueue(valid_filter) device = q.get_sycl_device() except dpctl.SyclQueueCreationError: pytest.skip("Failed to create device with supported filter") check(device)
def test_memory_gpu_context(): mobj = _create_memory() # GPU context usm_type = mobj.get_usm_type() assert usm_type == "shared" gpu_queue = dpctl.SyclQueue("opencl:gpu") usm_type = mobj.get_usm_type(gpu_queue) assert usm_type in ["unknown", "shared"]
def test_context_can_be_used_in_queue2(valid_filter): try: d = dpctl.SyclDevice(valid_filter) except ValueError: pytest.skip() if d.default_selector_score < 0: # skip test for devices rejected by default selector pytest.skip() ctx = dpctl.SyclContext(d) dpctl.SyclQueue(ctx, d)