def test_multi_device_empty(mock_or_real_multi_device_context): context, _mocked = mock_or_real_multi_device_context mqueue = MultiQueue.on_devices(context.devices) arr_dev = MultiArray.empty(context.devices, 100, numpy.int32) arr = arr_dev.get(mqueue) assert arr.shape == (100, ) assert arr.dtype == numpy.int32 # explicit device arr_dev = MultiArray.empty(context.devices[0:1], 100, numpy.int32) assert list(arr_dev.subarrays.keys()) == [context.devices[0]] arr = arr_dev.get(mqueue) assert arr.shape == (100, ) assert arr.dtype == numpy.int32 # explicit splay arr_dev = MultiArray.empty(context.devices, 100, numpy.int32, splay=MultiArray.EqualSplay()) arr = arr_dev.get(mqueue) assert arr.shape == (100, ) assert arr.dtype == numpy.int32 device0, device1 = context.devices assert (arr_dev.subarrays[device0].get( mqueue.queues[device0]) == arr[:50]).all() assert (arr_dev.subarrays[device1].get( mqueue.queues[device1]) == arr[50:]).all()
def test_multi_queue(mock_or_real_multi_device_context): context, _mocked = mock_or_real_multi_device_context mqueue = MultiQueue.on_devices(context.devices) assert set( mqueue.queues.keys()) == {context.devices[0], context.devices[1]} assert mqueue.devices == context.devices[[0, 1]] mqueue.synchronize() mqueue = MultiQueue.on_devices(context.devices[[1]]) l1 = list(mqueue.queues) l2 = list(context.devices) assert set(mqueue.queues.keys()) == {context.devices[1]} assert mqueue.devices == context.devices[[1]] mqueue.synchronize()
def test_virtual_buffer(mock_4_device_context_pyopencl): # Using an OpenCL mock context here because it keeps track of buffer migrations context = mock_4_device_context_pyopencl mqueue = MultiQueue.on_devices(context.devices) dev0 = context.devices[0] virtual_alloc = TrivialManager(dev0) allocator = virtual_alloc.allocator() vbuf = allocator(dev0, 100) assert vbuf.size == 100 assert isinstance(vbuf.kernel_arg._buffer, bytes) assert vbuf.device == dev0 with pytest.raises(NotImplementedError): vbuf.get_sub_region(0, 50) assert vbuf.offset == 0 arr = numpy.arange(100).astype(numpy.uint8) vbuf.set(mqueue.queues[dev0], arr) res = numpy.empty_like(arr) vbuf.get(mqueue.queues[dev0], res) assert (arr == res).all()
def test_compile_static_multi_device(mock_or_real_multi_device_context): context, mocked = mock_or_real_multi_device_context if mocked: kernel = MockKernel( 'multiply', [None, None, None], max_total_local_sizes={0: 1024, 1: 512}) src = MockDefTemplate(kernels=[kernel]) else: src = SRC a = numpy.arange(22).astype(numpy.int32) b = numpy.arange(15).astype(numpy.int32) ref = numpy.outer(a, b) mqueue = MultiQueue.on_devices(context.devices[[0, 1]]) a_dev = MultiArray.from_host(mqueue, a) b_dev = MultiArray.from_host(mqueue, b, splay=MultiArray.CloneSplay()) res_dev = MultiArray.empty(mqueue.devices, (22, 15), ref.dtype) multiply = StaticKernel(mqueue.devices, src, 'multiply', res_dev.shapes) multiply(mqueue, res_dev, a_dev, b_dev) res = res_dev.get(mqueue) if not mocked: assert (res == ref).all()
def test_multi_device(device_idxs, full_len, benchmark=False): pwr = 50 a = numpy.arange(full_len).astype(numpy.uint64) context = Context.from_devices( [api.platforms[0].devices[device_idx] for device_idx in device_idxs]) mqueue = MultiQueue.on_devices(context.devices) program = Program(context.devices, src) a_dev = MultiArray.from_host(mqueue, a) mqueue.synchronize() t1 = time.time() program.kernel.sum(mqueue, a_dev.shapes, None, a_dev, numpy.int32(pwr)) mqueue.synchronize() t2 = time.time() print(f"Multidevice time (devices {device_idxs}):", t2 - t1) a_res = a_dev.get(mqueue) if not benchmark: a_ref = calc_ref(a, pwr) assert (a_ref == a_res).all()
def test_multi_device_mismatched_set(mock_or_real_multi_device_context): context, _mocked = mock_or_real_multi_device_context mqueue = MultiQueue.on_devices(context.devices) arr_dev = MultiArray.empty(context.devices, 100, numpy.int32) arr_dev2 = MultiArray.empty(context.devices[0:1], 100, numpy.int32) with pytest.raises( ValueError, match="Mismatched device sets in the source and the destination"): arr_dev.set(mqueue, arr_dev2)
def test_clone_splay(mock_or_real_multi_device_context): context, _mocked = mock_or_real_multi_device_context mqueue = MultiQueue.on_devices(context.devices) arr = numpy.arange(101) arr_dev = MultiArray.from_host(mqueue, arr, splay=MultiArray.CloneSplay()) device0, device1 = context.devices assert (arr_dev.subarrays[device0].get( mqueue.queues[device0]) == arr).all() assert (arr_dev.subarrays[device1].get( mqueue.queues[device1]) == arr).all()
def test_multi_queue_out_of_queues(mock_4_device_context): context = mock_4_device_context queue0 = Queue(context.devices[0]) queue0_2 = Queue(context.devices[0]) queue1 = Queue(context.devices[1]) mqueue = MultiQueue([queue0, queue1]) assert set( mqueue.queues.keys()) == {context.devices[0], context.devices[1]} assert mqueue.devices == context.devices[[0, 1]]
def test_wrong_device_idxs(mock_4_device_context): src = MockDefTemplate(kernels=[MockKernel('multiply', [None])]) context = mock_4_device_context program = Program(context.devices[[0, 1]], src) mqueue = MultiQueue.on_devices(context.devices[[2, 1]]) res_dev = MultiArray.empty(context.devices[[2, 1]], 16, numpy.int32) # Using all the queue's devices (1, 2) with pytest.raises(ValueError, match="Requested execution on devices"): program.kernel.multiply(mqueue, 8, None, res_dev)
def test_equal_splay(mock_or_real_multi_device_context): context, _mocked = mock_or_real_multi_device_context mqueue = MultiQueue.on_devices(context.devices) arr = numpy.arange(101) arr_dev = MultiArray.from_host(mqueue, arr, splay=MultiArray.EqualSplay()) device0, device1 = context.devices assert (arr_dev.subarrays[device0].get( mqueue.queues[device0]) == arr[:51]).all() assert (arr_dev.subarrays[device1].get( mqueue.queues[device1]) == arr[51:]).all() # Check that the default splay is EqualSplay arr_dev = MultiArray.from_host(mqueue, arr) assert (arr_dev.subarrays[device0].get( mqueue.queues[device0]) == arr[:51]).all() assert (arr_dev.subarrays[device1].get( mqueue.queues[device1]) == arr[51:]).all()
def test_compile_multi_device(mock_or_real_multi_device_context): context, mocked = mock_or_real_multi_device_context devices = context.devices[[1, 0]] if mocked: src = MockDefTemplate( kernels=[MockKernel('multiply', [None, None, None, numpy.int32])]) else: src = SRC_GENERIC length = 64 program = Program(devices, src) a = numpy.arange(length).astype(numpy.int32) b = numpy.arange(length).astype(numpy.int32) + 1 c = numpy.int32(3) ref = a * b + c mqueue = MultiQueue.on_devices(devices) a_dev = MultiArray.from_host(mqueue, a) b_dev = MultiArray.from_host(mqueue, b) res_dev = MultiArray.empty(devices, length, numpy.int32) program.kernel.multiply(mqueue, a_dev.shapes, None, res_dev, a_dev, b_dev, c) res = res_dev.get(mqueue) if not mocked: assert (res == ref).all() # Test argument unpacking from dictionaries res_dev = MultiArray.empty(devices, length, numpy.int32) program.kernel.multiply(mqueue, a_dev.shapes, {device: None for device in devices}, res_dev, a_dev.subarrays, b_dev, c) res = res_dev.get(mqueue) if not mocked: assert (res == ref).all()
def test_multi_device_from_host(mock_or_real_multi_device_context): context, _mocked = mock_or_real_multi_device_context mqueue = MultiQueue.on_devices(context.devices) arr = numpy.arange(100) arr_dev = MultiArray.from_host(mqueue, arr) assert (arr_dev.get(mqueue) == arr).all()
def test_multi_device(mock_or_real_multi_device_context): context, _mocked = mock_or_real_multi_device_context _check_array_operations(MultiQueue.on_devices([context.devices[0]]), MultiArray)