def test_flags(mock_backend_pyopencl): backend = mock_backend_pyopencl normal_flags = backend.pyopencl.mem_flags.READ_WRITE special_flags = normal_flags | backend.pyopencl.mem_flags.ALLOC_HOST_PTR backend.add_platform_with_devices('Apple', ['GeForce', 'Foo', 'Bar']) backend.add_platform_with_devices('Baz', ['GeForce', 'Foo', 'Bar']) # Multi-device on Apple platform with one of the devices being GeForce: need special Buffer flags api = API.from_api_id(backend.api_id) context = Context.from_devices([api.platforms[0].devices[0], api.platforms[0].devices[1]]) buf = Buffer.allocate(context.devices[0], 100) assert buf._buffer_adapter.pyopencl_buffer.flags == special_flags # None of the devices is GeForce context = Context.from_devices([api.platforms[0].devices[1], api.platforms[0].devices[2]]) buf = Buffer.allocate(context.devices[0], 100) assert buf._buffer_adapter.pyopencl_buffer.flags == normal_flags # Only one device context = Context.from_devices([api.platforms[0].devices[0]]) buf = Buffer.allocate(context.device, 100) assert buf._buffer_adapter.pyopencl_buffer.flags == normal_flags # Not an Apple platform context = Context.from_devices([api.platforms[1].devices[0], api.platforms[1].devices[1]]) buf = Buffer.allocate(context.devices[0], 100) assert buf._buffer_adapter.pyopencl_buffer.flags == normal_flags
def test_from_backend_contexts_cuda_multi_device(mock_backend_pycuda): # CUDA style - a context per device backend = mock_backend_pycuda backend.add_devices(['Device1', 'Device2']) backend_context1 = backend.pycuda_driver.Device(0).make_context() backend.pycuda_driver.Context.pop() backend_context2 = backend.pycuda_driver.Device(1).make_context() backend.pycuda_driver.Context.pop() # Grunnur mast have ownership error_msg = "When dealing with multiple CUDA contexts, Grunnur must be the one managing them" with pytest.raises(ValueError, match=error_msg): Context.from_backend_contexts([backend_context1, backend_context2]) context = Context.from_backend_contexts( [backend_context1, backend_context2], take_ownership=True) # CUDA has no concept of platforms, so the platform name in the mock will be ignored assert context.platform.name == 'nVidia CUDA' assert [device.name for device in context.devices] == ['Device1', 'Device2']
def test_from_backend_contexts_cuda_single_device(mock_backend_pycuda, take_ownership): # CUDA style - a context per device backend = mock_backend_pycuda backend.add_devices(['Device1', 'Device2']) backend_context = backend.pycuda_driver.Device(1).make_context() if not take_ownership: # backend context can stay in the stack context = Context.from_backend_contexts(backend_context, take_ownership=False) else: # forgot to pop the backend context off the stack - error with pytest.raises( ValueError, match="The given context is already in the context stack"): context = Context.from_backend_contexts(backend_context, take_ownership=True) backend.pycuda_driver.Context.pop() context = Context.from_backend_contexts(backend_context, take_ownership=True) # CUDA has no concept of platforms, so the platform name in the mock will be ignored assert context.platform.name == 'nVidia CUDA' assert [device.name for device in context.devices] == ['Device2']
def test_bound_multi_device_creation(mock_backend_pyopencl): mock_backend_pyopencl.add_platform_with_devices( 'Platform1', ['Device1', 'Device2', 'Device3']) api = API.from_api_id(mock_backend_pyopencl.api_id) platform = api.platforms[0] devices = platform.devices[:] context = Context.from_devices(devices) context2 = Context.from_devices(devices) with pytest.raises( ValueError, match= "All devices in a multi-device must belong to the same context"): BoundMultiDevice.from_bound_devices( [context.devices[0], context2.devices[1]]) with pytest.raises(ValueError, match="All devices in a multi-device must be distinct"): context.devices[[1, 1]] sub_device = context.devices[[2, 1]] assert len(sub_device) == 2 assert sub_device[0].name == 'Device3' assert sub_device[1].name == 'Device2'
def test_from_backend_contexts_opencl(mock_backend_pyopencl): # OpenCL style - one context, many devices backend = mock_backend_pyopencl backend.add_platform_with_devices('Platform1', ['Device1']) backend.add_platform_with_devices('Platform2', ['Device2', 'Device3']) backend_devices = backend.pyopencl.get_platforms()[1].get_devices() backend_context = backend.pyopencl.Context(backend_devices) backend_context2 = backend.pyopencl.Context(backend_devices) with pytest.raises( ValueError, match="Cannot make one OpenCL context out of several contexts"): Context.from_backend_contexts([backend_context, backend_context2]) context = Context.from_backend_contexts(backend_context) assert context.platform.name == 'Platform2' assert [device.name for device in context.devices] == ['Device2', 'Device3'] with pytest.raises(TypeError): Context.from_backend_contexts(1)
def test_from_devices_different_platforms(mock_backend_pyopencl): mock_backend_pyopencl.add_platform_with_devices('Platform1', ['Device1', 'Device2']) mock_backend_pyopencl.add_platform_with_devices('Platform2', ['Device3', 'Device4']) api = API.from_api_id(mock_backend_pyopencl.api_id) with pytest.raises(ValueError, match="All devices must belong to the same platform"): Context.from_devices( [api.platforms[0].devices[0], api.platforms[1].devices[0]])
def test_device_shortcut(mock_backend_pyopencl): mock_backend_pyopencl.add_platform_with_devices( 'Platform1', ['Device1', 'Device2', 'Device3']) api = API.from_api_id(mock_backend_pyopencl.api_id) context = Context.from_devices(api.platforms[0].devices[:]) with pytest.raises( RuntimeError, match="The `device` shortcut only works for single-device contexts" ): context.device context = Context.from_devices(api.platforms[0].devices[2]) assert context.device.name == 'Device3'
def test_builtin_globals(mock_backend_pycuda): mock_backend_pycuda.add_devices([ PyCUDADeviceInfo(max_threads_per_block=1024), PyCUDADeviceInfo(max_threads_per_block=512) ]) source_template = DefTemplate.from_string( 'mock_source', [], """ KERNEL void test() { int max_total_local_size = ${device_params.max_total_local_size}; } """) api = API.from_api_id(mock_backend_pycuda.api_id) context = Context.from_devices( [api.platforms[0].devices[0], api.platforms[0].devices[1]]) src = MockDefTemplate(kernels=[MockKernel('test', [None])], source_template=source_template) program = Program(context.devices, src) assert 'max_total_local_size = 1024' in program.sources[ context.devices[0]].source assert 'max_total_local_size = 512' in program.sources[ context.devices[1]].source
def test_max_total_local_sizes(mock_backend): mock_backend.add_devices( ["Device1", "Device2 - tag", "Device3 - tag", "Device4"]) api = API.from_api_id(mock_backend.api_id) context = Context.from_criteria(api, devices_num=2, device_include_masks=["tag"]) # Providing max_total_local_sizes for all possible devices to make sure # only the ones corresponding to the context will get picked up kernel = MockKernel('test', max_total_local_sizes={ 0: 64, 1: 1024, 2: 512, 3: 128 }) src = MockDefTemplate(kernels=[kernel]) program = Program(context.devices, src) # The indices here correspond to the devices in the context, not in the platform assert program.kernel.test.max_total_local_sizes == { context.devices[0]: 1024, context.devices[1]: 512 }
def test_multi_device(device_idxs, full_len, benchmark=False): pwr = 50 a = numpy.arange(full_len).astype(numpy.uint64) context = Context.from_devices( [api.platforms[0].devices[device_idx] for device_idx in device_idxs]) mqueue = MultiQueue.on_devices(context.devices) program = Program(context.devices, src) a_dev = MultiArray.from_host(mqueue, a) mqueue.synchronize() t1 = time.time() program.kernel.sum(mqueue, a_dev.shapes, None, a_dev, numpy.int32(pwr)) mqueue.synchronize() t2 = time.time() print(f"Multidevice time (devices {device_idxs}):", t2 - t1) a_res = a_dev.get(mqueue) if not benchmark: a_ref = calc_ref(a, pwr) assert (a_ref == a_res).all()
def test_bound_device_eq(mock_backend_pyopencl): mock_backend_pyopencl.add_platform_with_devices('Platform1', ['Device1', 'Device2']) api = API.from_api_id(mock_backend_pyopencl.api_id) platform = api.platforms[0] devices = platform.devices[:] context = Context.from_devices(devices) assert context.devices[0] == context.devices[0] assert context.devices[0] != context.devices[1] context2 = Context.from_devices(devices) assert context2.devices[0] != context.devices[0]
def test_virtual_sizes_error_propagated(mock_backend_pycuda): # Testing for PyCUDA backend only since mocked PyOpenCL backend does not have a way # to set maximum global sizes (PyOpenCL devices don't have a corresponding parameter), # and PyCUDA is enough to test the required code path. device_info = PyCUDADeviceInfo( max_threads_per_block=2**4, max_block_dim_x=2**4, max_block_dim_y=2**4, max_block_dim_z=2**4, max_grid_dim_x=2**10, max_grid_dim_y=2**10, max_grid_dim_z=2**8) mock_backend_pycuda.add_devices([device_info]) api = API.from_api_id(mock_backend_pycuda.api_id) device = api.platforms[0].devices[0] context = Context.from_devices([device]) kernel = MockKernel('test', [None], max_total_local_sizes={0: 16}) src = MockDefTemplate(kernels=[kernel]) # Just enough to fit in the grid limits multiply = StaticKernel(context.device, src, 'test', (2**14, 2**10, 2**8), (2**4, 1, 1)) # Global size is too large to fit on the device, # so virtual size finding fails and the error is propagated to the user. with pytest.raises( VirtualSizeError, match="Bounding global size \\(16384, 2048, 256\\) is too large"): multiply = StaticKernel(context.device, src, 'test', (2**14, 2**11, 2**8), (2**4, 1, 1))
def test_bound_multi_device_issubset(mock_backend_pyopencl): mock_backend_pyopencl.add_platform_with_devices( 'Platform1', ['Device1', 'Device2', 'Device3']) api = API.from_api_id(mock_backend_pyopencl.api_id) context = Context.from_devices(api.platforms[0].devices[:]) assert context.devices[[2, 1]].issubset(context.devices)
def test_from_devices(mock_backend): mock_backend.add_devices(['Device2', 'Device3']) api = API.from_api_id(mock_backend.api_id) platform = api.platforms[0] devices = platform.devices[:] context = Context.from_devices(devices) assert context.platform == platform assert [device.as_unbound() for device in context.devices] == devices
def test_from_backend_devices_opencl(mock_backend_pyopencl): backend = mock_backend_pyopencl backend.add_platform_with_devices('Platform1', ['Device1']) backend.add_platform_with_devices('Platform2', ['Device2', 'Device3']) backend_devices = backend.pyopencl.get_platforms()[1].get_devices() context = Context.from_backend_devices(backend_devices) assert context.platform.name == 'Platform2' assert [device.name for device in context.devices] == ['Device2', 'Device3']
def test_deactivate(mock_backend_pyopencl, mock_backend_pycuda): mock_backend_pyopencl.add_platform_with_devices('Platform1', ['Device1']) mock_backend_pycuda.add_devices(['Device1']) api = API.from_api_id(mock_backend_pyopencl.api_id) context = Context.from_devices(api.platforms[0].devices[0]) with pytest.raises(RuntimeError, match="`deactivate\\(\\)` only works for CUDA API"): context.deactivate() backend_context = mock_backend_pycuda.pycuda_driver.Device( 0).make_context() backend_context.pop() api = API.from_api_id(mock_backend_pycuda.api_id) context = Context.from_backend_contexts(backend_context, take_ownership=True) assert backend_context.is_stacked() context.deactivate() assert not backend_context.is_stacked()
def test_wrong_context(mock_backend): mock_backend.add_devices(['Device0']) src = MockDefTemplate(kernels=[MockKernel('multiply', [None])]) api = API.from_api_id(mock_backend.api_id) context = Context.from_devices(api.platforms[0].devices[0]) context2 = Context.from_devices(api.platforms[0].devices[0]) res_dev = Array.empty(context.device, 16, numpy.int32) program = Program(context.device, src) queue = Queue(context2.device) with pytest.raises( ValueError, match= "The provided queue must belong to the same context this program uses" ): program.kernel.multiply(queue, 8, None, res_dev)
def mock_or_real_multi_device_context(request, monkeypatch): # Same as `mock_or_real_context` above, but for 2-device contexts. value = request.param if value is None: pytest.skip( "Could not find 2 suitable GPGPU devices on the same platform") elif isinstance(value, list): yield (Context.from_devices(value), False) elif isinstance(value, APIID): factory = MockBackendFactory(monkeypatch) backend = factory.mock(value) yield (make_context(backend, 2), True) else: raise TypeError
def mock_or_real_context(request, monkeypatch): # Since `py.test` does not support concatenating fixtures, # we concatenate real contexts and mocked contexts manually. # If there are no devices available, we need it to be noticeable that some tests are skipped, # hence passing `None` and calling `skip()` explicitly in this case. value = request.param if value is None: pytest.skip("No GPGPU devices available") elif isinstance(value, Device): yield (Context.from_devices(value), False) elif isinstance(value, APIID): factory = MockBackendFactory(monkeypatch) backend = factory.mock(value) yield (make_context(backend, 1), True) else: raise TypeError
def test_single_device(device_idx, full_len, benchmark=False): pwr = 50 a = numpy.arange(full_len).astype(numpy.uint64) context = Context.from_devices([api.platforms[0].devices[device_idx]]) queue = Queue(context.device) program = Program(context.device, src) a_dev = Array.from_host(queue, a) queue.synchronize() t1 = time.time() program.kernel.sum(queue, full_len, None, a_dev, numpy.int32(pwr)) queue.synchronize() t2 = time.time() print(f"Single device time (device {device_idx}):", t2 - t1) a_res = a_dev.get(queue) if not benchmark: a_ref = calc_ref(a, pwr) assert (a_ref == a_res).all()
def test_from_criteria(mock_backend_pyopencl): backend = mock_backend_pyopencl backend.add_platform_with_devices('foo-bar', ['Device1']) backend.add_platform_with_devices('bar-baz', ['Device2']) backend.add_platform_with_devices('foo-baz', [ 'foo-bar', 'foo-baz-1', 'bar-baz', 'foo-baz-1', 'foo-baz-2', 'foo-baz-2', 'foo-baz-3' ]) api = API.from_api_id(backend.api_id) context = Context.from_criteria(api, devices_num=2, platform_include_masks=['foo'], platform_exclude_masks=['bar'], device_include_masks=['foo'], device_exclude_masks=['bar'], unique_devices_only=True) assert context.platform.name == 'foo-baz' assert [device.name for device in context.devices] == ['foo-baz-1', 'foo-baz-2']
def make_context(backend, devices_num): api_id = backend.api_id backend.add_devices(['Device' + str(i) for i in range(devices_num)]) api = API.from_api_id(api_id) return Context.from_criteria(api, devices_num=devices_num)
def test_set_constant_array_errors(mock_4_device_context): context = mock_4_device_context api = API.from_api_id(mock_4_device_context.api.id) other_context = Context.from_criteria(api) other_queue = Queue(other_context.devices[0]) # Contexts don't know about each other and can't interact with stack in a consistent manner. # So we deactivate the other context if we're on CUDA API. if api.id == cuda_api_id(): other_context.deactivate() cm1 = numpy.arange(16).astype(numpy.int32) src = MockDefTemplate(kernels=[ MockKernel('kernel', [], max_total_local_sizes={ 0: 1024, 1: 1024, 2: 1024, 3: 1024 }) ], constant_mem={'cm1': cm1.size * cm1.dtype.itemsize}) queue = Queue(context.devices[0]) if context.api.id == cuda_api_id(): program = Program(context.devices, src, constant_arrays=dict(cm1=cm1)) with pytest.raises( ValueError, match= "The provided queue must belong to the same context as this program uses" ): program.set_constant_array(other_queue, 'cm1', cm1) with pytest.raises(TypeError, match="Unsupported array type"): program.set_constant_array(queue, 'cm1', [1]) with pytest.raises(ValueError, match="Incorrect size of the constant buffer;"): program.set_constant_array(queue, 'cm1', cm1[:8]) with pytest.raises(TypeError, match="Unknown constant array metadata type"): program = Program(context.devices[[0, 1, 2]], src, constant_arrays=dict(cm1=1)) program = Program(context.devices[[0, 1, 2]], src, constant_arrays=dict(cm1=cm1)) queue3 = Queue(context.devices[3]) with pytest.raises( ValueError, match= "The program was not compiled for the device this queue uses"): program.set_constant_array(queue3, 'cm1', cm1) else: with pytest.raises( ValueError, match= "Compile-time constant arrays are only supported by CUDA API"): program = Program(context.devices, src, constant_arrays=dict(cm1=cm1)) program = Program(context.devices, src) with pytest.raises( ValueError, match="Constant arrays are only supported for CUDA API"): program.set_constant_array(queue, 'cm1', cm1) with pytest.raises( ValueError, match= "Compile-time constant arrays are only supported by CUDA API"): sk = StaticKernel(context.devices, src, 'kernel', 1024, constant_arrays=dict(cm1=cm1)) sk = StaticKernel(context.devices, src, 'kernel', 1024) with pytest.raises( ValueError, match="Constant arrays are only supported for CUDA API"): sk.set_constant_array(queue, 'cm1', cm1)