def mock(self, api_id, disable=False): if api_id == cuda_api_id(): return self.mock_pycuda(disable=disable) elif api_id == opencl_api_id(): return self.mock_pyopencl(disable=disable) else: raise ValueError(f"Unknown API ID: {api_id}")
def test_attributes(mock_backend): mock_backend.add_devices(['Device1']) api = API.from_api_id(mock_backend.api_id) p = Platform.from_index(api, 0) assert p.api == api assert p.name == { cuda_api_id(): 'nVidia CUDA', opencl_api_id(): 'Platform0' }[api.id] assert p.shortcut == api.shortcut + ',0' assert str(p) == 'platform(' + p.shortcut + ')' assert p.vendor == { cuda_api_id(): 'nVidia', opencl_api_id(): 'Mock Platforms' }[api.id] assert p.version == { cuda_api_id(): "CUDA 10.0.0", opencl_api_id(): 'OpenCL 1.2' }[api.id]
def test_from_backend_device(mock_backend): mock_backend.add_devices(['Device1']) api = API.from_api_id(mock_backend.api_id) if api.id == opencl_api_id(): backend_device = mock_backend.pyopencl.get_platforms()[0].get_devices( )[0] elif api.id == cuda_api_id(): backend_device = mock_backend.pycuda_driver.Device(0) else: raise NotImplementedError with pytest.raises(TypeError, match="was not recognized as a device object"): Device.from_backend_device(1) device = Device.from_backend_device(backend_device) assert device.platform.api == api if api.id != cuda_api_id(): assert device.platform.name == 'Platform0' assert device.name == 'Device1'
def test_compile(mock_or_real_context, no_prelude): context, mocked = mock_or_real_context if mocked: src = MockDefTemplate( kernels=[MockKernel('multiply', [None, None, None, numpy.int32])]) else: if no_prelude: src = SRC_CUDA if context.api.id == cuda_api_id() else SRC_OPENCL else: src = SRC_GENERIC program = Program(context.device, src, no_prelude=no_prelude) if mocked and no_prelude: assert program.sources[context.device].prelude.strip() == "" length = 64 a = numpy.arange(length).astype(numpy.int32) b = numpy.arange(length).astype(numpy.int32) + 1 c = numpy.int32(3) ref = a * b + c queue = Queue(context.device) a_dev = Array.from_host(queue, a) b_dev = Array.from_host(queue, b) res_dev = Array.empty(context.device, length, numpy.int32) # Check that passing both Arrays and Buffers is supported # Pass one of the buffers as a subregion, too. a_dev_view = a_dev.data.get_sub_region(0, a_dev.data.size) program.kernel.multiply(queue, length, None, res_dev, a_dev_view, b_dev.data, c) res = res_dev.get(queue) if not mocked: assert (res == ref).all() # Explicit local_size res2_dev = Array.from_host(queue, a) # Array.empty(queue, length, numpy.int32) program.kernel.multiply(queue, length, length // 2, res2_dev, a_dev, b_dev, c) res2 = res2_dev.get(queue) if not mocked: assert (res2 == ref).all()
def __init__(self, cuda_version=(10, 0, 0)): self.pycuda_driver = Mock_pycuda_driver(self, cuda_version) self.pycuda_compiler = Mock_pycuda_compiler(self) self.device_infos = [] self._context_stack = [] # Since we need to cast DeviceAllocation objects to integers (to add offsets), # there is no way to use a mock allocation object to track that. # Instead, we have to use recognizable integers as "addresses" and check the validity of # allocations using a kind of a fuzzy match database. # Should work for testing purposes as long as we use small offsets, # and other integer parameters don't fall in the "address" range. self._allocation_start = 2**30 self._allocation_step = 2**16 self._allocation_idx = 0 self._allocations = {} self.api_id = cuda_api_id()
def test_from_backend_platform(mock_backend_factory): mock_backend_pyopencl = mock_backend_factory.mock(opencl_api_id()) mock_backend_pyopencl.add_platform_with_devices('Platform1', ['Device1']) mock_backend_pyopencl.add_platform_with_devices('Platform2', ['Device2']) # Add a CUDA API to make sure it is queried whether the object is its platform object # (even though CUDA doesn't have platforms) mock_backend_pycuda = mock_backend_factory.mock(cuda_api_id()) mock_backend_pycuda.add_devices(['Device1', 'Device2']) backend_platform = mock_backend_pyopencl.pyopencl.get_platforms()[0] with pytest.raises(TypeError, match="was not recognized as a platform object"): Platform.from_backend_platform(1) platform = Platform.from_backend_platform(backend_platform) assert platform.api.id == opencl_api_id() assert platform.name == 'Platform1'
def test_hash(mock_backend): if mock_backend.api_id == cuda_api_id(): mock_backend.add_devices(['Device1']) api = API.from_api_id(mock_backend.api_id) p0 = Platform.from_index(api, 0) d = {p0: 0} assert d[p0] == 0 else: mock_backend.add_platform_with_devices('Platform0', ['Device0']) mock_backend.add_platform_with_devices('Platform1', ['Device1']) api = API.from_api_id(mock_backend.api_id) p0 = Platform.from_index(api, 0) p1 = Platform.from_index(api, 1) d = {p0: 0, p1: 1} assert d[p0] == 0 assert d[p1] == 1
def test_eq(mock_backend): if mock_backend.api_id == cuda_api_id(): mock_backend.add_devices(['Device1']) api = API.from_api_id(mock_backend.api_id) p0_v1 = Platform.from_index(api, 0) p0_v2 = Platform.from_index(api, 0) assert p0_v1 is not p0_v2 and p0_v1 == p0_v2 else: mock_backend.add_platform_with_devices('Platform0', ['Device0']) mock_backend.add_platform_with_devices('Platform1', ['Device1']) api = API.from_api_id(mock_backend.api_id) p0_v1 = Platform.from_index(api, 0) p0_v2 = Platform.from_index(api, 0) p1 = Platform.from_index(api, 1) assert p0_v1 is not p0_v2 and p0_v1 == p0_v2 assert p0_v1 != p1
def test_keep(mock_or_real_context, capsys): context, mocked = mock_or_real_context if mocked: src = MockDefTemplate( kernels=[MockKernel('multiply', [None, None, None, numpy.int32])]) else: src = SRC_GENERIC program = Program(context.device, src, keep=True) captured = capsys.readouterr() path = re.match(r'\*\*\* compiler output in (.*)', captured.out).group(1) assert os.path.isdir(path) if context.api.id == opencl_api_id(): srcfile = os.path.join(path, 'kernel.cl') elif context.api.id == cuda_api_id(): srcfile = os.path.join(path, 'kernel.cu') with open(srcfile) as f: source = f.read() assert str(src) in source
def test_set_constant_array_errors(mock_4_device_context): context = mock_4_device_context api = API.from_api_id(mock_4_device_context.api.id) other_context = Context.from_criteria(api) other_queue = Queue(other_context.devices[0]) # Contexts don't know about each other and can't interact with stack in a consistent manner. # So we deactivate the other context if we're on CUDA API. if api.id == cuda_api_id(): other_context.deactivate() cm1 = numpy.arange(16).astype(numpy.int32) src = MockDefTemplate(kernels=[ MockKernel('kernel', [], max_total_local_sizes={ 0: 1024, 1: 1024, 2: 1024, 3: 1024 }) ], constant_mem={'cm1': cm1.size * cm1.dtype.itemsize}) queue = Queue(context.devices[0]) if context.api.id == cuda_api_id(): program = Program(context.devices, src, constant_arrays=dict(cm1=cm1)) with pytest.raises( ValueError, match= "The provided queue must belong to the same context as this program uses" ): program.set_constant_array(other_queue, 'cm1', cm1) with pytest.raises(TypeError, match="Unsupported array type"): program.set_constant_array(queue, 'cm1', [1]) with pytest.raises(ValueError, match="Incorrect size of the constant buffer;"): program.set_constant_array(queue, 'cm1', cm1[:8]) with pytest.raises(TypeError, match="Unknown constant array metadata type"): program = Program(context.devices[[0, 1, 2]], src, constant_arrays=dict(cm1=1)) program = Program(context.devices[[0, 1, 2]], src, constant_arrays=dict(cm1=cm1)) queue3 = Queue(context.devices[3]) with pytest.raises( ValueError, match= "The program was not compiled for the device this queue uses"): program.set_constant_array(queue3, 'cm1', cm1) else: with pytest.raises( ValueError, match= "Compile-time constant arrays are only supported by CUDA API"): program = Program(context.devices, src, constant_arrays=dict(cm1=cm1)) program = Program(context.devices, src) with pytest.raises( ValueError, match="Constant arrays are only supported for CUDA API"): program.set_constant_array(queue, 'cm1', cm1) with pytest.raises( ValueError, match= "Compile-time constant arrays are only supported by CUDA API"): sk = StaticKernel(context.devices, src, 'kernel', 1024, constant_arrays=dict(cm1=cm1)) sk = StaticKernel(context.devices, src, 'kernel', 1024) with pytest.raises( ValueError, match="Constant arrays are only supported for CUDA API"): sk.set_constant_array(queue, 'cm1', cm1)
def _test_constant_memory(context, mocked, is_static): cm1 = numpy.arange(16).astype(numpy.int32) cm2 = numpy.arange(16).astype(numpy.int32) * 2 + 1 cm3 = numpy.arange(16).astype(numpy.int32) * 3 + 2 if mocked: kernel = MockKernel('copy_from_cm', [None] if context.api.id == cuda_api_id() else [None, None, None, None], max_total_local_sizes={0: 1024}) src = MockDefTemplate(constant_mem={ 'cm1': cm1.size * cm1.dtype.itemsize, 'cm2': cm2.size * cm2.dtype.itemsize, 'cm3': cm3.size * cm3.dtype.itemsize }, kernels=[kernel]) else: src = SRC_CONSTANT_MEM_STATIC if is_static else SRC_CONSTANT_MEM queue = Queue(context.device) cm1_dev = Array.from_host(queue, cm1) cm2_dev = Array.from_host(queue, cm2) cm3_dev = Array.from_host(queue, cm3) res_dev = Array.empty(context.device, 16, numpy.int32) if context.api.id == cuda_api_id(): # Use different forms of constant array representation constant_arrays = dict( cm1=cm1, # as an array(-like) object cm2=(cm2.shape, cm2.dtype), # as a tuple of shape and dtype cm3=cm3_dev) # as a device array if is_static: copy_from_cm = StaticKernel(context.device, src, 'copy_from_cm', global_size=16, constant_arrays=constant_arrays) copy_from_cm.set_constant_array( queue, 'cm1', cm1_dev) # setting from a device array copy_from_cm.set_constant_array(queue, 'cm2', cm2) # setting from a host array copy_from_cm.set_constant_array( queue, 'cm3', cm3_dev.data) # setting from a host buffer else: program = Program(context.device, src, constant_arrays=constant_arrays) program.set_constant_array(queue, 'cm1', cm1_dev) # setting from a device array program.set_constant_array(queue, 'cm2', cm2) # setting from a host array program.set_constant_array( queue, 'cm3', cm3_dev.data) # setting from a host buffer copy_from_cm = lambda queue, *args: program.kernel.copy_from_cm( queue, 16, None, *args) copy_from_cm(queue, res_dev) else: if is_static: copy_from_cm = StaticKernel(context.device, src, 'copy_from_cm', global_size=16) else: program = Program(context.device, src) copy_from_cm = lambda queue, *args: program.kernel.copy_from_cm( queue, 16, None, *args) copy_from_cm(queue, res_dev, cm1_dev, cm2_dev, cm3_dev) res = res_dev.get(queue) if not mocked: assert (res == cm1 + cm2 + cm3).all()
def test_allocate_and_copy(mock_or_real_context): context, _mocked = mock_or_real_context length = 100 dtype = numpy.dtype('int32') size = length * dtype.itemsize arr = numpy.arange(length).astype(dtype) buf = Buffer.allocate(context.device, size) assert buf.size == size assert buf.offset == 0 # Just covering the existence of the attribute. # Hard to actually check it without running a kernel assert buf.kernel_arg is not None queue = Queue(context.device) buf.set(queue, arr) # Read the whole buffer res = numpy.empty_like(arr) buf.get(queue, res) queue.synchronize() assert (res == arr).all() # Read a subregion buf_region = buf.get_sub_region(25 * dtype.itemsize, 50 * dtype.itemsize) arr_region = arr[25:25+50] res_region = numpy.empty_like(arr_region) buf_region.get(queue, res_region) queue.synchronize() assert (res_region == arr_region).all() # Write a subregion arr_region = (numpy.ones(50) * 100).astype(dtype) arr[25:25+50] = arr_region buf_region.set(queue, arr_region) buf.get(queue, res) queue.synchronize() assert (res == arr).all() # Subregion of subregion if context.api.id == cuda_api_id(): # In OpenCL that leads to segfault, but with CUDA we just emulate that with pointers. arr_region2 = (numpy.ones(20) * 200).astype(dtype) arr[25+20:25+40] = arr_region2 buf_region2 = buf_region.get_sub_region(20 * dtype.itemsize, 20 * dtype.itemsize) buf_region2.set(queue, arr_region2) buf.get(queue, res) queue.synchronize() assert (res == arr).all() # Device-to-device copy buf2 = Buffer.allocate(context.device, size * 2) buf2.set(queue, numpy.ones(length * 2, dtype)) buf2_view = buf2.get_sub_region(50 * dtype.itemsize, 100 * dtype.itemsize) buf2_view.set(queue, buf) res2 = numpy.empty(length * 2, dtype) buf2.get(queue, res2) queue.synchronize() assert (res2[50:150] == arr).all() assert (res2[:50] == 1).all() assert (res2[150:] == 1).all() # Device-to-device copy (no_async) buf2 = Buffer.allocate(context.device, size * 2) buf2.set(queue, numpy.ones(length * 2, dtype)) buf2_view = buf2.get_sub_region(50 * dtype.itemsize, 100 * dtype.itemsize) buf2_view.set(queue, buf, no_async=True) res2 = numpy.empty(length * 2, dtype) buf2.get(queue, res2) queue.synchronize() assert (res2[50:150] == arr).all() assert (res2[:50] == 1).all() assert (res2[150:] == 1).all()