Esempio n. 1
0
 def mock(self, api_id, disable=False):
     if api_id == cuda_api_id():
         return self.mock_pycuda(disable=disable)
     elif api_id == opencl_api_id():
         return self.mock_pyopencl(disable=disable)
     else:
         raise ValueError(f"Unknown API ID: {api_id}")
Esempio n. 2
0
def test_attributes(mock_backend):
    mock_backend.add_devices(['Device1'])
    api = API.from_api_id(mock_backend.api_id)
    p = Platform.from_index(api, 0)

    assert p.api == api
    assert p.name == {
        cuda_api_id(): 'nVidia CUDA',
        opencl_api_id(): 'Platform0'
    }[api.id]
    assert p.shortcut == api.shortcut + ',0'
    assert str(p) == 'platform(' + p.shortcut + ')'
    assert p.vendor == {
        cuda_api_id(): 'nVidia',
        opencl_api_id(): 'Mock Platforms'
    }[api.id]
    assert p.version == {
        cuda_api_id(): "CUDA 10.0.0",
        opencl_api_id(): 'OpenCL 1.2'
    }[api.id]
Esempio n. 3
0
def test_from_backend_device(mock_backend):
    mock_backend.add_devices(['Device1'])

    api = API.from_api_id(mock_backend.api_id)

    if api.id == opencl_api_id():
        backend_device = mock_backend.pyopencl.get_platforms()[0].get_devices(
        )[0]
    elif api.id == cuda_api_id():
        backend_device = mock_backend.pycuda_driver.Device(0)
    else:
        raise NotImplementedError

    with pytest.raises(TypeError,
                       match="was not recognized as a device object"):
        Device.from_backend_device(1)

    device = Device.from_backend_device(backend_device)
    assert device.platform.api == api
    if api.id != cuda_api_id():
        assert device.platform.name == 'Platform0'
    assert device.name == 'Device1'
Esempio n. 4
0
def test_compile(mock_or_real_context, no_prelude):

    context, mocked = mock_or_real_context

    if mocked:
        src = MockDefTemplate(
            kernels=[MockKernel('multiply', [None, None, None, numpy.int32])])
    else:
        if no_prelude:
            src = SRC_CUDA if context.api.id == cuda_api_id() else SRC_OPENCL
        else:
            src = SRC_GENERIC

    program = Program(context.device, src, no_prelude=no_prelude)

    if mocked and no_prelude:
        assert program.sources[context.device].prelude.strip() == ""

    length = 64

    a = numpy.arange(length).astype(numpy.int32)
    b = numpy.arange(length).astype(numpy.int32) + 1
    c = numpy.int32(3)
    ref = a * b + c

    queue = Queue(context.device)

    a_dev = Array.from_host(queue, a)
    b_dev = Array.from_host(queue, b)

    res_dev = Array.empty(context.device, length, numpy.int32)
    # Check that passing both Arrays and Buffers is supported
    # Pass one of the buffers as a subregion, too.
    a_dev_view = a_dev.data.get_sub_region(0, a_dev.data.size)
    program.kernel.multiply(queue, length, None, res_dev, a_dev_view,
                            b_dev.data, c)
    res = res_dev.get(queue)
    if not mocked:
        assert (res == ref).all()

    # Explicit local_size
    res2_dev = Array.from_host(queue,
                               a)  # Array.empty(queue, length, numpy.int32)
    program.kernel.multiply(queue, length, length // 2, res2_dev, a_dev, b_dev,
                            c)
    res2 = res2_dev.get(queue)
    if not mocked:
        assert (res2 == ref).all()
Esempio n. 5
0
    def __init__(self, cuda_version=(10, 0, 0)):
        self.pycuda_driver = Mock_pycuda_driver(self, cuda_version)
        self.pycuda_compiler = Mock_pycuda_compiler(self)

        self.device_infos = []
        self._context_stack = []

        # Since we need to cast DeviceAllocation objects to integers (to add offsets),
        # there is no way to use a mock allocation object to track that.
        # Instead, we have to use recognizable integers as "addresses" and check the validity of
        # allocations using a kind of a fuzzy match database.
        # Should work for testing purposes as long as we use small offsets,
        # and other integer parameters don't fall in the "address" range.
        self._allocation_start = 2**30
        self._allocation_step = 2**16
        self._allocation_idx = 0
        self._allocations = {}

        self.api_id = cuda_api_id()
Esempio n. 6
0
def test_from_backend_platform(mock_backend_factory):
    mock_backend_pyopencl = mock_backend_factory.mock(opencl_api_id())
    mock_backend_pyopencl.add_platform_with_devices('Platform1', ['Device1'])
    mock_backend_pyopencl.add_platform_with_devices('Platform2', ['Device2'])

    # Add a CUDA API to make sure it is queried whether the object is its platform object
    # (even though CUDA doesn't have platforms)
    mock_backend_pycuda = mock_backend_factory.mock(cuda_api_id())
    mock_backend_pycuda.add_devices(['Device1', 'Device2'])

    backend_platform = mock_backend_pyopencl.pyopencl.get_platforms()[0]

    with pytest.raises(TypeError,
                       match="was not recognized as a platform object"):
        Platform.from_backend_platform(1)

    platform = Platform.from_backend_platform(backend_platform)
    assert platform.api.id == opencl_api_id()
    assert platform.name == 'Platform1'
Esempio n. 7
0
def test_hash(mock_backend):
    if mock_backend.api_id == cuda_api_id():
        mock_backend.add_devices(['Device1'])
        api = API.from_api_id(mock_backend.api_id)

        p0 = Platform.from_index(api, 0)

        d = {p0: 0}
        assert d[p0] == 0

    else:
        mock_backend.add_platform_with_devices('Platform0', ['Device0'])
        mock_backend.add_platform_with_devices('Platform1', ['Device1'])
        api = API.from_api_id(mock_backend.api_id)

        p0 = Platform.from_index(api, 0)
        p1 = Platform.from_index(api, 1)

        d = {p0: 0, p1: 1}
        assert d[p0] == 0
        assert d[p1] == 1
Esempio n. 8
0
def test_eq(mock_backend):
    if mock_backend.api_id == cuda_api_id():
        mock_backend.add_devices(['Device1'])
        api = API.from_api_id(mock_backend.api_id)

        p0_v1 = Platform.from_index(api, 0)
        p0_v2 = Platform.from_index(api, 0)

        assert p0_v1 is not p0_v2 and p0_v1 == p0_v2

    else:
        mock_backend.add_platform_with_devices('Platform0', ['Device0'])
        mock_backend.add_platform_with_devices('Platform1', ['Device1'])
        api = API.from_api_id(mock_backend.api_id)

        p0_v1 = Platform.from_index(api, 0)
        p0_v2 = Platform.from_index(api, 0)
        p1 = Platform.from_index(api, 1)

        assert p0_v1 is not p0_v2 and p0_v1 == p0_v2
        assert p0_v1 != p1
Esempio n. 9
0
def test_keep(mock_or_real_context, capsys):

    context, mocked = mock_or_real_context

    if mocked:
        src = MockDefTemplate(
            kernels=[MockKernel('multiply', [None, None, None, numpy.int32])])
    else:
        src = SRC_GENERIC

    program = Program(context.device, src, keep=True)
    captured = capsys.readouterr()
    path = re.match(r'\*\*\* compiler output in (.*)', captured.out).group(1)
    assert os.path.isdir(path)

    if context.api.id == opencl_api_id():
        srcfile = os.path.join(path, 'kernel.cl')
    elif context.api.id == cuda_api_id():
        srcfile = os.path.join(path, 'kernel.cu')

    with open(srcfile) as f:
        source = f.read()

    assert str(src) in source
Esempio n. 10
0
def test_set_constant_array_errors(mock_4_device_context):

    context = mock_4_device_context

    api = API.from_api_id(mock_4_device_context.api.id)
    other_context = Context.from_criteria(api)
    other_queue = Queue(other_context.devices[0])
    # Contexts don't know about each other and can't interact with stack in a consistent manner.
    # So we deactivate the other context if we're on CUDA API.
    if api.id == cuda_api_id():
        other_context.deactivate()

    cm1 = numpy.arange(16).astype(numpy.int32)
    src = MockDefTemplate(kernels=[
        MockKernel('kernel', [],
                   max_total_local_sizes={
                       0: 1024,
                       1: 1024,
                       2: 1024,
                       3: 1024
                   })
    ],
                          constant_mem={'cm1': cm1.size * cm1.dtype.itemsize})
    queue = Queue(context.devices[0])

    if context.api.id == cuda_api_id():
        program = Program(context.devices, src, constant_arrays=dict(cm1=cm1))

        with pytest.raises(
                ValueError,
                match=
                "The provided queue must belong to the same context as this program uses"
        ):
            program.set_constant_array(other_queue, 'cm1', cm1)

        with pytest.raises(TypeError, match="Unsupported array type"):
            program.set_constant_array(queue, 'cm1', [1])

        with pytest.raises(ValueError,
                           match="Incorrect size of the constant buffer;"):
            program.set_constant_array(queue, 'cm1', cm1[:8])

        with pytest.raises(TypeError,
                           match="Unknown constant array metadata type"):
            program = Program(context.devices[[0, 1, 2]],
                              src,
                              constant_arrays=dict(cm1=1))

        program = Program(context.devices[[0, 1, 2]],
                          src,
                          constant_arrays=dict(cm1=cm1))
        queue3 = Queue(context.devices[3])

        with pytest.raises(
                ValueError,
                match=
                "The program was not compiled for the device this queue uses"):
            program.set_constant_array(queue3, 'cm1', cm1)

    else:
        with pytest.raises(
                ValueError,
                match=
                "Compile-time constant arrays are only supported by CUDA API"):
            program = Program(context.devices,
                              src,
                              constant_arrays=dict(cm1=cm1))

        program = Program(context.devices, src)
        with pytest.raises(
                ValueError,
                match="Constant arrays are only supported for CUDA API"):
            program.set_constant_array(queue, 'cm1', cm1)

        with pytest.raises(
                ValueError,
                match=
                "Compile-time constant arrays are only supported by CUDA API"):
            sk = StaticKernel(context.devices,
                              src,
                              'kernel',
                              1024,
                              constant_arrays=dict(cm1=cm1))

        sk = StaticKernel(context.devices, src, 'kernel', 1024)
        with pytest.raises(
                ValueError,
                match="Constant arrays are only supported for CUDA API"):
            sk.set_constant_array(queue, 'cm1', cm1)
Esempio n. 11
0
def _test_constant_memory(context, mocked, is_static):

    cm1 = numpy.arange(16).astype(numpy.int32)
    cm2 = numpy.arange(16).astype(numpy.int32) * 2 + 1
    cm3 = numpy.arange(16).astype(numpy.int32) * 3 + 2

    if mocked:
        kernel = MockKernel('copy_from_cm', [None] if context.api.id
                            == cuda_api_id() else [None, None, None, None],
                            max_total_local_sizes={0: 1024})
        src = MockDefTemplate(constant_mem={
            'cm1': cm1.size * cm1.dtype.itemsize,
            'cm2': cm2.size * cm2.dtype.itemsize,
            'cm3': cm3.size * cm3.dtype.itemsize
        },
                              kernels=[kernel])
    else:
        src = SRC_CONSTANT_MEM_STATIC if is_static else SRC_CONSTANT_MEM

    queue = Queue(context.device)

    cm1_dev = Array.from_host(queue, cm1)
    cm2_dev = Array.from_host(queue, cm2)
    cm3_dev = Array.from_host(queue, cm3)
    res_dev = Array.empty(context.device, 16, numpy.int32)

    if context.api.id == cuda_api_id():

        # Use different forms of constant array representation
        constant_arrays = dict(
            cm1=cm1,  # as an array(-like) object
            cm2=(cm2.shape, cm2.dtype),  # as a tuple of shape and dtype
            cm3=cm3_dev)  # as a device array

        if is_static:
            copy_from_cm = StaticKernel(context.device,
                                        src,
                                        'copy_from_cm',
                                        global_size=16,
                                        constant_arrays=constant_arrays)
            copy_from_cm.set_constant_array(
                queue, 'cm1', cm1_dev)  # setting from a device array
            copy_from_cm.set_constant_array(queue, 'cm2',
                                            cm2)  # setting from a host array
            copy_from_cm.set_constant_array(
                queue, 'cm3', cm3_dev.data)  # setting from a host buffer
        else:
            program = Program(context.device,
                              src,
                              constant_arrays=constant_arrays)
            program.set_constant_array(queue, 'cm1',
                                       cm1_dev)  # setting from a device array
            program.set_constant_array(queue, 'cm2',
                                       cm2)  # setting from a host array
            program.set_constant_array(
                queue, 'cm3', cm3_dev.data)  # setting from a host buffer
            copy_from_cm = lambda queue, *args: program.kernel.copy_from_cm(
                queue, 16, None, *args)

        copy_from_cm(queue, res_dev)
    else:

        if is_static:
            copy_from_cm = StaticKernel(context.device,
                                        src,
                                        'copy_from_cm',
                                        global_size=16)
        else:
            program = Program(context.device, src)
            copy_from_cm = lambda queue, *args: program.kernel.copy_from_cm(
                queue, 16, None, *args)

        copy_from_cm(queue, res_dev, cm1_dev, cm2_dev, cm3_dev)

    res = res_dev.get(queue)

    if not mocked:
        assert (res == cm1 + cm2 + cm3).all()
Esempio n. 12
0
def test_allocate_and_copy(mock_or_real_context):

    context, _mocked = mock_or_real_context

    length = 100
    dtype = numpy.dtype('int32')
    size = length * dtype.itemsize

    arr = numpy.arange(length).astype(dtype)

    buf = Buffer.allocate(context.device, size)
    assert buf.size == size
    assert buf.offset == 0

    # Just covering the existence of the attribute.
    # Hard to actually check it without running a kernel
    assert buf.kernel_arg is not None

    queue = Queue(context.device)
    buf.set(queue, arr)

    # Read the whole buffer
    res = numpy.empty_like(arr)
    buf.get(queue, res)
    queue.synchronize()
    assert (res == arr).all()

    # Read a subregion
    buf_region = buf.get_sub_region(25 * dtype.itemsize, 50 * dtype.itemsize)
    arr_region = arr[25:25+50]
    res_region = numpy.empty_like(arr_region)
    buf_region.get(queue, res_region)
    queue.synchronize()
    assert (res_region == arr_region).all()

    # Write a subregion
    arr_region = (numpy.ones(50) * 100).astype(dtype)
    arr[25:25+50] = arr_region
    buf_region.set(queue, arr_region)
    buf.get(queue, res)
    queue.synchronize()
    assert (res == arr).all()

    # Subregion of subregion
    if context.api.id == cuda_api_id():
        # In OpenCL that leads to segfault, but with CUDA we just emulate that with pointers.
        arr_region2 = (numpy.ones(20) * 200).astype(dtype)
        arr[25+20:25+40] = arr_region2
        buf_region2 = buf_region.get_sub_region(20 * dtype.itemsize, 20 * dtype.itemsize)
        buf_region2.set(queue, arr_region2)
        buf.get(queue, res)
        queue.synchronize()
        assert (res == arr).all()

    # Device-to-device copy
    buf2 = Buffer.allocate(context.device, size * 2)
    buf2.set(queue, numpy.ones(length * 2, dtype))
    buf2_view = buf2.get_sub_region(50 * dtype.itemsize, 100 * dtype.itemsize)
    buf2_view.set(queue, buf)
    res2 = numpy.empty(length * 2, dtype)
    buf2.get(queue, res2)
    queue.synchronize()
    assert (res2[50:150] == arr).all()
    assert (res2[:50] == 1).all()
    assert (res2[150:] == 1).all()

    # Device-to-device copy (no_async)
    buf2 = Buffer.allocate(context.device, size * 2)
    buf2.set(queue, numpy.ones(length * 2, dtype))
    buf2_view = buf2.get_sub_region(50 * dtype.itemsize, 100 * dtype.itemsize)
    buf2_view.set(queue, buf, no_async=True)
    res2 = numpy.empty(length * 2, dtype)
    buf2.get(queue, res2)
    queue.synchronize()
    assert (res2[50:150] == arr).all()
    assert (res2[:50] == 1).all()
    assert (res2[150:] == 1).all()