Beispiel #1
0
def test_virtual_sizes_error_propagated(mock_backend_pycuda):

    # Testing for PyCUDA backend only since mocked PyOpenCL backend does not have a way
    # to set maximum global sizes (PyOpenCL devices don't have a corresponding parameter),
    # and PyCUDA is enough to test the required code path.

    device_info = PyCUDADeviceInfo(
        max_threads_per_block=2**4,
        max_block_dim_x=2**4,
        max_block_dim_y=2**4,
        max_block_dim_z=2**4,
        max_grid_dim_x=2**10,
        max_grid_dim_y=2**10,
        max_grid_dim_z=2**8)

    mock_backend_pycuda.add_devices([device_info])
    api = API.from_api_id(mock_backend_pycuda.api_id)
    device = api.platforms[0].devices[0]
    context = Context.from_devices([device])
    kernel = MockKernel('test', [None], max_total_local_sizes={0: 16})
    src = MockDefTemplate(kernels=[kernel])

    # Just enough to fit in the grid limits
    multiply = StaticKernel(context.device, src, 'test', (2**14, 2**10, 2**8), (2**4, 1, 1))

    # Global size is too large to fit on the device,
    # so virtual size finding fails and the error is propagated to the user.
    with pytest.raises(
            VirtualSizeError,
            match="Bounding global size \\(16384, 2048, 256\\) is too large"):
        multiply = StaticKernel(context.device, src, 'test', (2**14, 2**11, 2**8), (2**4, 1, 1))
Beispiel #2
0
def test_compile_static_multi_device(mock_or_real_multi_device_context):

    context, mocked = mock_or_real_multi_device_context

    if mocked:
        kernel = MockKernel(
            'multiply', [None, None, None], max_total_local_sizes={0: 1024, 1: 512})
        src = MockDefTemplate(kernels=[kernel])
    else:
        src = SRC

    a = numpy.arange(22).astype(numpy.int32)
    b = numpy.arange(15).astype(numpy.int32)
    ref = numpy.outer(a, b)

    mqueue = MultiQueue.on_devices(context.devices[[0, 1]])

    a_dev = MultiArray.from_host(mqueue, a)
    b_dev = MultiArray.from_host(mqueue, b, splay=MultiArray.CloneSplay())
    res_dev = MultiArray.empty(mqueue.devices, (22, 15), ref.dtype)

    multiply = StaticKernel(mqueue.devices, src, 'multiply', res_dev.shapes)
    multiply(mqueue, res_dev, a_dev, b_dev)

    res = res_dev.get(mqueue)

    if not mocked:
        assert (res == ref).all()
Beispiel #3
0
def test_compile_static(mock_or_real_context):

    context, mocked = mock_or_real_context

    if mocked:
        kernel = MockKernel('multiply', [None, None, None], max_total_local_sizes={0: 1024})
        src = MockDefTemplate(kernels=[kernel])
    else:
        src = SRC

    a = numpy.arange(11).astype(numpy.int32)
    b = numpy.arange(15).astype(numpy.int32)
    ref = numpy.outer(a, b)

    queue = Queue(context.device)

    a_dev = Array.from_host(queue, a)
    b_dev = Array.from_host(queue, b)

    res_dev = Array.empty(context.device, (11, 15), numpy.int32)

    multiply = StaticKernel(context.device, src, 'multiply', (11, 15))
    multiply(queue, res_dev, a_dev, b_dev)

    res = res_dev.get(queue)

    if not mocked:
        assert (res == ref).all()
Beispiel #4
0
def test_builtin_globals(mock_backend_pycuda):
    mock_backend_pycuda.add_devices([
        PyCUDADeviceInfo(max_threads_per_block=1024),
        PyCUDADeviceInfo(max_threads_per_block=512)])

    source_template = DefTemplate.from_string(
        'mock_source', [],
        """
        KERNEL void test()
        {
            int max_total_local_size = ${device_params.max_total_local_size};
        }
        """)

    api = API.from_api_id(mock_backend_pycuda.api_id)
    context = Context.from_devices([api.platforms[0].devices[0], api.platforms[0].devices[1]])

    src = MockDefTemplate(
        kernels=[MockKernel('test', [None], max_total_local_sizes={0: 1024, 1: 512})],
        source_template=source_template)

    kernel = StaticKernel(context.devices, src, 'test', (1024,))

    assert 'max_total_local_size = 1024' in kernel.sources[context.devices[0]].source
    assert 'max_total_local_size = 512' in kernel.sources[context.devices[1]].source
Beispiel #5
0
def test_zero_max_total_local_size(mock_context):
    kernel = MockKernel('test', [None], max_total_local_sizes={0: 0})
    src = MockDefTemplate(kernels=[kernel])
    with pytest.raises(
            VirtualSizeError,
            match="The kernel requires too much resourses to be executed with any local size"):
        multiply = StaticKernel(mock_context.device, src, 'test', (1024,))
Beispiel #6
0
def test_set_constant_array_errors(mock_4_device_context):

    context = mock_4_device_context

    api = API.from_api_id(mock_4_device_context.api.id)
    other_context = Context.from_criteria(api)
    other_queue = Queue(other_context.devices[0])
    # Contexts don't know about each other and can't interact with stack in a consistent manner.
    # So we deactivate the other context if we're on CUDA API.
    if api.id == cuda_api_id():
        other_context.deactivate()

    cm1 = numpy.arange(16).astype(numpy.int32)
    src = MockDefTemplate(kernels=[
        MockKernel('kernel', [],
                   max_total_local_sizes={
                       0: 1024,
                       1: 1024,
                       2: 1024,
                       3: 1024
                   })
    ],
                          constant_mem={'cm1': cm1.size * cm1.dtype.itemsize})
    queue = Queue(context.devices[0])

    if context.api.id == cuda_api_id():
        program = Program(context.devices, src, constant_arrays=dict(cm1=cm1))

        with pytest.raises(
                ValueError,
                match=
                "The provided queue must belong to the same context as this program uses"
        ):
            program.set_constant_array(other_queue, 'cm1', cm1)

        with pytest.raises(TypeError, match="Unsupported array type"):
            program.set_constant_array(queue, 'cm1', [1])

        with pytest.raises(ValueError,
                           match="Incorrect size of the constant buffer;"):
            program.set_constant_array(queue, 'cm1', cm1[:8])

        with pytest.raises(TypeError,
                           match="Unknown constant array metadata type"):
            program = Program(context.devices[[0, 1, 2]],
                              src,
                              constant_arrays=dict(cm1=1))

        program = Program(context.devices[[0, 1, 2]],
                          src,
                          constant_arrays=dict(cm1=cm1))
        queue3 = Queue(context.devices[3])

        with pytest.raises(
                ValueError,
                match=
                "The program was not compiled for the device this queue uses"):
            program.set_constant_array(queue3, 'cm1', cm1)

    else:
        with pytest.raises(
                ValueError,
                match=
                "Compile-time constant arrays are only supported by CUDA API"):
            program = Program(context.devices,
                              src,
                              constant_arrays=dict(cm1=cm1))

        program = Program(context.devices, src)
        with pytest.raises(
                ValueError,
                match="Constant arrays are only supported for CUDA API"):
            program.set_constant_array(queue, 'cm1', cm1)

        with pytest.raises(
                ValueError,
                match=
                "Compile-time constant arrays are only supported by CUDA API"):
            sk = StaticKernel(context.devices,
                              src,
                              'kernel',
                              1024,
                              constant_arrays=dict(cm1=cm1))

        sk = StaticKernel(context.devices, src, 'kernel', 1024)
        with pytest.raises(
                ValueError,
                match="Constant arrays are only supported for CUDA API"):
            sk.set_constant_array(queue, 'cm1', cm1)
Beispiel #7
0
def _test_constant_memory(context, mocked, is_static):

    cm1 = numpy.arange(16).astype(numpy.int32)
    cm2 = numpy.arange(16).astype(numpy.int32) * 2 + 1
    cm3 = numpy.arange(16).astype(numpy.int32) * 3 + 2

    if mocked:
        kernel = MockKernel('copy_from_cm', [None] if context.api.id
                            == cuda_api_id() else [None, None, None, None],
                            max_total_local_sizes={0: 1024})
        src = MockDefTemplate(constant_mem={
            'cm1': cm1.size * cm1.dtype.itemsize,
            'cm2': cm2.size * cm2.dtype.itemsize,
            'cm3': cm3.size * cm3.dtype.itemsize
        },
                              kernels=[kernel])
    else:
        src = SRC_CONSTANT_MEM_STATIC if is_static else SRC_CONSTANT_MEM

    queue = Queue(context.device)

    cm1_dev = Array.from_host(queue, cm1)
    cm2_dev = Array.from_host(queue, cm2)
    cm3_dev = Array.from_host(queue, cm3)
    res_dev = Array.empty(context.device, 16, numpy.int32)

    if context.api.id == cuda_api_id():

        # Use different forms of constant array representation
        constant_arrays = dict(
            cm1=cm1,  # as an array(-like) object
            cm2=(cm2.shape, cm2.dtype),  # as a tuple of shape and dtype
            cm3=cm3_dev)  # as a device array

        if is_static:
            copy_from_cm = StaticKernel(context.device,
                                        src,
                                        'copy_from_cm',
                                        global_size=16,
                                        constant_arrays=constant_arrays)
            copy_from_cm.set_constant_array(
                queue, 'cm1', cm1_dev)  # setting from a device array
            copy_from_cm.set_constant_array(queue, 'cm2',
                                            cm2)  # setting from a host array
            copy_from_cm.set_constant_array(
                queue, 'cm3', cm3_dev.data)  # setting from a host buffer
        else:
            program = Program(context.device,
                              src,
                              constant_arrays=constant_arrays)
            program.set_constant_array(queue, 'cm1',
                                       cm1_dev)  # setting from a device array
            program.set_constant_array(queue, 'cm2',
                                       cm2)  # setting from a host array
            program.set_constant_array(
                queue, 'cm3', cm3_dev.data)  # setting from a host buffer
            copy_from_cm = lambda queue, *args: program.kernel.copy_from_cm(
                queue, 16, None, *args)

        copy_from_cm(queue, res_dev)
    else:

        if is_static:
            copy_from_cm = StaticKernel(context.device,
                                        src,
                                        'copy_from_cm',
                                        global_size=16)
        else:
            program = Program(context.device, src)
            copy_from_cm = lambda queue, *args: program.kernel.copy_from_cm(
                queue, 16, None, *args)

        copy_from_cm(queue, res_dev, cm1_dev, cm2_dev, cm3_dev)

    res = res_dev.get(queue)

    if not mocked:
        assert (res == cm1 + cm2 + cm3).all()
Beispiel #8
0
def test_find_local_size(mock_context):
    kernel = MockKernel('multiply', [None], max_total_local_sizes={0: 64})
    src = MockDefTemplate(kernels=[kernel])
    multiply = StaticKernel(mock_context.device, src, 'multiply', (11, 15))
    assert multiply._vs_metadata[mock_context.devices[0]].real_global_size == (16, 12)
    assert multiply._vs_metadata[mock_context.devices[0]].real_local_size == (16, 4)
Beispiel #9
0
def test_reserved_names(mock_context):
    kernel = MockKernel('test', [None])
    src = MockDefTemplate(kernels=[kernel])
    with pytest.raises(ValueError, match="The global name 'static' is reserved in static kernels"):
        multiply = StaticKernel(mock_context.device, src, 'test', (1024,), render_globals=dict(static=1))