예제 #1
0
def test_multi_queue_out_of_queues(mock_4_device_context):
    context = mock_4_device_context
    queue0 = Queue(context.devices[0])
    queue0_2 = Queue(context.devices[0])
    queue1 = Queue(context.devices[1])

    mqueue = MultiQueue([queue0, queue1])
    assert set(
        mqueue.queues.keys()) == {context.devices[0], context.devices[1]}
    assert mqueue.devices == context.devices[[0, 1]]
예제 #2
0
def test_extract_dependencies(mock_context):

    queue = Queue(mock_context.device)
    virtual_alloc = TrivialManager(mock_context.device).allocator()

    vbuf = virtual_alloc(mock_context.device, 100)
    varr = Array.empty(mock_context.device,
                       100,
                       numpy.int32,
                       allocator=virtual_alloc)

    assert extract_dependencies(vbuf) == {vbuf._buffer_adapter._id}
    assert extract_dependencies(varr) == {varr.data._buffer_adapter._id}
    assert extract_dependencies(
        [vbuf,
         varr]) == {vbuf._buffer_adapter._id, varr.data._buffer_adapter._id}

    class DependencyHolder:
        __virtual_allocations__ = [vbuf, varr]

    assert extract_dependencies(DependencyHolder()) == {
        vbuf._buffer_adapter._id, varr.data._buffer_adapter._id
    }

    # An object not having any dependencies
    assert extract_dependencies(123) == set()
예제 #3
0
def test_empty(mock_or_real_context):
    context, _mocked = mock_or_real_context
    queue = Queue(context.device)
    arr_dev = Array.empty(context.device, 100, numpy.int32)
    arr = arr_dev.get(queue)
    assert arr.shape == (100, )
    assert arr.dtype == numpy.int32
예제 #4
0
def check_func(context,
               func_module,
               reference_func,
               out_dtype,
               in_dtypes,
               atol=1e-8,
               rtol=1e-5,
               is_mocked=False):
    N = 256

    full_src = get_func_kernel(func_module, out_dtype, in_dtypes)

    # Can't test anything else if we don't have a real context
    if is_mocked:
        return

    program = Program(context.device, full_src)
    test = program.kernel.test

    queue = Queue(context.device)

    arrays = [get_test_array(N, dt, no_zeros=True, high=8) for dt in in_dtypes]
    arrays_dev = [Array.from_host(queue, array) for array in arrays]
    dest_dev = Array.empty(context.device, N, out_dtype)

    test(queue, N, None, dest_dev, *arrays_dev)

    assert numpy.allclose(dest_dev.get(queue),
                          reference_func(*arrays).astype(out_dtype),
                          atol=atol,
                          rtol=rtol)
예제 #5
0
def test_compile_static(mock_or_real_context):

    context, mocked = mock_or_real_context

    if mocked:
        kernel = MockKernel('multiply', [None, None, None], max_total_local_sizes={0: 1024})
        src = MockDefTemplate(kernels=[kernel])
    else:
        src = SRC

    a = numpy.arange(11).astype(numpy.int32)
    b = numpy.arange(15).astype(numpy.int32)
    ref = numpy.outer(a, b)

    queue = Queue(context.device)

    a_dev = Array.from_host(queue, a)
    b_dev = Array.from_host(queue, b)

    res_dev = Array.empty(context.device, (11, 15), numpy.int32)

    multiply = StaticKernel(context.device, src, 'multiply', (11, 15))
    multiply(queue, res_dev, a_dev, b_dev)

    res = res_dev.get(queue)

    if not mocked:
        assert (res == ref).all()
예제 #6
0
def test_pow_zero_base(context, out_code, in_codes):
    """
    Specific tests for 0^0 and 0^x.
    """
    N = 256

    out_dtype, in_dtypes = generate_dtypes(out_code, in_codes)
    func_module = functions.pow(in_dtypes[0],
                                exponent_dtype=in_dtypes[1],
                                out_dtype=out_dtype)
    full_src = get_func_kernel(func_module, out_dtype, in_dtypes)
    program = Program(context.device, full_src)
    test = program.kernel.test

    queue = Queue(context.device)
    bases = Array.from_host(queue, numpy.zeros(N, in_dtypes[0]))

    # zero exponents
    exponents = Array.from_host(queue, numpy.zeros(N, in_dtypes[1]))
    dest_dev = Array.empty(context.device, N, out_dtype)
    test(queue, N, None, dest_dev, bases, exponents)
    assert numpy.allclose(dest_dev.get(queue), numpy.ones(N, in_dtypes[0]))

    # non-zero exponents
    exponents = Array.from_host(queue, numpy.ones(N, in_dtypes[1]))
    dest_dev = Array.empty(context.device, N, out_dtype)
    test(queue, N, None, dest_dev, bases, exponents)
    assert numpy.allclose(dest_dev.get(queue), numpy.zeros(N, in_dtypes[0]))
예제 #7
0
def test_set_from_wrong_type(mock_context):
    context = mock_context
    queue = Queue(context.device)
    arr = Array.empty(context.device, (10, 20), numpy.int32)
    with pytest.raises(
            TypeError,
            match="Cannot set from an object of type <class 'int'>"):
        arr.set(queue, 1)
예제 #8
0
def test_mismatched_devices(mock_4_device_context):
    context = mock_4_device_context
    buf = Buffer.allocate(context.devices[0], 100)
    queue = Queue(context.devices[1])
    arr = numpy.ones(100, numpy.uint8)

    with pytest.raises(ValueError, match="Mismatched devices: queue on device"):
        buf.get(queue, arr)

    with pytest.raises(ValueError, match="Mismatched devices: queue on device"):
        buf.set(queue, arr)
예제 #9
0
def test_sizes(context, vstest):
    """
    Test that virtual sizes are correct.
    """
    ref = ReferenceIds(vstest.global_size, vstest.local_size)

    vs = VirtualSizes(max_total_local_size=vstest.max_total_local_size,
                      max_local_sizes=vstest.max_local_sizes,
                      max_num_groups=vstest.max_num_groups,
                      local_size_multiple=2,
                      virtual_global_size=vstest.global_size,
                      virtual_local_size=vstest.local_size)

    vdims = len(vstest.global_size)

    program = Program(context.device,
                      """
        KERNEL void get_sizes(GLOBAL_MEM int *sizes)
        {
            if (${static.global_id}(0) > 0) return;

            for (int i = 0; i < ${vdims}; i++)
            {
                sizes[i] = ${static.local_size}(i);
                sizes[i + ${vdims}] = ${static.num_groups}(i);
                sizes[i + ${vdims * 2}] = ${static.global_size}(i);
            }
            sizes[${vdims * 3}] = ${static.global_flat_size}();
        }
        """,
                      render_globals=dict(vdims=vdims,
                                          static=vs.vsize_modules))

    get_sizes = program.kernel.get_sizes

    queue = Queue(context.device)
    sizes = Array.empty(context.device, vdims * 3 + 1, numpy.int32)
    get_sizes(queue, vs.real_global_size, vs.real_local_size, sizes)

    sizes = sizes.get(queue)
    local_sizes = sizes[0:vdims]
    grid_sizes = sizes[vdims:vdims * 2]
    global_sizes = sizes[vdims * 2:vdims * 3]
    flat_size = sizes[vdims * 3]

    global_sizes_ref = numpy.array(vstest.global_size)
    assert (global_sizes == global_sizes_ref).all()
    assert flat_size == prod(vstest.global_size)

    if vstest.local_size is not None:
        grid_sizes_ref = numpy.array(vstest.grid_size)
        assert (grid_sizes == grid_sizes_ref).all()
        local_sizes_ref = numpy.array(vstest.local_size)
        assert (local_sizes == local_sizes_ref).all()
예제 #10
0
def test_set_checks_shape(mock_context):
    context = mock_context
    queue = Queue(context.device)
    arr = Array.empty(context.device, (10, 20), numpy.int32)

    with pytest.raises(
            ValueError,
            match="Shape mismatch: expected \\(10, 20\\), got \\(10, 30\\)"):
        arr.set(queue, numpy.zeros((10, 30), numpy.int32))

    with pytest.raises(ValueError,
                       match="Dtype mismatch: expected int32, got int64"):
        arr.set(queue, numpy.zeros((10, 20), numpy.int64))
예제 #11
0
def test_compile(mock_or_real_context, no_prelude):

    context, mocked = mock_or_real_context

    if mocked:
        src = MockDefTemplate(
            kernels=[MockKernel('multiply', [None, None, None, numpy.int32])])
    else:
        if no_prelude:
            src = SRC_CUDA if context.api.id == cuda_api_id() else SRC_OPENCL
        else:
            src = SRC_GENERIC

    program = Program(context.device, src, no_prelude=no_prelude)

    if mocked and no_prelude:
        assert program.sources[context.device].prelude.strip() == ""

    length = 64

    a = numpy.arange(length).astype(numpy.int32)
    b = numpy.arange(length).astype(numpy.int32) + 1
    c = numpy.int32(3)
    ref = a * b + c

    queue = Queue(context.device)

    a_dev = Array.from_host(queue, a)
    b_dev = Array.from_host(queue, b)

    res_dev = Array.empty(context.device, length, numpy.int32)
    # Check that passing both Arrays and Buffers is supported
    # Pass one of the buffers as a subregion, too.
    a_dev_view = a_dev.data.get_sub_region(0, a_dev.data.size)
    program.kernel.multiply(queue, length, None, res_dev, a_dev_view,
                            b_dev.data, c)
    res = res_dev.get(queue)
    if not mocked:
        assert (res == ref).all()

    # Explicit local_size
    res2_dev = Array.from_host(queue,
                               a)  # Array.empty(queue, length, numpy.int32)
    program.kernel.multiply(queue, length, length // 2, res2_dev, a_dev, b_dev,
                            c)
    res2 = res2_dev.get(queue)
    if not mocked:
        assert (res2 == ref).all()
예제 #12
0
def test_ids(context, vstest):
    """
    Test that virtual IDs are correct for each thread.
    """
    ref = ReferenceIds(vstest.global_size, vstest.local_size)

    vs = VirtualSizes(max_total_local_size=vstest.max_total_local_size,
                      max_local_sizes=vstest.max_local_sizes,
                      max_num_groups=vstest.max_num_groups,
                      local_size_multiple=2,
                      virtual_global_size=vstest.global_size,
                      virtual_local_size=vstest.local_size)

    program = Program(context.device,
                      """
        KERNEL void get_ids(
            GLOBAL_MEM int *local_ids,
            GLOBAL_MEM int *group_ids,
            GLOBAL_MEM int *global_ids,
            int vdim)
        {
            ${static.begin};
            const VSIZE_T i = ${static.global_flat_id}();
            local_ids[i] = ${static.local_id}(vdim);
            group_ids[i] = ${static.group_id}(vdim);
            global_ids[i] = ${static.global_id}(vdim);
        }
        """,
                      render_globals=dict(static=vs.vsize_modules))

    get_ids = program.kernel.get_ids

    queue = Queue(context.device)
    local_ids = Array.empty(context.device, ref.global_size, numpy.int32)
    group_ids = Array.empty(context.device, ref.global_size, numpy.int32)
    global_ids = Array.empty(context.device, ref.global_size, numpy.int32)

    for vdim in range(len(vstest.global_size)):

        get_ids(queue, vs.real_global_size, vs.real_local_size, local_ids,
                group_ids, global_ids, numpy.int32(vdim))

        assert (global_ids.get(queue) == ref.predict_global_ids(vdim)).all()
        if vstest.local_size is not None:
            assert (local_ids.get(queue) == ref.predict_local_ids(vdim)).all()
            assert (group_ids.get(queue) == ref.predict_group_ids(vdim)).all()
예제 #13
0
def test_custom_allocator(mock_context):
    context = mock_context
    queue = Queue(context.device)
    allocated = []

    def allocator(device, size):
        allocated.append(size)
        return Buffer.allocate(device, size)

    arr_dev = Array.empty(context.device,
                          100,
                          numpy.int32,
                          allocator=allocator)
    arr = arr_dev.get(queue)
    assert arr.shape == (100, )
    assert arr.dtype == numpy.int32
    assert allocated == [arr.size * arr.dtype.itemsize]
예제 #14
0
def test_set_from_non_contiguous(mock_or_real_context):

    context, _mocked = mock_or_real_context

    queue = Queue(context.device)
    arr = Array.empty(context.device, (10, 20), numpy.int32)
    arr2 = Array.empty(context.device, (20, 20), numpy.int32)

    with pytest.raises(
            ValueError,
            match="Setting from a non-contiguous device array is not supported"
    ):
        arr.set(queue, arr2[::2, :])

    # Can set from a non-contiguous numpy array though
    arr.set(queue, numpy.ones((20, 20), numpy.int32)[::2, :])
    assert (arr.get(queue) == 1).all()
예제 #15
0
def test_custom_buffer(mock_context):
    context = mock_context
    queue = Queue(context.device)

    arr = numpy.arange(100).astype(numpy.int32)
    metadata = ArrayMetadata.from_arraylike(arr)

    data = Buffer.allocate(context.device, 100)
    with pytest.raises(
            ValueError,
            match="Provided data buffer is not big enough to hold the array"):
        Array(metadata, data)

    bigger_data = Buffer.allocate(context.device,
                                  arr.size * arr.dtype.itemsize)
    bigger_data.set(queue, arr)

    arr_dev = Array(metadata, bigger_data)
    res = arr_dev.get(queue)
    assert (res == arr).all()
예제 #16
0
def test_wrong_context(mock_backend):

    mock_backend.add_devices(['Device0'])

    src = MockDefTemplate(kernels=[MockKernel('multiply', [None])])

    api = API.from_api_id(mock_backend.api_id)
    context = Context.from_devices(api.platforms[0].devices[0])
    context2 = Context.from_devices(api.platforms[0].devices[0])

    res_dev = Array.empty(context.device, 16, numpy.int32)

    program = Program(context.device, src)
    queue = Queue(context2.device)

    with pytest.raises(
            ValueError,
            match=
            "The provided queue must belong to the same context this program uses"
    ):
        program.kernel.multiply(queue, 8, None, res_dev)
예제 #17
0
def test_contract(context, valloc_cls, pack):

    dtype = numpy.int32

    program = Program(context.device,
                      """
        KERNEL void fill(GLOBAL_MEM ${ctype} *dest, ${ctype} val)
        {
            const SIZE_T i = get_global_id(0);
            dest[i] = val;
        }
        """,
                      render_globals=dict(ctype=dtypes.ctype(dtype)))
    fill = program.kernel.fill

    queue = Queue(context.device)
    virtual_alloc = valloc_cls(context.device)

    buffers_metadata, arrays = allocate_test_set(
        virtual_alloc,
        # Bump size to make sure buffer alignment doesn't hide any out-of-bounds access
        lambda allocator, size: Array.empty(
            context.device, size * 100, dtype, allocator=allocator))
    dependencies = {id_: deps for id_, _, deps in buffers_metadata}

    if pack:
        virtual_alloc.pack(queue)

    # Clear all arrays
    for name in sorted(arrays.keys()):
        fill(queue, arrays[name].shape, None, arrays[name], dtype(0))

    for i, name in enumerate(sorted(arrays.keys())):
        val = dtype(i + 1)
        fill(queue, arrays[name].shape, None, arrays[name], val)
        # According to the virtual allocator contract, the allocated buffer
        # will not intersect with the buffers from the specified dependencies.
        # So we're filling the buffer and checking that the dependencies did not change.
        for dep in dependencies[name]:
            assert (arrays[dep].get(queue) != val).all()
예제 #18
0
def test_statistics(mock_context, valloc_cls):

    context = mock_context
    queue = Queue(context.device)
    virtual_alloc = valloc_cls(context.device)

    buffers_metadata, buffers = allocate_test_set(
        virtual_alloc, lambda allocator, size: allocator(context.device, size))

    stats = virtual_alloc.statistics()
    check_statistics(buffers_metadata, stats)

    virtual_alloc.pack(queue)

    stats = virtual_alloc.statistics()
    check_statistics(buffers_metadata, stats)

    s = str(stats)
    assert str(stats.real_size_total) in s
    assert str(stats.real_num) in s
    assert str(stats.virtual_size_total) in s
    assert str(stats.virtual_num) in s
예제 #19
0
def test_contract_mocked(mock_backend_pycuda, mock_context_pycuda, valloc_cls,
                         pack):

    # Using PyCUDA backend here because it tracks the allocations.

    context = mock_context_pycuda
    queue = Queue(context.device)
    virtual_alloc = valloc_cls(context.device)

    buffers_metadata, buffers = allocate_test_set(
        virtual_alloc, lambda allocator, size: allocator(context.device, size))

    for name, size, deps in buffers_metadata:
        # Virtual buffer size should be exactly as requested
        assert buffers[name].size == size
        # The real buffer behind the virtual buffer may be larger
        # (note that _size is only present in mocked DeviceAllocation)
        assert buffers[name].kernel_arg._size >= size

    if pack:
        virtual_alloc.pack(queue)

    # Clear all buffers
    for name, _, _ in buffers_metadata:
        mock_fill(buffers[name], -1)

    for i, metadata in enumerate(buffers_metadata):
        name, size, deps = metadata
        mock_fill(buffers[name], i)
        # According to the virtual allocator contract, the allocated buffer
        # will not intersect with the buffers from the specified dependencies.
        # So we're filling the buffer and checking that the dependencies did not change.
        for dep in deps:
            assert (mock_get(buffers[dep]) != i).all()

    # Check that after deleting virtual buffers all the real buffers are freed as well
    del buffers
    assert mock_backend_pycuda.allocation_count() == 0
예제 #20
0
def test_single_device(device_idx, full_len, benchmark=False):
    pwr = 50

    a = numpy.arange(full_len).astype(numpy.uint64)

    context = Context.from_devices([api.platforms[0].devices[device_idx]])
    queue = Queue(context.device)

    program = Program(context.device, src)
    a_dev = Array.from_host(queue, a)

    queue.synchronize()
    t1 = time.time()
    program.kernel.sum(queue, full_len, None, a_dev, numpy.int32(pwr))
    queue.synchronize()
    t2 = time.time()
    print(f"Single device time (device {device_idx}):", t2 - t1)

    a_res = a_dev.get(queue)

    if not benchmark:
        a_ref = calc_ref(a, pwr)
        assert (a_ref == a_res).all()
예제 #21
0
def test_allocate_and_copy(mock_or_real_context):

    context, _mocked = mock_or_real_context

    length = 100
    dtype = numpy.dtype('int32')
    size = length * dtype.itemsize

    arr = numpy.arange(length).astype(dtype)

    buf = Buffer.allocate(context.device, size)
    assert buf.size == size
    assert buf.offset == 0

    # Just covering the existence of the attribute.
    # Hard to actually check it without running a kernel
    assert buf.kernel_arg is not None

    queue = Queue(context.device)
    buf.set(queue, arr)

    # Read the whole buffer
    res = numpy.empty_like(arr)
    buf.get(queue, res)
    queue.synchronize()
    assert (res == arr).all()

    # Read a subregion
    buf_region = buf.get_sub_region(25 * dtype.itemsize, 50 * dtype.itemsize)
    arr_region = arr[25:25+50]
    res_region = numpy.empty_like(arr_region)
    buf_region.get(queue, res_region)
    queue.synchronize()
    assert (res_region == arr_region).all()

    # Write a subregion
    arr_region = (numpy.ones(50) * 100).astype(dtype)
    arr[25:25+50] = arr_region
    buf_region.set(queue, arr_region)
    buf.get(queue, res)
    queue.synchronize()
    assert (res == arr).all()

    # Subregion of subregion
    if context.api.id == cuda_api_id():
        # In OpenCL that leads to segfault, but with CUDA we just emulate that with pointers.
        arr_region2 = (numpy.ones(20) * 200).astype(dtype)
        arr[25+20:25+40] = arr_region2
        buf_region2 = buf_region.get_sub_region(20 * dtype.itemsize, 20 * dtype.itemsize)
        buf_region2.set(queue, arr_region2)
        buf.get(queue, res)
        queue.synchronize()
        assert (res == arr).all()

    # Device-to-device copy
    buf2 = Buffer.allocate(context.device, size * 2)
    buf2.set(queue, numpy.ones(length * 2, dtype))
    buf2_view = buf2.get_sub_region(50 * dtype.itemsize, 100 * dtype.itemsize)
    buf2_view.set(queue, buf)
    res2 = numpy.empty(length * 2, dtype)
    buf2.get(queue, res2)
    queue.synchronize()
    assert (res2[50:150] == arr).all()
    assert (res2[:50] == 1).all()
    assert (res2[150:] == 1).all()

    # Device-to-device copy (no_async)
    buf2 = Buffer.allocate(context.device, size * 2)
    buf2.set(queue, numpy.ones(length * 2, dtype))
    buf2_view = buf2.get_sub_region(50 * dtype.itemsize, 100 * dtype.itemsize)
    buf2_view.set(queue, buf, no_async=True)
    res2 = numpy.empty(length * 2, dtype)
    buf2.get(queue, res2)
    queue.synchronize()
    assert (res2[50:150] == arr).all()
    assert (res2[:50] == 1).all()
    assert (res2[150:] == 1).all()
예제 #22
0
def _test_constant_memory(context, mocked, is_static):

    cm1 = numpy.arange(16).astype(numpy.int32)
    cm2 = numpy.arange(16).astype(numpy.int32) * 2 + 1
    cm3 = numpy.arange(16).astype(numpy.int32) * 3 + 2

    if mocked:
        kernel = MockKernel('copy_from_cm', [None] if context.api.id
                            == cuda_api_id() else [None, None, None, None],
                            max_total_local_sizes={0: 1024})
        src = MockDefTemplate(constant_mem={
            'cm1': cm1.size * cm1.dtype.itemsize,
            'cm2': cm2.size * cm2.dtype.itemsize,
            'cm3': cm3.size * cm3.dtype.itemsize
        },
                              kernels=[kernel])
    else:
        src = SRC_CONSTANT_MEM_STATIC if is_static else SRC_CONSTANT_MEM

    queue = Queue(context.device)

    cm1_dev = Array.from_host(queue, cm1)
    cm2_dev = Array.from_host(queue, cm2)
    cm3_dev = Array.from_host(queue, cm3)
    res_dev = Array.empty(context.device, 16, numpy.int32)

    if context.api.id == cuda_api_id():

        # Use different forms of constant array representation
        constant_arrays = dict(
            cm1=cm1,  # as an array(-like) object
            cm2=(cm2.shape, cm2.dtype),  # as a tuple of shape and dtype
            cm3=cm3_dev)  # as a device array

        if is_static:
            copy_from_cm = StaticKernel(context.device,
                                        src,
                                        'copy_from_cm',
                                        global_size=16,
                                        constant_arrays=constant_arrays)
            copy_from_cm.set_constant_array(
                queue, 'cm1', cm1_dev)  # setting from a device array
            copy_from_cm.set_constant_array(queue, 'cm2',
                                            cm2)  # setting from a host array
            copy_from_cm.set_constant_array(
                queue, 'cm3', cm3_dev.data)  # setting from a host buffer
        else:
            program = Program(context.device,
                              src,
                              constant_arrays=constant_arrays)
            program.set_constant_array(queue, 'cm1',
                                       cm1_dev)  # setting from a device array
            program.set_constant_array(queue, 'cm2',
                                       cm2)  # setting from a host array
            program.set_constant_array(
                queue, 'cm3', cm3_dev.data)  # setting from a host buffer
            copy_from_cm = lambda queue, *args: program.kernel.copy_from_cm(
                queue, 16, None, *args)

        copy_from_cm(queue, res_dev)
    else:

        if is_static:
            copy_from_cm = StaticKernel(context.device,
                                        src,
                                        'copy_from_cm',
                                        global_size=16)
        else:
            program = Program(context.device, src)
            copy_from_cm = lambda queue, *args: program.kernel.copy_from_cm(
                queue, 16, None, *args)

        copy_from_cm(queue, res_dev, cm1_dev, cm2_dev, cm3_dev)

    res = res_dev.get(queue)

    if not mocked:
        assert (res == cm1 + cm2 + cm3).all()
예제 #23
0
def test_from_host(mock_or_real_context):
    context, _mocked = mock_or_real_context
    queue = Queue(context.device)
    arr = numpy.arange(100)
    arr_dev = Array.from_host(queue, arr)
    assert (arr_dev.get(queue) == arr).all()
예제 #24
0
def test_single_device(mock_or_real_context):
    context, _mocked = mock_or_real_context
    _check_array_operations(Queue(context.device), Array)
예제 #25
0
def test_set_constant_array_errors(mock_4_device_context):

    context = mock_4_device_context

    api = API.from_api_id(mock_4_device_context.api.id)
    other_context = Context.from_criteria(api)
    other_queue = Queue(other_context.devices[0])
    # Contexts don't know about each other and can't interact with stack in a consistent manner.
    # So we deactivate the other context if we're on CUDA API.
    if api.id == cuda_api_id():
        other_context.deactivate()

    cm1 = numpy.arange(16).astype(numpy.int32)
    src = MockDefTemplate(kernels=[
        MockKernel('kernel', [],
                   max_total_local_sizes={
                       0: 1024,
                       1: 1024,
                       2: 1024,
                       3: 1024
                   })
    ],
                          constant_mem={'cm1': cm1.size * cm1.dtype.itemsize})
    queue = Queue(context.devices[0])

    if context.api.id == cuda_api_id():
        program = Program(context.devices, src, constant_arrays=dict(cm1=cm1))

        with pytest.raises(
                ValueError,
                match=
                "The provided queue must belong to the same context as this program uses"
        ):
            program.set_constant_array(other_queue, 'cm1', cm1)

        with pytest.raises(TypeError, match="Unsupported array type"):
            program.set_constant_array(queue, 'cm1', [1])

        with pytest.raises(ValueError,
                           match="Incorrect size of the constant buffer;"):
            program.set_constant_array(queue, 'cm1', cm1[:8])

        with pytest.raises(TypeError,
                           match="Unknown constant array metadata type"):
            program = Program(context.devices[[0, 1, 2]],
                              src,
                              constant_arrays=dict(cm1=1))

        program = Program(context.devices[[0, 1, 2]],
                          src,
                          constant_arrays=dict(cm1=cm1))
        queue3 = Queue(context.devices[3])

        with pytest.raises(
                ValueError,
                match=
                "The program was not compiled for the device this queue uses"):
            program.set_constant_array(queue3, 'cm1', cm1)

    else:
        with pytest.raises(
                ValueError,
                match=
                "Compile-time constant arrays are only supported by CUDA API"):
            program = Program(context.devices,
                              src,
                              constant_arrays=dict(cm1=cm1))

        program = Program(context.devices, src)
        with pytest.raises(
                ValueError,
                match="Constant arrays are only supported for CUDA API"):
            program.set_constant_array(queue, 'cm1', cm1)

        with pytest.raises(
                ValueError,
                match=
                "Compile-time constant arrays are only supported by CUDA API"):
            sk = StaticKernel(context.devices,
                              src,
                              'kernel',
                              1024,
                              constant_arrays=dict(cm1=cm1))

        sk = StaticKernel(context.devices, src, 'kernel', 1024)
        with pytest.raises(
                ValueError,
                match="Constant arrays are only supported for CUDA API"):
            sk.set_constant_array(queue, 'cm1', cm1)
예제 #26
0
def test_queue_on_multi_device_context(mock_or_real_multi_device_context):
    context, _mocked = mock_or_real_multi_device_context
    queue = Queue(context.devices[1])
    assert queue.device == context.devices[1]
예제 #27
0
def test_queue(mock_or_real_context):
    context, _mocked = mock_or_real_context
    queue = Queue(context.device)
    assert queue.device == context.devices[0]

    queue.synchronize()
예제 #28
0
def test_set_from_wrong_type(mock_context):
    buf = Buffer.allocate(mock_context.device, 100)
    queue = Queue(mock_context.device)
    with pytest.raises(TypeError, match="Cannot set from an object of type <class 'int'>"):
        buf.set(queue, 1)