Python Program.Programの例、grunnur.Program.Program Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_program.py プロジェクト: fjarri/grunnur

def test_builtin_globals(mock_backend_pycuda):
    mock_backend_pycuda.add_devices([
        PyCUDADeviceInfo(max_threads_per_block=1024),
        PyCUDADeviceInfo(max_threads_per_block=512)
    ])

    source_template = DefTemplate.from_string(
        'mock_source', [], """
        KERNEL void test()
        {
            int max_total_local_size = ${device_params.max_total_local_size};
        }
        """)

    api = API.from_api_id(mock_backend_pycuda.api_id)
    context = Context.from_devices(
        [api.platforms[0].devices[0], api.platforms[0].devices[1]])

    src = MockDefTemplate(kernels=[MockKernel('test', [None])],
                          source_template=source_template)

    program = Program(context.devices, src)

    assert 'max_total_local_size = 1024' in program.sources[
        context.devices[0]].source
    assert 'max_total_local_size = 512' in program.sources[
        context.devices[1]].source

コード例 #2

0

ファイルを表示

ファイル: test_program.py プロジェクト: fjarri/grunnur

def test_max_total_local_sizes(mock_backend):
    mock_backend.add_devices(
        ["Device1", "Device2 - tag", "Device3 - tag", "Device4"])
    api = API.from_api_id(mock_backend.api_id)
    context = Context.from_criteria(api,
                                    devices_num=2,
                                    device_include_masks=["tag"])

    # Providing max_total_local_sizes for all possible devices to make sure
    # only the ones corresponding to the context will get picked up
    kernel = MockKernel('test',
                        max_total_local_sizes={
                            0: 64,
                            1: 1024,
                            2: 512,
                            3: 128
                        })

    src = MockDefTemplate(kernels=[kernel])
    program = Program(context.devices, src)

    # The indices here correspond to the devices in the context, not in the platform
    assert program.kernel.test.max_total_local_sizes == {
        context.devices[0]: 1024,
        context.devices[1]: 512
    }

コード例 #3

0

ファイルを表示

ファイル: test_functions.py プロジェクト: fjarri/grunnur

def check_func(context,
               func_module,
               reference_func,
               out_dtype,
               in_dtypes,
               atol=1e-8,
               rtol=1e-5,
               is_mocked=False):
    N = 256

    full_src = get_func_kernel(func_module, out_dtype, in_dtypes)

    # Can't test anything else if we don't have a real context
    if is_mocked:
        return

    program = Program(context.device, full_src)
    test = program.kernel.test

    queue = Queue(context.device)

    arrays = [get_test_array(N, dt, no_zeros=True, high=8) for dt in in_dtypes]
    arrays_dev = [Array.from_host(queue, array) for array in arrays]
    dest_dev = Array.empty(context.device, N, out_dtype)

    test(queue, N, None, dest_dev, *arrays_dev)

    assert numpy.allclose(dest_dev.get(queue),
                          reference_func(*arrays).astype(out_dtype),
                          atol=atol,
                          rtol=rtol)

コード例 #4

0

ファイルを表示

ファイル: test_functions.py プロジェクト: fjarri/grunnur

def test_pow_zero_base(context, out_code, in_codes):
    """
    Specific tests for 0^0 and 0^x.
    """
    N = 256

    out_dtype, in_dtypes = generate_dtypes(out_code, in_codes)
    func_module = functions.pow(in_dtypes[0],
                                exponent_dtype=in_dtypes[1],
                                out_dtype=out_dtype)
    full_src = get_func_kernel(func_module, out_dtype, in_dtypes)
    program = Program(context.device, full_src)
    test = program.kernel.test

    queue = Queue(context.device)
    bases = Array.from_host(queue, numpy.zeros(N, in_dtypes[0]))

    # zero exponents
    exponents = Array.from_host(queue, numpy.zeros(N, in_dtypes[1]))
    dest_dev = Array.empty(context.device, N, out_dtype)
    test(queue, N, None, dest_dev, bases, exponents)
    assert numpy.allclose(dest_dev.get(queue), numpy.ones(N, in_dtypes[0]))

    # non-zero exponents
    exponents = Array.from_host(queue, numpy.ones(N, in_dtypes[1]))
    dest_dev = Array.empty(context.device, N, out_dtype)
    test(queue, N, None, dest_dev, bases, exponents)
    assert numpy.allclose(dest_dev.get(queue), numpy.zeros(N, in_dtypes[0]))

コード例 #5

0

ファイルを表示

ファイル: multi_device_comparison.py プロジェクト: fjarri/grunnur

def test_multi_device(device_idxs, full_len, benchmark=False):

    pwr = 50

    a = numpy.arange(full_len).astype(numpy.uint64)

    context = Context.from_devices(
        [api.platforms[0].devices[device_idx] for device_idx in device_idxs])
    mqueue = MultiQueue.on_devices(context.devices)

    program = Program(context.devices, src)
    a_dev = MultiArray.from_host(mqueue, a)

    mqueue.synchronize()
    t1 = time.time()
    program.kernel.sum(mqueue, a_dev.shapes, None, a_dev, numpy.int32(pwr))
    mqueue.synchronize()
    t2 = time.time()
    print(f"Multidevice time (devices {device_idxs}):", t2 - t1)

    a_res = a_dev.get(mqueue)

    if not benchmark:
        a_ref = calc_ref(a, pwr)
        assert (a_ref == a_res).all()

コード例 #6

0

ファイルを表示

ファイル: test_program.py プロジェクト: fjarri/grunnur

def test_cannot_override_builtin_globals(mock_context):
    with pytest.raises(
            ValueError,
            match="'device_params' is a reserved global name and cannot be used"
    ):
        Program(mock_context.device,
                MockDefTemplate(kernels=[MockKernel('test', [None])]),
                render_globals=dict(device_params=None))

コード例 #7

0

ファイルを表示

ファイル: test_program.py プロジェクト: fjarri/grunnur

def test_wrong_device_idxs(mock_4_device_context):
    src = MockDefTemplate(kernels=[MockKernel('multiply', [None])])

    context = mock_4_device_context
    program = Program(context.devices[[0, 1]], src)
    mqueue = MultiQueue.on_devices(context.devices[[2, 1]])
    res_dev = MultiArray.empty(context.devices[[2, 1]], 16, numpy.int32)

    # Using all the queue's devices (1, 2)
    with pytest.raises(ValueError, match="Requested execution on devices"):
        program.kernel.multiply(mqueue, 8, None, res_dev)

コード例 #8

0

ファイルを表示

ファイル: test_vsize_on_device.py プロジェクト: fjarri/grunnur

def test_sizes(context, vstest):
    """
    Test that virtual sizes are correct.
    """
    ref = ReferenceIds(vstest.global_size, vstest.local_size)

    vs = VirtualSizes(max_total_local_size=vstest.max_total_local_size,
                      max_local_sizes=vstest.max_local_sizes,
                      max_num_groups=vstest.max_num_groups,
                      local_size_multiple=2,
                      virtual_global_size=vstest.global_size,
                      virtual_local_size=vstest.local_size)

    vdims = len(vstest.global_size)

    program = Program(context.device,
                      """
        KERNEL void get_sizes(GLOBAL_MEM int *sizes)
        {
            if (${static.global_id}(0) > 0) return;

            for (int i = 0; i < ${vdims}; i++)
            {
                sizes[i] = ${static.local_size}(i);
                sizes[i + ${vdims}] = ${static.num_groups}(i);
                sizes[i + ${vdims * 2}] = ${static.global_size}(i);
            }
            sizes[${vdims * 3}] = ${static.global_flat_size}();
        }
        """,
                      render_globals=dict(vdims=vdims,
                                          static=vs.vsize_modules))

    get_sizes = program.kernel.get_sizes

    queue = Queue(context.device)
    sizes = Array.empty(context.device, vdims * 3 + 1, numpy.int32)
    get_sizes(queue, vs.real_global_size, vs.real_local_size, sizes)

    sizes = sizes.get(queue)
    local_sizes = sizes[0:vdims]
    grid_sizes = sizes[vdims:vdims * 2]
    global_sizes = sizes[vdims * 2:vdims * 3]
    flat_size = sizes[vdims * 3]

    global_sizes_ref = numpy.array(vstest.global_size)
    assert (global_sizes == global_sizes_ref).all()
    assert flat_size == prod(vstest.global_size)

    if vstest.local_size is not None:
        grid_sizes_ref = numpy.array(vstest.grid_size)
        assert (grid_sizes == grid_sizes_ref).all()
        local_sizes_ref = numpy.array(vstest.local_size)
        assert (local_sizes == local_sizes_ref).all()

コード例 #9

0

ファイルを表示

ファイル: test_program.py プロジェクト: fjarri/grunnur

def test_compile(mock_or_real_context, no_prelude):

    context, mocked = mock_or_real_context

    if mocked:
        src = MockDefTemplate(
            kernels=[MockKernel('multiply', [None, None, None, numpy.int32])])
    else:
        if no_prelude:
            src = SRC_CUDA if context.api.id == cuda_api_id() else SRC_OPENCL
        else:
            src = SRC_GENERIC

    program = Program(context.device, src, no_prelude=no_prelude)

    if mocked and no_prelude:
        assert program.sources[context.device].prelude.strip() == ""

    length = 64

    a = numpy.arange(length).astype(numpy.int32)
    b = numpy.arange(length).astype(numpy.int32) + 1
    c = numpy.int32(3)
    ref = a * b + c

    queue = Queue(context.device)

    a_dev = Array.from_host(queue, a)
    b_dev = Array.from_host(queue, b)

    res_dev = Array.empty(context.device, length, numpy.int32)
    # Check that passing both Arrays and Buffers is supported
    # Pass one of the buffers as a subregion, too.
    a_dev_view = a_dev.data.get_sub_region(0, a_dev.data.size)
    program.kernel.multiply(queue, length, None, res_dev, a_dev_view,
                            b_dev.data, c)
    res = res_dev.get(queue)
    if not mocked:
        assert (res == ref).all()

    # Explicit local_size
    res2_dev = Array.from_host(queue,
                               a)  # Array.empty(queue, length, numpy.int32)
    program.kernel.multiply(queue, length, length // 2, res2_dev, a_dev, b_dev,
                            c)
    res2 = res2_dev.get(queue)
    if not mocked:
        assert (res2 == ref).all()

コード例 #10

0

ファイルを表示

ファイル: test_program.py プロジェクト: fjarri/grunnur

def test_mismatched_devices(mock_4_device_context):
    context = mock_4_device_context
    src = MockDefTemplate(
        kernels=[MockKernel('multiply', [None, None, None, numpy.int32])])
    program = Program(context.devices, src)
    with pytest.raises(
            ValueError,
            match="Mismatched device sets for global and local sizes"):
        program.kernel.multiply.prepare(
            {
                context.devices[0]: 10,
                context.devices[2]: 20
            }, {
                context.devices[0]: None,
                context.devices[1]: None
            })

コード例 #11

0

ファイルを表示

ファイル: test_vsize_on_device.py プロジェクト: fjarri/grunnur

def test_ids(context, vstest):
    """
    Test that virtual IDs are correct for each thread.
    """
    ref = ReferenceIds(vstest.global_size, vstest.local_size)

    vs = VirtualSizes(max_total_local_size=vstest.max_total_local_size,
                      max_local_sizes=vstest.max_local_sizes,
                      max_num_groups=vstest.max_num_groups,
                      local_size_multiple=2,
                      virtual_global_size=vstest.global_size,
                      virtual_local_size=vstest.local_size)

    program = Program(context.device,
                      """
        KERNEL void get_ids(
            GLOBAL_MEM int *local_ids,
            GLOBAL_MEM int *group_ids,
            GLOBAL_MEM int *global_ids,
            int vdim)
        {
            ${static.begin};
            const VSIZE_T i = ${static.global_flat_id}();
            local_ids[i] = ${static.local_id}(vdim);
            group_ids[i] = ${static.group_id}(vdim);
            global_ids[i] = ${static.global_id}(vdim);
        }
        """,
                      render_globals=dict(static=vs.vsize_modules))

    get_ids = program.kernel.get_ids

    queue = Queue(context.device)
    local_ids = Array.empty(context.device, ref.global_size, numpy.int32)
    group_ids = Array.empty(context.device, ref.global_size, numpy.int32)
    global_ids = Array.empty(context.device, ref.global_size, numpy.int32)

    for vdim in range(len(vstest.global_size)):

        get_ids(queue, vs.real_global_size, vs.real_local_size, local_ids,
                group_ids, global_ids, numpy.int32(vdim))

        assert (global_ids.get(queue) == ref.predict_global_ids(vdim)).all()
        if vstest.local_size is not None:
            assert (local_ids.get(queue) == ref.predict_local_ids(vdim)).all()
            assert (group_ids.get(queue) == ref.predict_group_ids(vdim)).all()

コード例 #12

0

ファイルを表示

ファイル: test_program.py プロジェクト: fjarri/grunnur

def test_wrong_context(mock_backend):

    mock_backend.add_devices(['Device0'])

    src = MockDefTemplate(kernels=[MockKernel('multiply', [None])])

    api = API.from_api_id(mock_backend.api_id)
    context = Context.from_devices(api.platforms[0].devices[0])
    context2 = Context.from_devices(api.platforms[0].devices[0])

    res_dev = Array.empty(context.device, 16, numpy.int32)

    program = Program(context.device, src)
    queue = Queue(context2.device)

    with pytest.raises(
            ValueError,
            match=
            "The provided queue must belong to the same context this program uses"
    ):
        program.kernel.multiply(queue, 8, None, res_dev)

コード例 #13

0

ファイルを表示

def test_contract(context, valloc_cls, pack):

    dtype = numpy.int32

    program = Program(context.device,
                      """
        KERNEL void fill(GLOBAL_MEM ${ctype} *dest, ${ctype} val)
        {
            const SIZE_T i = get_global_id(0);
            dest[i] = val;
        }
        """,
                      render_globals=dict(ctype=dtypes.ctype(dtype)))
    fill = program.kernel.fill

    queue = Queue(context.device)
    virtual_alloc = valloc_cls(context.device)

    buffers_metadata, arrays = allocate_test_set(
        virtual_alloc,
        # Bump size to make sure buffer alignment doesn't hide any out-of-bounds access
        lambda allocator, size: Array.empty(
            context.device, size * 100, dtype, allocator=allocator))
    dependencies = {id_: deps for id_, _, deps in buffers_metadata}

    if pack:
        virtual_alloc.pack(queue)

    # Clear all arrays
    for name in sorted(arrays.keys()):
        fill(queue, arrays[name].shape, None, arrays[name], dtype(0))

    for i, name in enumerate(sorted(arrays.keys())):
        val = dtype(i + 1)
        fill(queue, arrays[name].shape, None, arrays[name], val)
        # According to the virtual allocator contract, the allocated buffer
        # will not intersect with the buffers from the specified dependencies.
        # So we're filling the buffer and checking that the dependencies did not change.
        for dep in dependencies[name]:
            assert (arrays[dep].get(queue) != val).all()

コード例 #14

0

ファイルを表示

ファイル: test_program.py プロジェクト: fjarri/grunnur

def test_compile_multi_device(mock_or_real_multi_device_context):

    context, mocked = mock_or_real_multi_device_context
    devices = context.devices[[1, 0]]

    if mocked:
        src = MockDefTemplate(
            kernels=[MockKernel('multiply', [None, None, None, numpy.int32])])
    else:
        src = SRC_GENERIC

    length = 64

    program = Program(devices, src)
    a = numpy.arange(length).astype(numpy.int32)
    b = numpy.arange(length).astype(numpy.int32) + 1
    c = numpy.int32(3)
    ref = a * b + c

    mqueue = MultiQueue.on_devices(devices)

    a_dev = MultiArray.from_host(mqueue, a)
    b_dev = MultiArray.from_host(mqueue, b)

    res_dev = MultiArray.empty(devices, length, numpy.int32)
    program.kernel.multiply(mqueue, a_dev.shapes, None, res_dev, a_dev, b_dev,
                            c)
    res = res_dev.get(mqueue)
    if not mocked:
        assert (res == ref).all()

    # Test argument unpacking from dictionaries
    res_dev = MultiArray.empty(devices, length, numpy.int32)
    program.kernel.multiply(mqueue, a_dev.shapes,
                            {device: None
                             for device in devices}, res_dev, a_dev.subarrays,
                            b_dev, c)
    res = res_dev.get(mqueue)
    if not mocked:
        assert (res == ref).all()

コード例 #15

0

ファイルを表示

ファイル: test_program.py プロジェクト: fjarri/grunnur

def test_compilation_error(mock_or_real_context, capsys):

    context, mocked = mock_or_real_context

    if mocked:
        src = MockDefTemplate(should_fail=True)
    else:
        src = SRC_COMPILE_ERROR

    with pytest.raises(CompilationError):
        Program(context.device, src)

    captured = capsys.readouterr()
    assert "Failed to compile on device" in captured.out

    # check that the full source is shown (including the prelude)
    assert "#define GRUNNUR_" in captured.out

    if mocked:
        assert "<<< mock source >>>" in captured.out
    else:
        assert "KERNEL void compile_error(GLOBAL_MEM int *dest)" in captured.out

コード例 #16

0

ファイルを表示

ファイル: multi_device_comparison.py プロジェクト: fjarri/grunnur

def test_single_device(device_idx, full_len, benchmark=False):
    pwr = 50

    a = numpy.arange(full_len).astype(numpy.uint64)

    context = Context.from_devices([api.platforms[0].devices[device_idx]])
    queue = Queue(context.device)

    program = Program(context.device, src)
    a_dev = Array.from_host(queue, a)

    queue.synchronize()
    t1 = time.time()
    program.kernel.sum(queue, full_len, None, a_dev, numpy.int32(pwr))
    queue.synchronize()
    t2 = time.time()
    print(f"Single device time (device {device_idx}):", t2 - t1)

    a_res = a_dev.get(queue)

    if not benchmark:
        a_ref = calc_ref(a, pwr)
        assert (a_ref == a_res).all()

コード例 #17

0

ファイルを表示

ファイル: test_program.py プロジェクト: fjarri/grunnur

def test_keep(mock_or_real_context, capsys):

    context, mocked = mock_or_real_context

    if mocked:
        src = MockDefTemplate(
            kernels=[MockKernel('multiply', [None, None, None, numpy.int32])])
    else:
        src = SRC_GENERIC

    program = Program(context.device, src, keep=True)
    captured = capsys.readouterr()
    path = re.match(r'\*\*\* compiler output in (.*)', captured.out).group(1)
    assert os.path.isdir(path)

    if context.api.id == opencl_api_id():
        srcfile = os.path.join(path, 'kernel.cl')
    elif context.api.id == cuda_api_id():
        srcfile = os.path.join(path, 'kernel.cu')

    with open(srcfile) as f:
        source = f.read()

    assert str(src) in source

コード例 #18

0

ファイルを表示

ファイル: test_program.py プロジェクト: fjarri/grunnur

def _test_constant_memory(context, mocked, is_static):

    cm1 = numpy.arange(16).astype(numpy.int32)
    cm2 = numpy.arange(16).astype(numpy.int32) * 2 + 1
    cm3 = numpy.arange(16).astype(numpy.int32) * 3 + 2

    if mocked:
        kernel = MockKernel('copy_from_cm', [None] if context.api.id
                            == cuda_api_id() else [None, None, None, None],
                            max_total_local_sizes={0: 1024})
        src = MockDefTemplate(constant_mem={
            'cm1': cm1.size * cm1.dtype.itemsize,
            'cm2': cm2.size * cm2.dtype.itemsize,
            'cm3': cm3.size * cm3.dtype.itemsize
        },
                              kernels=[kernel])
    else:
        src = SRC_CONSTANT_MEM_STATIC if is_static else SRC_CONSTANT_MEM

    queue = Queue(context.device)

    cm1_dev = Array.from_host(queue, cm1)
    cm2_dev = Array.from_host(queue, cm2)
    cm3_dev = Array.from_host(queue, cm3)
    res_dev = Array.empty(context.device, 16, numpy.int32)

    if context.api.id == cuda_api_id():

        # Use different forms of constant array representation
        constant_arrays = dict(
            cm1=cm1,  # as an array(-like) object
            cm2=(cm2.shape, cm2.dtype),  # as a tuple of shape and dtype
            cm3=cm3_dev)  # as a device array

        if is_static:
            copy_from_cm = StaticKernel(context.device,
                                        src,
                                        'copy_from_cm',
                                        global_size=16,
                                        constant_arrays=constant_arrays)
            copy_from_cm.set_constant_array(
                queue, 'cm1', cm1_dev)  # setting from a device array
            copy_from_cm.set_constant_array(queue, 'cm2',
                                            cm2)  # setting from a host array
            copy_from_cm.set_constant_array(
                queue, 'cm3', cm3_dev.data)  # setting from a host buffer
        else:
            program = Program(context.device,
                              src,
                              constant_arrays=constant_arrays)
            program.set_constant_array(queue, 'cm1',
                                       cm1_dev)  # setting from a device array
            program.set_constant_array(queue, 'cm2',
                                       cm2)  # setting from a host array
            program.set_constant_array(
                queue, 'cm3', cm3_dev.data)  # setting from a host buffer
            copy_from_cm = lambda queue, *args: program.kernel.copy_from_cm(
                queue, 16, None, *args)

        copy_from_cm(queue, res_dev)
    else:

        if is_static:
            copy_from_cm = StaticKernel(context.device,
                                        src,
                                        'copy_from_cm',
                                        global_size=16)
        else:
            program = Program(context.device, src)
            copy_from_cm = lambda queue, *args: program.kernel.copy_from_cm(
                queue, 16, None, *args)

        copy_from_cm(queue, res_dev, cm1_dev, cm2_dev, cm3_dev)

    res = res_dev.get(queue)

    if not mocked:
        assert (res == cm1 + cm2 + cm3).all()

コード例 #19

0

ファイルを表示

ファイル: test_program.py プロジェクト: fjarri/grunnur

def test_set_constant_array_errors(mock_4_device_context):

    context = mock_4_device_context

    api = API.from_api_id(mock_4_device_context.api.id)
    other_context = Context.from_criteria(api)
    other_queue = Queue(other_context.devices[0])
    # Contexts don't know about each other and can't interact with stack in a consistent manner.
    # So we deactivate the other context if we're on CUDA API.
    if api.id == cuda_api_id():
        other_context.deactivate()

    cm1 = numpy.arange(16).astype(numpy.int32)
    src = MockDefTemplate(kernels=[
        MockKernel('kernel', [],
                   max_total_local_sizes={
                       0: 1024,
                       1: 1024,
                       2: 1024,
                       3: 1024
                   })
    ],
                          constant_mem={'cm1': cm1.size * cm1.dtype.itemsize})
    queue = Queue(context.devices[0])

    if context.api.id == cuda_api_id():
        program = Program(context.devices, src, constant_arrays=dict(cm1=cm1))

        with pytest.raises(
                ValueError,
                match=
                "The provided queue must belong to the same context as this program uses"
        ):
            program.set_constant_array(other_queue, 'cm1', cm1)

        with pytest.raises(TypeError, match="Unsupported array type"):
            program.set_constant_array(queue, 'cm1', [1])

        with pytest.raises(ValueError,
                           match="Incorrect size of the constant buffer;"):
            program.set_constant_array(queue, 'cm1', cm1[:8])

        with pytest.raises(TypeError,
                           match="Unknown constant array metadata type"):
            program = Program(context.devices[[0, 1, 2]],
                              src,
                              constant_arrays=dict(cm1=1))

        program = Program(context.devices[[0, 1, 2]],
                          src,
                          constant_arrays=dict(cm1=cm1))
        queue3 = Queue(context.devices[3])

        with pytest.raises(
                ValueError,
                match=
                "The program was not compiled for the device this queue uses"):
            program.set_constant_array(queue3, 'cm1', cm1)

    else:
        with pytest.raises(
                ValueError,
                match=
                "Compile-time constant arrays are only supported by CUDA API"):
            program = Program(context.devices,
                              src,
                              constant_arrays=dict(cm1=cm1))

        program = Program(context.devices, src)
        with pytest.raises(
                ValueError,
                match="Constant arrays are only supported for CUDA API"):
            program.set_constant_array(queue, 'cm1', cm1)

        with pytest.raises(
                ValueError,
                match=
                "Compile-time constant arrays are only supported by CUDA API"):
            sk = StaticKernel(context.devices,
                              src,
                              'kernel',
                              1024,
                              constant_arrays=dict(cm1=cm1))

        sk = StaticKernel(context.devices, src, 'kernel', 1024)
        with pytest.raises(
                ValueError,
                match="Constant arrays are only supported for CUDA API"):
            sk.set_constant_array(queue, 'cm1', cm1)