Ejemplo n.º 1
0
def test_coarse_grain_svm(ctx_factory):
    import sys
    is_pypy = '__pypy__' in sys.builtin_module_names

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    dev = ctx.devices[0]

    has_svm = (ctx._get_cl_version() >= (2, 0)
               and ctx.devices[0]._get_cl_version() >= (2, 0)
               and cl.get_cl_header_version() >= (2, 0))

    if dev.platform.name == "Portable Computing Language":
        has_svm = (get_pocl_version(dev.platform) >= (1, 0)
                   and cl.get_cl_header_version() >= (2, 0))

    if not has_svm:
        from pytest import skip
        skip("SVM only available in OpenCL 2.0 and higher")

    if ("AMD" in dev.platform.name and dev.type & cl.device_type.CPU):
        pytest.xfail("AMD CPU doesn't do coarse-grain SVM")

    n = 3000
    svm_ary = cl.SVM(cl.csvm_empty(ctx, (n, ), np.float32, alignment=64))
    if not is_pypy:
        # https://bitbucket.org/pypy/numpy/issues/52
        assert isinstance(svm_ary.mem.base, cl.SVMAllocation)

    cl.enqueue_svm_memfill(queue, svm_ary, np.zeros((), svm_ary.mem.dtype))

    with svm_ary.map_rw(queue) as ary:
        ary.fill(17)
        orig_ary = ary.copy()

    prg = cl.Program(
        ctx, """
        __kernel void twice(__global float *a_g)
        {
          a_g[get_global_id(0)] *= 2;
        }
        """).build()

    prg.twice(queue, svm_ary.mem.shape, None, svm_ary)

    with svm_ary.map_ro(queue) as ary:
        print(ary)
        assert np.array_equal(orig_ary * 2, ary)

    new_ary = np.empty_like(orig_ary)
    new_ary.fill(-1)

    if ctx.devices[0].platform.name != "Portable Computing Language":
        # "Blocking memcpy is unimplemented (clEnqueueSVMMemcpy.c:61)"
        # in pocl up to and including 1.0rc1.

        cl.enqueue_copy(queue, new_ary, svm_ary)
        assert np.array_equal(orig_ary * 2, new_ary)
Ejemplo n.º 2
0
def _may_have_svm(dev):
    has_svm = (dev.platform._get_cl_version() >= (2, 0)
               and cl.get_cl_header_version() >= (2, 0))

    if dev.platform.name == "Portable Computing Language":
        has_svm = (get_pocl_version(dev.platform) >= (1, 0)
                   and cl.get_cl_header_version() >= (2, 0))

    return has_svm
Ejemplo n.º 3
0
def _may_have_svm(dev):
    has_svm = (dev.platform._get_cl_version() >= (2, 0)
            and cl.get_cl_header_version() >= (2, 0))

    if dev.platform.name == "Portable Computing Language":
        has_svm = (
                get_pocl_version(dev.platform) >= (1, 0)
                and cl.get_cl_header_version() >= (2, 0))

    return has_svm
Ejemplo n.º 4
0
 def print_info(self):
     print("PyOpenCL Version:", cl.VERSION)
     print("OpenCL Version:", cl.get_cl_header_version())
     print()
     print("Platform Name:", self._platform.get_info(cl.platform_info.NAME))
     print("Platform Profile:",
           self._platform.get_info(cl.platform_info.PROFILE))
     print("Platform Vendor:",
           self._platform.get_info(cl.platform_info.VENDOR))
     print("Platform Version:",
           self._platform.get_info(cl.platform_info.VERSION))
     print()
     print("GPU Name:", self._device.get_info(cl.device_info.NAME))
     print("OpenCL Version:",
           self._device.get_info(cl.device_info.OPENCL_C_VERSION))
     print("GPU Vendor:", self._device.get_info(cl.device_info.VENDOR))
     print("GPU Version:", self._device.get_info(cl.device_info.VERSION))
     print("GPU Driver Version:",
           self._device.get_info(cl.device_info.DRIVER_VERSION))
     print("Max Work Group Size:",
           self._device.get_info(cl.device_info.MAX_WORK_GROUP_SIZE))
     print("Max Compute Units:",
           self._device.get_info(cl.device_info.MAX_COMPUTE_UNITS))
     print("Max Work Item Size:",
           self._device.get_info(cl.device_info.MAX_WORK_ITEM_SIZES))
     print("Local Memory Size:",
           self._device.get_info(cl.device_info.LOCAL_MEM_SIZE) / 1024,
           'KB')
Ejemplo n.º 5
0
def test_platform_get_devices(ctx_factory):
    ctx = ctx_factory()
    platform = ctx.devices[0].platform

    if platform.name == "Apple":
        pytest.xfail("Apple doesn't understand all the values we pass " "for dev_type")

    dev_types = [
        cl.device_type.ACCELERATOR,
        cl.device_type.ALL,
        cl.device_type.CPU,
        cl.device_type.DEFAULT,
        cl.device_type.GPU,
    ]
    if (
        platform._get_cl_version() >= (1, 2)
        and cl.get_cl_header_version() >= (1, 2)
        and not platform.name.lower().startswith("nvidia")
    ):
        dev_types.append(cl.device_type.CUSTOM)

    for dev_type in dev_types:
        print(dev_type)
        devs = platform.get_devices(dev_type)
        if dev_type in (cl.device_type.DEFAULT, cl.device_type.ALL, getattr(cl.device_type, "CUSTOM", None)):
            continue
        for dev in devs:
            assert dev.type & dev_type == dev_type
Ejemplo n.º 6
0
def test_sub_buffers(ctx_factory):
    ctx = ctx_factory()
    if (ctx._get_cl_version() < (1, 1) or
            cl.get_cl_header_version() < (1, 1)):
        from pytest import skip
        skip("sub-buffers are only available in OpenCL 1.1")

    alignment = ctx.devices[0].mem_base_addr_align

    queue = cl.CommandQueue(ctx)

    n = 30000
    a = (np.random.rand(n) * 100).astype(np.uint8)

    mf = cl.mem_flags
    a_buf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a)

    start = (5000 // alignment) * alignment
    stop = start + 20 * alignment

    a_sub_ref = a[start:stop]

    a_sub = np.empty_like(a_sub_ref)
    cl.enqueue_copy(queue, a_sub, a_buf[start:stop])

    assert np.array_equal(a_sub, a_sub_ref)
Ejemplo n.º 7
0
def test_sub_buffers(ctx_factory):
    ctx = ctx_factory()
    if (ctx._get_cl_version() < (1, 1) and cl.get_cl_header_version() <
        (1, 1)):
        from pytest import skip
        skip("sub-buffers are only available in OpenCL 1.1")

    alignment = ctx.devices[0].mem_base_addr_align

    queue = cl.CommandQueue(ctx)

    n = 30000
    a = (np.random.rand(n) * 100).astype(np.uint8)

    mf = cl.mem_flags
    a_buf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a)

    start = (5000 // alignment) * alignment
    stop = start + 20 * alignment

    a_sub_ref = a[start:stop]

    a_sub = np.empty_like(a_sub_ref)
    cl.enqueue_copy(queue, a_sub, a_buf[start:stop])

    assert np.array_equal(a_sub, a_sub_ref)
Ejemplo n.º 8
0
def get_info():
    global selected_device, selected_platform, context, KERNELS_DEFS
    info = {
            "version"               : {
                                       "cl_header"     : pyopencl.get_cl_header_version(),
                                       },
            "opengl"                : pyopencl.have_gl(),
            #"kernels"               : KERNELS_DEFS.keys()
            "pyopencl"              : get_pyopencl_info(),
            }
    if selected_platform:
        info["platform"] = {
            "name"          : selected_platform.name,
            "vendor"        : selected_platform.vendor,
            "devices"       : len(selected_platform.get_devices()),
            }
    if selected_device:
        dinfo = {
            "type"                      : device_type(selected_device),
            "name"                      : selected_device.name.strip(),
            "version"                   : selected_device.version,
            "max_work_group_size"       : selected_device.max_work_group_size,
            "max_work_item_dimensions"  : selected_device.max_work_item_dimensions,
            "max_work_item_sizes"       : selected_device.max_work_item_sizes,
            "max-size"                  : selected_device_max_size}
        if hasattr(selected_device, "opencl_c_version"):
            dinfo["opencl_c_version"] = getattr(selected_device, "opencl_c_version")
        info["device"] = dinfo
    return info
Ejemplo n.º 9
0
def test_bitonic_sort(ctx_factory, size, dtype):
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    dev = ctx.devices[0]
    if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU):
        pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup "
            "parallelism")
    if (dev.platform.name == "Portable Computing Language"
            and dtype == np.float64
            and get_pocl_version(dev.platform) < (1, 0)):
        pytest.xfail("Double precision bitonic sort doesn't work on POCL < 1.0")

    if dtype == np.float64 and not has_double_support(dev):
        from pytest import skip
        skip("double precision not supported on %s" % dev)

    import pyopencl.clrandom as clrandom
    from pyopencl.bitonic_sort import BitonicSort

    s = clrandom.rand(queue, (2, size, 3,), dtype, luxury=None, a=0, b=239482333)
    sgs = s.copy()
    # enqueue_marker crashes under CL 1.1 pocl if there is anything to wait for
    # (no clEnqueueWaitForEvents) https://github.com/inducer/pyopencl/pull/237
    if (dev.platform.name == "Portable Computing Language"
            and cl.get_cl_header_version() < (1, 2)):
        sgs.finish()
    sorter = BitonicSort(ctx)
    sgs, evt = sorter(sgs, axis=1)
    assert np.array_equal(np.sort(s.get(), axis=1), sgs.get())
Ejemplo n.º 10
0
def get_info():
    global selected_device, selected_platform, context, KERNELS_DEFS
    info = {"version"               : pyopencl.version.VERSION,
            "version.text"          : pyopencl.VERSION_TEXT,
            "version.status"        : pyopencl.VERSION_STATUS,
            "version.cl_header"     : pyopencl.get_cl_header_version(),
            "opengl"                : pyopencl.have_gl(),
            #"kernels"               : KERNELS_DEFS.keys()
            }
    if selected_platform:
        info.update({
            "platform.name"         : selected_platform.name,
            "platform.vendor"       : selected_platform.vendor,
            "platform.devices"      : len(selected_platform.get_devices()),
            })
    if selected_device:
        if hasattr(selected_device, "opencl_c_version"):
            info["device.opencl_c_version"] = getattr(selected_device, "opencl_c_version")
        info.update({
            "device.type"           : device_type(selected_device),
            "device.name"           : selected_device.name.strip(),
            "device.version"        : selected_device.version,
            "device.max_work_group_size"        : selected_device.max_work_group_size,
            "device.max_work_item_dimensions"   : selected_device.max_work_item_dimensions,
            "device.max_work_item_sizes"        : selected_device.max_work_item_sizes})
    return info
Ejemplo n.º 11
0
def test_custom_type_zeros(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    if not (
            queue._get_cl_version() >= (1, 2)
            and cl.get_cl_header_version() >= (1, 2)):
        pytest.skip("CL1.2 not available")

    dtype = np.dtype([
        ("cur_min", np.int32),
        ("cur_max", np.int32),
        ("pad", np.int32),
        ])

    from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct

    name = "mmc_type"
    dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype)
    dtype = get_or_register_dtype(name, dtype)

    n = 1000
    z_dev = cl.array.zeros(queue, n, dtype=dtype)

    z = z_dev.get()

    assert np.array_equal(np.zeros(n, dtype), z)
Ejemplo n.º 12
0
def get_info():
    global selected_device, selected_platform, context, KERNELS_DEFS
    info = {
            "version.cl_header"     : pyopencl.get_cl_header_version(),
            "opengl"                : pyopencl.have_gl(),
            #"kernels"               : KERNELS_DEFS.keys()
            }
    updict(info, "pyopencl", get_pyopencl_info())
    if selected_platform:
        updict(info, "platform", {
            "name"          : selected_platform.name,
            "vendor"        : selected_platform.vendor,
            "devices"       : len(selected_platform.get_devices()),
            })
    if selected_device:
        if hasattr(selected_device, "opencl_c_version"):
            info["device.opencl_c_version"] = getattr(selected_device, "opencl_c_version")
        updict(info, "device", {
            "type"                      : device_type(selected_device),
            "name"                      : selected_device.name.strip(),
            "version"                   : selected_device.version,
            "max_work_group_size"       : selected_device.max_work_group_size,
            "max_work_item_dimensions"  : selected_device.max_work_item_dimensions,
            "max_work_item_sizes"       : selected_device.max_work_item_sizes,
            "max-size"                  : selected_device_max_size})
    return info
Ejemplo n.º 13
0
def test_custom_type_zeros(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    if not (queue._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >=
            (1, 2)):
        pytest.skip("CL1.2 not available")

    dtype = np.dtype([
        ("cur_min", np.int32),
        ("cur_max", np.int32),
        ("pad", np.int32),
    ])

    from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct

    name = "mmc_type"
    dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype)
    dtype = get_or_register_dtype(name, dtype)

    n = 1000
    z_dev = cl.array.zeros(queue, n, dtype=dtype)

    z = z_dev.get()

    assert np.array_equal(np.zeros(n, dtype), z)
Ejemplo n.º 14
0
def test_compile_link(ctx_factory):
    ctx = ctx_factory()

    if ctx._get_cl_version() < (1, 2) or cl.get_cl_header_version() < (1, 2):
        pytest.skip(
            "Context and ICD loader must understand CL1.2 for compile/link")

    platform = ctx.devices[0].platform
    if platform.name == "Apple":
        pytest.skip("Apple doesn't like our compile/link test")

    queue = cl.CommandQueue(ctx)
    vsink_prg = cl.Program(
        ctx, """//CL//
        void value_sink(float x)
        {
        }
        """).compile()
    main_prg = cl.Program(
        ctx, """//CL//
        void value_sink(float x);

        __kernel void experiment()
        {
            value_sink(3.1415f + get_global_id(0));
        }
        """).compile()
    z = cl.link_program(ctx, [vsink_prg, main_prg], devices=ctx.devices)
    z.experiment(queue, (128**2, ), (128, ))
    queue.finish()
Ejemplo n.º 15
0
def test_spirv(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    if (ctx._get_cl_version() < (2, 1) or cl.get_cl_header_version() < (2, 1)):
        pytest.skip("SPIR-V program creation only available "
                    "in OpenCL 2.1 and higher")

    n = 50000

    a_dev = cl.clrandom.rand(queue, n, np.float32)
    b_dev = cl.clrandom.rand(queue, n, np.float32)
    dest_dev = cl_array.empty_like(a_dev)

    with open("add-vectors-%d.spv" % queue.device.address_bits,
              "rb") as spv_file:
        spv = spv_file.read()

    prg = cl.Program(ctx, spv).build()
    if (not prg.all_kernels()
            and queue.device.platform.name.startswith("AMD Accelerated")):
        pytest.skip(
            "SPIR-V program creation on AMD did not result in any kernels")

    prg.sum(queue, a_dev.shape, None, a_dev.data, b_dev.data, dest_dev.data)

    assert la.norm((dest_dev - (a_dev + b_dev)).get()) < 1e-7
Ejemplo n.º 16
0
def test_bitonic_argsort(ctx_factory, size, dtype):
    import sys
    is_pypy = "__pypy__" in sys.builtin_module_names

    if not size and is_pypy:
        # https://bitbucket.org/pypy/numpy/issues/53/specifying-strides-on-zero-sized-array
        pytest.xfail("pypy doesn't seem to handle as_strided "
                "on zero-sized arrays very well")

    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    device = queue.device
    if device.platform.vendor == "The pocl project" \
            and device.type & cl.device_type.GPU:
        pytest.xfail("bitonic argsort fails on POCL + Nvidia,"
                "at least the K40, as of pocl 1.6, 2021-01-20")

    dev = ctx.devices[0]
    if (dev.platform.name == "Portable Computing Language"
            and sys.platform == "darwin"):
        pytest.xfail("Bitonic sort crashes on Apple POCL")
    if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU):
        pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup "
            "parallelism")
    if (dev.platform.name == "Portable Computing Language"
            and dtype == np.float64
            and get_pocl_version(dev.platform) < (1, 0)):
        pytest.xfail("Double precision bitonic sort doesn't work on POCL < 1.0")
    if (dev.platform.name == "Intel(R) OpenCL" and size == 0):
        pytest.xfail("size-0 arange fails on Intel CL")

    if dtype == np.float64 and not has_double_support(dev):
        from pytest import skip
        skip("double precision not supported on %s" % dev)

    import pyopencl.clrandom as clrandom
    from pyopencl.bitonic_sort import BitonicSort

    index = cl_array.arange(queue, 0, size, 1, dtype=np.int32)
    m = clrandom.rand(queue, (size,), dtype, luxury=None, a=0, b=239432234)

    sorterm = BitonicSort(ctx)

    ms = m.copy()
    # enqueue_marker crashes under CL 1.1 pocl if there is anything to wait for
    # (no clEnqueueWaitForEvents) https://github.com/inducer/pyopencl/pull/237
    if (dev.platform.name == "Portable Computing Language"
            and cl.get_cl_header_version() < (1, 2)):
        ms.finish()
        index.finish()
    ms, evt = sorterm(ms, idx=index, axis=0)

    assert np.array_equal(np.sort(m.get()), ms.get())

    # may be False because of identical values in array
    # assert np.array_equal(np.argsort(m.get()), index.get())

    # Check values by indices
    assert np.array_equal(m.get()[np.argsort(m.get())], m.get()[index.get()])
Ejemplo n.º 17
0
def test_unload_compiler(platform):
    if (platform._get_cl_version() < (1, 2) or
            cl.get_cl_header_version() < (1, 2)):
        from pytest import skip
        skip("clUnloadPlatformCompiler is only available in OpenCL 1.2")
    _skip_if_pocl(platform, 'pocl does not support unloading compiler')
    if platform.vendor == "Intel(R) Corporation":
        from pytest import skip
        skip("Intel proprietary driver does not support unloading compiler")
    cl.unload_platform_compiler(platform)
Ejemplo n.º 18
0
def test_unload_compiler(platform):
    if (platform._get_cl_version() < (1, 2) or cl.get_cl_header_version() <
        (1, 2)):
        from pytest import skip
        skip("clUnloadPlatformCompiler is only available in OpenCL 1.2")
    _skip_if_pocl(platform, 'pocl does not support unloading compiler')
    if platform.vendor == "Intel(R) Corporation":
        from pytest import skip
        skip("Intel proprietary driver does not support unloading compiler")
    cl.unload_platform_compiler(platform)
Ejemplo n.º 19
0
def test_fine_grain_svm(ctx_factory):
    ctx = ctx_factory()
    # queue = cl.CommandQueue(ctx)

    if (ctx._get_cl_version() < (2, 0) or cl.get_cl_header_version() < (2, 0)):
        from pytest import skip
        skip("SVM only available in OpenCL 2.0 and higher")

    svm_ary = cl.fsvm_empty(ctx, (100, 100), np.float32, alignment=64)
    assert isinstance(svm_ary.base, cl.SVMAllocation)
Ejemplo n.º 20
0
def test_fine_grain_svm(ctx_factory):
    ctx = ctx_factory()
    # queue = cl.CommandQueue(ctx)

    if (ctx._get_cl_version() < (2, 0) or
            cl.get_cl_header_version() < (2, 0)):
        from pytest import skip
        skip("SVM only available in OpenCL 2.0 and higher")

    svm_ary = cl.fsvm_empty(ctx, (100, 100), np.float32, alignment=64)
    assert isinstance(svm_ary.base, cl.SVMAllocation)
Ejemplo n.º 21
0
def test_coarse_grain_svm(ctx_factory):
    import sys
    is_pypy = '__pypy__' in sys.builtin_module_names

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    if (ctx._get_cl_version() < (2, 0) or
            cl.get_cl_header_version() < (2, 0)):
        from pytest import skip
        skip("SVM only available in OpenCL 2.0 and higher")
    dev = ctx.devices[0]
    if ("AMD" in dev.platform.name
            and dev.type & cl.device_type.CPU):
        pytest.xfail("AMD CPU doesn't do coarse-grain SVM")

    n = 3000
    svm_ary = cl.SVM(cl.csvm_empty(ctx, (n,), np.float32, alignment=64))
    if not is_pypy:
        # https://bitbucket.org/pypy/numpy/issues/52
        assert isinstance(svm_ary.mem.base, cl.SVMAllocation)

    if dev.platform.name != "Portable Computing Language":
        # pocl 0.13 has a bug misinterpreting the size parameter
        cl.enqueue_svm_memfill(queue, svm_ary, np.zeros((), svm_ary.mem.dtype))

    with svm_ary.map_rw(queue) as ary:
        ary.fill(17)
        orig_ary = ary.copy()

    prg = cl.Program(ctx, """
        __kernel void twice(__global float *a_g)
        {
          a_g[get_global_id(0)] *= 2;
        }
        """).build()

    prg.twice(queue, svm_ary.mem.shape, None, svm_ary)

    with svm_ary.map_ro(queue) as ary:
        print(ary)
        assert np.array_equal(orig_ary*2, ary)

    new_ary = np.empty_like(orig_ary)
    new_ary.fill(-1)

    if ctx.devices[0].platform.name != "Portable Computing Language":
        # "Blocking memcpy is unimplemented (clEnqueueSVMMemcpy.c:61)"
        # in pocl 0.13.

        cl.enqueue_copy(queue, new_ary, svm_ary)
        assert np.array_equal(orig_ary*2, new_ary)
Ejemplo n.º 22
0
def test_platform_get_devices(platform):
    dev_types = [cl.device_type.ACCELERATOR, cl.device_type.ALL,
                 cl.device_type.CPU, cl.device_type.DEFAULT, cl.device_type.GPU]
    if (platform._get_cl_version() >= (1, 2) and
            cl.get_cl_header_version() >= (1, 2)):
        dev_types.append(cl.device_type.CUSTOM)
    for dev_type in dev_types:
        devs = platform.get_devices(dev_type)
        if dev_type in (cl.device_type.DEFAULT,
                        cl.device_type.ALL,
                        getattr(cl.device_type, 'CUSTOM', None)):
            continue
        for dev in devs:
            assert dev.type == dev_type
Ejemplo n.º 23
0
def test_platform_get_devices(platform):
    dev_types = [
        cl.device_type.ACCELERATOR, cl.device_type.ALL, cl.device_type.CPU,
        cl.device_type.DEFAULT, cl.device_type.GPU
    ]
    if (platform._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >=
        (1, 2)):
        dev_types.append(cl.device_type.CUSTOM)
    for dev_type in dev_types:
        devs = platform.get_devices(dev_type)
        if dev_type in (cl.device_type.DEFAULT, cl.device_type.ALL,
                        getattr(cl.device_type, 'CUSTOM', None)):
            continue
        for dev in devs:
            assert dev.type == dev_type
Ejemplo n.º 24
0
def test_enqueue_barrier_marker(ctx_factory):
    ctx = ctx_factory()
    # Still relevant on pocl 1.0RC1.
    _xfail_if_pocl(
            ctx.devices[0].platform, (1, 0), "pocl crashes on enqueue_barrier")

    queue = cl.CommandQueue(ctx)

    if queue._get_cl_version() >= (1, 2) and cl.get_cl_header_version() <= (1, 1):
        pytest.skip("CL impl version >= 1.2, header version <= 1.1--cannot be sure "
                "that clEnqueueWaitForEvents is implemented")

    cl.enqueue_barrier(queue)
    evt1 = cl.enqueue_marker(queue)
    evt2 = cl.enqueue_marker(queue, wait_for=[evt1])
    cl.enqueue_barrier(queue, wait_for=[evt1, evt2])
Ejemplo n.º 25
0
def test_empty_ndrange(ctx_factory, empty_shape):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    if ctx._get_cl_version() < (1, 2) or cl.get_cl_header_version() < (1, 2):
        pytest.skip("OpenCL 1.2 required for empty NDRange suuport")

    a = cl_array.zeros(queue, empty_shape, dtype=np.float32)

    prg = cl.Program(ctx, """
        __kernel void add_two(__global float *a_g)
        {
          a_g[get_global_id(0)] += 2;
        }
        """).build()

    prg.add_two(queue, a.shape, None, a.data, allow_empty_ndrange=True)
Ejemplo n.º 26
0
def test_user_event(ctx_factory):
    ctx = ctx_factory()
    if ctx._get_cl_version() < (1, 1) and cl.get_cl_header_version() < (1, 1):
        from pytest import skip

        skip("UserEvent is only available in OpenCL 1.1")

    if ctx.devices[0].platform.name == "Portable Computing Language":
        # https://github.com/pocl/pocl/issues/201
        pytest.xfail("POCL's user events don't work right")

    status = {}

    def event_waiter1(e, key):
        e.wait()
        status[key] = True

    def event_waiter2(e, key):
        cl.wait_for_events([e])
        status[key] = True

    from threading import Thread
    from time import sleep

    evt = cl.UserEvent(ctx)
    Thread(target=event_waiter1, args=(evt, 1)).start()
    sleep(0.05)
    if status.get(1, False):
        raise RuntimeError("UserEvent triggered before set_status")
    evt.set_status(cl.command_execution_status.COMPLETE)
    sleep(0.05)
    if not status.get(1, False):
        raise RuntimeError("UserEvent.wait timeout")
    assert evt.command_execution_status == cl.command_execution_status.COMPLETE

    evt = cl.UserEvent(ctx)
    Thread(target=event_waiter2, args=(evt, 2)).start()
    sleep(0.05)
    if status.get(2, False):
        raise RuntimeError("UserEvent triggered before set_status")
    evt.set_status(cl.command_execution_status.COMPLETE)
    sleep(0.05)
    if not status.get(2, False):
        raise RuntimeError("cl.wait_for_events timeout on UserEvent")
    assert evt.command_execution_status == cl.command_execution_status.COMPLETE
def test_platform_get_devices(platform):
    if platform.name == "Apple":
        pytest.xfail("Apple doesn't understand all the values we pass "
                "for dev_type")

    dev_types = [cl.device_type.ACCELERATOR, cl.device_type.ALL,
                 cl.device_type.CPU, cl.device_type.DEFAULT, cl.device_type.GPU]
    if (platform._get_cl_version() >= (1, 2) and
            cl.get_cl_header_version() >= (1, 2)):
        dev_types.append(cl.device_type.CUSTOM)
    for dev_type in dev_types:
        devs = platform.get_devices(dev_type)
        if dev_type in (cl.device_type.DEFAULT,
                        cl.device_type.ALL,
                        getattr(cl.device_type, 'CUSTOM', None)):
            continue
        for dev in devs:
            assert dev.type & dev_type == dev_type
Ejemplo n.º 28
0
def test_user_event(ctx_factory):
    ctx = ctx_factory()
    if (ctx._get_cl_version() < (1, 1) and cl.get_cl_header_version() <
        (1, 1)):
        from pytest import skip
        skip("UserEvent is only available in OpenCL 1.1")

    if ctx.devices[0].platform.name == "Portable Computing Language":
        # https://github.com/pocl/pocl/issues/201
        pytest.xfail("POCL's user events don't work right")

    status = {}

    def event_waiter1(e, key):
        e.wait()
        status[key] = True

    def event_waiter2(e, key):
        cl.wait_for_events([e])
        status[key] = True

    from threading import Thread
    from time import sleep
    evt = cl.UserEvent(ctx)
    Thread(target=event_waiter1, args=(evt, 1)).start()
    sleep(.05)
    if status.get(1, False):
        raise RuntimeError('UserEvent triggered before set_status')
    evt.set_status(cl.command_execution_status.COMPLETE)
    sleep(.05)
    if not status.get(1, False):
        raise RuntimeError('UserEvent.wait timeout')
    assert evt.command_execution_status == cl.command_execution_status.COMPLETE

    evt = cl.UserEvent(ctx)
    Thread(target=event_waiter2, args=(evt, 2)).start()
    sleep(.05)
    if status.get(2, False):
        raise RuntimeError('UserEvent triggered before set_status')
    evt.set_status(cl.command_execution_status.COMPLETE)
    sleep(.05)
    if not status.get(2, False):
        raise RuntimeError('cl.wait_for_events timeout on UserEvent')
    assert evt.command_execution_status == cl.command_execution_status.COMPLETE
Ejemplo n.º 29
0
def test_user_event(ctx_factory):
    ctx = ctx_factory()
    if (ctx._get_cl_version() < (1, 1) and
            cl.get_cl_header_version() < (1, 1)):
        from pytest import skip
        skip("UserEvent is only available in OpenCL 1.1")

    status = {}

    def event_waiter1(e, key):
        e.wait()
        status[key] = True

    def event_waiter2(e, key):
        cl.wait_for_events([e])
        status[key] = True

    from threading import Thread
    from time import sleep
    evt = cl.UserEvent(ctx)
    Thread(target=event_waiter1, args=(evt, 1)).start()
    sleep(.05)
    if status.get(1, False):
        raise RuntimeError('UserEvent triggered before set_status')
    evt.set_status(cl.command_execution_status.COMPLETE)
    sleep(.05)
    if not status.get(1, False):
        raise RuntimeError('UserEvent.wait timeout')
    assert evt.command_execution_status == cl.command_execution_status.COMPLETE

    evt = cl.UserEvent(ctx)
    Thread(target=event_waiter2, args=(evt, 2)).start()
    sleep(.05)
    if status.get(2, False):
        raise RuntimeError('UserEvent triggered before set_status')
    evt.set_status(cl.command_execution_status.COMPLETE)
    sleep(.05)
    if not status.get(2, False):
        raise RuntimeError('cl.wait_for_events timeout on UserEvent')
    assert evt.command_execution_status == cl.command_execution_status.COMPLETE
Ejemplo n.º 30
0
def test_user_event(ctx_factory):
    ctx = ctx_factory()
    if (ctx._get_cl_version() < (1, 1) and cl.get_cl_header_version() <
        (1, 1)):
        from pytest import skip
        skip("UserEvent is only available in OpenCL 1.1")

    status = {}

    def event_waiter1(e, key):
        e.wait()
        status[key] = True

    def event_waiter2(e, key):
        cl.wait_for_events([e])
        status[key] = True

    from threading import Thread
    from time import sleep
    evt = cl.UserEvent(ctx)
    Thread(target=event_waiter1, args=(evt, 1)).start()
    sleep(.05)
    if status.get(1, False):
        raise RuntimeError('UserEvent triggered before set_status')
    evt.set_status(cl.command_execution_status.COMPLETE)
    sleep(.05)
    if not status.get(1, False):
        raise RuntimeError('UserEvent.wait timeout')
    assert evt.command_execution_status == cl.command_execution_status.COMPLETE

    evt = cl.UserEvent(ctx)
    Thread(target=event_waiter2, args=(evt, 2)).start()
    sleep(.05)
    if status.get(2, False):
        raise RuntimeError('UserEvent triggered before set_status')
    evt.set_status(cl.command_execution_status.COMPLETE)
    sleep(.05)
    if not status.get(2, False):
        raise RuntimeError('cl.wait_for_events timeout on UserEvent')
    assert evt.command_execution_status == cl.command_execution_status.COMPLETE
Ejemplo n.º 31
0
def test_spirv(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    if (ctx._get_cl_version() < (2, 1) or cl.get_cl_header_version() < (2, 1)):
        from pytest import skip
        skip("SPIR-V program creation only available in OpenCL 2.1 and higher")

    n = 50000

    a_dev = cl.clrandom.rand(queue, n, np.float32)
    b_dev = cl.clrandom.rand(queue, n, np.float32)
    dest_dev = cl_array.empty_like(a_dev)

    with open("add-vectors.spv", "rb") as spv_file:
        spv = spv_file.read()

    prg = cl.Program(ctx, spv)

    prg.sum(queue, a_dev.shape, None, a_dev.data, b_dev.data, dest_dev.data)

    assert la.norm((dest_dev - (a_dev + b_dev)).get()) < 1e-7
Ejemplo n.º 32
0
    def print_info():
        print("PyOpenCL Version:", cl.VERSION)
        print("OpenCL Head Version:", cl.get_cl_header_version())
        print()

        platforms = cl.get_platforms()
        print("Platforms Amount:", len(platforms))

        for plat in platforms:
            print("Platform:", plat.get_info(cl.platform_info.NAME))
            print("--Platform Profile:",
                  plat.get_info(cl.platform_info.PROFILE))
            print("--Platform Vendor:", plat.get_info(cl.platform_info.VENDOR))
            print("--Platform Version:",
                  plat.get_info(cl.platform_info.VERSION))

            devices = plat.get_devices(cl.device_type.GPU)
            print("--GPU Amount:", len(devices))

            for device in devices:
                print("--GPU:", device.get_info(cl.device_info.NAME))
                print("----OpenCL Version:",
                      device.get_info(cl.device_info.OPENCL_C_VERSION))
                print("----GPU Vendor:",
                      device.get_info(cl.device_info.VENDOR))
                print("----GPU Version:",
                      device.get_info(cl.device_info.VERSION))
                print("----GPU Driver Version:",
                      device.get_info(cl.device_info.DRIVER_VERSION))
                print("----Max Work Group Size:",
                      device.get_info(cl.device_info.MAX_WORK_GROUP_SIZE))
                print("----Max Compute Units:",
                      device.get_info(cl.device_info.MAX_COMPUTE_UNITS))
                print("----Max Work Item Size:",
                      device.get_info(cl.device_info.MAX_WORK_ITEM_SIZES))
                print("----Local Memory Size:",
                      device.get_info(cl.device_info.LOCAL_MEM_SIZE) / 1024,
                      'KB')
                print()
Ejemplo n.º 33
0
def test_fine_grain_svm(ctx_factory):
    import sys
    is_pypy = '__pypy__' in sys.builtin_module_names

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    from pytest import skip
    if (ctx._get_cl_version() < (2, 0) or
            cl.get_cl_header_version() < (2, 0)):
        skip("SVM only available in OpenCL 2.0 and higher")

    if not (ctx.devices[0].svm_capabilities
            & cl.device_svm_capabilities.FINE_GRAIN_BUFFER):
        skip("device does not support fine-grain SVM")

    n = 3000
    ary = cl.fsvm_empty(ctx, n, np.float32, alignment=64)

    if not is_pypy:
        # https://bitbucket.org/pypy/numpy/issues/52
        assert isinstance(ary.base, cl.SVMAllocation)

    ary.fill(17)
    orig_ary = ary.copy()

    prg = cl.Program(ctx, """
        __kernel void twice(__global float *a_g)
        {
          a_g[get_global_id(0)] *= 2;
        }
        """).build()

    prg.twice(queue, ary.shape, None, cl.SVM(ary))
    queue.finish()

    print(ary)
    assert np.array_equal(orig_ary*2, ary)
Ejemplo n.º 34
0
def test_fine_grain_svm(ctx_factory):
    import sys
    is_pypy = '__pypy__' in sys.builtin_module_names

    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    from pytest import skip
    if (ctx._get_cl_version() < (2, 0) or cl.get_cl_header_version() < (2, 0)):
        skip("SVM only available in OpenCL 2.0 and higher")

    if not (ctx.devices[0].svm_capabilities
            & cl.device_svm_capabilities.FINE_GRAIN_BUFFER):
        skip("device does not support fine-grain SVM")

    n = 3000
    ary = cl.fsvm_empty(ctx, n, np.float32, alignment=64)

    if not is_pypy:
        # https://bitbucket.org/pypy/numpy/issues/52
        assert isinstance(ary.base, cl.SVMAllocation)

    ary.fill(17)
    orig_ary = ary.copy()

    prg = cl.Program(
        ctx, """
        __kernel void twice(__global float *a_g)
        {
          a_g[get_global_id(0)] *= 2;
        }
        """).build()

    prg.twice(queue, ary.shape, None, cl.SVM(ary))
    queue.finish()

    print(ary)
    assert np.array_equal(orig_ary * 2, ary)
Ejemplo n.º 35
0
def test_spirv(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    if (ctx._get_cl_version() < (2, 1) or
            cl.get_cl_header_version() < (2, 1)):
        from pytest import skip
        skip("SPIR-V program creation only available in OpenCL 2.1 and higher")

    n = 50000

    a_dev = cl.clrandom.rand(queue, n, np.float32)
    b_dev = cl.clrandom.rand(queue, n, np.float32)
    dest_dev = cl_array.empty_like(a_dev)

    with open("add-vectors-%d.spv" % queue.device.address_bits, "rb") as spv_file:
        spv = spv_file.read()

    prg = cl.Program(ctx, spv)

    prg.sum(queue, a_dev.shape, None, a_dev.data, b_dev.data, dest_dev.data)

    assert la.norm((dest_dev - (a_dev+b_dev)).get()) < 1e-7
Ejemplo n.º 36
0
def test_platform_get_devices(ctx_factory):
    ctx = ctx_factory()
    platform = ctx.devices[0].platform

    if platform.name == "Apple":
        pytest.xfail("Apple doesn't understand all the values we pass "
                     "for dev_type")

    dev_types = [
        cl.device_type.ACCELERATOR, cl.device_type.ALL, cl.device_type.CPU,
        cl.device_type.DEFAULT, cl.device_type.GPU
    ]
    if (platform._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >=
        (1, 2) and not platform.name.lower().startswith("nvidia")):
        dev_types.append(cl.device_type.CUSTOM)

    for dev_type in dev_types:
        print(dev_type)
        devs = platform.get_devices(dev_type)
        if dev_type in (cl.device_type.DEFAULT, cl.device_type.ALL,
                        getattr(cl.device_type, 'CUSTOM', None)):
            continue
        for dev in devs:
            assert dev.type & dev_type == dev_type
Ejemplo n.º 37
0
    def __init__(self, kernel_file):

        if self.layers + 1 != len(self.layer_height):
            print("Bad network config.")
            exit()

        print("Running with {} hidden layers and {} layers total.".format(
            self.hidden_layers, self.layers))

        print("OpenCL Version v{}".format(".".join(
            [str(i) for i in cl.get_cl_header_version()])))
        print("Finding platform....")
        platform = self.findPlatform(VENDOR_NAME)
        if not platform:
            print("ERROR: Platform not found for name {0}".format(VENDOR_NAME))
            exit(1)

        print("Getting devices...")
        devices = platform.get_devices(device_type=DEVICE_TYPE)
        if len(devices) < 1:
            print("ERROR: No device found for type {0}.".format(DEVICE_TYPE))
            exit(1)

        devices = [devices[1]]

        self.ctx = cl.Context(devices=devices)

        if DEVICE_TYPE == cl.device_type.ACCELERATOR:
            print("Reading binary...")
            binary = kernel_file.read()

            binaries = [binary] * len(devices)

            print("Building...")
            program = cl.Program(self.ctx, devices, binaries)
        else:
            print("Reading program...")
            binary = kernel_file.read()

            program = cl.Program(self.ctx, binary.decode('utf-8')).build()

        self.kForward = program.forward
        self.kForwardSoftMax = program.forward_softmax
        # self.kBackwardFirstDelta = program.backward_first_delta
        # self.kBackward = program.backward

        self.kForward.set_scalar_arg_dtypes(
            [None, None, None, None, np.int32, np.int32, np.int32, np.int32])
        self.kForwardSoftMax.set_scalar_arg_dtypes([None, np.int32, np.int32])
        # self.kBackwardFirstDelta.set_scalar_arg_dtypes([None, None, None, np.int32, np.int32])
        # self.kBackward.set_scalar_arg_dtypes(
        #    [None, None, None, None, NN_T, NN_T, np.int32, np.int32, np.int32])

        self.queue = cl.CommandQueue(self.ctx)

        print("Loading data...")
        _, (self.x_test, self.y_test) = input_data.load_data()

        self.y_test = self.y_test.reshape((10000, ))
        self.x_test = self.x_test.reshape(10000, self.layer_height[0])
        self.x_test = self.x_test.astype('float32')
        self.x_test /= 255

        self.correct_pred_fpga = 0
        self.wrong_pred_fpga = 0
        self.correct_pred_cpu = 0
        self.wrong_pred_cpu = 0
Ejemplo n.º 38
0
def log_version_info():
    log.info("PyOpenCL loaded, header version: %s, GL support: %s",
             ".".join([str(x) for x in pyopencl.get_cl_header_version()]), pyopencl.have_gl())
Ejemplo n.º 39
0
def test_get_info(ctx_factory):
    ctx = ctx_factory()
    device, = ctx.devices
    platform = device.platform

    failure_count = [0]

    pocl_quirks = [
        (cl.Buffer, cl.mem_info.OFFSET),
        (cl.Program, cl.program_info.BINARIES),
        (cl.Program, cl.program_info.BINARY_SIZES),
    ]
    if ctx._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1,
                                                                          2):
        pocl_quirks.extend([
            (cl.Program, cl.program_info.KERNEL_NAMES),
            (cl.Program, cl.program_info.NUM_KERNELS),
        ])
    CRASH_QUIRKS = [  # noqa
        (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.0 CUDA 3.0.1"), [
            (cl.Event, cl.event_info.COMMAND_QUEUE),
        ]),
        (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.2 CUDA 7.5"), [
            (cl.Buffer, getattr(cl.mem_info, "USES_SVM_POINTER", None)),
        ]),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.8-pre"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.8"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.9-pre"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.9"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.10-pre"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.10"), pocl_quirks),
        (("Apple", "Apple", "OpenCL 1.2"), [
            (cl.Program, cl.program_info.SOURCE),
        ]),
    ]
    QUIRKS = []  # noqa

    def find_quirk(quirk_list, cl_obj, info):
        for (vendor, name, version), quirks in quirk_list:
            if (vendor == platform.vendor and name == platform.name
                    and platform.version.startswith(version)):
                for quirk_cls, quirk_info in quirks:
                    if (isinstance(cl_obj, quirk_cls) and quirk_info == info):
                        return True

        return False

    def do_test(cl_obj, info_cls, func=None, try_attr_form=True):
        if func is None:

            def func(info):
                cl_obj.get_info(info)

        for info_name in dir(info_cls):
            if not info_name.startswith("_") and info_name != "to_string":
                print(info_cls, info_name)
                info = getattr(info_cls, info_name)

                if find_quirk(CRASH_QUIRKS, cl_obj, info):
                    print("not executing get_info", type(cl_obj), info_name)
                    print("(known crash quirk for %s)" % platform.name)
                    continue

                try:
                    func(info)
                except:
                    msg = "failed get_info", type(cl_obj), info_name

                    if find_quirk(QUIRKS, cl_obj, info):
                        msg += ("(known quirk for %s)" % platform.name)
                    else:
                        failure_count[0] += 1

                if try_attr_form:
                    try:
                        getattr(cl_obj, info_name.lower())
                    except:
                        print("failed attr-based get_info", type(cl_obj),
                              info_name)

                        if find_quirk(QUIRKS, cl_obj, info):
                            print("(known quirk for %s)" % platform.name)
                        else:
                            failure_count[0] += 1

    do_test(platform, cl.platform_info)
    do_test(device, cl.device_info)
    do_test(ctx, cl.context_info)

    props = 0
    if (device.queue_properties
            & cl.command_queue_properties.PROFILING_ENABLE):
        profiling = True
        props = cl.command_queue_properties.PROFILING_ENABLE
    queue = cl.CommandQueue(ctx, properties=props)
    do_test(queue, cl.command_queue_info)

    prg = cl.Program(
        ctx, """
        __kernel void sum(__global float *a)
        { a[get_global_id(0)] *= 2; }
        """).build()
    do_test(prg, cl.program_info)
    do_test(prg,
            cl.program_build_info,
            lambda info: prg.get_build_info(device, info),
            try_attr_form=False)

    n = 2000
    a_buf = cl.Buffer(ctx, 0, n * 4)

    do_test(a_buf, cl.mem_info)

    kernel = prg.sum
    do_test(kernel, cl.kernel_info)

    evt = kernel(queue, (n, ), None, a_buf)
    do_test(evt, cl.event_info)

    if profiling:
        evt.wait()
        do_test(evt,
                cl.profiling_info,
                lambda info: evt.get_profiling_info(info),
                try_attr_form=False)

    # crashes on intel...
    # and pocl does not support CL_ADDRESS_CLAMP
    if device.image_support and platform.vendor not in [
            "Intel(R) Corporation",
            "The pocl project",
    ]:
        smp = cl.Sampler(ctx, False, cl.addressing_mode.CLAMP,
                         cl.filter_mode.NEAREST)
        do_test(smp, cl.sampler_info)

        img_format = cl.get_supported_image_formats(
            ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

        img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
        assert img.shape == (128, 256)

        img.depth
        img.image.depth
        do_test(img, cl.image_info, lambda info: img.get_image_info(info))
Ejemplo n.º 40
0
def test_get_info(ctx_factory):
    ctx = ctx_factory()
    device, = ctx.devices
    platform = device.platform

    failure_count = [0]

    pocl_quirks = [
        (cl.Buffer, cl.mem_info.OFFSET),
        (cl.Program, cl.program_info.BINARIES),
        (cl.Program, cl.program_info.BINARY_SIZES),
    ]
    if ctx._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2):
        pocl_quirks.extend([
            (cl.Program, cl.program_info.KERNEL_NAMES),
            (cl.Program, cl.program_info.NUM_KERNELS),
        ])
    CRASH_QUIRKS = [  # noqa
            (("NVIDIA Corporation", "NVIDIA CUDA",
                "OpenCL 1.0 CUDA 3.0.1"),
                [
                    (cl.Event, cl.event_info.COMMAND_QUEUE),
                    ]),
            (("The pocl project", "Portable Computing Language",
                "OpenCL 1.2 pocl 0.8-pre"),
                    pocl_quirks),
            (("The pocl project", "Portable Computing Language",
                "OpenCL 1.2 pocl 0.8"),
                pocl_quirks),
            (("The pocl project", "Portable Computing Language",
                "OpenCL 1.2 pocl 0.9-pre"),
                pocl_quirks),
            (("The pocl project", "Portable Computing Language",
                "OpenCL 1.2 pocl 0.9"),
                pocl_quirks),
            (("The pocl project", "Portable Computing Language",
                "OpenCL 1.2 pocl 0.10-pre"),
                pocl_quirks),
            (("The pocl project", "Portable Computing Language",
                "OpenCL 1.2 pocl 0.10"),
                pocl_quirks),
            (("Apple", "Apple",
                "OpenCL 1.2"),
                [
                    (cl.Program, cl.program_info.SOURCE),
                    ]),
            ]
    QUIRKS = []  # noqa

    def find_quirk(quirk_list, cl_obj, info):
        for (vendor, name, version), quirks in quirk_list:
            if (
                    vendor == platform.vendor
                    and name == platform.name
                    and platform.version.startswith(version)):
                for quirk_cls, quirk_info in quirks:
                    if (isinstance(cl_obj, quirk_cls)
                            and quirk_info == info):
                        return True

        return False

    def do_test(cl_obj, info_cls, func=None, try_attr_form=True):
        if func is None:
            def func(info):
                cl_obj.get_info(info)

        for info_name in dir(info_cls):
            if not info_name.startswith("_") and info_name != "to_string":
                print(info_cls, info_name)
                info = getattr(info_cls, info_name)

                if find_quirk(CRASH_QUIRKS, cl_obj, info):
                    print("not executing get_info", type(cl_obj), info_name)
                    print("(known crash quirk for %s)" % platform.name)
                    continue

                try:
                    func(info)
                except:
                    msg = "failed get_info", type(cl_obj), info_name

                    if find_quirk(QUIRKS, cl_obj, info):
                        msg += ("(known quirk for %s)" % platform.name)
                    else:
                        failure_count[0] += 1

                if try_attr_form:
                    try:
                        getattr(cl_obj, info_name.lower())
                    except:
                        print("failed attr-based get_info", type(cl_obj), info_name)

                        if find_quirk(QUIRKS, cl_obj, info):
                            print("(known quirk for %s)" % platform.name)
                        else:
                            failure_count[0] += 1

    do_test(platform, cl.platform_info)
    do_test(device, cl.device_info)
    do_test(ctx, cl.context_info)

    props = 0
    if (device.queue_properties
            & cl.command_queue_properties.PROFILING_ENABLE):
        profiling = True
        props = cl.command_queue_properties.PROFILING_ENABLE
    queue = cl.CommandQueue(ctx,
            properties=props)
    do_test(queue, cl.command_queue_info)

    prg = cl.Program(ctx, """
        __kernel void sum(__global float *a)
        { a[get_global_id(0)] *= 2; }
        """).build()
    do_test(prg, cl.program_info)
    do_test(prg, cl.program_build_info,
            lambda info: prg.get_build_info(device, info),
            try_attr_form=False)

    n = 2000
    a_buf = cl.Buffer(ctx, 0, n*4)

    do_test(a_buf, cl.mem_info)

    kernel = prg.sum
    do_test(kernel, cl.kernel_info)

    evt = kernel(queue, (n,), None, a_buf)
    do_test(evt, cl.event_info)

    if profiling:
        evt.wait()
        do_test(evt, cl.profiling_info,
                lambda info: evt.get_profiling_info(info),
                try_attr_form=False)

    # crashes on intel...
    # and pocl does not support CL_ADDRESS_CLAMP
    if device.image_support and platform.vendor not in [
            "Intel(R) Corporation",
            "The pocl project",
            ]:
        smp = cl.Sampler(ctx, False,
                cl.addressing_mode.CLAMP,
                cl.filter_mode.NEAREST)
        do_test(smp, cl.sampler_info)

        img_format = cl.get_supported_image_formats(
                ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

        img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
        assert img.shape == (128, 256)

        img.depth
        img.image.depth
        do_test(img, cl.image_info,
                lambda info: img.get_image_info(info))
Ejemplo n.º 41
0
'''
Prints relevant information regarding the capabilities of the current OpenCL runtime and devices
Note that pyopencl has a script that prints all properties in its examples folder
'''

import pyopencl as cl

print('PyOpenCL version: ' + cl.VERSION_TEXT)
print('OpenCL header version: ' + '.'.join(map(str, cl.get_cl_header_version())) + '\n')

# Get installed platforms (SDKs)
print('- Installed platforms (SDKs) and available devices:')
platforms = cl.get_platforms()

for plat in platforms:
    indent = ''

    # Get and print platform info
    print(indent + '{} ({})'.format(plat.name, plat.vendor))
    indent = '\t'
    print(indent + 'Version: ' + plat.version)
    print(indent + 'Profile: ' + plat.profile)
    print(indent + 'Extensions: ' + str(plat.extensions.strip().split(' ')))

    # Get and print device info
    devices = plat.get_devices(cl.device_type.ALL)

    print(indent + 'Available devices: ')
    if not devices:
        print(indent + '\tNone')
Ejemplo n.º 42
0
from npsolve import runner
from math import ceil, log2

pp = pprint.PrettyPrinter(depth=5)
mf = cl.mem_flags


def dec2str(num):
    k = []
    s = str(num)
    for a in s:
        k.append(ascii_lowercase[int(a)])
    return 'qq' + ''.join(k)


print(cl.get_cl_header_version())

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

set = 'random'
traindata = idxs("train-images-idx3-ubyte.idx", "train-labels-idx1-ubyte.idx")
testdata = idxs("t10k-images-idx3-ubyte.idx", "t10k-labels-idx1-ubyte.idx")
result = 1.0
ninpt = traindata.count  #Samples count ( 60000 for set )
nvarsd = traindata.rows * traindata.cols  #Count of equations members ( 28*28 for set)
topology = [nvarsd, 5, 4, 3, 1]
nvarsg = genn.countcns(topology)  #Count of equations members
print("Total connections is", nvarsg)
nsamp = 64  #ctx.get_info(cl.context_info.DEVICES)[0].max_work_group_size #Genome samples count (current sort limitation to local_size)
print("Population count is", nsamp)
Ejemplo n.º 43
0
def ensure_required_version():
    ver = cl.get_cl_header_version()
    assert ver[0] >= 2 and ver[1] >= 0, "OpenCL must be version 2.0 or greater."
Ejemplo n.º 44
0
import pprint
from idxread import idxs
from npsolve import runner
from math import ceil, log2

pp = pprint.PrettyPrinter(depth=5)
mf = cl.mem_flags

def dec2str(num):
    k = []
    s = str(num)
    for a in s:
        k.append(ascii_lowercase[int(a)])
    return 'qq'+''.join(k)

print( cl.get_cl_header_version() )

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

set = 'random'
traindata = idxs("train-images-idx3-ubyte.idx", "train-labels-idx1-ubyte.idx")
testdata  = idxs("t10k-images-idx3-ubyte.idx", "t10k-labels-idx1-ubyte.idx")
result = 1.0
ninpt =  traindata.count                 #Samples count ( 60000 for set )
nvarsd = traindata.rows*traindata.cols   #Count of equations members ( 28*28 for set)
topology = [nvarsd, 5, 4, 3, 1]
nvarsg = genn.countcns(topology)     #Count of equations members
print("Total connections is", nvarsg)
nsamp = 64#ctx.get_info(cl.context_info.DEVICES)[0].max_work_group_size #Genome samples count (current sort limitation to local_size)
print("Population count is", nsamp)
    def pyOpenCLInfo(self, output_info=True):
        if (output_info):
            print('PyOpenCL version: ' + cl.VERSION_TEXT)
            print('OpenCL header version: ' +
                  '.'.join(map(str, cl.get_cl_header_version())) + '\n')

            # Get installed platforms (SDKs)
            print('- Installed platforms (SDKs) and available devices:')
            platforms = cl.get_platforms()

            for plat in platforms:
                indent = ''

                # Get and print platform info
                print(indent + '{} ({})'.format(plat.name, plat.vendor))
                indent = '\t'
                print(indent + 'Version: ' + plat.version)
                print(indent + 'Profile: ' + plat.profile)
                print(indent + 'Extensions: ' +
                      str(plat.extensions.strip().split(' ')))

                # Get and print device info
                devices = plat.get_devices(cl.device_type.ALL)

                print(indent + 'Available devices: ')
                if not devices:
                    print(indent + '\tNone')

                for dev in devices:
                    indent = '\t\t'
                    print(indent + '{} ({})'.format(dev.name, dev.vendor))

                    indent = '\t\t\t'
                    flags = [('Version', dev.version),
                             ('Type', cl.device_type.to_string(dev.type)),
                             ('Extensions',
                              str(dev.extensions.strip().split(' '))),
                             ('Memory (global)', str(dev.global_mem_size)),
                             ('Memory (local)', str(dev.local_mem_size)),
                             ('Address bits', str(dev.address_bits)),
                             ('Max work item dims',
                              str(dev.max_work_item_dimensions)),
                             ('Max work group size',
                              str(dev.max_work_group_size)),
                             ('Max compute units', str(dev.max_compute_units)),
                             ('Driver version', dev.driver_version),
                             ('Image support', str(bool(dev.image_support))),
                             ('Little endian', str(bool(dev.endian_little))),
                             ('Device available', str(bool(dev.available))),
                             ('Compiler available',
                              str(bool(dev.compiler_available)))]

                    [
                        print(indent +
                              '{0:<25}{1:<10}'.format(name + ':', flag))
                        for name, flag in flags
                    ]

                    # Device version string has the following syntax, extract the number like this
                    # OpenCL<space><major_version.minor_version><space><vendor-specific information>
                    version_number = float(dev.version.split(' ')[1])

                print('')
        else:
            platform = cl.get_platforms()[self.platform_id]
            device = platform.get_devices(cl.device_type.ALL)[0]
            self.recommend_CU = device.max_compute_units
Ejemplo n.º 46
0
import pyopencl as cl

print("CL_VERSION:", cl.VERSION)
print("CL_HEADER_VERSION:", cl.get_cl_header_version())
print()

platforms = cl.get_platforms()
print("Platform num:", len(platforms))

for plat in platforms:
    print("--Platform Name:", plat.get_info(cl.platform_info.NAME))
    #   print("--Platform Extensions:",plat.get_info(cl.platform_info.EXTENSIONS))
    print("--Platform Profile:", plat.get_info(cl.platform_info.PROFILE))
    print("--Platform Vendor:", plat.get_info(cl.platform_info.VENDOR))
    print("--Platform Version:", plat.get_info(cl.platform_info.VERSION))

    devices = plat.get_devices(cl.device_type.ALL)
    print("--device num:", len(devices))

    for device in devices:
        print("----Name:", device.get_info(cl.device_info.NAME))
        print("----OpenCL_C_Version:",
              device.get_info(cl.device_info.OPENCL_C_VERSION))
        print("----Vendor:", device.get_info(cl.device_info.VENDOR))
        print("----Version:", device.get_info(cl.device_info.VERSION))
        print("----Driver Version:",
              device.get_info(cl.device_info.DRIVER_VERSION))

        print("----MAX_WORK_GROUP_SIZE:",
              device.get_info(cl.device_info.MAX_WORK_GROUP_SIZE))
        print("----MAX_COMPUTE_UNITS:",