def test_coarse_grain_svm(ctx_factory): import sys is_pypy = '__pypy__' in sys.builtin_module_names ctx = ctx_factory() queue = cl.CommandQueue(ctx) dev = ctx.devices[0] has_svm = (ctx._get_cl_version() >= (2, 0) and ctx.devices[0]._get_cl_version() >= (2, 0) and cl.get_cl_header_version() >= (2, 0)) if dev.platform.name == "Portable Computing Language": has_svm = (get_pocl_version(dev.platform) >= (1, 0) and cl.get_cl_header_version() >= (2, 0)) if not has_svm: from pytest import skip skip("SVM only available in OpenCL 2.0 and higher") if ("AMD" in dev.platform.name and dev.type & cl.device_type.CPU): pytest.xfail("AMD CPU doesn't do coarse-grain SVM") n = 3000 svm_ary = cl.SVM(cl.csvm_empty(ctx, (n, ), np.float32, alignment=64)) if not is_pypy: # https://bitbucket.org/pypy/numpy/issues/52 assert isinstance(svm_ary.mem.base, cl.SVMAllocation) cl.enqueue_svm_memfill(queue, svm_ary, np.zeros((), svm_ary.mem.dtype)) with svm_ary.map_rw(queue) as ary: ary.fill(17) orig_ary = ary.copy() prg = cl.Program( ctx, """ __kernel void twice(__global float *a_g) { a_g[get_global_id(0)] *= 2; } """).build() prg.twice(queue, svm_ary.mem.shape, None, svm_ary) with svm_ary.map_ro(queue) as ary: print(ary) assert np.array_equal(orig_ary * 2, ary) new_ary = np.empty_like(orig_ary) new_ary.fill(-1) if ctx.devices[0].platform.name != "Portable Computing Language": # "Blocking memcpy is unimplemented (clEnqueueSVMMemcpy.c:61)" # in pocl up to and including 1.0rc1. cl.enqueue_copy(queue, new_ary, svm_ary) assert np.array_equal(orig_ary * 2, new_ary)
def _may_have_svm(dev): has_svm = (dev.platform._get_cl_version() >= (2, 0) and cl.get_cl_header_version() >= (2, 0)) if dev.platform.name == "Portable Computing Language": has_svm = (get_pocl_version(dev.platform) >= (1, 0) and cl.get_cl_header_version() >= (2, 0)) return has_svm
def _may_have_svm(dev): has_svm = (dev.platform._get_cl_version() >= (2, 0) and cl.get_cl_header_version() >= (2, 0)) if dev.platform.name == "Portable Computing Language": has_svm = ( get_pocl_version(dev.platform) >= (1, 0) and cl.get_cl_header_version() >= (2, 0)) return has_svm
def print_info(self): print("PyOpenCL Version:", cl.VERSION) print("OpenCL Version:", cl.get_cl_header_version()) print() print("Platform Name:", self._platform.get_info(cl.platform_info.NAME)) print("Platform Profile:", self._platform.get_info(cl.platform_info.PROFILE)) print("Platform Vendor:", self._platform.get_info(cl.platform_info.VENDOR)) print("Platform Version:", self._platform.get_info(cl.platform_info.VERSION)) print() print("GPU Name:", self._device.get_info(cl.device_info.NAME)) print("OpenCL Version:", self._device.get_info(cl.device_info.OPENCL_C_VERSION)) print("GPU Vendor:", self._device.get_info(cl.device_info.VENDOR)) print("GPU Version:", self._device.get_info(cl.device_info.VERSION)) print("GPU Driver Version:", self._device.get_info(cl.device_info.DRIVER_VERSION)) print("Max Work Group Size:", self._device.get_info(cl.device_info.MAX_WORK_GROUP_SIZE)) print("Max Compute Units:", self._device.get_info(cl.device_info.MAX_COMPUTE_UNITS)) print("Max Work Item Size:", self._device.get_info(cl.device_info.MAX_WORK_ITEM_SIZES)) print("Local Memory Size:", self._device.get_info(cl.device_info.LOCAL_MEM_SIZE) / 1024, 'KB')
def test_platform_get_devices(ctx_factory): ctx = ctx_factory() platform = ctx.devices[0].platform if platform.name == "Apple": pytest.xfail("Apple doesn't understand all the values we pass " "for dev_type") dev_types = [ cl.device_type.ACCELERATOR, cl.device_type.ALL, cl.device_type.CPU, cl.device_type.DEFAULT, cl.device_type.GPU, ] if ( platform._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2) and not platform.name.lower().startswith("nvidia") ): dev_types.append(cl.device_type.CUSTOM) for dev_type in dev_types: print(dev_type) devs = platform.get_devices(dev_type) if dev_type in (cl.device_type.DEFAULT, cl.device_type.ALL, getattr(cl.device_type, "CUSTOM", None)): continue for dev in devs: assert dev.type & dev_type == dev_type
def test_sub_buffers(ctx_factory): ctx = ctx_factory() if (ctx._get_cl_version() < (1, 1) or cl.get_cl_header_version() < (1, 1)): from pytest import skip skip("sub-buffers are only available in OpenCL 1.1") alignment = ctx.devices[0].mem_base_addr_align queue = cl.CommandQueue(ctx) n = 30000 a = (np.random.rand(n) * 100).astype(np.uint8) mf = cl.mem_flags a_buf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a) start = (5000 // alignment) * alignment stop = start + 20 * alignment a_sub_ref = a[start:stop] a_sub = np.empty_like(a_sub_ref) cl.enqueue_copy(queue, a_sub, a_buf[start:stop]) assert np.array_equal(a_sub, a_sub_ref)
def test_sub_buffers(ctx_factory): ctx = ctx_factory() if (ctx._get_cl_version() < (1, 1) and cl.get_cl_header_version() < (1, 1)): from pytest import skip skip("sub-buffers are only available in OpenCL 1.1") alignment = ctx.devices[0].mem_base_addr_align queue = cl.CommandQueue(ctx) n = 30000 a = (np.random.rand(n) * 100).astype(np.uint8) mf = cl.mem_flags a_buf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=a) start = (5000 // alignment) * alignment stop = start + 20 * alignment a_sub_ref = a[start:stop] a_sub = np.empty_like(a_sub_ref) cl.enqueue_copy(queue, a_sub, a_buf[start:stop]) assert np.array_equal(a_sub, a_sub_ref)
def get_info(): global selected_device, selected_platform, context, KERNELS_DEFS info = { "version" : { "cl_header" : pyopencl.get_cl_header_version(), }, "opengl" : pyopencl.have_gl(), #"kernels" : KERNELS_DEFS.keys() "pyopencl" : get_pyopencl_info(), } if selected_platform: info["platform"] = { "name" : selected_platform.name, "vendor" : selected_platform.vendor, "devices" : len(selected_platform.get_devices()), } if selected_device: dinfo = { "type" : device_type(selected_device), "name" : selected_device.name.strip(), "version" : selected_device.version, "max_work_group_size" : selected_device.max_work_group_size, "max_work_item_dimensions" : selected_device.max_work_item_dimensions, "max_work_item_sizes" : selected_device.max_work_item_sizes, "max-size" : selected_device_max_size} if hasattr(selected_device, "opencl_c_version"): dinfo["opencl_c_version"] = getattr(selected_device, "opencl_c_version") info["device"] = dinfo return info
def test_bitonic_sort(ctx_factory, size, dtype): ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) dev = ctx.devices[0] if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU): pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup " "parallelism") if (dev.platform.name == "Portable Computing Language" and dtype == np.float64 and get_pocl_version(dev.platform) < (1, 0)): pytest.xfail("Double precision bitonic sort doesn't work on POCL < 1.0") if dtype == np.float64 and not has_double_support(dev): from pytest import skip skip("double precision not supported on %s" % dev) import pyopencl.clrandom as clrandom from pyopencl.bitonic_sort import BitonicSort s = clrandom.rand(queue, (2, size, 3,), dtype, luxury=None, a=0, b=239482333) sgs = s.copy() # enqueue_marker crashes under CL 1.1 pocl if there is anything to wait for # (no clEnqueueWaitForEvents) https://github.com/inducer/pyopencl/pull/237 if (dev.platform.name == "Portable Computing Language" and cl.get_cl_header_version() < (1, 2)): sgs.finish() sorter = BitonicSort(ctx) sgs, evt = sorter(sgs, axis=1) assert np.array_equal(np.sort(s.get(), axis=1), sgs.get())
def get_info(): global selected_device, selected_platform, context, KERNELS_DEFS info = {"version" : pyopencl.version.VERSION, "version.text" : pyopencl.VERSION_TEXT, "version.status" : pyopencl.VERSION_STATUS, "version.cl_header" : pyopencl.get_cl_header_version(), "opengl" : pyopencl.have_gl(), #"kernels" : KERNELS_DEFS.keys() } if selected_platform: info.update({ "platform.name" : selected_platform.name, "platform.vendor" : selected_platform.vendor, "platform.devices" : len(selected_platform.get_devices()), }) if selected_device: if hasattr(selected_device, "opencl_c_version"): info["device.opencl_c_version"] = getattr(selected_device, "opencl_c_version") info.update({ "device.type" : device_type(selected_device), "device.name" : selected_device.name.strip(), "device.version" : selected_device.version, "device.max_work_group_size" : selected_device.max_work_group_size, "device.max_work_item_dimensions" : selected_device.max_work_item_dimensions, "device.max_work_item_sizes" : selected_device.max_work_item_sizes}) return info
def test_custom_type_zeros(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) if not ( queue._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2)): pytest.skip("CL1.2 not available") dtype = np.dtype([ ("cur_min", np.int32), ("cur_max", np.int32), ("pad", np.int32), ]) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct name = "mmc_type" dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype) dtype = get_or_register_dtype(name, dtype) n = 1000 z_dev = cl.array.zeros(queue, n, dtype=dtype) z = z_dev.get() assert np.array_equal(np.zeros(n, dtype), z)
def get_info(): global selected_device, selected_platform, context, KERNELS_DEFS info = { "version.cl_header" : pyopencl.get_cl_header_version(), "opengl" : pyopencl.have_gl(), #"kernels" : KERNELS_DEFS.keys() } updict(info, "pyopencl", get_pyopencl_info()) if selected_platform: updict(info, "platform", { "name" : selected_platform.name, "vendor" : selected_platform.vendor, "devices" : len(selected_platform.get_devices()), }) if selected_device: if hasattr(selected_device, "opencl_c_version"): info["device.opencl_c_version"] = getattr(selected_device, "opencl_c_version") updict(info, "device", { "type" : device_type(selected_device), "name" : selected_device.name.strip(), "version" : selected_device.version, "max_work_group_size" : selected_device.max_work_group_size, "max_work_item_dimensions" : selected_device.max_work_item_dimensions, "max_work_item_sizes" : selected_device.max_work_item_sizes, "max-size" : selected_device_max_size}) return info
def test_custom_type_zeros(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) if not (queue._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2)): pytest.skip("CL1.2 not available") dtype = np.dtype([ ("cur_min", np.int32), ("cur_max", np.int32), ("pad", np.int32), ]) from pyopencl.tools import get_or_register_dtype, match_dtype_to_c_struct name = "mmc_type" dtype, c_decl = match_dtype_to_c_struct(queue.device, name, dtype) dtype = get_or_register_dtype(name, dtype) n = 1000 z_dev = cl.array.zeros(queue, n, dtype=dtype) z = z_dev.get() assert np.array_equal(np.zeros(n, dtype), z)
def test_compile_link(ctx_factory): ctx = ctx_factory() if ctx._get_cl_version() < (1, 2) or cl.get_cl_header_version() < (1, 2): pytest.skip( "Context and ICD loader must understand CL1.2 for compile/link") platform = ctx.devices[0].platform if platform.name == "Apple": pytest.skip("Apple doesn't like our compile/link test") queue = cl.CommandQueue(ctx) vsink_prg = cl.Program( ctx, """//CL// void value_sink(float x) { } """).compile() main_prg = cl.Program( ctx, """//CL// void value_sink(float x); __kernel void experiment() { value_sink(3.1415f + get_global_id(0)); } """).compile() z = cl.link_program(ctx, [vsink_prg, main_prg], devices=ctx.devices) z.experiment(queue, (128**2, ), (128, )) queue.finish()
def test_spirv(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) if (ctx._get_cl_version() < (2, 1) or cl.get_cl_header_version() < (2, 1)): pytest.skip("SPIR-V program creation only available " "in OpenCL 2.1 and higher") n = 50000 a_dev = cl.clrandom.rand(queue, n, np.float32) b_dev = cl.clrandom.rand(queue, n, np.float32) dest_dev = cl_array.empty_like(a_dev) with open("add-vectors-%d.spv" % queue.device.address_bits, "rb") as spv_file: spv = spv_file.read() prg = cl.Program(ctx, spv).build() if (not prg.all_kernels() and queue.device.platform.name.startswith("AMD Accelerated")): pytest.skip( "SPIR-V program creation on AMD did not result in any kernels") prg.sum(queue, a_dev.shape, None, a_dev.data, b_dev.data, dest_dev.data) assert la.norm((dest_dev - (a_dev + b_dev)).get()) < 1e-7
def test_bitonic_argsort(ctx_factory, size, dtype): import sys is_pypy = "__pypy__" in sys.builtin_module_names if not size and is_pypy: # https://bitbucket.org/pypy/numpy/issues/53/specifying-strides-on-zero-sized-array pytest.xfail("pypy doesn't seem to handle as_strided " "on zero-sized arrays very well") ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) device = queue.device if device.platform.vendor == "The pocl project" \ and device.type & cl.device_type.GPU: pytest.xfail("bitonic argsort fails on POCL + Nvidia," "at least the K40, as of pocl 1.6, 2021-01-20") dev = ctx.devices[0] if (dev.platform.name == "Portable Computing Language" and sys.platform == "darwin"): pytest.xfail("Bitonic sort crashes on Apple POCL") if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU): pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup " "parallelism") if (dev.platform.name == "Portable Computing Language" and dtype == np.float64 and get_pocl_version(dev.platform) < (1, 0)): pytest.xfail("Double precision bitonic sort doesn't work on POCL < 1.0") if (dev.platform.name == "Intel(R) OpenCL" and size == 0): pytest.xfail("size-0 arange fails on Intel CL") if dtype == np.float64 and not has_double_support(dev): from pytest import skip skip("double precision not supported on %s" % dev) import pyopencl.clrandom as clrandom from pyopencl.bitonic_sort import BitonicSort index = cl_array.arange(queue, 0, size, 1, dtype=np.int32) m = clrandom.rand(queue, (size,), dtype, luxury=None, a=0, b=239432234) sorterm = BitonicSort(ctx) ms = m.copy() # enqueue_marker crashes under CL 1.1 pocl if there is anything to wait for # (no clEnqueueWaitForEvents) https://github.com/inducer/pyopencl/pull/237 if (dev.platform.name == "Portable Computing Language" and cl.get_cl_header_version() < (1, 2)): ms.finish() index.finish() ms, evt = sorterm(ms, idx=index, axis=0) assert np.array_equal(np.sort(m.get()), ms.get()) # may be False because of identical values in array # assert np.array_equal(np.argsort(m.get()), index.get()) # Check values by indices assert np.array_equal(m.get()[np.argsort(m.get())], m.get()[index.get()])
def test_unload_compiler(platform): if (platform._get_cl_version() < (1, 2) or cl.get_cl_header_version() < (1, 2)): from pytest import skip skip("clUnloadPlatformCompiler is only available in OpenCL 1.2") _skip_if_pocl(platform, 'pocl does not support unloading compiler') if platform.vendor == "Intel(R) Corporation": from pytest import skip skip("Intel proprietary driver does not support unloading compiler") cl.unload_platform_compiler(platform)
def test_fine_grain_svm(ctx_factory): ctx = ctx_factory() # queue = cl.CommandQueue(ctx) if (ctx._get_cl_version() < (2, 0) or cl.get_cl_header_version() < (2, 0)): from pytest import skip skip("SVM only available in OpenCL 2.0 and higher") svm_ary = cl.fsvm_empty(ctx, (100, 100), np.float32, alignment=64) assert isinstance(svm_ary.base, cl.SVMAllocation)
def test_coarse_grain_svm(ctx_factory): import sys is_pypy = '__pypy__' in sys.builtin_module_names ctx = ctx_factory() queue = cl.CommandQueue(ctx) if (ctx._get_cl_version() < (2, 0) or cl.get_cl_header_version() < (2, 0)): from pytest import skip skip("SVM only available in OpenCL 2.0 and higher") dev = ctx.devices[0] if ("AMD" in dev.platform.name and dev.type & cl.device_type.CPU): pytest.xfail("AMD CPU doesn't do coarse-grain SVM") n = 3000 svm_ary = cl.SVM(cl.csvm_empty(ctx, (n,), np.float32, alignment=64)) if not is_pypy: # https://bitbucket.org/pypy/numpy/issues/52 assert isinstance(svm_ary.mem.base, cl.SVMAllocation) if dev.platform.name != "Portable Computing Language": # pocl 0.13 has a bug misinterpreting the size parameter cl.enqueue_svm_memfill(queue, svm_ary, np.zeros((), svm_ary.mem.dtype)) with svm_ary.map_rw(queue) as ary: ary.fill(17) orig_ary = ary.copy() prg = cl.Program(ctx, """ __kernel void twice(__global float *a_g) { a_g[get_global_id(0)] *= 2; } """).build() prg.twice(queue, svm_ary.mem.shape, None, svm_ary) with svm_ary.map_ro(queue) as ary: print(ary) assert np.array_equal(orig_ary*2, ary) new_ary = np.empty_like(orig_ary) new_ary.fill(-1) if ctx.devices[0].platform.name != "Portable Computing Language": # "Blocking memcpy is unimplemented (clEnqueueSVMMemcpy.c:61)" # in pocl 0.13. cl.enqueue_copy(queue, new_ary, svm_ary) assert np.array_equal(orig_ary*2, new_ary)
def test_platform_get_devices(platform): dev_types = [cl.device_type.ACCELERATOR, cl.device_type.ALL, cl.device_type.CPU, cl.device_type.DEFAULT, cl.device_type.GPU] if (platform._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2)): dev_types.append(cl.device_type.CUSTOM) for dev_type in dev_types: devs = platform.get_devices(dev_type) if dev_type in (cl.device_type.DEFAULT, cl.device_type.ALL, getattr(cl.device_type, 'CUSTOM', None)): continue for dev in devs: assert dev.type == dev_type
def test_platform_get_devices(platform): dev_types = [ cl.device_type.ACCELERATOR, cl.device_type.ALL, cl.device_type.CPU, cl.device_type.DEFAULT, cl.device_type.GPU ] if (platform._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2)): dev_types.append(cl.device_type.CUSTOM) for dev_type in dev_types: devs = platform.get_devices(dev_type) if dev_type in (cl.device_type.DEFAULT, cl.device_type.ALL, getattr(cl.device_type, 'CUSTOM', None)): continue for dev in devs: assert dev.type == dev_type
def test_enqueue_barrier_marker(ctx_factory): ctx = ctx_factory() # Still relevant on pocl 1.0RC1. _xfail_if_pocl( ctx.devices[0].platform, (1, 0), "pocl crashes on enqueue_barrier") queue = cl.CommandQueue(ctx) if queue._get_cl_version() >= (1, 2) and cl.get_cl_header_version() <= (1, 1): pytest.skip("CL impl version >= 1.2, header version <= 1.1--cannot be sure " "that clEnqueueWaitForEvents is implemented") cl.enqueue_barrier(queue) evt1 = cl.enqueue_marker(queue) evt2 = cl.enqueue_marker(queue, wait_for=[evt1]) cl.enqueue_barrier(queue, wait_for=[evt1, evt2])
def test_empty_ndrange(ctx_factory, empty_shape): ctx = ctx_factory() queue = cl.CommandQueue(ctx) if ctx._get_cl_version() < (1, 2) or cl.get_cl_header_version() < (1, 2): pytest.skip("OpenCL 1.2 required for empty NDRange suuport") a = cl_array.zeros(queue, empty_shape, dtype=np.float32) prg = cl.Program(ctx, """ __kernel void add_two(__global float *a_g) { a_g[get_global_id(0)] += 2; } """).build() prg.add_two(queue, a.shape, None, a.data, allow_empty_ndrange=True)
def test_user_event(ctx_factory): ctx = ctx_factory() if ctx._get_cl_version() < (1, 1) and cl.get_cl_header_version() < (1, 1): from pytest import skip skip("UserEvent is only available in OpenCL 1.1") if ctx.devices[0].platform.name == "Portable Computing Language": # https://github.com/pocl/pocl/issues/201 pytest.xfail("POCL's user events don't work right") status = {} def event_waiter1(e, key): e.wait() status[key] = True def event_waiter2(e, key): cl.wait_for_events([e]) status[key] = True from threading import Thread from time import sleep evt = cl.UserEvent(ctx) Thread(target=event_waiter1, args=(evt, 1)).start() sleep(0.05) if status.get(1, False): raise RuntimeError("UserEvent triggered before set_status") evt.set_status(cl.command_execution_status.COMPLETE) sleep(0.05) if not status.get(1, False): raise RuntimeError("UserEvent.wait timeout") assert evt.command_execution_status == cl.command_execution_status.COMPLETE evt = cl.UserEvent(ctx) Thread(target=event_waiter2, args=(evt, 2)).start() sleep(0.05) if status.get(2, False): raise RuntimeError("UserEvent triggered before set_status") evt.set_status(cl.command_execution_status.COMPLETE) sleep(0.05) if not status.get(2, False): raise RuntimeError("cl.wait_for_events timeout on UserEvent") assert evt.command_execution_status == cl.command_execution_status.COMPLETE
def test_platform_get_devices(platform): if platform.name == "Apple": pytest.xfail("Apple doesn't understand all the values we pass " "for dev_type") dev_types = [cl.device_type.ACCELERATOR, cl.device_type.ALL, cl.device_type.CPU, cl.device_type.DEFAULT, cl.device_type.GPU] if (platform._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2)): dev_types.append(cl.device_type.CUSTOM) for dev_type in dev_types: devs = platform.get_devices(dev_type) if dev_type in (cl.device_type.DEFAULT, cl.device_type.ALL, getattr(cl.device_type, 'CUSTOM', None)): continue for dev in devs: assert dev.type & dev_type == dev_type
def test_user_event(ctx_factory): ctx = ctx_factory() if (ctx._get_cl_version() < (1, 1) and cl.get_cl_header_version() < (1, 1)): from pytest import skip skip("UserEvent is only available in OpenCL 1.1") if ctx.devices[0].platform.name == "Portable Computing Language": # https://github.com/pocl/pocl/issues/201 pytest.xfail("POCL's user events don't work right") status = {} def event_waiter1(e, key): e.wait() status[key] = True def event_waiter2(e, key): cl.wait_for_events([e]) status[key] = True from threading import Thread from time import sleep evt = cl.UserEvent(ctx) Thread(target=event_waiter1, args=(evt, 1)).start() sleep(.05) if status.get(1, False): raise RuntimeError('UserEvent triggered before set_status') evt.set_status(cl.command_execution_status.COMPLETE) sleep(.05) if not status.get(1, False): raise RuntimeError('UserEvent.wait timeout') assert evt.command_execution_status == cl.command_execution_status.COMPLETE evt = cl.UserEvent(ctx) Thread(target=event_waiter2, args=(evt, 2)).start() sleep(.05) if status.get(2, False): raise RuntimeError('UserEvent triggered before set_status') evt.set_status(cl.command_execution_status.COMPLETE) sleep(.05) if not status.get(2, False): raise RuntimeError('cl.wait_for_events timeout on UserEvent') assert evt.command_execution_status == cl.command_execution_status.COMPLETE
def test_user_event(ctx_factory): ctx = ctx_factory() if (ctx._get_cl_version() < (1, 1) and cl.get_cl_header_version() < (1, 1)): from pytest import skip skip("UserEvent is only available in OpenCL 1.1") status = {} def event_waiter1(e, key): e.wait() status[key] = True def event_waiter2(e, key): cl.wait_for_events([e]) status[key] = True from threading import Thread from time import sleep evt = cl.UserEvent(ctx) Thread(target=event_waiter1, args=(evt, 1)).start() sleep(.05) if status.get(1, False): raise RuntimeError('UserEvent triggered before set_status') evt.set_status(cl.command_execution_status.COMPLETE) sleep(.05) if not status.get(1, False): raise RuntimeError('UserEvent.wait timeout') assert evt.command_execution_status == cl.command_execution_status.COMPLETE evt = cl.UserEvent(ctx) Thread(target=event_waiter2, args=(evt, 2)).start() sleep(.05) if status.get(2, False): raise RuntimeError('UserEvent triggered before set_status') evt.set_status(cl.command_execution_status.COMPLETE) sleep(.05) if not status.get(2, False): raise RuntimeError('cl.wait_for_events timeout on UserEvent') assert evt.command_execution_status == cl.command_execution_status.COMPLETE
def test_spirv(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) if (ctx._get_cl_version() < (2, 1) or cl.get_cl_header_version() < (2, 1)): from pytest import skip skip("SPIR-V program creation only available in OpenCL 2.1 and higher") n = 50000 a_dev = cl.clrandom.rand(queue, n, np.float32) b_dev = cl.clrandom.rand(queue, n, np.float32) dest_dev = cl_array.empty_like(a_dev) with open("add-vectors.spv", "rb") as spv_file: spv = spv_file.read() prg = cl.Program(ctx, spv) prg.sum(queue, a_dev.shape, None, a_dev.data, b_dev.data, dest_dev.data) assert la.norm((dest_dev - (a_dev + b_dev)).get()) < 1e-7
def print_info(): print("PyOpenCL Version:", cl.VERSION) print("OpenCL Head Version:", cl.get_cl_header_version()) print() platforms = cl.get_platforms() print("Platforms Amount:", len(platforms)) for plat in platforms: print("Platform:", plat.get_info(cl.platform_info.NAME)) print("--Platform Profile:", plat.get_info(cl.platform_info.PROFILE)) print("--Platform Vendor:", plat.get_info(cl.platform_info.VENDOR)) print("--Platform Version:", plat.get_info(cl.platform_info.VERSION)) devices = plat.get_devices(cl.device_type.GPU) print("--GPU Amount:", len(devices)) for device in devices: print("--GPU:", device.get_info(cl.device_info.NAME)) print("----OpenCL Version:", device.get_info(cl.device_info.OPENCL_C_VERSION)) print("----GPU Vendor:", device.get_info(cl.device_info.VENDOR)) print("----GPU Version:", device.get_info(cl.device_info.VERSION)) print("----GPU Driver Version:", device.get_info(cl.device_info.DRIVER_VERSION)) print("----Max Work Group Size:", device.get_info(cl.device_info.MAX_WORK_GROUP_SIZE)) print("----Max Compute Units:", device.get_info(cl.device_info.MAX_COMPUTE_UNITS)) print("----Max Work Item Size:", device.get_info(cl.device_info.MAX_WORK_ITEM_SIZES)) print("----Local Memory Size:", device.get_info(cl.device_info.LOCAL_MEM_SIZE) / 1024, 'KB') print()
def test_fine_grain_svm(ctx_factory): import sys is_pypy = '__pypy__' in sys.builtin_module_names ctx = ctx_factory() queue = cl.CommandQueue(ctx) from pytest import skip if (ctx._get_cl_version() < (2, 0) or cl.get_cl_header_version() < (2, 0)): skip("SVM only available in OpenCL 2.0 and higher") if not (ctx.devices[0].svm_capabilities & cl.device_svm_capabilities.FINE_GRAIN_BUFFER): skip("device does not support fine-grain SVM") n = 3000 ary = cl.fsvm_empty(ctx, n, np.float32, alignment=64) if not is_pypy: # https://bitbucket.org/pypy/numpy/issues/52 assert isinstance(ary.base, cl.SVMAllocation) ary.fill(17) orig_ary = ary.copy() prg = cl.Program(ctx, """ __kernel void twice(__global float *a_g) { a_g[get_global_id(0)] *= 2; } """).build() prg.twice(queue, ary.shape, None, cl.SVM(ary)) queue.finish() print(ary) assert np.array_equal(orig_ary*2, ary)
def test_fine_grain_svm(ctx_factory): import sys is_pypy = '__pypy__' in sys.builtin_module_names ctx = ctx_factory() queue = cl.CommandQueue(ctx) from pytest import skip if (ctx._get_cl_version() < (2, 0) or cl.get_cl_header_version() < (2, 0)): skip("SVM only available in OpenCL 2.0 and higher") if not (ctx.devices[0].svm_capabilities & cl.device_svm_capabilities.FINE_GRAIN_BUFFER): skip("device does not support fine-grain SVM") n = 3000 ary = cl.fsvm_empty(ctx, n, np.float32, alignment=64) if not is_pypy: # https://bitbucket.org/pypy/numpy/issues/52 assert isinstance(ary.base, cl.SVMAllocation) ary.fill(17) orig_ary = ary.copy() prg = cl.Program( ctx, """ __kernel void twice(__global float *a_g) { a_g[get_global_id(0)] *= 2; } """).build() prg.twice(queue, ary.shape, None, cl.SVM(ary)) queue.finish() print(ary) assert np.array_equal(orig_ary * 2, ary)
def test_spirv(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) if (ctx._get_cl_version() < (2, 1) or cl.get_cl_header_version() < (2, 1)): from pytest import skip skip("SPIR-V program creation only available in OpenCL 2.1 and higher") n = 50000 a_dev = cl.clrandom.rand(queue, n, np.float32) b_dev = cl.clrandom.rand(queue, n, np.float32) dest_dev = cl_array.empty_like(a_dev) with open("add-vectors-%d.spv" % queue.device.address_bits, "rb") as spv_file: spv = spv_file.read() prg = cl.Program(ctx, spv) prg.sum(queue, a_dev.shape, None, a_dev.data, b_dev.data, dest_dev.data) assert la.norm((dest_dev - (a_dev+b_dev)).get()) < 1e-7
def test_platform_get_devices(ctx_factory): ctx = ctx_factory() platform = ctx.devices[0].platform if platform.name == "Apple": pytest.xfail("Apple doesn't understand all the values we pass " "for dev_type") dev_types = [ cl.device_type.ACCELERATOR, cl.device_type.ALL, cl.device_type.CPU, cl.device_type.DEFAULT, cl.device_type.GPU ] if (platform._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2) and not platform.name.lower().startswith("nvidia")): dev_types.append(cl.device_type.CUSTOM) for dev_type in dev_types: print(dev_type) devs = platform.get_devices(dev_type) if dev_type in (cl.device_type.DEFAULT, cl.device_type.ALL, getattr(cl.device_type, 'CUSTOM', None)): continue for dev in devs: assert dev.type & dev_type == dev_type
def __init__(self, kernel_file): if self.layers + 1 != len(self.layer_height): print("Bad network config.") exit() print("Running with {} hidden layers and {} layers total.".format( self.hidden_layers, self.layers)) print("OpenCL Version v{}".format(".".join( [str(i) for i in cl.get_cl_header_version()]))) print("Finding platform....") platform = self.findPlatform(VENDOR_NAME) if not platform: print("ERROR: Platform not found for name {0}".format(VENDOR_NAME)) exit(1) print("Getting devices...") devices = platform.get_devices(device_type=DEVICE_TYPE) if len(devices) < 1: print("ERROR: No device found for type {0}.".format(DEVICE_TYPE)) exit(1) devices = [devices[1]] self.ctx = cl.Context(devices=devices) if DEVICE_TYPE == cl.device_type.ACCELERATOR: print("Reading binary...") binary = kernel_file.read() binaries = [binary] * len(devices) print("Building...") program = cl.Program(self.ctx, devices, binaries) else: print("Reading program...") binary = kernel_file.read() program = cl.Program(self.ctx, binary.decode('utf-8')).build() self.kForward = program.forward self.kForwardSoftMax = program.forward_softmax # self.kBackwardFirstDelta = program.backward_first_delta # self.kBackward = program.backward self.kForward.set_scalar_arg_dtypes( [None, None, None, None, np.int32, np.int32, np.int32, np.int32]) self.kForwardSoftMax.set_scalar_arg_dtypes([None, np.int32, np.int32]) # self.kBackwardFirstDelta.set_scalar_arg_dtypes([None, None, None, np.int32, np.int32]) # self.kBackward.set_scalar_arg_dtypes( # [None, None, None, None, NN_T, NN_T, np.int32, np.int32, np.int32]) self.queue = cl.CommandQueue(self.ctx) print("Loading data...") _, (self.x_test, self.y_test) = input_data.load_data() self.y_test = self.y_test.reshape((10000, )) self.x_test = self.x_test.reshape(10000, self.layer_height[0]) self.x_test = self.x_test.astype('float32') self.x_test /= 255 self.correct_pred_fpga = 0 self.wrong_pred_fpga = 0 self.correct_pred_cpu = 0 self.wrong_pred_cpu = 0
def log_version_info(): log.info("PyOpenCL loaded, header version: %s, GL support: %s", ".".join([str(x) for x in pyopencl.get_cl_header_version()]), pyopencl.have_gl())
def test_get_info(ctx_factory): ctx = ctx_factory() device, = ctx.devices platform = device.platform failure_count = [0] pocl_quirks = [ (cl.Buffer, cl.mem_info.OFFSET), (cl.Program, cl.program_info.BINARIES), (cl.Program, cl.program_info.BINARY_SIZES), ] if ctx._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2): pocl_quirks.extend([ (cl.Program, cl.program_info.KERNEL_NAMES), (cl.Program, cl.program_info.NUM_KERNELS), ]) CRASH_QUIRKS = [ # noqa (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.0 CUDA 3.0.1"), [ (cl.Event, cl.event_info.COMMAND_QUEUE), ]), (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.2 CUDA 7.5"), [ (cl.Buffer, getattr(cl.mem_info, "USES_SVM_POINTER", None)), ]), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.8-pre"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.8"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.9-pre"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.9"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.10-pre"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.10"), pocl_quirks), (("Apple", "Apple", "OpenCL 1.2"), [ (cl.Program, cl.program_info.SOURCE), ]), ] QUIRKS = [] # noqa def find_quirk(quirk_list, cl_obj, info): for (vendor, name, version), quirks in quirk_list: if (vendor == platform.vendor and name == platform.name and platform.version.startswith(version)): for quirk_cls, quirk_info in quirks: if (isinstance(cl_obj, quirk_cls) and quirk_info == info): return True return False def do_test(cl_obj, info_cls, func=None, try_attr_form=True): if func is None: def func(info): cl_obj.get_info(info) for info_name in dir(info_cls): if not info_name.startswith("_") and info_name != "to_string": print(info_cls, info_name) info = getattr(info_cls, info_name) if find_quirk(CRASH_QUIRKS, cl_obj, info): print("not executing get_info", type(cl_obj), info_name) print("(known crash quirk for %s)" % platform.name) continue try: func(info) except: msg = "failed get_info", type(cl_obj), info_name if find_quirk(QUIRKS, cl_obj, info): msg += ("(known quirk for %s)" % platform.name) else: failure_count[0] += 1 if try_attr_form: try: getattr(cl_obj, info_name.lower()) except: print("failed attr-based get_info", type(cl_obj), info_name) if find_quirk(QUIRKS, cl_obj, info): print("(known quirk for %s)" % platform.name) else: failure_count[0] += 1 do_test(platform, cl.platform_info) do_test(device, cl.device_info) do_test(ctx, cl.context_info) props = 0 if (device.queue_properties & cl.command_queue_properties.PROFILING_ENABLE): profiling = True props = cl.command_queue_properties.PROFILING_ENABLE queue = cl.CommandQueue(ctx, properties=props) do_test(queue, cl.command_queue_info) prg = cl.Program( ctx, """ __kernel void sum(__global float *a) { a[get_global_id(0)] *= 2; } """).build() do_test(prg, cl.program_info) do_test(prg, cl.program_build_info, lambda info: prg.get_build_info(device, info), try_attr_form=False) n = 2000 a_buf = cl.Buffer(ctx, 0, n * 4) do_test(a_buf, cl.mem_info) kernel = prg.sum do_test(kernel, cl.kernel_info) evt = kernel(queue, (n, ), None, a_buf) do_test(evt, cl.event_info) if profiling: evt.wait() do_test(evt, cl.profiling_info, lambda info: evt.get_profiling_info(info), try_attr_form=False) # crashes on intel... # and pocl does not support CL_ADDRESS_CLAMP if device.image_support and platform.vendor not in [ "Intel(R) Corporation", "The pocl project", ]: smp = cl.Sampler(ctx, False, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) do_test(smp, cl.sampler_info) img_format = cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0] img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256)) assert img.shape == (128, 256) img.depth img.image.depth do_test(img, cl.image_info, lambda info: img.get_image_info(info))
def test_get_info(ctx_factory): ctx = ctx_factory() device, = ctx.devices platform = device.platform failure_count = [0] pocl_quirks = [ (cl.Buffer, cl.mem_info.OFFSET), (cl.Program, cl.program_info.BINARIES), (cl.Program, cl.program_info.BINARY_SIZES), ] if ctx._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2): pocl_quirks.extend([ (cl.Program, cl.program_info.KERNEL_NAMES), (cl.Program, cl.program_info.NUM_KERNELS), ]) CRASH_QUIRKS = [ # noqa (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.0 CUDA 3.0.1"), [ (cl.Event, cl.event_info.COMMAND_QUEUE), ]), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.8-pre"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.8"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.9-pre"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.9"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.10-pre"), pocl_quirks), (("The pocl project", "Portable Computing Language", "OpenCL 1.2 pocl 0.10"), pocl_quirks), (("Apple", "Apple", "OpenCL 1.2"), [ (cl.Program, cl.program_info.SOURCE), ]), ] QUIRKS = [] # noqa def find_quirk(quirk_list, cl_obj, info): for (vendor, name, version), quirks in quirk_list: if ( vendor == platform.vendor and name == platform.name and platform.version.startswith(version)): for quirk_cls, quirk_info in quirks: if (isinstance(cl_obj, quirk_cls) and quirk_info == info): return True return False def do_test(cl_obj, info_cls, func=None, try_attr_form=True): if func is None: def func(info): cl_obj.get_info(info) for info_name in dir(info_cls): if not info_name.startswith("_") and info_name != "to_string": print(info_cls, info_name) info = getattr(info_cls, info_name) if find_quirk(CRASH_QUIRKS, cl_obj, info): print("not executing get_info", type(cl_obj), info_name) print("(known crash quirk for %s)" % platform.name) continue try: func(info) except: msg = "failed get_info", type(cl_obj), info_name if find_quirk(QUIRKS, cl_obj, info): msg += ("(known quirk for %s)" % platform.name) else: failure_count[0] += 1 if try_attr_form: try: getattr(cl_obj, info_name.lower()) except: print("failed attr-based get_info", type(cl_obj), info_name) if find_quirk(QUIRKS, cl_obj, info): print("(known quirk for %s)" % platform.name) else: failure_count[0] += 1 do_test(platform, cl.platform_info) do_test(device, cl.device_info) do_test(ctx, cl.context_info) props = 0 if (device.queue_properties & cl.command_queue_properties.PROFILING_ENABLE): profiling = True props = cl.command_queue_properties.PROFILING_ENABLE queue = cl.CommandQueue(ctx, properties=props) do_test(queue, cl.command_queue_info) prg = cl.Program(ctx, """ __kernel void sum(__global float *a) { a[get_global_id(0)] *= 2; } """).build() do_test(prg, cl.program_info) do_test(prg, cl.program_build_info, lambda info: prg.get_build_info(device, info), try_attr_form=False) n = 2000 a_buf = cl.Buffer(ctx, 0, n*4) do_test(a_buf, cl.mem_info) kernel = prg.sum do_test(kernel, cl.kernel_info) evt = kernel(queue, (n,), None, a_buf) do_test(evt, cl.event_info) if profiling: evt.wait() do_test(evt, cl.profiling_info, lambda info: evt.get_profiling_info(info), try_attr_form=False) # crashes on intel... # and pocl does not support CL_ADDRESS_CLAMP if device.image_support and platform.vendor not in [ "Intel(R) Corporation", "The pocl project", ]: smp = cl.Sampler(ctx, False, cl.addressing_mode.CLAMP, cl.filter_mode.NEAREST) do_test(smp, cl.sampler_info) img_format = cl.get_supported_image_formats( ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0] img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256)) assert img.shape == (128, 256) img.depth img.image.depth do_test(img, cl.image_info, lambda info: img.get_image_info(info))
''' Prints relevant information regarding the capabilities of the current OpenCL runtime and devices Note that pyopencl has a script that prints all properties in its examples folder ''' import pyopencl as cl print('PyOpenCL version: ' + cl.VERSION_TEXT) print('OpenCL header version: ' + '.'.join(map(str, cl.get_cl_header_version())) + '\n') # Get installed platforms (SDKs) print('- Installed platforms (SDKs) and available devices:') platforms = cl.get_platforms() for plat in platforms: indent = '' # Get and print platform info print(indent + '{} ({})'.format(plat.name, plat.vendor)) indent = '\t' print(indent + 'Version: ' + plat.version) print(indent + 'Profile: ' + plat.profile) print(indent + 'Extensions: ' + str(plat.extensions.strip().split(' '))) # Get and print device info devices = plat.get_devices(cl.device_type.ALL) print(indent + 'Available devices: ') if not devices: print(indent + '\tNone')
from npsolve import runner from math import ceil, log2 pp = pprint.PrettyPrinter(depth=5) mf = cl.mem_flags def dec2str(num): k = [] s = str(num) for a in s: k.append(ascii_lowercase[int(a)]) return 'qq' + ''.join(k) print(cl.get_cl_header_version()) ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) set = 'random' traindata = idxs("train-images-idx3-ubyte.idx", "train-labels-idx1-ubyte.idx") testdata = idxs("t10k-images-idx3-ubyte.idx", "t10k-labels-idx1-ubyte.idx") result = 1.0 ninpt = traindata.count #Samples count ( 60000 for set ) nvarsd = traindata.rows * traindata.cols #Count of equations members ( 28*28 for set) topology = [nvarsd, 5, 4, 3, 1] nvarsg = genn.countcns(topology) #Count of equations members print("Total connections is", nvarsg) nsamp = 64 #ctx.get_info(cl.context_info.DEVICES)[0].max_work_group_size #Genome samples count (current sort limitation to local_size) print("Population count is", nsamp)
def ensure_required_version(): ver = cl.get_cl_header_version() assert ver[0] >= 2 and ver[1] >= 0, "OpenCL must be version 2.0 or greater."
import pprint from idxread import idxs from npsolve import runner from math import ceil, log2 pp = pprint.PrettyPrinter(depth=5) mf = cl.mem_flags def dec2str(num): k = [] s = str(num) for a in s: k.append(ascii_lowercase[int(a)]) return 'qq'+''.join(k) print( cl.get_cl_header_version() ) ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) set = 'random' traindata = idxs("train-images-idx3-ubyte.idx", "train-labels-idx1-ubyte.idx") testdata = idxs("t10k-images-idx3-ubyte.idx", "t10k-labels-idx1-ubyte.idx") result = 1.0 ninpt = traindata.count #Samples count ( 60000 for set ) nvarsd = traindata.rows*traindata.cols #Count of equations members ( 28*28 for set) topology = [nvarsd, 5, 4, 3, 1] nvarsg = genn.countcns(topology) #Count of equations members print("Total connections is", nvarsg) nsamp = 64#ctx.get_info(cl.context_info.DEVICES)[0].max_work_group_size #Genome samples count (current sort limitation to local_size) print("Population count is", nsamp)
def pyOpenCLInfo(self, output_info=True): if (output_info): print('PyOpenCL version: ' + cl.VERSION_TEXT) print('OpenCL header version: ' + '.'.join(map(str, cl.get_cl_header_version())) + '\n') # Get installed platforms (SDKs) print('- Installed platforms (SDKs) and available devices:') platforms = cl.get_platforms() for plat in platforms: indent = '' # Get and print platform info print(indent + '{} ({})'.format(plat.name, plat.vendor)) indent = '\t' print(indent + 'Version: ' + plat.version) print(indent + 'Profile: ' + plat.profile) print(indent + 'Extensions: ' + str(plat.extensions.strip().split(' '))) # Get and print device info devices = plat.get_devices(cl.device_type.ALL) print(indent + 'Available devices: ') if not devices: print(indent + '\tNone') for dev in devices: indent = '\t\t' print(indent + '{} ({})'.format(dev.name, dev.vendor)) indent = '\t\t\t' flags = [('Version', dev.version), ('Type', cl.device_type.to_string(dev.type)), ('Extensions', str(dev.extensions.strip().split(' '))), ('Memory (global)', str(dev.global_mem_size)), ('Memory (local)', str(dev.local_mem_size)), ('Address bits', str(dev.address_bits)), ('Max work item dims', str(dev.max_work_item_dimensions)), ('Max work group size', str(dev.max_work_group_size)), ('Max compute units', str(dev.max_compute_units)), ('Driver version', dev.driver_version), ('Image support', str(bool(dev.image_support))), ('Little endian', str(bool(dev.endian_little))), ('Device available', str(bool(dev.available))), ('Compiler available', str(bool(dev.compiler_available)))] [ print(indent + '{0:<25}{1:<10}'.format(name + ':', flag)) for name, flag in flags ] # Device version string has the following syntax, extract the number like this # OpenCL<space><major_version.minor_version><space><vendor-specific information> version_number = float(dev.version.split(' ')[1]) print('') else: platform = cl.get_platforms()[self.platform_id] device = platform.get_devices(cl.device_type.ALL)[0] self.recommend_CU = device.max_compute_units
import pyopencl as cl print("CL_VERSION:", cl.VERSION) print("CL_HEADER_VERSION:", cl.get_cl_header_version()) print() platforms = cl.get_platforms() print("Platform num:", len(platforms)) for plat in platforms: print("--Platform Name:", plat.get_info(cl.platform_info.NAME)) # print("--Platform Extensions:",plat.get_info(cl.platform_info.EXTENSIONS)) print("--Platform Profile:", plat.get_info(cl.platform_info.PROFILE)) print("--Platform Vendor:", plat.get_info(cl.platform_info.VENDOR)) print("--Platform Version:", plat.get_info(cl.platform_info.VERSION)) devices = plat.get_devices(cl.device_type.ALL) print("--device num:", len(devices)) for device in devices: print("----Name:", device.get_info(cl.device_info.NAME)) print("----OpenCL_C_Version:", device.get_info(cl.device_info.OPENCL_C_VERSION)) print("----Vendor:", device.get_info(cl.device_info.VENDOR)) print("----Version:", device.get_info(cl.device_info.VERSION)) print("----Driver Version:", device.get_info(cl.device_info.DRIVER_VERSION)) print("----MAX_WORK_GROUP_SIZE:", device.get_info(cl.device_info.MAX_WORK_GROUP_SIZE)) print("----MAX_COMPUTE_UNITS:",