def test_bitonic_sort(ctx_factory, size, dtype): ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) dev = ctx.devices[0] if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU): pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup " "parallelism") if (dev.platform.name == "Portable Computing Language" and dtype == np.float64 and get_pocl_version(dev.platform) < (1, 0)): pytest.xfail("Double precision bitonic sort doesn't work on POCL < 1.0") if dtype == np.float64 and not has_double_support(dev): from pytest import skip skip("double precision not supported on %s" % dev) import pyopencl.clrandom as clrandom from pyopencl.bitonic_sort import BitonicSort s = clrandom.rand(queue, (2, size, 3,), dtype, luxury=None, a=0, b=239482333) sgs = s.copy() # enqueue_marker crashes under CL 1.1 pocl if there is anything to wait for # (no clEnqueueWaitForEvents) https://github.com/inducer/pyopencl/pull/237 if (dev.platform.name == "Portable Computing Language" and cl.get_cl_header_version() < (1, 2)): sgs.finish() sorter = BitonicSort(ctx) sgs, evt = sorter(sgs, axis=1) assert np.array_equal(np.sort(s.get(), axis=1), sgs.get())
def test_bitonic_argsort(ctx_factory, size, dtype): ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) dev = ctx.devices[0] if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU): pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup " "parallelism") if (dev.platform.name == "Portable Computing Language" and dtype == np.float64): pytest.xfail("Double precision bitonic sort doesn't work on POCL") import pyopencl.clrandom as clrandom from pyopencl.bitonic_sort import BitonicSort index = cl_array.arange(queue, 0, size, 1, dtype=np.int32) m = clrandom.rand(queue, (size, ), dtype, luxury=None, a=0, b=239432234) sorterm = BitonicSort(ctx) ms, evt = sorterm(m.copy(), idx=index, axis=0) assert np.array_equal(np.sort(m.get()), ms.get()) # may be False because of identical values in array # assert np.array_equal(np.argsort(m.get()), index.get()) # Check values by indices assert np.array_equal(m.get()[np.argsort(m.get())], m.get()[index.get()])
def test_bitonic_sort(ctx_factory, size, dtype): ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) dev = ctx.devices[0] if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU): pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup " "parallelism") if (dev.platform.name == "Portable Computing Language" and dtype == np.float64): pytest.xfail("Double precision bitonic sort doesn't work on POCL") import pyopencl.clrandom as clrandom from pyopencl.bitonic_sort import BitonicSort s = clrandom.rand(queue, ( 2, size, 3, ), dtype, luxury=None, a=0, b=239482333) sorter = BitonicSort(ctx) sgs, evt = sorter(s.copy(), axis=1) assert np.array_equal(np.sort(s.get(), axis=1), sgs.get())
def test_bitonic_argsort(ctx_factory, size, dtype): import sys is_pypy = "__pypy__" in sys.builtin_module_names if not size and is_pypy: # https://bitbucket.org/pypy/numpy/issues/53/specifying-strides-on-zero-sized-array pytest.xfail("pypy doesn't seem to handle as_strided " "on zero-sized arrays very well") ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) device = queue.device if device.platform.vendor == "The pocl project" \ and device.type & cl.device_type.GPU: pytest.xfail("bitonic argsort fails on POCL + Nvidia," "at least the K40, as of pocl 1.6, 2021-01-20") dev = ctx.devices[0] if (dev.platform.name == "Portable Computing Language" and sys.platform == "darwin"): pytest.xfail("Bitonic sort crashes on Apple POCL") if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU): pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup " "parallelism") if (dev.platform.name == "Portable Computing Language" and dtype == np.float64 and get_pocl_version(dev.platform) < (1, 0)): pytest.xfail("Double precision bitonic sort doesn't work on POCL < 1.0") if (dev.platform.name == "Intel(R) OpenCL" and size == 0): pytest.xfail("size-0 arange fails on Intel CL") if dtype == np.float64 and not has_double_support(dev): from pytest import skip skip("double precision not supported on %s" % dev) import pyopencl.clrandom as clrandom from pyopencl.bitonic_sort import BitonicSort index = cl_array.arange(queue, 0, size, 1, dtype=np.int32) m = clrandom.rand(queue, (size,), dtype, luxury=None, a=0, b=239432234) sorterm = BitonicSort(ctx) ms = m.copy() # enqueue_marker crashes under CL 1.1 pocl if there is anything to wait for # (no clEnqueueWaitForEvents) https://github.com/inducer/pyopencl/pull/237 if (dev.platform.name == "Portable Computing Language" and cl.get_cl_header_version() < (1, 2)): ms.finish() index.finish() ms, evt = sorterm(ms, idx=index, axis=0) assert np.array_equal(np.sort(m.get()), ms.get()) # may be False because of identical values in array # assert np.array_equal(np.argsort(m.get()), index.get()) # Check values by indices assert np.array_equal(m.get()[np.argsort(m.get())], m.get()[index.get()])
def test_bitonic_argsort(ctx_factory, size, dtype): import sys is_pypy = '__pypy__' in sys.builtin_module_names if not size and is_pypy: # https://bitbucket.org/pypy/numpy/issues/53/specifying-strides-on-zero-sized-array pytest.xfail("pypy doesn't seem to handle as_strided " "on zero-sized arrays very well") ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) dev = ctx.devices[0] if (dev.platform.name == "Portable Computing Language" and sys.platform == "darwin"): pytest.xfail("Bitonic sort crashes on Apple POCL") if (dev.platform.name == "Apple" and dev.type & cl.device_type.CPU): pytest.xfail("Bitonic sort won't work on Apple CPU: no workgroup " "parallelism") if (dev.platform.name == "Portable Computing Language" and dtype == np.float64 and get_pocl_version(dev.platform) < (1, 0)): pytest.xfail( "Double precision bitonic sort doesn't work on POCL < 1.0") if dtype == np.float64 and not has_double_support(dev): from pytest import skip skip("double precision not supported on %s" % dev) import pyopencl.clrandom as clrandom from pyopencl.bitonic_sort import BitonicSort index = cl_array.arange(queue, 0, size, 1, dtype=np.int32) m = clrandom.rand(queue, (size, ), dtype, luxury=None, a=0, b=239432234) sorterm = BitonicSort(ctx) ms, evt = sorterm(m.copy(), idx=index, axis=0) assert np.array_equal(np.sort(m.get()), ms.get()) # may be False because of identical values in array # assert np.array_equal(np.argsort(m.get()), index.get()) # Check values by indices assert np.array_equal(m.get()[np.argsort(m.get())], m.get()[index.get()])
@timeit_repeat(reps) def test_bitonic_speed(buff, sorter): sorter(buff)[1].wait() @timeit_repeat(reps) def test_numpy_speed(buff): np.sort(buff) from collections import defaultdict times = defaultdict(list) dtype = np.int32 xs = range(5, 32) bs = BitonicSort(ctx) rs = RadixSort(ctx, "int *ary", key_expr="ary[i]", sort_arg_names=["ary"], scan_kernel=GenericScanKernel) for size in xs: print("running size=2^{} = {}".format(size, 2**size)) s = clrandom.rand(queue, (2**size, ), dtype, a=0, b=2**16) times['bitonic'].append(test_bitonic_speed(s, bs).microseconds / 1000000.) times['radix'].append(test_radix_speed(s, rs).microseconds / 1000000.) times['numpy'].append( test_numpy_speed(s.get().copy()).microseconds / 1000000.) print("\t".join(["Size"] + times.keys())) for idx, s in enumerate(xs):