def test_coarse_grain_svm(ctx_factory): import sys is_pypy = '__pypy__' in sys.builtin_module_names ctx = ctx_factory() queue = cl.CommandQueue(ctx) dev = ctx.devices[0] has_svm = (ctx._get_cl_version() >= (2, 0) and ctx.devices[0]._get_cl_version() >= (2, 0) and cl.get_cl_header_version() >= (2, 0)) if dev.platform.name == "Portable Computing Language": has_svm = (get_pocl_version(dev.platform) >= (1, 0) and cl.get_cl_header_version() >= (2, 0)) if not has_svm: from pytest import skip skip("SVM only available in OpenCL 2.0 and higher") if ("AMD" in dev.platform.name and dev.type & cl.device_type.CPU): pytest.xfail("AMD CPU doesn't do coarse-grain SVM") n = 3000 svm_ary = cl.SVM(cl.csvm_empty(ctx, (n, ), np.float32, alignment=64)) if not is_pypy: # https://bitbucket.org/pypy/numpy/issues/52 assert isinstance(svm_ary.mem.base, cl.SVMAllocation) cl.enqueue_svm_memfill(queue, svm_ary, np.zeros((), svm_ary.mem.dtype)) with svm_ary.map_rw(queue) as ary: ary.fill(17) orig_ary = ary.copy() prg = cl.Program( ctx, """ __kernel void twice(__global float *a_g) { a_g[get_global_id(0)] *= 2; } """).build() prg.twice(queue, svm_ary.mem.shape, None, svm_ary) with svm_ary.map_ro(queue) as ary: print(ary) assert np.array_equal(orig_ary * 2, ary) new_ary = np.empty_like(orig_ary) new_ary.fill(-1) if ctx.devices[0].platform.name != "Portable Computing Language": # "Blocking memcpy is unimplemented (clEnqueueSVMMemcpy.c:61)" # in pocl up to and including 1.0rc1. cl.enqueue_copy(queue, new_ary, svm_ary) assert np.array_equal(orig_ary * 2, new_ary)
def test_coarse_grain_svm(ctx_factory): ctx = ctx_factory() # queue = cl.CommandQueue(ctx) if (ctx._get_cl_version() < (2, 0) or cl.get_cl_header_version() < (2, 0)): from pytest import skip skip("SVM only available in OpenCL 2.0 and higher") svm_ary = cl.csvm_empty(ctx, (100, 100), np.float32, alignment=64) assert isinstance(svm_ary.base, cl.SVMAllocation)
def test_coarse_grain_svm(ctx_factory): import sys is_pypy = '__pypy__' in sys.builtin_module_names ctx = ctx_factory() queue = cl.CommandQueue(ctx) if (ctx._get_cl_version() < (2, 0) or cl.get_cl_header_version() < (2, 0)): from pytest import skip skip("SVM only available in OpenCL 2.0 and higher") dev = ctx.devices[0] if ("AMD" in dev.platform.name and dev.type & cl.device_type.CPU): pytest.xfail("AMD CPU doesn't do coarse-grain SVM") n = 3000 svm_ary = cl.SVM(cl.csvm_empty(ctx, (n,), np.float32, alignment=64)) if not is_pypy: # https://bitbucket.org/pypy/numpy/issues/52 assert isinstance(svm_ary.mem.base, cl.SVMAllocation) if dev.platform.name != "Portable Computing Language": # pocl 0.13 has a bug misinterpreting the size parameter cl.enqueue_svm_memfill(queue, svm_ary, np.zeros((), svm_ary.mem.dtype)) with svm_ary.map_rw(queue) as ary: ary.fill(17) orig_ary = ary.copy() prg = cl.Program(ctx, """ __kernel void twice(__global float *a_g) { a_g[get_global_id(0)] *= 2; } """).build() prg.twice(queue, svm_ary.mem.shape, None, svm_ary) with svm_ary.map_ro(queue) as ary: print(ary) assert np.array_equal(orig_ary*2, ary) new_ary = np.empty_like(orig_ary) new_ary.fill(-1) if ctx.devices[0].platform.name != "Portable Computing Language": # "Blocking memcpy is unimplemented (clEnqueueSVMMemcpy.c:61)" # in pocl 0.13. cl.enqueue_copy(queue, new_ary, svm_ary) assert np.array_equal(orig_ary*2, new_ary)
def test_coarse_grain_svm(ctx_factory): import sys is_pypy = "__pypy__" in sys.builtin_module_names ctx = ctx_factory() queue = cl.CommandQueue(ctx) _xfail_if_pocl_gpu(queue.device, "SVM") dev = ctx.devices[0] from pyopencl.characterize import has_coarse_grain_buffer_svm from pytest import skip if not has_coarse_grain_buffer_svm(queue.device): skip("device does not support coarse-grain SVM") if ("AMD" in dev.platform.name and dev.type & cl.device_type.CPU): pytest.xfail("AMD CPU doesn't do coarse-grain SVM") if ("AMD" in dev.platform.name and dev.type & cl.device_type.GPU): pytest.xfail("AMD GPU crashes on SVM unmap") n = 3000 svm_ary = cl.SVM(cl.csvm_empty(ctx, (n,), np.float32, alignment=64)) if not is_pypy: # https://bitbucket.org/pypy/numpy/issues/52 assert isinstance(svm_ary.mem.base, cl.SVMAllocation) cl.enqueue_svm_memfill(queue, svm_ary, np.zeros((), svm_ary.mem.dtype)) with svm_ary.map_rw(queue) as ary: ary.fill(17) orig_ary = ary.copy() prg = cl.Program(ctx, """ __kernel void twice(__global float *a_g) { a_g[get_global_id(0)] *= 2; } """).build() prg.twice(queue, svm_ary.mem.shape, None, svm_ary) with svm_ary.map_ro(queue) as ary: print(ary) assert np.array_equal(orig_ary*2, ary) new_ary = np.empty_like(orig_ary) new_ary.fill(-1) if ctx.devices[0].platform.name != "Portable Computing Language": # "Blocking memcpy is unimplemented (clEnqueueSVMMemcpy.c:61)" # in pocl up to and including 1.0rc1. cl.enqueue_copy(queue, new_ary, svm_ary) assert np.array_equal(orig_ary*2, new_ary) # {{{ https://github.com/inducer/pyopencl/issues/372 buf_arr = cl.svm_empty(ctx, cl.svm_mem_flags.READ_ONLY, 10, np.int32) out_arr = cl.svm_empty(ctx, cl.svm_mem_flags.READ_WRITE, 10, np.int32) svm_buf_arr = cl.SVM(buf_arr) svm_out_arr = cl.SVM(out_arr) with svm_buf_arr.map_rw(queue) as ary: ary.fill(17) prg_ro = cl.Program(ctx, r""" __kernel void twice_ro(__global int *out_g, __global int *in_g) { out_g[get_global_id(0)] = 2*in_g[get_global_id(0)]; } """).build() prg_ro.twice_ro(queue, buf_arr.shape, None, svm_out_arr, svm_buf_arr) with svm_out_arr.map_ro(queue) as ary: print(ary)
) prg = cl.Program(ctx, """ __kernel void twice( __global float *a_g) { int gid = get_global_id(0); a_g[gid] = 2*a_g[gid]; } """).build() if has_coarse_grain_buffer_svm(dev): print("Testing coarse-grained buffer SVM...", end="") svm_ary = cl.SVM(cl.csvm_empty(ctx, 10, np.float32)) assert isinstance(svm_ary.mem, np.ndarray) with svm_ary.map_rw(queue) as ary: ary.fill(17) # use from host orig_ary = ary.copy() prg.twice(queue, svm_ary.mem.shape, None, svm_ary) queue.finish() with svm_ary.map_ro(queue) as ary: assert(np.array_equal(orig_ary*2, ary)) print(" done.") if has_fine_grain_buffer_svm(dev):