def test_coarse_grain_svm(ctx_factory): import sys is_pypy = '__pypy__' in sys.builtin_module_names ctx = ctx_factory() queue = cl.CommandQueue(ctx) dev = ctx.devices[0] from pyopencl.characterize import has_coarse_grain_buffer_svm from pytest import skip if not has_coarse_grain_buffer_svm(queue.device): skip("device does not support coarse-grain SVM") if ("AMD" in dev.platform.name and dev.type & cl.device_type.CPU): pytest.xfail("AMD CPU doesn't do coarse-grain SVM") if ("AMD" in dev.platform.name and dev.type & cl.device_type.GPU): pytest.xfail("AMD GPU crashes on SVM unmap") n = 3000 svm_ary = cl.SVM(cl.csvm_empty(ctx, (n,), np.float32, alignment=64)) if not is_pypy: # https://bitbucket.org/pypy/numpy/issues/52 assert isinstance(svm_ary.mem.base, cl.SVMAllocation) cl.enqueue_svm_memfill(queue, svm_ary, np.zeros((), svm_ary.mem.dtype)) with svm_ary.map_rw(queue) as ary: ary.fill(17) orig_ary = ary.copy() prg = cl.Program(ctx, """ __kernel void twice(__global float *a_g) { a_g[get_global_id(0)] *= 2; } """).build() prg.twice(queue, svm_ary.mem.shape, None, svm_ary) with svm_ary.map_ro(queue) as ary: print(ary) assert np.array_equal(orig_ary*2, ary) new_ary = np.empty_like(orig_ary) new_ary.fill(-1) if ctx.devices[0].platform.name != "Portable Computing Language": # "Blocking memcpy is unimplemented (clEnqueueSVMMemcpy.c:61)" # in pocl up to and including 1.0rc1. cl.enqueue_copy(queue, new_ary, svm_ary) assert np.array_equal(orig_ary*2, new_ary)
f" Coarse-grained buffer SVM: {has_coarse_grain_buffer_svm(dev)}\n" f" Fine-grained buffer SVM: {has_fine_grain_buffer_svm(dev)}\n" f" Fine-grained system SVM: {has_fine_grain_system_svm(dev)}" ) prg = cl.Program(ctx, """ __kernel void twice( __global float *a_g) { int gid = get_global_id(0); a_g[gid] = 2*a_g[gid]; } """).build() if has_coarse_grain_buffer_svm(dev): print("Testing coarse-grained buffer SVM...", end="") svm_ary = cl.SVM(cl.csvm_empty(ctx, 10, np.float32)) assert isinstance(svm_ary.mem, np.ndarray) with svm_ary.map_rw(queue) as ary: ary.fill(17) # use from host orig_ary = ary.copy() prg.twice(queue, svm_ary.mem.shape, None, svm_ary) queue.finish() with svm_ary.map_ro(queue) as ary: assert(np.array_equal(orig_ary*2, ary))