1 / 0 knl = lp.split_iname(knl, "e", 16, outer_tag="g.0") #, slabs=(0, 1)) knl = lp.tag_inames(knl, dict(i="l.0", j="l.1")) print(knl) #1/0 kernel_gen = lp.generate_loop_schedules(knl) kernel_gen = lp.check_kernels(kernel_gen, dict(K=1000), kill_level_min=5) lp.auto_test_vs_ref( seq_knl, ctx, kernel_gen, op_count=0, op_label="GFlops", parameters={"K": K}, print_seq_code=True, ) if __name__ == "__main__": import sys if len(sys.argv) > 1: exec(sys.argv[1]) else: from py.test.cmdline import main main([__file__])
a_gpu = clrand(context, queue, (2000,), dtype=numpy.float32) a = a_gpu.get().astype(numpy.float64) a2 = a_gpu.astype(numpy.float64).get() assert a2.dtype == numpy.float64 assert la.norm(a - a2) == 0, (a, a2) a_gpu = clrand(context, queue, (2000,), dtype=numpy.float64) a = a_gpu.get().astype(numpy.float32) a2 = a_gpu.astype(numpy.float32).get() assert a2.dtype == numpy.float32 assert la.norm(a - a2)/la.norm(a) < 1e-7 if __name__ == "__main__": # make sure that import failures get reported, instead of skipping the tests. import pyopencl as cl import sys if len(sys.argv) > 1: exec(sys.argv[1]) else: from py.test.cmdline import main main([__file__])
# simple test runner wrapper (useful for test debugging in IDEs) if __name__ == "__main__": from py.test import cmdline import sys sys.exit(cmdline.main(['.'] + sys.argv[1:]))