def test_lin_comb_diff(ctx_factory, arg_type): ctx = ctx_factory() dev, = ctx.devices if not has_double_support(dev): if arg_type in (np.float64, np.complex128): pytest.skip('Device does not support double.') n = 100000 a_np = (np.random.randn(n)).astype(arg_type) b_np = (np.random.randn(n)).astype(arg_type) c_np = (np.random.randn(n) * 10).astype(arg_type) queue = cl.CommandQueue(ctx) a_g = cl.array.to_device(queue, a_np) b_g = cl.array.to_device(queue, b_np) c_g = cl.array.to_device(queue, c_np) res_g = cl.array.empty_like(a_g) lin_comb_diff = lin_comb_diff_kernel(ctx, arg_type, arg_type, arg_type, np.float32, 2) gs, ls = get_group_sizes(n, dev, lin_comb_diff) evt = run_elwise_kernel(lin_comb_diff, queue, gs, ls, n, [], res_g, c_g, a_g, b_g, 2, 3) evt.wait() # Check on GPU with PyOpenCL Array: assert np.linalg.norm((res_g - (c_g + 2 * a_g + 3 * b_g)).get()) <= 2e-4 # Check on CPU with Numpy: res_np = res_g.get() assert np.linalg.norm(res_np - (c_np + 2 * a_np + 3 * b_np)) <= 2e-4
def main2(): ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) dev = queue.device knl = _get_wave_kernel(ctx) gs, ls = get_group_sizes(len_x * 2, dev, knl) def f(t, y_in, y_out, wait_for=None): return run_elwise_kernel(knl, queue, gs, ls, len_x * 2, wait_for, y_out, y_in, h_x, len_x) xs = np.arange(len_x) * np.pi / (len_x - 1) y0 = np.r_[(np.sin(xs) + np.sin(xs * 2) + np.sin(xs * 3) + np.sin(xs * 4) + np.sin(xs * 5)) / 5, np.zeros(len_x)].astype(np.float32) # y0 += np.r_[np.zeros(len_x), # [(min((i / len_x) - 0.4, 0.5 - (i / len_x)) * 20 # if 0.4 < (i / len_x) < 0.5 else 0) # for i in range(len_x)]].astype(np.float32) y0 += np.r_[np.zeros(len_x), [((i / len_x) - 0.2 if 0.15 < (i / len_x) < 0.25 else 0) * 20 for i in range(len_x)]].astype(np.float32) # y0 = np.r_[[(1 if 0.4 < (i / len_x) < 0.5 else 0) # for i in range(len_x)], # np.zeros(len_x)].astype(np.float32) y0 += np.r_[[(1 if 0.75 < (i / len_x) < 0.85 else 0) for i in range(len_x)], np.zeros(len_x)].astype(np.float32) res, evt = solve_ode(t0, t1, h, y0, f, queue) print('queued') evt.wait() print('finished') res_np = [a.get() for a in res]
def test_bloch(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) dev = queue.device # if dev.type != cl.device_type.GPU: # pytest.skip('Only use GPU') knl = _get_bloch_kernel(ctx) t0 = 0 t1 = 550 h = 0.02 len_x = 512 t_x = 1 slope = 0.05 # slope = 0 gs, ls = get_group_sizes(len_x, dev, knl) def f(t, y_in, y_out, wait_for=None): return run_elwise_kernel(knl, queue, gs, ls, len_x, wait_for, y_out, y_in, t_x, slope, len_x) y0 = np.zeros(len_x).astype(np.complex64) # y0[int(len_x / 2)] = 1 # y0[int(len_x * 2 / 5)] = np.sqrt(2) / 2 # y0[int(len_x * 3 / 5)] = np.sqrt(2) / 2 y0[int(len_x * 2 / 5)] = 1 / np.sqrt(3) y0[int(len_x / 2)] = 1 / np.sqrt(3) y0[int(len_x * 3 / 5)] = 1 / np.sqrt(3) print('start') start_evt = cl.UserEvent(ctx) res, evt = solve_ode(t0, t1, h, y0, f, queue, wait_for=[start_evt]) start_evt.set_status(cl.command_execution_status.COMPLETE) print('wait') evt.wait() print('done') dn = 19 res_np = [np.abs(a.get()) for a in res[::dn]]