def test_harmonic(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = _get_harmonic_kernel(ctx) start_evt = cl.UserEvent(ctx) def f(t, y_in, y_out, wait_for=None): knl.set_args(y_in.base_data, y_out.base_data) return cl.enqueue_task(queue, knl, wait_for=wait_for + [start_evt]) t0 = 0 t1 = 40 h = 0.05 y0 = np.array([1, 0]).astype(np.float32) res, evt = solve_ode(t0, t1, h, y0, f, queue) # Make sure the work is done asynchronously start_evt.set_status(cl.command_execution_status.COMPLETE) _res = [a.get() for a in res] evt.wait() res_np = np.array(_res).T ts = t0 + np.arange(len(res)) * h expect0 = np.cos(ts) expect1 = -np.sin(ts) assert np.linalg.norm(res_np[0] - expect0) < 1e-4 assert np.linalg.norm(res_np[1] - expect1) < 1e-4
def main2(): ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) dev = queue.device knl = _get_wave_kernel(ctx) gs, ls = get_group_sizes(len_x * 2, dev, knl) def f(t, y_in, y_out, wait_for=None): return run_elwise_kernel(knl, queue, gs, ls, len_x * 2, wait_for, y_out, y_in, h_x, len_x) xs = np.arange(len_x) * np.pi / (len_x - 1) y0 = np.r_[(np.sin(xs) + np.sin(xs * 2) + np.sin(xs * 3) + np.sin(xs * 4) + np.sin(xs * 5)) / 5, np.zeros(len_x)].astype(np.float32) # y0 += np.r_[np.zeros(len_x), # [(min((i / len_x) - 0.4, 0.5 - (i / len_x)) * 20 # if 0.4 < (i / len_x) < 0.5 else 0) # for i in range(len_x)]].astype(np.float32) y0 += np.r_[np.zeros(len_x), [((i / len_x) - 0.2 if 0.15 < (i / len_x) < 0.25 else 0) * 20 for i in range(len_x)]].astype(np.float32) # y0 = np.r_[[(1 if 0.4 < (i / len_x) < 0.5 else 0) # for i in range(len_x)], # np.zeros(len_x)].astype(np.float32) y0 += np.r_[[(1 if 0.75 < (i / len_x) < 0.85 else 0) for i in range(len_x)], np.zeros(len_x)].astype(np.float32) res, evt = solve_ode(t0, t1, h, y0, f, queue) print('queued') evt.wait() print('finished') res_np = [a.get() for a in res]
def test_bloch(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) dev = queue.device # if dev.type != cl.device_type.GPU: # pytest.skip('Only use GPU') knl = _get_bloch_kernel(ctx) t0 = 0 t1 = 550 h = 0.02 len_x = 512 t_x = 1 slope = 0.05 # slope = 0 gs, ls = get_group_sizes(len_x, dev, knl) def f(t, y_in, y_out, wait_for=None): return run_elwise_kernel(knl, queue, gs, ls, len_x, wait_for, y_out, y_in, t_x, slope, len_x) y0 = np.zeros(len_x).astype(np.complex64) # y0[int(len_x / 2)] = 1 # y0[int(len_x * 2 / 5)] = np.sqrt(2) / 2 # y0[int(len_x * 3 / 5)] = np.sqrt(2) / 2 y0[int(len_x * 2 / 5)] = 1 / np.sqrt(3) y0[int(len_x / 2)] = 1 / np.sqrt(3) y0[int(len_x * 3 / 5)] = 1 / np.sqrt(3) print('start') start_evt = cl.UserEvent(ctx) res, evt = solve_ode(t0, t1, h, y0, f, queue, wait_for=[start_evt]) start_evt.set_status(cl.command_execution_status.COMPLETE) print('wait') evt.wait() print('done') dn = 19 res_np = [np.abs(a.get()) for a in res[::dn]]