Exemple #1
0
def test_lin_comb_diff(ctx_factory, arg_type):
    ctx = ctx_factory()
    dev, = ctx.devices
    if not has_double_support(dev):
        if arg_type in (np.float64, np.complex128):
            pytest.skip('Device does not support double.')
    n = 100000
    a_np = (np.random.randn(n)).astype(arg_type)
    b_np = (np.random.randn(n)).astype(arg_type)
    c_np = (np.random.randn(n) * 10).astype(arg_type)
    queue = cl.CommandQueue(ctx)

    a_g = cl.array.to_device(queue, a_np)
    b_g = cl.array.to_device(queue, b_np)
    c_g = cl.array.to_device(queue, c_np)
    res_g = cl.array.empty_like(a_g)
    lin_comb_diff = lin_comb_diff_kernel(ctx, arg_type, arg_type,
                                         arg_type, np.float32, 2)
    gs, ls = get_group_sizes(n, dev, lin_comb_diff)

    evt = run_elwise_kernel(lin_comb_diff, queue, gs, ls, n, [],
                            res_g, c_g, a_g, b_g, 2, 3)
    evt.wait()

    # Check on GPU with PyOpenCL Array:
    assert np.linalg.norm((res_g - (c_g + 2 * a_g + 3 * b_g)).get()) <= 2e-4

    # Check on CPU with Numpy:
    res_np = res_g.get()
    assert np.linalg.norm(res_np - (c_np + 2 * a_np + 3 * b_np)) <= 2e-4
Exemple #2
0
def main2():
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)
    dev = queue.device
    knl = _get_wave_kernel(ctx)

    gs, ls = get_group_sizes(len_x * 2, dev, knl)

    def f(t, y_in, y_out, wait_for=None):
        return run_elwise_kernel(knl, queue, gs, ls, len_x * 2, wait_for,
                                 y_out, y_in, h_x, len_x)

    xs = np.arange(len_x) * np.pi / (len_x - 1)
    y0 = np.r_[(np.sin(xs) + np.sin(xs * 2) + np.sin(xs * 3)
                + np.sin(xs * 4) + np.sin(xs * 5)) / 5,
               np.zeros(len_x)].astype(np.float32)
    # y0 += np.r_[np.zeros(len_x),
    #             [(min((i / len_x) - 0.4, 0.5 - (i / len_x)) * 20
    #               if 0.4 < (i / len_x) < 0.5 else 0)
    #               for i in range(len_x)]].astype(np.float32)
    y0 += np.r_[np.zeros(len_x),
                [((i / len_x) - 0.2 if 0.15 < (i / len_x) < 0.25 else 0) * 20
                 for i in range(len_x)]].astype(np.float32)
    # y0 = np.r_[[(1 if 0.4 < (i / len_x) < 0.5 else 0)
    #             for i in range(len_x)],
    #            np.zeros(len_x)].astype(np.float32)
    y0 += np.r_[[(1 if 0.75 < (i / len_x) < 0.85 else 0)
                 for i in range(len_x)],
                np.zeros(len_x)].astype(np.float32)

    res, evt = solve_ode(t0, t1, h, y0, f, queue)
    print('queued')
    evt.wait()
    print('finished')
    res_np = [a.get() for a in res]
Exemple #3
0
def test_bloch(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    dev = queue.device
    # if dev.type != cl.device_type.GPU:
    #     pytest.skip('Only use GPU')
    knl = _get_bloch_kernel(ctx)

    t0 = 0
    t1 = 550
    h = 0.02

    len_x = 512
    t_x = 1
    slope = 0.05
    # slope = 0

    gs, ls = get_group_sizes(len_x, dev, knl)

    def f(t, y_in, y_out, wait_for=None):
        return run_elwise_kernel(knl, queue, gs, ls, len_x, wait_for,
                                 y_out, y_in, t_x, slope, len_x)

    y0 = np.zeros(len_x).astype(np.complex64)
    # y0[int(len_x / 2)] = 1

    # y0[int(len_x * 2 / 5)] = np.sqrt(2) / 2
    # y0[int(len_x * 3 / 5)] = np.sqrt(2) / 2

    y0[int(len_x * 2 / 5)] = 1 / np.sqrt(3)
    y0[int(len_x / 2)] = 1 / np.sqrt(3)
    y0[int(len_x * 3 / 5)] = 1 / np.sqrt(3)

    print('start')
    start_evt = cl.UserEvent(ctx)
    res, evt = solve_ode(t0, t1, h, y0, f, queue, wait_for=[start_evt])
    start_evt.set_status(cl.command_execution_status.COMPLETE)
    print('wait')
    evt.wait()
    print('done')
    dn = 19
    res_np = [np.abs(a.get()) for a in res[::dn]]