예제 #1
0
def test_elwise_wave(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    dev = queue.device
    src = """
    #include <wave_func.cl>
    static float
    _calc_wave_func(float t, const __global float *y_in, size_t i, float y_in_i,
                    float h_x, ulong len_x)
    {
        return calc_wave_func(y_in, h_x, len_x, i);
    }
    """
    solver = ElwiseOdeSolver(ctx, dev, src, "_calc_wave_func",
                             extra_args=(CLArg('h_x', 'float'),
                                         CLArg('len_x', 'ulong')),
                             options=['-I', _path.dirname(__file__)])

    t0 = 0
    t1 = 100
    h = 0.2

    h_x = 0.2
    len_x = 256

    xs = np.arange(len_x) * np.pi / (len_x - 1)
    y0 = np.r_[(np.sin(xs) + np.sin(xs * 2) + np.sin(xs * 3)
                + np.sin(xs * 4) + np.sin(xs * 5)) / 5,
               np.zeros(len_x)].astype(np.float32)
    # y0 += np.r_[np.zeros(len_x),
    #             [(min((i / len_x) - 0.4, 0.5 - (i / len_x)) * 20
    #               if 0.4 < (i / len_x) < 0.5 else 0)
    #               for i in range(len_x)]].astype(np.float32)
    y0 += np.r_[np.zeros(len_x),
                [((i / len_x) - 0.2 if 0.15 < (i / len_x) < 0.25 else 0) * 20
                 for i in range(len_x)]].astype(np.float32)
    # y0 = np.r_[[(1 if 0.4 < (i / len_x) < 0.5 else 0)
    #             for i in range(len_x)],
    #            np.zeros(len_x)].astype(np.float32)
    y0 += np.r_[[(1 if 0.75 < (i / len_x) < 0.85 else 0)
                 for i in range(len_x)],
                np.zeros(len_x)].astype(np.float32)

    res, evt = solver.run(t0, t1, h, y0, queue,
                          extra_args=(np.float32(h_x), np.int64(len_x)))
    evt.wait()
    res_np = [a.get() for a in res]
예제 #2
0
def evolve_sideband(ctx, queue, gamma_x, gamma_y, gamma_z, pump_branch,
                    omegas_x, omegas_y, omegas_z, h_t, gamma_total,
                    delta_xyz, omega_xyz, p_b, p_a=None, p_c=None):
    dim_x, d = gamma_x.shape
    if dim_x != d:
        raise ValueError("gamma_x is not a square matrix.")
    if gamma_x.dtype != np.float32:
        raise TypeError("The type of gamma_x should be float32.")

    dim_y, d = gamma_y.shape
    if dim_y != d:
        raise ValueError("gamma_y is not a square matrix.")
    if gamma_y.dtype != np.float32:
        raise TypeError("The type of gamma_y should be float32.")

    dim_z, d = gamma_z.shape
    if dim_z != d:
        raise ValueError("gamma_z is not a square matrix.")
    if gamma_z.dtype != np.float32:
        raise TypeError("The type of gamma_z should be float32.")

    total_dim = dim_x * dim_y * dim_z

    mf = cl.mem_flags
    events = []

    gamma_xyz = cl.Buffer(ctx, mf.READ_ONLY,
                          (dim_x**2 + dim_y**2 + dim_z**2) * 4)
    events.append(cl.enqueue_copy(queue, gamma_xyz, gamma_x,
                                  device_offset=0, is_blocking=False))
    events.append(cl.enqueue_copy(queue, gamma_xyz, gamma_y,
                                  device_offset=dim_x**2 * 4,
                                  is_blocking=False))
    events.append(cl.enqueue_copy(queue, gamma_xyz, gamma_z,
                                  device_offset=(dim_x**2 + dim_y**2) * 4,
                                  is_blocking=False))

    gidx_minmax_xyz = cl.Buffer(ctx, mf.READ_ONLY, (dim_x + dim_y + dim_z) * 8)
    is_cpu = queue.device.type == cl.device_type.CPU
    events.append(cl.enqueue_copy(queue, gidx_minmax_xyz,
                                  _get_gidx_minmax_xyz(dim_x, dim_y, dim_z,
                                                       gamma_x, gamma_y,
                                                       gamma_z,
                                                       align=not is_cpu),
                                  device_offset=0, is_blocking=False))

    if pump_branch.dtype != np.float32:
        raise TypeError("The type of pump_branch should be float32.")
    pump_branch_gpu = cl.Buffer(ctx, mf.READ_ONLY, 36)
    events.append(cl.enqueue_copy(queue, pump_branch_gpu, pump_branch,
                                  device_offset=0, is_blocking=False))

    num_omg_x, d = omegas_x.shape
    if dim_x != d:
        raise ValueError("The second dimension of omegas_x is not "
                         "the same with dim_x.")
    if omegas_x.dtype != np.float32:
        raise TypeError("The type of omegas_x should be float32.")

    num_omg_y, d = omegas_y.shape
    if dim_y != d:
        raise ValueError("The second dimension of omegas_y is not "
                         "the same with dim_y.")
    if omegas_y.dtype != np.float32:
        raise TypeError("The type of omegas_y should be float32.")

    num_omg_z, d = omegas_z.shape
    if dim_z != d:
        raise ValueError("The second dimension of omegas_z is not "
                         "the same with dim_z.")
    if omegas_z.dtype != np.float32:
        raise TypeError("The type of omegas_z should be float32.")

    omegas_gpu = cl.Buffer(ctx, mf.READ_ONLY,
                           (num_omg_x * dim_x + num_omg_y * dim_y +
                            num_omg_z * dim_z) * 4)
    events.append(cl.enqueue_copy(queue, omegas_gpu, omegas_x,
                                  device_offset=0, is_blocking=False))
    events.append(cl.enqueue_copy(queue, omegas_gpu, omegas_y,
                                  device_offset=num_omg_x * dim_x * 4,
                                  is_blocking=False))
    events.append(cl.enqueue_copy(queue, omegas_gpu, omegas_z,
                                  device_offset=(num_omg_x * dim_x +
                                                 num_omg_y * dim_y) * 4,
                                  is_blocking=False))

    h_t = np.float32(h_t)
    seq_len, d = gamma_total.shape
    t_len = (seq_len - 1) * h_t
    if d != 3:
        raise TypeError("Second dimension of gamma_total should be 3.")
    if gamma_total.dtype != np.float32:
        raise TypeError("The type of gamma_total should be float32.")
    gamma_total_gpu = cl.Buffer(ctx, mf.READ_ONLY, seq_len * 12)
    events.append(cl.enqueue_copy(queue, gamma_total_gpu, gamma_total,
                                  device_offset=0, is_blocking=False))

    d1, d2 = delta_xyz.shape
    if d1 != 3 or d2 != seq_len:
        raise TypeError("Dimensions of delta_xyz should be (3, seq_len).")
    if delta_xyz.dtype != np.uint32:
        raise TypeError("The type of delta_xyz should be uint32.")
    delta_xyz_gpu = cl.Buffer(ctx, mf.READ_ONLY, seq_len * 12)
    events.append(cl.enqueue_copy(queue, delta_xyz_gpu, delta_xyz,
                                  device_offset=0, is_blocking=False))

    d1, d2 = omega_xyz.shape
    if d1 != 3 or d2 != seq_len:
        raise TypeError("Dimensions of omega_xyz should be (3, seq_len).")
    if omega_xyz.dtype != np.uint32:
        raise TypeError("The type of omega_xyz should be uint32.")
    omega_xyz_offset = np.empty(seq_len * 3, np.uint32)
    for i in _range(seq_len):
        _omega_x = omega_xyz[0, i]
        if _omega_x >= num_omg_x:
            raise IndexError("omega_x index too larger")
        omega_xyz_offset[i] = _omega_x * dim_x
        _omega_y = omega_xyz[1, i]
        if _omega_y >= num_omg_y:
            raise IndexError("omega_y index too larger")
        omega_xyz_offset[seq_len + i] = _omega_y * dim_y + num_omg_x * dim_x
        _omega_z = omega_xyz[2, i]
        if _omega_z >= num_omg_z:
            raise IndexError("omega_z index too larger")
        omega_xyz_offset[seq_len * 2 + i] = (_omega_z * dim_z +
                                             num_omg_x * dim_x +
                                             num_omg_y * dim_y)
    omega_xyz_offset_gpu = cl.Buffer(ctx, mf.READ_ONLY, seq_len * 12)
    events.append(cl.enqueue_copy(queue, omega_xyz_offset_gpu, omega_xyz_offset,
                                  device_offset=0, is_blocking=False))


    dev = queue.device
    src = """
    #include <sideband.cl>
    """
    extra_args = (CLArg('dim_x', 'unsigned'),
                  CLArg('dim_y', 'unsigned'),
                  CLArg('dim_z', 'unsigned'),
                  CLArg('gamma_xyz', 'gcfloat_p'),
                  CLArg('gidx_minman_xyz', 'gcuint_p'),
                  CLArg('pump_branch', 'gcfloat_p'),
                  CLArg('omegas', 'gcfloat_p'),
                  CLArg('h_t', 'float'),
                  CLArg('seq_len', 'unsigned'),
                  CLArg('gamma_total', 'gcfloat_p'),
                  CLArg('delta_xyz', 'gcuint_p'),
                  CLArg('omega_xyz_offset', 'gcuint_p'))
    solver = ElwiseOdeSolver(ctx, dev, src, "calc_sbcooling_diff",
                             extra_args=extra_args,
                             options=['-I',
                                      _path.dirname(_path.abspath(__file__))],
                             post_func='calc_sbcooling_post')
    seq_len = np.uint32(seq_len)

    y0 = np.zeros(total_dim * 4, np.float32)

    if p_a is not None:
        if p_a.shape != (dim_x, dim_y, dim_z):
            raise ValueError("Initial value of p_a has wrong shape.")
        y0[:total_dim] = p_a.flatten()
    if p_b.shape != (dim_x, dim_y, dim_z):
        raise ValueError("Initial value of p_b has wrong shape.")
    y0[total_dim:total_dim * 2] = p_b.flatten()
    if p_c is not None:
        if p_c.shape != (dim_x, dim_y, dim_z):
            raise ValueError("Initial value of p_c has wrong shape.")
        y0[total_dim * 2:total_dim * 3] = p_c.flatten()

    extra_args_vals = (np.uint32(dim_x), np.uint32(dim_y), np.uint32(dim_z),
                       gamma_xyz, gidx_minmax_xyz, pump_branch_gpu, omegas_gpu,
                       h_t, seq_len, gamma_total_gpu, delta_xyz_gpu,
                       omega_xyz_offset_gpu)
    return solver.run_no_process(0, t_len, h_t, y0, queue,
                                 extra_args=extra_args_vals)