def test_elwise_wave(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) dev = queue.device src = """ #include <wave_func.cl> static float _calc_wave_func(float t, const __global float *y_in, size_t i, float y_in_i, float h_x, ulong len_x) { return calc_wave_func(y_in, h_x, len_x, i); } """ solver = ElwiseOdeSolver(ctx, dev, src, "_calc_wave_func", extra_args=(CLArg('h_x', 'float'), CLArg('len_x', 'ulong')), options=['-I', _path.dirname(__file__)]) t0 = 0 t1 = 100 h = 0.2 h_x = 0.2 len_x = 256 xs = np.arange(len_x) * np.pi / (len_x - 1) y0 = np.r_[(np.sin(xs) + np.sin(xs * 2) + np.sin(xs * 3) + np.sin(xs * 4) + np.sin(xs * 5)) / 5, np.zeros(len_x)].astype(np.float32) # y0 += np.r_[np.zeros(len_x), # [(min((i / len_x) - 0.4, 0.5 - (i / len_x)) * 20 # if 0.4 < (i / len_x) < 0.5 else 0) # for i in range(len_x)]].astype(np.float32) y0 += np.r_[np.zeros(len_x), [((i / len_x) - 0.2 if 0.15 < (i / len_x) < 0.25 else 0) * 20 for i in range(len_x)]].astype(np.float32) # y0 = np.r_[[(1 if 0.4 < (i / len_x) < 0.5 else 0) # for i in range(len_x)], # np.zeros(len_x)].astype(np.float32) y0 += np.r_[[(1 if 0.75 < (i / len_x) < 0.85 else 0) for i in range(len_x)], np.zeros(len_x)].astype(np.float32) res, evt = solver.run(t0, t1, h, y0, queue, extra_args=(np.float32(h_x), np.int64(len_x))) evt.wait() res_np = [a.get() for a in res]
def evolve_sideband(ctx, queue, gamma_x, gamma_y, gamma_z, pump_branch, omegas_x, omegas_y, omegas_z, h_t, gamma_total, delta_xyz, omega_xyz, p_b, p_a=None, p_c=None): dim_x, d = gamma_x.shape if dim_x != d: raise ValueError("gamma_x is not a square matrix.") if gamma_x.dtype != np.float32: raise TypeError("The type of gamma_x should be float32.") dim_y, d = gamma_y.shape if dim_y != d: raise ValueError("gamma_y is not a square matrix.") if gamma_y.dtype != np.float32: raise TypeError("The type of gamma_y should be float32.") dim_z, d = gamma_z.shape if dim_z != d: raise ValueError("gamma_z is not a square matrix.") if gamma_z.dtype != np.float32: raise TypeError("The type of gamma_z should be float32.") total_dim = dim_x * dim_y * dim_z mf = cl.mem_flags events = [] gamma_xyz = cl.Buffer(ctx, mf.READ_ONLY, (dim_x**2 + dim_y**2 + dim_z**2) * 4) events.append(cl.enqueue_copy(queue, gamma_xyz, gamma_x, device_offset=0, is_blocking=False)) events.append(cl.enqueue_copy(queue, gamma_xyz, gamma_y, device_offset=dim_x**2 * 4, is_blocking=False)) events.append(cl.enqueue_copy(queue, gamma_xyz, gamma_z, device_offset=(dim_x**2 + dim_y**2) * 4, is_blocking=False)) gidx_minmax_xyz = cl.Buffer(ctx, mf.READ_ONLY, (dim_x + dim_y + dim_z) * 8) is_cpu = queue.device.type == cl.device_type.CPU events.append(cl.enqueue_copy(queue, gidx_minmax_xyz, _get_gidx_minmax_xyz(dim_x, dim_y, dim_z, gamma_x, gamma_y, gamma_z, align=not is_cpu), device_offset=0, is_blocking=False)) if pump_branch.dtype != np.float32: raise TypeError("The type of pump_branch should be float32.") pump_branch_gpu = cl.Buffer(ctx, mf.READ_ONLY, 36) events.append(cl.enqueue_copy(queue, pump_branch_gpu, pump_branch, device_offset=0, is_blocking=False)) num_omg_x, d = omegas_x.shape if dim_x != d: raise ValueError("The second dimension of omegas_x is not " "the same with dim_x.") if omegas_x.dtype != np.float32: raise TypeError("The type of omegas_x should be float32.") num_omg_y, d = omegas_y.shape if dim_y != d: raise ValueError("The second dimension of omegas_y is not " "the same with dim_y.") if omegas_y.dtype != np.float32: raise TypeError("The type of omegas_y should be float32.") num_omg_z, d = omegas_z.shape if dim_z != d: raise ValueError("The second dimension of omegas_z is not " "the same with dim_z.") if omegas_z.dtype != np.float32: raise TypeError("The type of omegas_z should be float32.") omegas_gpu = cl.Buffer(ctx, mf.READ_ONLY, (num_omg_x * dim_x + num_omg_y * dim_y + num_omg_z * dim_z) * 4) events.append(cl.enqueue_copy(queue, omegas_gpu, omegas_x, device_offset=0, is_blocking=False)) events.append(cl.enqueue_copy(queue, omegas_gpu, omegas_y, device_offset=num_omg_x * dim_x * 4, is_blocking=False)) events.append(cl.enqueue_copy(queue, omegas_gpu, omegas_z, device_offset=(num_omg_x * dim_x + num_omg_y * dim_y) * 4, is_blocking=False)) h_t = np.float32(h_t) seq_len, d = gamma_total.shape t_len = (seq_len - 1) * h_t if d != 3: raise TypeError("Second dimension of gamma_total should be 3.") if gamma_total.dtype != np.float32: raise TypeError("The type of gamma_total should be float32.") gamma_total_gpu = cl.Buffer(ctx, mf.READ_ONLY, seq_len * 12) events.append(cl.enqueue_copy(queue, gamma_total_gpu, gamma_total, device_offset=0, is_blocking=False)) d1, d2 = delta_xyz.shape if d1 != 3 or d2 != seq_len: raise TypeError("Dimensions of delta_xyz should be (3, seq_len).") if delta_xyz.dtype != np.uint32: raise TypeError("The type of delta_xyz should be uint32.") delta_xyz_gpu = cl.Buffer(ctx, mf.READ_ONLY, seq_len * 12) events.append(cl.enqueue_copy(queue, delta_xyz_gpu, delta_xyz, device_offset=0, is_blocking=False)) d1, d2 = omega_xyz.shape if d1 != 3 or d2 != seq_len: raise TypeError("Dimensions of omega_xyz should be (3, seq_len).") if omega_xyz.dtype != np.uint32: raise TypeError("The type of omega_xyz should be uint32.") omega_xyz_offset = np.empty(seq_len * 3, np.uint32) for i in _range(seq_len): _omega_x = omega_xyz[0, i] if _omega_x >= num_omg_x: raise IndexError("omega_x index too larger") omega_xyz_offset[i] = _omega_x * dim_x _omega_y = omega_xyz[1, i] if _omega_y >= num_omg_y: raise IndexError("omega_y index too larger") omega_xyz_offset[seq_len + i] = _omega_y * dim_y + num_omg_x * dim_x _omega_z = omega_xyz[2, i] if _omega_z >= num_omg_z: raise IndexError("omega_z index too larger") omega_xyz_offset[seq_len * 2 + i] = (_omega_z * dim_z + num_omg_x * dim_x + num_omg_y * dim_y) omega_xyz_offset_gpu = cl.Buffer(ctx, mf.READ_ONLY, seq_len * 12) events.append(cl.enqueue_copy(queue, omega_xyz_offset_gpu, omega_xyz_offset, device_offset=0, is_blocking=False)) dev = queue.device src = """ #include <sideband.cl> """ extra_args = (CLArg('dim_x', 'unsigned'), CLArg('dim_y', 'unsigned'), CLArg('dim_z', 'unsigned'), CLArg('gamma_xyz', 'gcfloat_p'), CLArg('gidx_minman_xyz', 'gcuint_p'), CLArg('pump_branch', 'gcfloat_p'), CLArg('omegas', 'gcfloat_p'), CLArg('h_t', 'float'), CLArg('seq_len', 'unsigned'), CLArg('gamma_total', 'gcfloat_p'), CLArg('delta_xyz', 'gcuint_p'), CLArg('omega_xyz_offset', 'gcuint_p')) solver = ElwiseOdeSolver(ctx, dev, src, "calc_sbcooling_diff", extra_args=extra_args, options=['-I', _path.dirname(_path.abspath(__file__))], post_func='calc_sbcooling_post') seq_len = np.uint32(seq_len) y0 = np.zeros(total_dim * 4, np.float32) if p_a is not None: if p_a.shape != (dim_x, dim_y, dim_z): raise ValueError("Initial value of p_a has wrong shape.") y0[:total_dim] = p_a.flatten() if p_b.shape != (dim_x, dim_y, dim_z): raise ValueError("Initial value of p_b has wrong shape.") y0[total_dim:total_dim * 2] = p_b.flatten() if p_c is not None: if p_c.shape != (dim_x, dim_y, dim_z): raise ValueError("Initial value of p_c has wrong shape.") y0[total_dim * 2:total_dim * 3] = p_c.flatten() extra_args_vals = (np.uint32(dim_x), np.uint32(dim_y), np.uint32(dim_z), gamma_xyz, gidx_minmax_xyz, pump_branch_gpu, omegas_gpu, h_t, seq_len, gamma_total_gpu, delta_xyz_gpu, omega_xyz_offset_gpu) return solver.run_no_process(0, t_len, h_t, y0, queue, extra_args=extra_args_vals)