예제 #1
0
    def init():
        dh.fill(ρ.name, 1)
        dh.fill(u.name, 0)

        setter = macroscopic_values_setter(collision.method,
                                           velocity=(0, ) * dh.dim,
                                           pdfs=src,
                                           density=ρ.center,
                                           set_pre_collision_pdfs=True)
        kernel = ps.create_kernel(setter).compile()
        dh.run_kernel(kernel)
예제 #2
0
                                            **options)
        update_rule = insert_fast_divisions(update_rule)
        update_rule = insert_fast_sqrts(update_rule)
        update_rules[name] = update_rule
        generate_sweep(ctx,
                       'UniformGridGPU_AA_LbKernel' + name,
                       update_rule,
                       inner_outer_split=True,
                       target='gpu',
                       gpu_indexing_params=sweep_params,
                       varying_parameters=vp)

    # getter & setter
    setter_assignments = macroscopic_values_setter(
        lb_method,
        velocity=velocity_field.center_vector,
        pdfs=pdfs.center_vector,
        density=1)
    getter_assignments = macroscopic_values_getter(
        lb_method,
        velocity=velocity_field.center_vector,
        pdfs=pdfs.center_vector,
        density=None)
    generate_sweep(ctx, 'UniformGridGPU_AA_MacroSetter', setter_assignments)
    generate_sweep(ctx, 'UniformGridGPU_AA_MacroGetter', getter_assignments)

    # communication
    generate_pack_info_from_kernel(ctx,
                                   'UniformGridGPU_AA_PackInfoPull',
                                   update_rules['Odd'],
                                   kind='pull',
예제 #3
0
def poiseuille_channel(target, stencil_name):
    # physical parameters
    rho_0 = 1.2  # density
    eta = 0.2  # kinematic viscosity
    width = 41  # of box
    actual_width = width - 2  # subtract boundary layer from box width
    ext_force_density = 0.2 / actual_width ** 2  # scale by width to keep stable

    # LB parameters
    lb_stencil = LBStencil(stencil_name)

    if lb_stencil.D == 2:
        L = (4, width)
    elif lb_stencil.D == 3:
        L = (4, width, 4)
    else:
        raise Exception()
    periodicity = [True, False] + [True] * (lb_stencil.D - 2)

    omega = lbmpy.relaxationrates.relaxation_rate_from_lattice_viscosity(eta)

    # ## Data structures
    dh = ps.create_data_handling(L, periodicity=periodicity, default_target=target)

    src = dh.add_array('src', values_per_cell=len(lb_stencil))
    dst = dh.add_array_like('dst', 'src')
    ρ = dh.add_array('rho', latex_name='\\rho', values_per_cell=1)
    u = dh.add_array('u', values_per_cell=dh.dim)

    # LB Setup
    lbm_config = LBMConfig(stencil=lb_stencil, relaxation_rate=omega, method=Method.TRT,
                           compressible=True, force_model=ForceModel.GUO,
                           force=tuple([ext_force_density] + [0] * (lb_stencil.D - 1)),
                           kernel_type='collide_only')

    lbm_opt = LBMOptimisation(symbolic_field=src)
    collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)

    stream = create_stream_pull_with_output_kernel(collision.method, src, dst, {'velocity': u})

    config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target)

    stream_kernel = ps.create_kernel(stream, config=config).compile()
    collision_kernel = ps.create_kernel(collision, config=config).compile()

    # Boundaries
    lbbh = LatticeBoltzmannBoundaryHandling(collision.method, dh, src.name, target=dh.default_target)

    # ## Set up the simulation

    init = macroscopic_values_setter(collision.method, velocity=(0,) * dh.dim,
                                     pdfs=src.center_vector, density=ρ.center)
    init_kernel = ps.create_kernel(init, ghost_layers=0).compile()

    noslip = NoSlip()
    wall_thickness = 2
    if lb_stencil.D == 2:
        lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness])
        lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:])
    elif lb_stencil.D == 3:
        lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness, :])
        lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:, :])
    else:
        raise Exception()

    for bh in lbbh, :
        assert len(bh._boundary_object_to_boundary_info) == 1, "Restart kernel to clear boundaries"

    def init():
        dh.fill(ρ.name, rho_0)
        dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True)
        dh.fill(u.name, 0)

        dh.run_kernel(init_kernel)

    # In[6]:

    sync_pdfs = dh.synchronization_function([src.name])

    # Time loop
    def time_loop(steps):
        dh.all_to_gpu()
        i = -1
        last_max_vel = -1
        for i in range(steps):
            dh.run_kernel(collision_kernel)
            sync_pdfs()
            lbbh()
            dh.run_kernel(stream_kernel)

            dh.swap(src.name, dst.name)

            # Consider early termination
            if i % 100 == 0:
                if u.name in dh.gpu_arrays:
                    dh.to_cpu(u.name)
                uu = dh.gather_array(u.name)
                # average periodic directions
                if lb_stencil.D == 3:  # dont' swap order
                    uu = np.average(uu, axis=2)
                uu = np.average(uu, axis=0)

                max_vel = np.nanmax(uu)
                if np.abs(max_vel / last_max_vel - 1) < 5E-6:
                    break
                last_max_vel = max_vel

        # cut off wall regions
        uu = uu[wall_thickness:-wall_thickness]

        # correct for f/2 term
        uu -= np.array([ext_force_density / 2 / rho_0] + [0] * (lb_stencil.D - 1))

        return uu

    init()
    # Simulation
    profile = time_loop(5000)

    # compare against analytical solution
    # The profile is of shape (n,3). Force is in x-direction
    y = np.arange(len(profile[:, 0]))
    mid = (y[-1] - y[0]) / 2  # Mid point of channel

    expected = poiseuille_flow((y - mid), actual_width, ext_force_density, rho_0 * eta)

    np.testing.assert_allclose(profile[:, 0], expected, rtol=0.006)

    # Test zero vel in other directions
    np.testing.assert_allclose(profile[:, 1:], np.zeros_like(profile[:, 1:]), atol=1E-9)
예제 #4
0
def test_fully_periodic_flow(target, stencil, streaming_pattern):
    gpu = False
    if target == Target.GPU:
        gpu = True

    #   Stencil
    stencil = LBStencil(stencil)

    #   Streaming
    inplace = is_inplace(streaming_pattern)
    timesteps = get_timesteps(streaming_pattern)
    zeroth_timestep = timesteps[0]

    #   Data Handling and PDF fields
    domain_size = (30, ) * stencil.D
    periodicity = (True, ) * stencil.D

    dh = create_data_handling(domain_size=domain_size,
                              periodicity=periodicity,
                              default_target=target)

    pdfs = dh.add_array('pdfs', stencil.Q)
    if not inplace:
        pdfs_tmp = dh.add_array_like('pdfs_tmp', pdfs.name)

    #   LBM Streaming and Collision
    lbm_config = LBMConfig(stencil=stencil,
                           method=Method.SRT,
                           relaxation_rate=1.0,
                           streaming_pattern=streaming_pattern)

    lbm_opt = LBMOptimisation(symbolic_field=pdfs)
    config = CreateKernelConfig(target=target)

    if not inplace:
        lbm_opt = replace(lbm_opt, symbolic_temporary_field=pdfs_tmp)

    lb_collision = create_lb_collision_rule(lbm_config=lbm_config,
                                            lbm_optimisation=lbm_opt,
                                            config=config)
    lb_method = lb_collision.method

    lb_kernels = []
    for t in timesteps:
        lbm_config = replace(lbm_config, timestep=t)
        lb_kernels.append(
            create_lb_function(collision_rule=lb_collision,
                               lbm_config=lbm_config,
                               lbm_optimisation=lbm_opt))

    #   Macroscopic Values
    density = 1.0
    density_field = dh.add_array('rho', 1)
    u_x = 0.01
    velocity = (u_x, ) * stencil.D
    velocity_field = dh.add_array('u', stencil.D)

    u_ref = np.full(domain_size + (stencil.D, ), u_x)

    setter = macroscopic_values_setter(lb_method,
                                       density,
                                       velocity,
                                       pdfs,
                                       streaming_pattern=streaming_pattern,
                                       previous_timestep=zeroth_timestep)
    setter_kernel = create_kernel(
        setter, config=CreateKernelConfig(target=target,
                                          ghost_layers=1)).compile()

    getter_kernels = []
    for t in timesteps:
        getter = macroscopic_values_getter(lb_method,
                                           density_field,
                                           velocity_field,
                                           pdfs,
                                           streaming_pattern=streaming_pattern,
                                           previous_timestep=t)
        getter_kernels.append(
            create_kernel(getter,
                          config=CreateKernelConfig(target=target,
                                                    ghost_layers=1)).compile())

    #   Periodicity
    periodicity_handler = LBMPeriodicityHandling(
        stencil, dh, pdfs.name, streaming_pattern=streaming_pattern)

    # Initialization and Timestep
    current_timestep = zeroth_timestep

    def init():
        global current_timestep
        current_timestep = zeroth_timestep
        dh.run_kernel(setter_kernel)

    def one_step():
        global current_timestep

        # Periodicty
        periodicity_handler(current_timestep)

        # Here, the next time step begins
        current_timestep = current_timestep.next()

        # LBM Step
        dh.run_kernel(lb_kernels[current_timestep.idx])

        # Field Swaps
        if not inplace:
            dh.swap(pdfs.name, pdfs_tmp.name)

        # Macroscopic Values
        dh.run_kernel(getter_kernels[current_timestep.idx])

    #   Run the simulation
    init()

    for _ in range(100):
        one_step()

    #   Evaluation
    if gpu:
        dh.to_cpu(velocity_field.name)
    u = dh.gather_array(velocity_field.name)

    #   Equal to the steady-state velocity field up to numerical errors
    assert_allclose(u, u_ref)

    #   Flow must be equal up to numerical error for all streaming patterns
    global all_results
    for key, prev_u in all_results.items():
        if key[0] == stencil:
            prev_pattern = key[1]
            assert_allclose(
                u,
                prev_u,
                err_msg=
                f'Velocity field for {streaming_pattern} differed from {prev_pattern}!'
            )
    all_results[(stencil, streaming_pattern)] = u
예제 #5
0
def test_shear_flow(target, stencil_name):
    # Cuda
    if target == ps.Target.GPU:
        pytest.importorskip("pycuda")

    # LB parameters
    stencil = LBStencil(stencil_name)

    if stencil.D == 2:
        L = (4, width)
    elif stencil.D == 3:
        L = (4, width, 4)
    else:
        raise Exception()
    periodicity = [True, False] + [True] * (stencil.D - 2)

    omega = relaxation_rate_from_lattice_viscosity(eta)

    # ## Data structures
    dh = ps.create_data_handling(L, periodicity=periodicity, default_target=target)

    src = dh.add_array('src', values_per_cell=stencil.Q)
    dst = dh.add_array_like('dst', 'src')
    ρ = dh.add_array('rho', latex_name='\\rho', values_per_cell=1)
    u = dh.add_array('u', values_per_cell=stencil.D)
    p = dh.add_array('p', values_per_cell=stencil.D**2)

    # LB Setup
    lbm_config = LBMConfig(stencil=stencil, relaxation_rate=omega, method=Method.TRT,
                           compressible=True, kernel_type='collide_only')
    lbm_opt = LBMOptimisation(symbolic_field=src)
    collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)

    stream = create_stream_pull_with_output_kernel(collision.method, src, dst, {'velocity': u})
    config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target)

    stream_kernel = ps.create_kernel(stream, config=config).compile()
    collision_kernel = ps.create_kernel(collision, config=config).compile()

    # Boundaries
    lbbh = LatticeBoltzmannBoundaryHandling(collision.method, dh, src.name, target=dh.default_target)

    # Second moment test setup
    cqc = collision.method.conserved_quantity_computation
    getter_eqs = cqc.output_equations_from_pdfs(src.center_vector,
                                                {'moment2': p})

    kernel_compute_p = ps.create_kernel(getter_eqs, config=config).compile()

    # ## Set up the simulation

    init = macroscopic_values_setter(collision.method, velocity=(0,) * dh.dim,
                                     pdfs=src.center_vector, density=ρ.center)
    init_kernel = ps.create_kernel(init, ghost_layers=0).compile()

    vel_vec = sp.Matrix([0.5 * shear_velocity] + [0] * (stencil.D - 1))
    if stencil.D == 2:
        lbbh.set_boundary(UBB(velocity=vel_vec), ps.make_slice[:, :wall_thickness])
        lbbh.set_boundary(UBB(velocity=-vel_vec), ps.make_slice[:, -wall_thickness:])
    elif stencil.D == 3:
        lbbh.set_boundary(UBB(velocity=vel_vec), ps.make_slice[:, :wall_thickness, :])
        lbbh.set_boundary(UBB(velocity=-vel_vec), ps.make_slice[:, -wall_thickness:, :])
    else:
        raise Exception()

    for bh in lbbh, :
        assert len(bh._boundary_object_to_boundary_info) == 2, "Restart kernel to clear boundaries"

    def init():
        dh.fill(ρ.name, rho_0)
        dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True)
        dh.fill(u.name, 0)

        dh.run_kernel(init_kernel)

    sync_pdfs = dh.synchronization_function([src.name])

    # Time loop
    def time_loop(steps):
        dh.all_to_gpu()
        for i in range(steps):
            dh.run_kernel(collision_kernel)
            sync_pdfs()
            lbbh()
            dh.run_kernel(stream_kernel)
            dh.run_kernel(kernel_compute_p)

            dh.swap(src.name, dst.name)

        if u.name in dh.gpu_arrays:
            dh.to_cpu(u.name)
        uu = dh.gather_array(u.name)
        # average periodic directions
        if stencil.D == 3:  # dont' swap order
            uu = np.average(uu, axis=2)
        uu = np.average(uu, axis=0)

        if p.name in dh.gpu_arrays:
            dh.to_cpu(p.name)
        pp = dh.gather_array(p.name)
        # average periodic directions
        if stencil.D == 3:  # dont' swap order
            pp = np.average(pp, axis=2)
        pp = np.average(pp, axis=0)

        # cut off wall regions
        uu = uu[wall_thickness:-wall_thickness]
        pp = pp[wall_thickness:-wall_thickness]

        if stencil.D == 2:
            pp = pp.reshape((len(pp), 2, 2))
        if stencil.D == 3:
            pp = pp.reshape((len(pp), 3, 3))
        return uu, pp

    init()
    # Simulation
    profile, pressure_profile = time_loop(t_max)

    expected = shear_flow(x=(np.arange(0, actual_width) + .5),
                          t=t_max,
                          nu=eta / rho_0,
                          v=shear_velocity,
                          h=actual_width,
                          k_max=100)

    if stencil.D == 2:
        shear_direction = np.array((1, 0), dtype=float)
        shear_plane_normal = np.array((0, 1), dtype=float)
    if stencil.D == 3:
        shear_direction = np.array((1, 0, 0), dtype=float)
        shear_plane_normal = np.array((0, 1, 0), dtype=float)

    shear_rate = shear_velocity / actual_width
    dynamic_viscosity = eta * rho_0
    correction_factor = eta / (eta - 1. / 6.)

    p_expected = rho_0 * np.identity(dh.dim) / 3.0 + dynamic_viscosity * shear_rate / correction_factor * (
        np.outer(shear_plane_normal, shear_direction) + np.transpose(np.outer(shear_plane_normal, shear_direction)))

    # Sustract the tensorproduct of the velosity to get the pressure
    pressure_profile[:, 0, 0] -= rho_0 * profile[:, 0]**2
    
    np.testing.assert_allclose(profile[:, 0], expected[1:-1], atol=1E-9)
    for i in range(actual_width - 2):
        np.testing.assert_allclose(pressure_profile[i], p_expected, atol=1E-9, rtol=1E-3)
예제 #6
0
    def __init__(self, stencil, streaming_pattern, wall_boundary=None, target=Target.CPU):

        if wall_boundary is None:
            wall_boundary = NoSlip()

        self.target = target
        self.gpu = target in [Target.GPU]

        #   Stencil
        self.stencil = stencil
        self.q = stencil.Q
        self.dim = stencil.D

        #   Streaming
        self.streaming_pattern = streaming_pattern
        self.inplace = is_inplace(self.streaming_pattern)
        self.timesteps = get_timesteps(streaming_pattern)
        self.zeroth_timestep = self.timesteps[0]

        #   Domain, Data Handling and PDF fields
        self.pipe_length = 60
        self.pipe_radius = 15
        self.domain_size = (self.pipe_length, ) + (2 * self.pipe_radius,) * (self.dim - 1)
        self.periodicity = (True, ) + (False, ) * (self.dim - 1)
        self.force = (0.0001, ) + (0.0,) * (self.dim - 1)

        self.dh = create_data_handling(domain_size=self.domain_size,
                                       periodicity=self.periodicity, default_target=self.target)

        self.pdfs = self.dh.add_array('pdfs', self.q)
        if not self.inplace:
            self.pdfs_tmp = self.dh.add_array_like('pdfs_tmp', self.pdfs.name)

        #   LBM Streaming and Collision
        lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=1.0,
                               force_model=ForceModel.GUO, force=self.force, streaming_pattern=streaming_pattern)

        lbm_opt = LBMOptimisation(symbolic_field=self.pdfs)
        config = CreateKernelConfig(target=self.target)

        if not self.inplace:
            lbm_opt = replace(lbm_opt, symbolic_temporary_field=self.pdfs_tmp)

        self.lb_collision = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
        self.lb_method = self.lb_collision.method

        self.lb_kernels = []
        for t in self.timesteps:
            lbm_config = replace(lbm_config, timestep=t)
            self.lb_kernels.append(create_lb_function(collision_rule=self.lb_collision,
                                                      lbm_config=lbm_config,
                                                      lbm_optimisation=lbm_opt,
                                                      config=config))

        #   Macroscopic Values
        self.density = 1.0
        self.density_field = self.dh.add_array('rho', 1)
        u_x = 0.0
        self.velocity = (u_x,) * self.dim
        self.velocity_field = self.dh.add_array('u', self.dim)

        setter = macroscopic_values_setter(
            self.lb_method, self.density, self.velocity, self.pdfs,
            streaming_pattern=self.streaming_pattern, previous_timestep=self.zeroth_timestep)
        self.init_kernel = create_kernel(setter,
                                         config=CreateKernelConfig(target=target, ghost_layers=1)).compile()

        self.getter_kernels = []
        for t in self.timesteps:
            getter = macroscopic_values_getter(
                self.lb_method, self.density_field, self.velocity_field, self.pdfs,
                streaming_pattern=self.streaming_pattern, previous_timestep=t)
            self.getter_kernels.append(create_kernel(getter,
                                                     config=CreateKernelConfig(target=target, ghost_layers=1)).compile())

        #   Periodicity
        self.periodicity_handler = LBMPeriodicityHandling(
            self.stencil, self.dh, self.pdfs.name, streaming_pattern=self.streaming_pattern)

        #   Boundary Handling
        self.wall = wall_boundary
        self.bh = LatticeBoltzmannBoundaryHandling(
            self.lb_method, self.dh, self.pdfs.name,
            streaming_pattern=self.streaming_pattern, target=self.target)

        self.bh.set_boundary(boundary_obj=self.wall, mask_callback=self.mask_callback)

        self.current_timestep = self.zeroth_timestep
예제 #7
0
def test_lees_edwards():

    domain_size = (64, 64)
    omega = 1.0  # relaxation rate of first component
    shear_velocity = 0.1  # shear velocity
    shear_dir = 0  # direction of shear flow
    shear_dir_normal = 1  # direction normal to shear plane, for interpolation

    stencil = LBStencil(Stencil.D2Q9)

    dh = ps.create_data_handling(domain_size,
                                 periodicity=True,
                                 default_target=ps.Target.CPU)

    src = dh.add_array('src', values_per_cell=stencil.Q)
    dh.fill('src', 1.0, ghost_layers=True)

    dst = dh.add_array_like('dst', 'src')
    dh.fill('dst', 0.0, ghost_layers=True)

    force = dh.add_array('force', values_per_cell=stencil.D)
    dh.fill('force', 0.0, ghost_layers=True)

    rho = dh.add_array('rho', values_per_cell=1)
    dh.fill('rho', 1.0, ghost_layers=True)
    u = dh.add_array('u', values_per_cell=stencil.D)
    dh.fill('u', 0.0, ghost_layers=True)

    counters = [
        LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(stencil.D)
    ]
    points_up = sp.Symbol('points_up')
    points_down = sp.Symbol('points_down')

    u_p = sp.Piecewise(
        (1,
         sp.And(ps.data_types.type_all_numbers(counters[1] <= 1, 'int'),
                points_down)),
        (-1,
         sp.And(
             ps.data_types.type_all_numbers(counters[1] >= src.shape[1] - 2,
                                            'int'), points_up)),
        (0, True)) * shear_velocity

    lbm_config = LBMConfig(stencil=stencil,
                           relaxation_rate=omega,
                           compressible=True,
                           velocity_input=u.center_vector +
                           sp.Matrix([u_p, 0]),
                           density_input=rho,
                           force_model=ForceModel.LUO,
                           force=force.center_vector,
                           kernel_type='collide_only')
    lbm_opt = LBMOptimisation(symbolic_field=src)
    collision = create_lb_update_rule(lbm_config=lbm_config,
                                      lbm_optimisation=lbm_opt)

    to_insert = [
        s.lhs for s in collision.subexpressions
        if collision.method.first_order_equilibrium_moment_symbols[shear_dir]
        in s.free_symbols
    ]
    for s in to_insert:
        collision = collision.new_with_inserted_subexpression(s)
    ma = []
    for a, c in zip(collision.main_assignments, collision.method.stencil):
        if c[shear_dir_normal] == -1:
            b = (True, False)
        elif c[shear_dir_normal] == 1:
            b = (False, True)
        else:
            b = (False, False)
        a = ps.Assignment(a.lhs, a.rhs.replace(points_down, b[0]))
        a = ps.Assignment(a.lhs, a.rhs.replace(points_up, b[1]))
        ma.append(a)
    collision.main_assignments = ma

    stream = create_stream_pull_with_output_kernel(collision.method, src, dst,
                                                   {
                                                       'density': rho,
                                                       'velocity': u
                                                   })

    config = ps.CreateKernelConfig(target=dh.default_target)
    stream_kernel = ps.create_kernel(stream, config=config).compile()
    collision_kernel = ps.create_kernel(collision, config=config).compile()

    init = macroscopic_values_setter(collision.method,
                                     velocity=(0, 0),
                                     pdfs=src.center_vector,
                                     density=rho.center)
    init_kernel = ps.create_kernel(init, ghost_layers=0).compile()

    offset = [0.0]

    sync_pdfs = dh.synchronization_function([src.name],
                                            functor=partial(
                                                get_le_boundary_functor,
                                                shear_offset=offset))

    dh.run_kernel(init_kernel)

    time = 500

    dh.all_to_gpu()
    for i in range(time):
        dh.run_kernel(collision_kernel)

        sync_pdfs()
        dh.run_kernel(stream_kernel)

        dh.swap(src.name, dst.name)
        offset[0] += shear_velocity
    dh.all_to_cpu()

    nu = lattice_viscosity_from_relaxation_rate(omega)
    h = domain_size[0]
    k_max = 100

    analytical_solution = get_solution_navier_stokes(
        np.linspace(0.5, h - 0.5, h), time, nu, shear_velocity, h, k_max)
    np.testing.assert_array_almost_equal(analytical_solution,
                                         dh.gather_array(u.name)[0, :, 0],
                                         decimal=5)

    dh.fill(rho.name, 1.0, ghost_layers=True)
    dh.run_kernel(init_kernel)
    dh.fill(u.name, 0.0, ghost_layers=True)
    dh.fill('force', 0.0, ghost_layers=True)
    dh.cpu_arrays[force.name][64 // 3, 1, :] = [1e-2, -1e-1]

    offset[0] = 0

    time = 20

    dh.all_to_gpu()
    for i in range(time):
        dh.run_kernel(collision_kernel)

        sync_pdfs()
        dh.run_kernel(stream_kernel)

        dh.swap(src.name, dst.name)
    dh.all_to_cpu()

    vel_unshifted = np.array(dh.gather_array(u.name)[:, -3:-1, :])

    dh.fill(rho.name, 1.0, ghost_layers=True)
    dh.run_kernel(init_kernel)
    dh.fill(u.name, 0.0, ghost_layers=True)
    dh.fill('force', 0.0, ghost_layers=True)
    dh.cpu_arrays[force.name][64 // 3, 1, :] = [1e-2, -1e-1]

    offset[0] = 10

    time = 20

    dh.all_to_gpu()
    for i in range(time):
        dh.run_kernel(collision_kernel)

        sync_pdfs()
        dh.run_kernel(stream_kernel)

        dh.swap(src.name, dst.name)
    dh.all_to_cpu()

    vel_shifted = np.array(dh.gather_array(u.name)[:, -3:-1, :])

    vel_rolled = np.roll(vel_shifted, -offset[0], axis=0)

    np.testing.assert_array_almost_equal(vel_unshifted, vel_rolled)