def test_equivalence_short(setup): relaxation_rates = [1.8, 1.7, 1.0, 1.0, 1.0, 1.0] stencil = LBStencil(setup[0]) compressible = setup[1] method = setup[2] force = (setup[3], 0) if stencil.D == 2 else (setup[3], 0, 0) domain_size = (20, 30) if stencil.D == 2 else (10, 13, 7) lbm_config = LBMConfig(stencil=stencil, method=method, compressible=compressible, relaxation_rates=relaxation_rates, force_model=ForceModel.GUO, force=force) lbm_opt_split = LBMOptimisation(split=True) lbm_opt = LBMOptimisation(split=False) with_split = create_lid_driven_cavity(domain_size=domain_size, lbm_config=lbm_config, lbm_optimisation=lbm_opt_split) without_split = create_lid_driven_cavity(domain_size=domain_size, lbm_config=lbm_config, lbm_optimisation=lbm_opt) with_split.run(100) without_split.run(100) np.testing.assert_almost_equal(with_split.velocity_slice(), without_split.velocity_slice())
def test_lbm_vectorization(instruction_set, aligned_and_padding, nontemporal, double_precision, fixed_loop_sizes): vectorization_options = { 'instruction_set': instruction_set, 'assume_aligned': aligned_and_padding[0], 'nontemporal': nontemporal, 'assume_inner_stride_one': True, 'assume_sufficient_line_padding': aligned_and_padding[1] } time_steps = 100 size1 = (64, 32) size2 = (666, 34) relaxation_rate = 1.8 print("Computing reference solutions") ldc1_ref = create_lid_driven_cavity(size1, relaxation_rate=relaxation_rate) ldc1_ref.run(time_steps) ldc2_ref = create_lid_driven_cavity(size2, relaxation_rate=relaxation_rate) ldc2_ref.run(time_steps) lbm_config = LBMConfig(relaxation_rate=relaxation_rate) config = ps.CreateKernelConfig( data_type="double" if double_precision else "float32", cpu_vectorize_info=vectorization_options) lbm_opt_split = LBMOptimisation(cse_global=True, split=True) lbm_opt = LBMOptimisation(cse_global=True, split=False) print( f"Vectorization test, double precision {double_precision}, vectorization {vectorization_options}, " f"fixed loop sizes {fixed_loop_sizes}") ldc1 = create_lid_driven_cavity(size1, fixed_loop_sizes=fixed_loop_sizes, lbm_config=lbm_config, lbm_optimisation=lbm_opt, config=config) ldc1.run(time_steps) np.testing.assert_almost_equal(ldc1_ref.velocity[:, :], ldc1.velocity[:, :]) ldc2 = create_lid_driven_cavity(size2, fixed_loop_sizes=fixed_loop_sizes, lbm_config=lbm_config, lbm_optimisation=lbm_opt_split, config=config) ldc2.run(time_steps) np.testing.assert_almost_equal(ldc2_ref.velocity[:, :], ldc2.velocity[:, :])
def test_optimised_and_full_communication_equivalence(stencil_name): target = ps.Target.CPU stencil = LBStencil(stencil_name) domain_size = (4, ) * stencil.D dh = ps.create_data_handling(domain_size, periodicity=(True, ) * stencil.D, parallel=False, default_target=target) pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64) dh.fill("pdf", 0, ghost_layers=True) pdf_tmp = dh.add_array("pdf_tmp", values_per_cell=len(stencil), dtype=np.int64) dh.fill("pdf_tmp", 0, ghost_layers=True) gl = dh.ghost_layers_of_field("pdf") num = 0 for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']): dh.cpu_arrays['pdf'][idx] = num dh.cpu_arrays['pdf_tmp'][idx] = num num += 1 lbm_config = LBMConfig(stencil=stencil, kernel_type="stream_pull_only") lbm_opt = LBMOptimisation(symbolic_field=pdf, symbolic_temporary_field=pdf_tmp) config = ps.CreateKernelConfig(target=dh.default_target, cpu_openmp=True) ac = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) ast = ps.create_kernel(ac, config=config) stream = ast.compile() full_communication = dh.synchronization_function(pdf.name, target=dh.default_target, optimization={"openmp": True}) full_communication() dh.run_kernel(stream) dh.swap("pdf", "pdf_tmp") pdf_full_communication = np.copy(dh.cpu_arrays['pdf']) num = 0 for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']): dh.cpu_arrays['pdf'][idx] = num dh.cpu_arrays['pdf_tmp'][idx] = num num += 1 optimised_communication = LBMPeriodicityHandling(stencil=stencil, data_handling=dh, pdf_field_name=pdf.name, streaming_pattern='pull') optimised_communication() dh.run_kernel(stream) dh.swap("pdf", "pdf_tmp") if stencil.D == 3: for i in range(gl, domain_size[0]): for j in range(gl, domain_size[1]): for k in range(gl, domain_size[2]): for f in range(len(stencil)): assert dh.cpu_arrays['pdf'][i, j, k, f] == pdf_full_communication[i, j, k, f], print(f) else: for i in range(gl, domain_size[0]): for j in range(gl, domain_size[1]): for f in range(len(stencil)): assert dh.cpu_arrays['pdf'][i, j, f] == pdf_full_communication[i, j, f]
def test_split_number_of_operations(stencil, compressible, method): # For the following configurations the number of operations for splitted and un-splitted version are # exactly equal. This is not true for D3Q15 and D3Q27 because some sub-expressions are computed in multiple # splitted, inner loops. lbm_config = LBMConfig(stencil=LBStencil(stencil), method=method, compressible=compressible, force_model=ForceModel.LUO, force=(1e-6, 1e-5, 1e-7)) lbm_opt_split = LBMOptimisation(split=True) lbm_opt = LBMOptimisation(split=False) ast_with_splitting = create_lb_ast(lbm_config=lbm_config, lbm_optimisation=lbm_opt_split) ast_without_splitting = create_lb_ast(lbm_config=lbm_config, lbm_optimisation=lbm_opt) op_with_splitting = count_operations_in_ast(ast_with_splitting) op_without_splitting = count_operations_in_ast(ast_without_splitting) assert op_without_splitting['muls'] == op_with_splitting['muls'] assert op_without_splitting['adds'] == op_with_splitting['adds'] assert op_without_splitting['divs'] == op_with_splitting['divs']
def test_population_and_moment_space_equivalence(setup): stencil = LBStencil(setup[0]) method = setup[1] nested_moments = setup[2] fmodel = setup[3] force = sp.symbols(f'F_:{stencil.D}') conserved_moments = 1 + stencil.D rr = [ *[0] * conserved_moments, *sp.symbols(f'omega_:{stencil.Q - conserved_moments}') ] lbm_config = LBMConfig( stencil=stencil, method=method, relaxation_rates=rr, nested_moments=nested_moments, force_model=fmodel, force=force, weighted=True, compressible=True, moment_transform_class=PdfsToMomentsByChimeraTransform) lbm_opt = LBMOptimisation(cse_global=False, cse_pdfs=False, pre_simplification=True, simplification=False) lb_method_moment_space = create_lb_method(lbm_config=lbm_config) lbm_config = replace(lbm_config, moment_transform_class=None) lb_method_pdf_space = create_lb_method(lbm_config=lbm_config) rho = lb_method_moment_space.zeroth_order_equilibrium_moment_symbol u = lb_method_moment_space.first_order_equilibrium_moment_symbols keep = set((rho, ) + u) cr_moment_space = create_lb_collision_rule( lb_method=lb_method_moment_space, lbm_optimisation=lbm_opt) cr_moment_space = cr_moment_space.new_without_subexpressions( subexpressions_to_keep=keep) lbm_opt = replace(lbm_opt, simplification='auto') cr_pdf_space = create_lb_collision_rule(lb_method=lb_method_pdf_space, lbm_optimisation=lbm_opt) cr_pdf_space = cr_pdf_space.new_without_subexpressions( subexpressions_to_keep=keep) for a, b in zip(cr_moment_space.main_assignments, cr_pdf_space.main_assignments): diff = (a.rhs - b.rhs).expand() assert diff == 0, f"Mismatch between population- and moment-space equations in PDFs {a.lhs}, {b.lhs}"
def test_force_driven_channel_short(scenario): pytest.importorskip("pycuda") ds = scenario[0] method = scenario[1] compressible = scenario[2] block_size = scenario[3] field_layout = scenario[4] lbm_config = LBMConfig(method=method, compressible=compressible, relaxation_rates=[1.95, 1.9, 1.92, 1.92]) lbm_opt = LBMOptimisation(field_layout=field_layout) # Different methods if block_size is not False: config = CreateKernelConfig( gpu_indexing_params={'block_size': block_size}) else: config = CreateKernelConfig(gpu_indexing='line') run_equivalence_test(domain_size=ds, lbm_config=lbm_config, lbm_opt=lbm_opt, config=config)
def poiseuille_channel(target, stencil_name): # physical parameters rho_0 = 1.2 # density eta = 0.2 # kinematic viscosity width = 41 # of box actual_width = width - 2 # subtract boundary layer from box width ext_force_density = 0.2 / actual_width ** 2 # scale by width to keep stable # LB parameters lb_stencil = LBStencil(stencil_name) if lb_stencil.D == 2: L = (4, width) elif lb_stencil.D == 3: L = (4, width, 4) else: raise Exception() periodicity = [True, False] + [True] * (lb_stencil.D - 2) omega = lbmpy.relaxationrates.relaxation_rate_from_lattice_viscosity(eta) # ## Data structures dh = ps.create_data_handling(L, periodicity=periodicity, default_target=target) src = dh.add_array('src', values_per_cell=len(lb_stencil)) dst = dh.add_array_like('dst', 'src') ρ = dh.add_array('rho', latex_name='\\rho', values_per_cell=1) u = dh.add_array('u', values_per_cell=dh.dim) # LB Setup lbm_config = LBMConfig(stencil=lb_stencil, relaxation_rate=omega, method=Method.TRT, compressible=True, force_model=ForceModel.GUO, force=tuple([ext_force_density] + [0] * (lb_stencil.D - 1)), kernel_type='collide_only') lbm_opt = LBMOptimisation(symbolic_field=src) collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) stream = create_stream_pull_with_output_kernel(collision.method, src, dst, {'velocity': u}) config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target) stream_kernel = ps.create_kernel(stream, config=config).compile() collision_kernel = ps.create_kernel(collision, config=config).compile() # Boundaries lbbh = LatticeBoltzmannBoundaryHandling(collision.method, dh, src.name, target=dh.default_target) # ## Set up the simulation init = macroscopic_values_setter(collision.method, velocity=(0,) * dh.dim, pdfs=src.center_vector, density=ρ.center) init_kernel = ps.create_kernel(init, ghost_layers=0).compile() noslip = NoSlip() wall_thickness = 2 if lb_stencil.D == 2: lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness]) lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:]) elif lb_stencil.D == 3: lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness, :]) lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:, :]) else: raise Exception() for bh in lbbh, : assert len(bh._boundary_object_to_boundary_info) == 1, "Restart kernel to clear boundaries" def init(): dh.fill(ρ.name, rho_0) dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True) dh.fill(u.name, 0) dh.run_kernel(init_kernel) # In[6]: sync_pdfs = dh.synchronization_function([src.name]) # Time loop def time_loop(steps): dh.all_to_gpu() i = -1 last_max_vel = -1 for i in range(steps): dh.run_kernel(collision_kernel) sync_pdfs() lbbh() dh.run_kernel(stream_kernel) dh.swap(src.name, dst.name) # Consider early termination if i % 100 == 0: if u.name in dh.gpu_arrays: dh.to_cpu(u.name) uu = dh.gather_array(u.name) # average periodic directions if lb_stencil.D == 3: # dont' swap order uu = np.average(uu, axis=2) uu = np.average(uu, axis=0) max_vel = np.nanmax(uu) if np.abs(max_vel / last_max_vel - 1) < 5E-6: break last_max_vel = max_vel # cut off wall regions uu = uu[wall_thickness:-wall_thickness] # correct for f/2 term uu -= np.array([ext_force_density / 2 / rho_0] + [0] * (lb_stencil.D - 1)) return uu init() # Simulation profile = time_loop(5000) # compare against analytical solution # The profile is of shape (n,3). Force is in x-direction y = np.arange(len(profile[:, 0])) mid = (y[-1] - y[0]) / 2 # Mid point of channel expected = poiseuille_flow((y - mid), actual_width, ext_force_density, rho_0 * eta) np.testing.assert_allclose(profile[:, 0], expected, rtol=0.006) # Test zero vel in other directions np.testing.assert_allclose(profile[:, 1:], np.zeros_like(profile[:, 1:]), atol=1E-9)
def test_fully_periodic_flow(target, stencil, streaming_pattern): gpu = False if target == Target.GPU: gpu = True # Stencil stencil = LBStencil(stencil) # Streaming inplace = is_inplace(streaming_pattern) timesteps = get_timesteps(streaming_pattern) zeroth_timestep = timesteps[0] # Data Handling and PDF fields domain_size = (30, ) * stencil.D periodicity = (True, ) * stencil.D dh = create_data_handling(domain_size=domain_size, periodicity=periodicity, default_target=target) pdfs = dh.add_array('pdfs', stencil.Q) if not inplace: pdfs_tmp = dh.add_array_like('pdfs_tmp', pdfs.name) # LBM Streaming and Collision lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=1.0, streaming_pattern=streaming_pattern) lbm_opt = LBMOptimisation(symbolic_field=pdfs) config = CreateKernelConfig(target=target) if not inplace: lbm_opt = replace(lbm_opt, symbolic_temporary_field=pdfs_tmp) lb_collision = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt, config=config) lb_method = lb_collision.method lb_kernels = [] for t in timesteps: lbm_config = replace(lbm_config, timestep=t) lb_kernels.append( create_lb_function(collision_rule=lb_collision, lbm_config=lbm_config, lbm_optimisation=lbm_opt)) # Macroscopic Values density = 1.0 density_field = dh.add_array('rho', 1) u_x = 0.01 velocity = (u_x, ) * stencil.D velocity_field = dh.add_array('u', stencil.D) u_ref = np.full(domain_size + (stencil.D, ), u_x) setter = macroscopic_values_setter(lb_method, density, velocity, pdfs, streaming_pattern=streaming_pattern, previous_timestep=zeroth_timestep) setter_kernel = create_kernel( setter, config=CreateKernelConfig(target=target, ghost_layers=1)).compile() getter_kernels = [] for t in timesteps: getter = macroscopic_values_getter(lb_method, density_field, velocity_field, pdfs, streaming_pattern=streaming_pattern, previous_timestep=t) getter_kernels.append( create_kernel(getter, config=CreateKernelConfig(target=target, ghost_layers=1)).compile()) # Periodicity periodicity_handler = LBMPeriodicityHandling( stencil, dh, pdfs.name, streaming_pattern=streaming_pattern) # Initialization and Timestep current_timestep = zeroth_timestep def init(): global current_timestep current_timestep = zeroth_timestep dh.run_kernel(setter_kernel) def one_step(): global current_timestep # Periodicty periodicity_handler(current_timestep) # Here, the next time step begins current_timestep = current_timestep.next() # LBM Step dh.run_kernel(lb_kernels[current_timestep.idx]) # Field Swaps if not inplace: dh.swap(pdfs.name, pdfs_tmp.name) # Macroscopic Values dh.run_kernel(getter_kernels[current_timestep.idx]) # Run the simulation init() for _ in range(100): one_step() # Evaluation if gpu: dh.to_cpu(velocity_field.name) u = dh.gather_array(velocity_field.name) # Equal to the steady-state velocity field up to numerical errors assert_allclose(u, u_ref) # Flow must be equal up to numerical error for all streaming patterns global all_results for key, prev_u in all_results.items(): if key[0] == stencil: prev_pattern = key[1] assert_allclose( u, prev_u, err_msg= f'Velocity field for {streaming_pattern} differed from {prev_pattern}!' ) all_results[(stencil, streaming_pattern)] = u
def test_diffusion(): """ Runs the "Diffusion from Plate in Uniform Flow" benchmark as it is described in [ch. 8.6.3, The Lattice Boltzmann Method, Krüger et al.]. dC/dy = 0 ┌───────────────┐ │ → → → │ C = 0 │ → u → │ dC/dx = 0 │ → → → │ └───────────────┘ C = 1 The analytical solution is given by: C(x,y) = 1 * erfc(y / sqrt(4Dx/u)) The hydrodynamic field is not simulated, instead a constant velocity is assumed. """ pytest.importorskip("pycuda") # Parameters domain_size = (1600, 160) omega = 1.38 diffusion = (1 / omega - 0.5) / 3 velocity = 0.05 time_steps = 50000 stencil = LBStencil(Stencil.D2Q9) target = ps.Target.GPU # Data Handling dh = ps.create_data_handling(domain_size=domain_size, default_target=target) vel_field = dh.add_array('vel_field', values_per_cell=stencil.D) dh.fill('vel_field', velocity, 0, ghost_layers=True) dh.fill('vel_field', 0.0, 1, ghost_layers=True) con_field = dh.add_array('con_field', values_per_cell=1) dh.fill('con_field', 0.0, ghost_layers=True) pdfs = dh.add_array('pdfs', values_per_cell=stencil.Q) dh.fill('pdfs', 0.0, ghost_layers=True) pdfs_tmp = dh.add_array('pdfs_tmp', values_per_cell=stencil.Q) dh.fill('pdfs_tmp', 0.0, ghost_layers=True) # Lattice Boltzmann method lbm_config = LBMConfig(stencil=stencil, method=Method.MRT, relaxation_rates=[1, 1.5, 1, 1.5, 1], velocity_input=vel_field, output={'density': con_field}, compressible=True, weighted=True, kernel_type='stream_pull_collide') lbm_opt = LBMOptimisation(symbolic_field=pdfs, symbolic_temporary_field=pdfs_tmp) config = ps.CreateKernelConfig(target=dh.default_target, cpu_openmp=True) method = create_lb_method(lbm_config=lbm_config) method.set_conserved_moments_relaxation_rate(omega) lbm_config = replace(lbm_config, lb_method=method) update_rule = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) kernel = ps.create_kernel(update_rule, config=config).compile() # PDF initalization init = pdf_initialization_assignments(method, con_field.center, vel_field.center_vector, pdfs.center_vector) dh.run_kernel(ps.create_kernel(init).compile()) dh.all_to_gpu() # Boundary Handling bh = LatticeBoltzmannBoundaryHandling(update_rule.method, dh, 'pdfs', name="bh", target=dh.default_target) add_box_boundary(bh, boundary=NeumannByCopy()) bh.set_boundary(DiffusionDirichlet(0), slice_from_direction('W', dh.dim)) bh.set_boundary(DiffusionDirichlet(1), slice_from_direction('S', dh.dim)) # Timeloop for i in range(time_steps): bh() dh.run_kernel(kernel) dh.swap("pdfs", "pdfs_tmp") dh.all_to_cpu() # Verification x = np.arange(1, domain_size[0], 1) y = np.arange(0, domain_size[1], 1) X, Y = np.meshgrid(x, y) analytical = np.zeros(domain_size) analytical[1:, :] = np.vectorize(math.erfc)( Y / np.vectorize(math.sqrt)(4 * diffusion * X / velocity)).transpose() simulated = dh.gather_array('con_field', ghost_layers=False) residual = 0 for i in x: for j in y: residual += (simulated[i, j] - analytical[i, j])**2 residual = math.sqrt(residual / (domain_size[0] * domain_size[1])) assert residual < 1e-2
def create_model(domain_size, num_phases, coeff_a, coeff_epsilon, gabd, alpha=1, penalty_factor=0.01, simplex_projection=False): def lapl(e): return sum(Diff(Diff(e, i), i) for i in range(dh.dim)) def interfacial_chemical_potential(c): result = [] n = len(c) for i in range(n): entry = 0 for k in range(n): if i == k: continue eps = coeff_epsilon[(k, i)] if i < k else coeff_epsilon[(i, k)] entry += alpha**2 * eps**2 * (c[k] * lapl(c[i]) - c[i] * lapl(c[k])) result.append(entry) return -sp.Matrix(result) def bulk(c): result = 0 for i in range(num_phases): for j in range(i): result += (c[i]**2 * c[j]**2) / (4 * coeff_a[i, j]) for i in range(num_phases): for j in range(i): for k in range(j): result += gabd * c[i] * c[j] * c[k] return result # -------------- Data ------------------ dh = create_data_handling(domain_size, periodicity=(True, True), default_ghost_layers=2) c = dh.add_array("c", values_per_cell=num_phases) rho = dh.add_array("rho", values_per_cell=1) mu = dh.add_array("mu", values_per_cell=num_phases, latex_name="\\mu") force = dh.add_array("F", values_per_cell=dh.dim) u = dh.add_array("u", values_per_cell=dh.dim) # Distribution functions for each order parameter pdf_field = [] pdf_dst_field = [] for i in range(num_phases): pdf_field_local = dh.add_array(f"pdf_ch_{i}", values_per_cell=9) # 9 for D2Q9 pdf_dst_field_local = dh.add_array(f"pdfs_ch_{i}_dst", values_per_cell=9) pdf_field.append(pdf_field_local) pdf_dst_field.append(pdf_dst_field_local) # Distribution functions for the hydrodynamics pdf_hydro_field = dh.add_array("pdfs", values_per_cell=9) pdf_hydro_dst_field = dh.add_array("pdfs_dst", values_per_cell=9) # ------------- Compute kernels -------- c_vec = c.center_vector f_penalty = penalty_factor * (1 - sum(c_vec[i] for i in range(num_phases)))**2 f_bulk = bulk(c_vec) + f_penalty print(f_bulk) mu_eq = chemical_potentials_from_free_energy(f_bulk, order_parameters=c_vec) mu_eq += interfacial_chemical_potential(c_vec) mu_eq = [expand_diff_full(mu_i, functions=c) for mu_i in mu_eq] mu_assignments = [ Assignment(mu(i), discretize_spatial(mu_i, dx=1, stencil='isotropic')) for i, mu_i in enumerate(mu_eq) ] mu_compute_kernel = create_kernel(mu_assignments).compile() mu_discretize_substitutions = forth_order_isotropic_discretize(mu) force_rhs = force_from_phi_and_mu(order_parameters=c_vec, dim=dh.dim, mu=mu.center_vector) force_rhs = force_rhs.subs(mu_discretize_substitutions) force_assignments = [ Assignment(force(i), force_rhs[i]) for i in range(dh.dim) ] force_kernel = create_kernel(force_assignments).compile() ch_collide_kernels = [] ch_methods = [] for i in range(num_phases): ch_method = cahn_hilliard_lb_method(LBStencil(Stencil.D2Q9), mu(i), relaxation_rate=1.0, gamma=1.0) ch_methods.append(ch_method) lbm_config = LBMConfig(lb_method=ch_method, kernel_type='collide_only', density_input=c(i), velocity_input=u.center_vector, compressible=True) lbm_opt = LBMOptimisation(symbolic_field=pdf_field[i]) ch_update_rule = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) ch_assign = ch_update_rule.all_assignments ch_kernel = create_kernel(ch_assign).compile() ch_collide_kernels.append(ch_kernel) ch_stream_kernels = [] for i in range(num_phases): ch_method = ch_methods[i] lbm_config = LBMConfig(lb_method=ch_method, kernel_type='stream_pull_only', temporary_field_name=pdf_dst_field[i].name) lbm_opt = LBMOptimisation(symbolic_field=pdf_field[i]) ch_update_rule = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) ch_assign = ch_update_rule.all_assignments ch_kernel = create_kernel(ch_assign).compile() ch_stream_kernels.append(ch_kernel) # Defining the initialisation kernels for the C-H pdfs init_kernels = [] for i in range(num_phases): ch_method = ch_methods[i] init_assign = pdf_initialization_assignments( lb_method=ch_method, density=c_vec[i], velocity=(0, 0), pdfs=pdf_field[i].center_vector) init_kernel = create_kernel(init_assign).compile() init_kernels.append(init_kernel) getter_kernels = [] for i in range(num_phases): cqc = ch_methods[i].conserved_quantity_computation output_assign = cqc.output_equations_from_pdfs( pdf_field[i].center_vector, {'density': c(i)}) getter_kernel = create_kernel(output_assign).compile() getter_kernels.append(getter_kernel) lbm_config = LBMConfig(kernel_type='collide_only', relaxation_rate=1.0, force=force, compressible=True) lbm_opt = LBMOptimisation(symbolic_field=pdf_hydro_field) collide_assign = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) collide_kernel = create_kernel(collide_assign).compile() lbm_config = LBMConfig(kernel_type='stream_pull_only', temporary_field_name=pdf_hydro_dst_field.name, output={ "density": rho, "velocity": u }) lbm_opt = LBMOptimisation(symbolic_field=pdf_hydro_field) stream_assign = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) stream_kernel = create_kernel(stream_assign).compile() method_collide = collide_assign.method init_hydro_assign = pdf_initialization_assignments( lb_method=method_collide, density=rho.center, velocity=u.center_vector, pdfs=pdf_hydro_field.center_vector) init_hydro_kernel = create_kernel(init_hydro_assign).compile() output_hydro_assign = cqc.output_equations_from_pdfs( pdf_hydro_field.center_vector, { 'density': rho.center, 'velocity': u.center_vector }).all_assignments # Creating getter kernel to extract quantities getter_hydro_kernel = create_kernel( output_hydro_assign).compile() # getter kernel # Setting values of arrays dh.cpu_arrays[c.name].fill(0) dh.cpu_arrays[u.name].fill(0) dh.cpu_arrays[rho.name].fill(1) dh.cpu_arrays[mu.name].fill(0) dh.cpu_arrays[force.name].fill(0) def init(): for k in init_kernels: dh.run_kernel(k) dh.run_kernel(init_hydro_kernel) pdf_sync_fns = [] for i in range(num_phases): sync_fn = dh.synchronization_function([pdf_field[i].name]) pdf_sync_fns.append(sync_fn) hydro_sync_fn = dh.synchronization_function([pdf_hydro_field.name]) c_sync_fn = dh.synchronization_function([c.name]) mu_sync = dh.synchronization_function([mu.name]) def run(steps): for t in range(steps): # μ and P c_sync_fn() dh.run_kernel(mu_compute_kernel) mu_sync() dh.run_kernel(force_kernel) # Hydrodynamic LB dh.run_kernel(collide_kernel) # running collision kernel hydro_sync_fn() dh.run_kernel(stream_kernel) # running streaming kernel dh.swap(pdf_hydro_field.name, pdf_hydro_dst_field.name) dh.run_kernel(getter_hydro_kernel) # Cahn-Hilliard LBs for i in range(num_phases): dh.run_kernel(ch_collide_kernels[i]) pdf_sync_fns[i]() dh.run_kernel(ch_stream_kernels[i]) dh.swap(pdf_field[i].name, pdf_dst_field[i].name) dh.run_kernel(getter_kernels[i]) if simplex_projection: simplex_projection_2d(dh.cpu_arrays[c.name]) return dh.cpu_arrays[c.name][1:-1, 1:-1, :] return dh, init, run
def test_total_momentum(method_enum, force_model, omega): # for the EDM force model this test case not work. However it is successfully used in test_entropic_model # Any attempt to adapted the EDM force model so it fullfills the test case did result in a failure in the # entropic test case. Note also that the test runs for MRT and EMD if force_model == ForceModel.EDM: pytest.skip() L = (16, 16) stencil = LBStencil(Stencil.D2Q9) F = (2e-4, -3e-4) dh = ps.create_data_handling(L, periodicity=True, default_target=Target.CPU) src = dh.add_array('src', values_per_cell=stencil.Q) dst = dh.add_array_like('dst', 'src') ρ = dh.add_array('rho', values_per_cell=1) u = dh.add_array('u', values_per_cell=stencil.D) lbm_config = LBMConfig(method=method_enum, stencil=stencil, relaxation_rate=omega, compressible=True, force_model=force_model, force=F, streaming_pattern='pull') lbm_opt = LBMOptimisation(symbolic_field=src) collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) config = ps.CreateKernelConfig(cpu_openmp=True, target=dh.default_target) collision_kernel = ps.create_kernel(collision, config=config).compile() def init(): dh.fill(ρ.name, 1) dh.fill(u.name, 0) setter = macroscopic_values_setter(collision.method, velocity=(0, ) * dh.dim, pdfs=src, density=ρ.center, set_pre_collision_pdfs=True) kernel = ps.create_kernel(setter).compile() dh.run_kernel(kernel) sync_pdfs = dh.synchronization_function([src.name]) getter = macroscopic_values_getter(collision.method, ρ.center, u.center_vector, src, use_pre_collision_pdfs=True) getter_kernel = ps.create_kernel(getter).compile() def time_loop(steps): dh.all_to_gpu() for _ in range(steps): dh.run_kernel(collision_kernel) dh.swap(src.name, dst.name) sync_pdfs() dh.all_to_cpu() t = 20 init() time_loop(t) dh.run_kernel(getter_kernel) total = np.sum(dh.gather_array(u.name), axis=(0, 1)) assert np.allclose(total / np.prod(L) / F / t, 1)
def test_shear_flow(target, stencil_name): # Cuda if target == ps.Target.GPU: pytest.importorskip("pycuda") # LB parameters stencil = LBStencil(stencil_name) if stencil.D == 2: L = (4, width) elif stencil.D == 3: L = (4, width, 4) else: raise Exception() periodicity = [True, False] + [True] * (stencil.D - 2) omega = relaxation_rate_from_lattice_viscosity(eta) # ## Data structures dh = ps.create_data_handling(L, periodicity=periodicity, default_target=target) src = dh.add_array('src', values_per_cell=stencil.Q) dst = dh.add_array_like('dst', 'src') ρ = dh.add_array('rho', latex_name='\\rho', values_per_cell=1) u = dh.add_array('u', values_per_cell=stencil.D) p = dh.add_array('p', values_per_cell=stencil.D**2) # LB Setup lbm_config = LBMConfig(stencil=stencil, relaxation_rate=omega, method=Method.TRT, compressible=True, kernel_type='collide_only') lbm_opt = LBMOptimisation(symbolic_field=src) collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) stream = create_stream_pull_with_output_kernel(collision.method, src, dst, {'velocity': u}) config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target) stream_kernel = ps.create_kernel(stream, config=config).compile() collision_kernel = ps.create_kernel(collision, config=config).compile() # Boundaries lbbh = LatticeBoltzmannBoundaryHandling(collision.method, dh, src.name, target=dh.default_target) # Second moment test setup cqc = collision.method.conserved_quantity_computation getter_eqs = cqc.output_equations_from_pdfs(src.center_vector, {'moment2': p}) kernel_compute_p = ps.create_kernel(getter_eqs, config=config).compile() # ## Set up the simulation init = macroscopic_values_setter(collision.method, velocity=(0,) * dh.dim, pdfs=src.center_vector, density=ρ.center) init_kernel = ps.create_kernel(init, ghost_layers=0).compile() vel_vec = sp.Matrix([0.5 * shear_velocity] + [0] * (stencil.D - 1)) if stencil.D == 2: lbbh.set_boundary(UBB(velocity=vel_vec), ps.make_slice[:, :wall_thickness]) lbbh.set_boundary(UBB(velocity=-vel_vec), ps.make_slice[:, -wall_thickness:]) elif stencil.D == 3: lbbh.set_boundary(UBB(velocity=vel_vec), ps.make_slice[:, :wall_thickness, :]) lbbh.set_boundary(UBB(velocity=-vel_vec), ps.make_slice[:, -wall_thickness:, :]) else: raise Exception() for bh in lbbh, : assert len(bh._boundary_object_to_boundary_info) == 2, "Restart kernel to clear boundaries" def init(): dh.fill(ρ.name, rho_0) dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True) dh.fill(u.name, 0) dh.run_kernel(init_kernel) sync_pdfs = dh.synchronization_function([src.name]) # Time loop def time_loop(steps): dh.all_to_gpu() for i in range(steps): dh.run_kernel(collision_kernel) sync_pdfs() lbbh() dh.run_kernel(stream_kernel) dh.run_kernel(kernel_compute_p) dh.swap(src.name, dst.name) if u.name in dh.gpu_arrays: dh.to_cpu(u.name) uu = dh.gather_array(u.name) # average periodic directions if stencil.D == 3: # dont' swap order uu = np.average(uu, axis=2) uu = np.average(uu, axis=0) if p.name in dh.gpu_arrays: dh.to_cpu(p.name) pp = dh.gather_array(p.name) # average periodic directions if stencil.D == 3: # dont' swap order pp = np.average(pp, axis=2) pp = np.average(pp, axis=0) # cut off wall regions uu = uu[wall_thickness:-wall_thickness] pp = pp[wall_thickness:-wall_thickness] if stencil.D == 2: pp = pp.reshape((len(pp), 2, 2)) if stencil.D == 3: pp = pp.reshape((len(pp), 3, 3)) return uu, pp init() # Simulation profile, pressure_profile = time_loop(t_max) expected = shear_flow(x=(np.arange(0, actual_width) + .5), t=t_max, nu=eta / rho_0, v=shear_velocity, h=actual_width, k_max=100) if stencil.D == 2: shear_direction = np.array((1, 0), dtype=float) shear_plane_normal = np.array((0, 1), dtype=float) if stencil.D == 3: shear_direction = np.array((1, 0, 0), dtype=float) shear_plane_normal = np.array((0, 1, 0), dtype=float) shear_rate = shear_velocity / actual_width dynamic_viscosity = eta * rho_0 correction_factor = eta / (eta - 1. / 6.) p_expected = rho_0 * np.identity(dh.dim) / 3.0 + dynamic_viscosity * shear_rate / correction_factor * ( np.outer(shear_plane_normal, shear_direction) + np.transpose(np.outer(shear_plane_normal, shear_direction))) # Sustract the tensorproduct of the velosity to get the pressure pressure_profile[:, 0, 0] -= rho_0 * profile[:, 0]**2 np.testing.assert_allclose(profile[:, 0], expected[1:-1], atol=1E-9) for i in range(actual_width - 2): np.testing.assert_allclose(pressure_profile[i], p_expected, atol=1E-9, rtol=1E-3)
def __init__(self, stencil, streaming_pattern, wall_boundary=None, target=Target.CPU): if wall_boundary is None: wall_boundary = NoSlip() self.target = target self.gpu = target in [Target.GPU] # Stencil self.stencil = stencil self.q = stencil.Q self.dim = stencil.D # Streaming self.streaming_pattern = streaming_pattern self.inplace = is_inplace(self.streaming_pattern) self.timesteps = get_timesteps(streaming_pattern) self.zeroth_timestep = self.timesteps[0] # Domain, Data Handling and PDF fields self.pipe_length = 60 self.pipe_radius = 15 self.domain_size = (self.pipe_length, ) + (2 * self.pipe_radius,) * (self.dim - 1) self.periodicity = (True, ) + (False, ) * (self.dim - 1) self.force = (0.0001, ) + (0.0,) * (self.dim - 1) self.dh = create_data_handling(domain_size=self.domain_size, periodicity=self.periodicity, default_target=self.target) self.pdfs = self.dh.add_array('pdfs', self.q) if not self.inplace: self.pdfs_tmp = self.dh.add_array_like('pdfs_tmp', self.pdfs.name) # LBM Streaming and Collision lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=1.0, force_model=ForceModel.GUO, force=self.force, streaming_pattern=streaming_pattern) lbm_opt = LBMOptimisation(symbolic_field=self.pdfs) config = CreateKernelConfig(target=self.target) if not self.inplace: lbm_opt = replace(lbm_opt, symbolic_temporary_field=self.pdfs_tmp) self.lb_collision = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) self.lb_method = self.lb_collision.method self.lb_kernels = [] for t in self.timesteps: lbm_config = replace(lbm_config, timestep=t) self.lb_kernels.append(create_lb_function(collision_rule=self.lb_collision, lbm_config=lbm_config, lbm_optimisation=lbm_opt, config=config)) # Macroscopic Values self.density = 1.0 self.density_field = self.dh.add_array('rho', 1) u_x = 0.0 self.velocity = (u_x,) * self.dim self.velocity_field = self.dh.add_array('u', self.dim) setter = macroscopic_values_setter( self.lb_method, self.density, self.velocity, self.pdfs, streaming_pattern=self.streaming_pattern, previous_timestep=self.zeroth_timestep) self.init_kernel = create_kernel(setter, config=CreateKernelConfig(target=target, ghost_layers=1)).compile() self.getter_kernels = [] for t in self.timesteps: getter = macroscopic_values_getter( self.lb_method, self.density_field, self.velocity_field, self.pdfs, streaming_pattern=self.streaming_pattern, previous_timestep=t) self.getter_kernels.append(create_kernel(getter, config=CreateKernelConfig(target=target, ghost_layers=1)).compile()) # Periodicity self.periodicity_handler = LBMPeriodicityHandling( self.stencil, self.dh, self.pdfs.name, streaming_pattern=self.streaming_pattern) # Boundary Handling self.wall = wall_boundary self.bh = LatticeBoltzmannBoundaryHandling( self.lb_method, self.dh, self.pdfs.name, streaming_pattern=self.streaming_pattern, target=self.target) self.bh.set_boundary(boundary_obj=self.wall, mask_callback=self.mask_callback) self.current_timestep = self.zeroth_timestep
def test_lees_edwards(): domain_size = (64, 64) omega = 1.0 # relaxation rate of first component shear_velocity = 0.1 # shear velocity shear_dir = 0 # direction of shear flow shear_dir_normal = 1 # direction normal to shear plane, for interpolation stencil = LBStencil(Stencil.D2Q9) dh = ps.create_data_handling(domain_size, periodicity=True, default_target=ps.Target.CPU) src = dh.add_array('src', values_per_cell=stencil.Q) dh.fill('src', 1.0, ghost_layers=True) dst = dh.add_array_like('dst', 'src') dh.fill('dst', 0.0, ghost_layers=True) force = dh.add_array('force', values_per_cell=stencil.D) dh.fill('force', 0.0, ghost_layers=True) rho = dh.add_array('rho', values_per_cell=1) dh.fill('rho', 1.0, ghost_layers=True) u = dh.add_array('u', values_per_cell=stencil.D) dh.fill('u', 0.0, ghost_layers=True) counters = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(stencil.D) ] points_up = sp.Symbol('points_up') points_down = sp.Symbol('points_down') u_p = sp.Piecewise( (1, sp.And(ps.data_types.type_all_numbers(counters[1] <= 1, 'int'), points_down)), (-1, sp.And( ps.data_types.type_all_numbers(counters[1] >= src.shape[1] - 2, 'int'), points_up)), (0, True)) * shear_velocity lbm_config = LBMConfig(stencil=stencil, relaxation_rate=omega, compressible=True, velocity_input=u.center_vector + sp.Matrix([u_p, 0]), density_input=rho, force_model=ForceModel.LUO, force=force.center_vector, kernel_type='collide_only') lbm_opt = LBMOptimisation(symbolic_field=src) collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) to_insert = [ s.lhs for s in collision.subexpressions if collision.method.first_order_equilibrium_moment_symbols[shear_dir] in s.free_symbols ] for s in to_insert: collision = collision.new_with_inserted_subexpression(s) ma = [] for a, c in zip(collision.main_assignments, collision.method.stencil): if c[shear_dir_normal] == -1: b = (True, False) elif c[shear_dir_normal] == 1: b = (False, True) else: b = (False, False) a = ps.Assignment(a.lhs, a.rhs.replace(points_down, b[0])) a = ps.Assignment(a.lhs, a.rhs.replace(points_up, b[1])) ma.append(a) collision.main_assignments = ma stream = create_stream_pull_with_output_kernel(collision.method, src, dst, { 'density': rho, 'velocity': u }) config = ps.CreateKernelConfig(target=dh.default_target) stream_kernel = ps.create_kernel(stream, config=config).compile() collision_kernel = ps.create_kernel(collision, config=config).compile() init = macroscopic_values_setter(collision.method, velocity=(0, 0), pdfs=src.center_vector, density=rho.center) init_kernel = ps.create_kernel(init, ghost_layers=0).compile() offset = [0.0] sync_pdfs = dh.synchronization_function([src.name], functor=partial( get_le_boundary_functor, shear_offset=offset)) dh.run_kernel(init_kernel) time = 500 dh.all_to_gpu() for i in range(time): dh.run_kernel(collision_kernel) sync_pdfs() dh.run_kernel(stream_kernel) dh.swap(src.name, dst.name) offset[0] += shear_velocity dh.all_to_cpu() nu = lattice_viscosity_from_relaxation_rate(omega) h = domain_size[0] k_max = 100 analytical_solution = get_solution_navier_stokes( np.linspace(0.5, h - 0.5, h), time, nu, shear_velocity, h, k_max) np.testing.assert_array_almost_equal(analytical_solution, dh.gather_array(u.name)[0, :, 0], decimal=5) dh.fill(rho.name, 1.0, ghost_layers=True) dh.run_kernel(init_kernel) dh.fill(u.name, 0.0, ghost_layers=True) dh.fill('force', 0.0, ghost_layers=True) dh.cpu_arrays[force.name][64 // 3, 1, :] = [1e-2, -1e-1] offset[0] = 0 time = 20 dh.all_to_gpu() for i in range(time): dh.run_kernel(collision_kernel) sync_pdfs() dh.run_kernel(stream_kernel) dh.swap(src.name, dst.name) dh.all_to_cpu() vel_unshifted = np.array(dh.gather_array(u.name)[:, -3:-1, :]) dh.fill(rho.name, 1.0, ghost_layers=True) dh.run_kernel(init_kernel) dh.fill(u.name, 0.0, ghost_layers=True) dh.fill('force', 0.0, ghost_layers=True) dh.cpu_arrays[force.name][64 // 3, 1, :] = [1e-2, -1e-1] offset[0] = 10 time = 20 dh.all_to_gpu() for i in range(time): dh.run_kernel(collision_kernel) sync_pdfs() dh.run_kernel(stream_kernel) dh.swap(src.name, dst.name) dh.all_to_cpu() vel_shifted = np.array(dh.gather_array(u.name)[:, -3:-1, :]) vel_rolled = np.roll(vel_shifted, -offset[0], axis=0) np.testing.assert_array_almost_equal(vel_unshifted, vel_rolled)
def flow_around_sphere(stencil, galilean_correction, L_LU, total_steps): if galilean_correction and stencil.Q != 27: return True target = Target.GPU streaming_pattern = 'aa' timesteps = get_timesteps(streaming_pattern) u_max = 0.05 Re = 500000 kinematic_viscosity = (L_LU * u_max) / Re initial_velocity = (u_max, ) + (0, ) * (stencil.D - 1) omega_v = relaxation_rate_from_lattice_viscosity(kinematic_viscosity) channel_size = (10 * L_LU, ) + (5 * L_LU, ) * (stencil.D - 1) sphere_position = (channel_size[0] // 3, ) + (channel_size[1] // 2, ) * (stencil.D - 1) sphere_radius = L_LU // 2 lbm_config = LBMConfig(stencil=stencil, method=Method.CUMULANT, relaxation_rate=omega_v, galilean_correction=galilean_correction) lbm_opt = LBMOptimisation(pre_simplification=True) config = CreateKernelConfig(target=target) lb_method = create_lb_method(lbm_config=lbm_config) def get_extrapolation_kernel(timestep): boundary_assignments = [] indexing = BetweenTimestepsIndexing( pdf_field, stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep) f_out, _ = indexing.proxy_fields for i, d in enumerate(stencil): if d[0] == -1: asm = Assignment(f_out.neighbor(0, 1)(i), f_out.center(i)) boundary_assignments.append(asm) boundary_assignments = indexing.substitute_proxies( boundary_assignments) iter_slice = get_slice_before_ghost_layer((1, ) + (0, ) * (stencil.D - 1)) extrapolation_ast = create_kernel(boundary_assignments, config=CreateKernelConfig( iteration_slice=iter_slice, ghost_layers=1, target=target)) return extrapolation_ast.compile() dh = create_data_handling(channel_size, periodicity=False, default_layout='fzyx', default_target=target) u_field = dh.add_array('u', stencil.D) rho_field = dh.add_array('rho', 1) pdf_field = dh.add_array('pdfs', stencil.Q) dh.fill(u_field.name, 0.0, ghost_layers=True) dh.fill(rho_field.name, 0.0, ghost_layers=True) dh.to_gpu(u_field.name) dh.to_gpu(rho_field.name) lbm_opt = replace(lbm_opt, symbolic_field=pdf_field) bh = LatticeBoltzmannBoundaryHandling(lb_method, dh, pdf_field.name, streaming_pattern=streaming_pattern, target=target) wall = NoSlip() inflow = UBB(initial_velocity) bh.set_boundary(inflow, slice_from_direction('W', stencil.D)) directions = ('N', 'S', 'T', 'B') if stencil.D == 3 else ('N', 'S') for direction in directions: bh.set_boundary(wall, slice_from_direction(direction, stencil.D)) outflow_kernels = [ get_extrapolation_kernel(Timestep.EVEN), get_extrapolation_kernel(Timestep.ODD) ] def sphere_boundary_callback(x, y, z=None): x = x - sphere_position[0] y = y - sphere_position[1] z = z - sphere_position[2] if z is not None else 0 return np.sqrt(x**2 + y**2 + z**2) <= sphere_radius bh.set_boundary(wall, mask_callback=sphere_boundary_callback) init_eqs = pdf_initialization_assignments( lb_method, 1.0, initial_velocity, pdf_field, streaming_pattern=streaming_pattern, previous_timestep=timesteps[0]) init_kernel = create_kernel(init_eqs, config=config).compile() output = {'density': rho_field, 'velocity': u_field} lbm_config = replace(lbm_config, output=output) lb_collision_rule = create_lb_collision_rule(lb_method=lb_method, lbm_config=lbm_config, lbm_optimisation=lbm_opt) lb_kernels = [] for t in timesteps: lbm_config = replace(lbm_config, timestep=t) lbm_config = replace(lbm_config, streaming_pattern=streaming_pattern) lb_kernels.append( create_lb_function(collision_rule=lb_collision_rule, lbm_config=lbm_config, lbm_optimisation=lbm_opt, config=config)) timestep = timesteps[0] dh.run_kernel(init_kernel) stability_check_frequency = 1000 for i in range(total_steps): bh(prev_timestep=timestep) dh.run_kernel(outflow_kernels[timestep.idx]) timestep = timestep.next() dh.run_kernel(lb_kernels[timestep.idx]) if i % stability_check_frequency == 0: dh.to_cpu(u_field.name) assert np.isfinite(dh.cpu_arrays[u_field.name]).all()