def test_strided(instruction_set, dtype): f, g = ps.fields(f"f, g : float{64 if dtype == 'double' else 32}[2D]") update_rule = [ ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0) ] if 'storeS' not in get_vector_instruction_set( dtype, instruction_set) and not instruction_set in [ 'avx512', 'rvv' ] and not instruction_set.startswith('sve'): with pytest.warns(UserWarning) as warn: config = ps.CreateKernelConfig( cpu_vectorize_info={'instruction_set': instruction_set}) ast = ps.create_kernel(update_rule, config=config) assert 'Could not vectorize loop' in warn[0].message.args[0] else: with pytest.warns(None) as warn: config = ps.CreateKernelConfig( cpu_vectorize_info={'instruction_set': instruction_set}) ast = ps.create_kernel(update_rule, config=config) assert len(warn) == 0 func = ast.compile() ref_func = ps.create_kernel(update_rule).compile() arr = np.random.random( (23 + 2, 17 + 2)).astype(np.float64 if dtype == 'double' else np.float32) dst = np.zeros_like(arr, dtype=np.float64 if dtype == 'double' else np.float32) ref = np.zeros_like(arr, dtype=np.float64 if dtype == 'double' else np.float32) func(g=dst, f=arr) ref_func(g=ref, f=arr) np.testing.assert_almost_equal(dst, ref, 13 if dtype == 'double' else 5)
def test_create_kernel_config(): c = ps.CreateKernelConfig() assert c.backend == ps.Backend.C assert c.target == ps.Target.CPU c = ps.CreateKernelConfig(target=ps.Target.GPU) assert c.backend == ps.Backend.CUDA c = ps.CreateKernelConfig(backend=ps.Backend.CUDA) assert c.target == ps.Target.CPU assert c.backend == ps.Backend.CUDA
def test_sqrt_of_integer(): """Regression test for bug where sqrt(3) was classified as integer""" f = ps.fields("f: [1D]") tmp = sp.symbols("tmp") assignments = [ps.Assignment(tmp, sp.sqrt(3)), ps.Assignment(f[0], tmp)] arr_double = np.array([1], dtype=np.float64) kernel = ps.create_kernel(assignments).compile() kernel(f=arr_double) assert 1.7 < arr_double[0] < 1.8 f = ps.fields("f: float32[1D]") tmp = sp.symbols("tmp") assignments = [ps.Assignment(tmp, sp.sqrt(3)), ps.Assignment(f[0], tmp)] arr_single = np.array([1], dtype=np.float32) config = ps.CreateKernelConfig(data_type="float32") kernel = ps.create_kernel(assignments, config=config).compile() kernel(f=arr_single) code = ps.get_code_str(kernel.ast) # ps.show_code(kernel.ast) # 1.7320508075688772935 --> it is actually correct to round to ...773. This was wrong before !282 assert "1.7320508075688773f" in code assert 1.7 < arr_single[0] < 1.8
def test_sum_use_float(default_assignment_simplifications): sum = sympy.Sum(sp.abc.k, (sp.abc.k, 1, 100)) expanded_sum = sum.doit() print(sum) print(expanded_sum) x = ps.fields('x: float32[1d]') assignments = ps.AssignmentCollection({x.center(): sum}) config = ps.CreateKernelConfig( default_assignment_simplifications=default_assignment_simplifications, data_type=create_type('float32')) ast = ps.create_kernel(assignments, config=config) code = ps.get_code_str(ast) kernel = ast.compile() print(code) if default_assignment_simplifications is False: assert 'float sum' in code array = np.zeros((10, ), np.float32) kernel(x=array) assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_product(default_assignment_simplifications): k = ps.TypedSymbol('k', create_type('int64')) sum = sympy.Product(k, (k, 1, 10)) expanded_sum = sum.doit() print(sum) print(expanded_sum) x = ps.fields('x: int64[1d]') assignments = ps.AssignmentCollection({x.center(): sum}) config = ps.CreateKernelConfig( default_assignment_simplifications=default_assignment_simplifications) ast = ps.create_kernel(assignments, config=config) code = ps.get_code_str(ast) kernel = ast.compile() print(code) if default_assignment_simplifications is False: assert 'int64_t product' in code array = np.zeros((10, ), np.int64) kernel(x=array) assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_optimised_and_full_communication_equivalence(stencil_name): target = ps.Target.CPU stencil = LBStencil(stencil_name) domain_size = (4, ) * stencil.D dh = ps.create_data_handling(domain_size, periodicity=(True, ) * stencil.D, parallel=False, default_target=target) pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64) dh.fill("pdf", 0, ghost_layers=True) pdf_tmp = dh.add_array("pdf_tmp", values_per_cell=len(stencil), dtype=np.int64) dh.fill("pdf_tmp", 0, ghost_layers=True) gl = dh.ghost_layers_of_field("pdf") num = 0 for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']): dh.cpu_arrays['pdf'][idx] = num dh.cpu_arrays['pdf_tmp'][idx] = num num += 1 lbm_config = LBMConfig(stencil=stencil, kernel_type="stream_pull_only") lbm_opt = LBMOptimisation(symbolic_field=pdf, symbolic_temporary_field=pdf_tmp) config = ps.CreateKernelConfig(target=dh.default_target, cpu_openmp=True) ac = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) ast = ps.create_kernel(ac, config=config) stream = ast.compile() full_communication = dh.synchronization_function(pdf.name, target=dh.default_target, optimization={"openmp": True}) full_communication() dh.run_kernel(stream) dh.swap("pdf", "pdf_tmp") pdf_full_communication = np.copy(dh.cpu_arrays['pdf']) num = 0 for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']): dh.cpu_arrays['pdf'][idx] = num dh.cpu_arrays['pdf_tmp'][idx] = num num += 1 optimised_communication = LBMPeriodicityHandling(stencil=stencil, data_handling=dh, pdf_field_name=pdf.name, streaming_pattern='pull') optimised_communication() dh.run_kernel(stream) dh.swap("pdf", "pdf_tmp") if stencil.D == 3: for i in range(gl, domain_size[0]): for j in range(gl, domain_size[1]): for k in range(gl, domain_size[2]): for f in range(len(stencil)): assert dh.cpu_arrays['pdf'][i, j, k, f] == pdf_full_communication[i, j, k, f], print(f) else: for i in range(gl, domain_size[0]): for j in range(gl, domain_size[1]): for f in range(len(stencil)): assert dh.cpu_arrays['pdf'][i, j, f] == pdf_full_communication[i, j, f]
def test_kernel_decorator_config(): config = ps.CreateKernelConfig() a, b, c = ps.fields(a=np.ones(100), b=np.ones(100), c=np.ones(100)) @ps.kernel_config(config) def test(): a[0] @= b[0] + c[0] ps.create_kernel(**test)
def test_abs(): x, y, z = ps.fields('x, y, z: float64[2d]') default_int_type = create_type('int64') assignments = ps.AssignmentCollection( {x[0, 0]: sympy.Abs(cast_func(y[0, 0], default_int_type))}) config = ps.CreateKernelConfig(target=ps.Target.GPU) ast = ps.create_kernel(assignments, config=config) code = ps.get_code_str(ast) print(code) assert 'fabs(' not in code
def test_creation(method_enum, double_precision): """Simple test that makes sure that only float variables are created""" lbm_config = LBMConfig(method=method_enum, relaxation_rate=1.5) config = ps.CreateKernelConfig( data_type="float64" if double_precision else "float32") func = create_lb_function(lbm_config=lbm_config, config=config) code = ps.get_code_str(func) if double_precision: assert 'float' not in code assert 'double' in code else: assert 'double' not in code assert 'float' in code
def test_scenario(method_enum, double_precision): lbm_config = LBMConfig(method=method_enum, relaxation_rate=1.5) config = ps.CreateKernelConfig( data_type="double" if double_precision else "float32") sc = create_lid_driven_cavity((16, 16, 8), lbm_config=lbm_config, config=config) sc.run(1) code = ps.get_code_str(sc.ast) if double_precision: assert 'float' not in code assert 'double' in code else: assert 'double' not in code assert 'float' in code
def test_lbm_vectorization(instruction_set, aligned_and_padding, nontemporal, double_precision, fixed_loop_sizes): vectorization_options = { 'instruction_set': instruction_set, 'assume_aligned': aligned_and_padding[0], 'nontemporal': nontemporal, 'assume_inner_stride_one': True, 'assume_sufficient_line_padding': aligned_and_padding[1] } time_steps = 100 size1 = (64, 32) size2 = (666, 34) relaxation_rate = 1.8 print("Computing reference solutions") ldc1_ref = create_lid_driven_cavity(size1, relaxation_rate=relaxation_rate) ldc1_ref.run(time_steps) ldc2_ref = create_lid_driven_cavity(size2, relaxation_rate=relaxation_rate) ldc2_ref.run(time_steps) lbm_config = LBMConfig(relaxation_rate=relaxation_rate) config = ps.CreateKernelConfig( data_type="double" if double_precision else "float32", cpu_vectorize_info=vectorization_options) lbm_opt_split = LBMOptimisation(cse_global=True, split=True) lbm_opt = LBMOptimisation(cse_global=True, split=False) print( f"Vectorization test, double precision {double_precision}, vectorization {vectorization_options}, " f"fixed loop sizes {fixed_loop_sizes}") ldc1 = create_lid_driven_cavity(size1, fixed_loop_sizes=fixed_loop_sizes, lbm_config=lbm_config, lbm_optimisation=lbm_opt, config=config) ldc1.run(time_steps) np.testing.assert_almost_equal(ldc1_ref.velocity[:, :], ldc1.velocity[:, :]) ldc2 = create_lid_driven_cavity(size2, fixed_loop_sizes=fixed_loop_sizes, lbm_config=lbm_config, lbm_optimisation=lbm_opt_split, config=config) ldc2.run(time_steps) np.testing.assert_almost_equal(ldc2_ref.velocity[:, :], ldc2.velocity[:, :])
def test_aligned_and_nt_stores(instruction_set=instruction_set, openmp=False): domain_size = (24, 24) # create a datahandling object dh = ps.create_data_handling(domain_size, periodicity=(True, True), parallel=False, default_target=Target.CPU) # fields alignment = 'cacheline' if openmp else True g = dh.add_array("g", values_per_cell=1, alignment=alignment) dh.fill("g", 1.0, ghost_layers=True) f = dh.add_array("f", values_per_cell=1, alignment=alignment) dh.fill("f", 0.0, ghost_layers=True) opt = { 'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True, 'assume_inner_stride_one': True } update_rule = [ ps.Assignment(f.center(), 0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1])) ] config = ps.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp) ast = ps.create_kernel(update_rule, config=config) if instruction_set in ['sse'] or instruction_set.startswith('avx'): assert 'stream' in ast.instruction_set assert 'streamFence' in ast.instruction_set if instruction_set in ['neon', 'vsx'] or instruction_set.startswith('sve'): assert 'cachelineZero' in ast.instruction_set if instruction_set in ['vsx']: assert 'storeAAndFlushCacheline' in ast.instruction_set for instruction in [ 'stream', 'streamFence', 'cachelineZero', 'storeAAndFlushCacheline', 'flushCacheline' ]: if instruction in ast.instruction_set: assert ast.instruction_set[instruction].split( '{')[0] in ps.get_code_str(ast) kernel = ast.compile() dh.run_kernel(kernel) np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size))
def test_evaluate_constant_terms(target, simplification): if target == ps.Target.GPU: pytest.importorskip("pycuda") src, dst = ps.fields('src, dst: float32[2d]') # Triggers Sympy's cos optimization assignments = ps.AssignmentCollection({src[0, 0]: -sp.cos(1) + dst[0, 0]}) config = ps.CreateKernelConfig( target=target, default_assignment_simplifications=simplification) ast = ps.create_kernel(assignments, config=config) code = ps.get_code_str(ast) if simplification: assert 'cos(' not in code else: assert 'cos(' in code print(code)
def test_source_code_comment(): a, b = pystencils.fields('a,b: float[2D]') assignments = pystencils.AssignmentCollection( {a.center(): b[0, 2] + b[0, 0]}, {}) config = pystencils.CreateKernelConfig(target=pystencils.Target.CPU) ast = pystencils.create_kernel(assignments, config=config) ast.body.append(pystencils.astnodes.SourceCodeComment("Hallo")) ast.body.append(pystencils.astnodes.EmptyLine()) ast.body.append(pystencils.astnodes.SourceCodeComment("World!")) print(ast) compiled = ast.compile() assert compiled is not None pystencils.show_code(ast)
def test_vectorized_abs(instruction_set, dtype): """Some instructions sets have abs, some don't. Furthermore, the special treatment of unary minus makes this data type-sensitive too. """ arr = np.ones((2**2 + 2, 2**3 + 2), dtype=np.float64 if dtype == 'double' else np.float32) arr[-3:, :] = -1 f, g = ps.fields(f=arr, g=arr) update_rule = [ps.Assignment(g.center(), sp.Abs(f.center()))] config = ps.CreateKernelConfig( cpu_vectorize_info={'instruction_set': instruction_set}) ast = ps.create_kernel(update_rule, config=config) func = ast.compile() dst = np.zeros_like(arr) func(g=dst, f=arr) np.testing.assert_equal(np.sum(dst[1:-1, 1:-1]), 2**2 * 2**3)
def test_momentum_density_shift(force_model): target = Target.CPU stencil = LBStencil(Stencil.D2Q9) domain_size = (4, 4) dh = ps.create_data_handling(domain_size=domain_size, default_target=target) rho = dh.add_array('rho', values_per_cell=1) dh.fill('rho', 0.0, ghost_layers=True) momentum_density = dh.add_array('momentum_density', values_per_cell=dh.dim) dh.fill('momentum_density', 0.0, ghost_layers=True) src = dh.add_array('src', values_per_cell=len(stencil)) dh.fill('src', 0.0, ghost_layers=True) lbm_config = LBMConfig(method=Method.SRT, compressible=True, force_model=force_model, force=(1, 2)) method = create_lb_method(lbm_config=lbm_config) cqc = method.conserved_quantity_computation momentum_density_getter = cqc.output_equations_from_pdfs( src.center_vector, { 'density': rho.center, 'momentum_density': momentum_density.center_vector }) config = ps.CreateKernelConfig(target=dh.default_target) momentum_density_ast = ps.create_kernel(momentum_density_getter, config=config) momentum_density_kernel = momentum_density_ast.compile() dh.run_kernel(momentum_density_kernel) assert np.sum(dh.gather_array( momentum_density.name)[:, :, 0]) == np.prod(domain_size) / 2 assert np.sum(dh.gather_array( momentum_density.name)[:, :, 1]) == np.prod(domain_size)
def test_sympy_optimizations(target, simplification): if target == ps.Target.GPU: pytest.importorskip("pycuda") src, dst = ps.fields('src, dst: float32[2d]') # Triggers Sympy's expm1 optimization # Sympy's expm1 optimization is tedious to use and the behaviour is highly depended on the sympy version. In # some cases the exp expression has to be encapsulated in brackets or multiplied with 1 or 1.0 # for sympy to work properly ... assignments = ps.AssignmentCollection( {src[0, 0]: 1.0 * (sp.exp(dst[0, 0]) - 1)}) config = ps.CreateKernelConfig( target=target, default_assignment_simplifications=simplification) ast = ps.create_kernel(assignments, config=config) code = ps.get_code_str(ast) if simplification: assert 'expm1(' in code else: assert 'expm1(' not in code
def test_vectorisation_varying_arch(instruction_set): shape = (9, 9, 3) arr = np.ones(shape, order='f') @ps.kernel def update_rule(s): f = ps.fields("f(3) : [2D]", f=arr) s.tmp0 @= f(0) s.tmp1 @= f(1) s.tmp2 @= f(2) f0, f1, f2 = f(0), f(1), f(2) f0 @= 2 * s.tmp0 f1 @= 2 * s.tmp0 f2 @= 2 * s.tmp0 config = ps.CreateKernelConfig( cpu_vectorize_info={'instruction_set': instruction_set}) ast = ps.create_kernel(update_rule, config=config) kernel = ast.compile() kernel(f=arr) np.testing.assert_equal(arr, 2)
def test_lbm_vectorization_short(): print("Computing reference solutions") size1 = (64, 32) relaxation_rate = 1.8 ldc1_ref = create_lid_driven_cavity(size1, relaxation_rate=relaxation_rate) ldc1_ref.run(10) lbm_config = LBMConfig(relaxation_rate=relaxation_rate) config = ps.CreateKernelConfig( cpu_vectorize_info={ 'instruction_set': get_supported_instruction_sets()[-1], 'assume_aligned': True, 'nontemporal': True, 'assume_inner_stride_one': True, 'assume_sufficient_line_padding': False, }) ldc1 = create_lid_driven_cavity(size1, lbm_config=lbm_config, config=config, fixed_loop_sizes=False) ldc1.run(10)
def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set, dtype): dtype = np.float64 if dtype == 'double' else np.float32 domain_size = (128, 128) dh = ps.create_data_handling(domain_size, periodicity=(True, True), default_target=Target.CPU) src = dh.add_array("src", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=True) dh.fill(src.name, 1.0, ghost_layers=True) dst = dh.add_array("dst", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=True) dh.fill(dst.name, 1.0, ghost_layers=True) update_rule = ps.Assignment(dst[0, 0], src[0, 0]) opt = { 'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True, 'assume_inner_stride_one': True } config = ps.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, ghost_layers=gl_kernel) ast = ps.create_kernel(update_rule, config=config) kernel = ast.compile() if gl_kernel != gl_field: with pytest.raises(ValueError): dh.run_kernel(kernel) else: dh.run_kernel(kernel)
def test_issue40(*_): """https://i10git.cs.fau.de/pycodegen/pystencils/-/issues/40""" opt = { 'instruction_set': "avx512", 'assume_aligned': False, 'nontemporal': False, 'assume_inner_stride_one': True } src = ps.fields("src(1): double[2D]", layout='fzyx') eq = [ ps.Assignment(sp.Symbol('rho'), 1.0), ps.Assignment(src[0, 0](0), sp.Rational(4, 9) * sp.Symbol('rho')) ] config = ps.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt, data_type='float64') ast = ps.create_kernel(eq, config=config) code = ps.get_code_str(ast) assert 'epi32' not in code
def test_sympy_assignment(default_assignment_simplifications): assignment = SympyAssignment(dst[0, 0](0), sp.log(x + 3) / sp.log(2) + sp.log(x**2 + 1)) config = ps.CreateKernelConfig( default_assignment_simplifications=default_assignment_simplifications) ast = ps.create_kernel([assignment], config=config) code = ps.get_code_str(ast) if default_assignment_simplifications: assert 'log1p' in code # constant term is directly evaluated assert 'log2' not in code else: # no optimisations will be applied so the optimised version of log will not be in the code assert 'log1p' not in code assert 'log2' not in code assignment.replace(assignment.lhs, dst[0, 0](1)) assignment.replace(assignment.rhs, sp.log(2)) assert assignment.lhs == dst[0, 0](1) assert assignment.rhs == sp.log(2)
def test_square_root(dtype, instruction_set, field_layout): config = ps.CreateKernelConfig(data_type=dtype, cpu_vectorize_info={ 'instruction_set': instruction_set, 'assume_inner_stride_one': True, 'assume_aligned': False, 'nontemporal': False }) src_field = ps.Field.create_generic('pdfs', 2, dtype, index_dimensions=1, layout=field_layout, index_shape=(9, )) eq = [ ps.Assignment(sp.Symbol("xi"), sum(src_field.center_vector)), ps.Assignment(sp.Symbol("xi_2"), sp.Symbol("xi") * sp.sqrt(src_field.center)) ] ps.create_kernel(eq, config=config).compile()
def test_shear_flow(target, stencil_name): # Cuda if target == ps.Target.GPU: pytest.importorskip("pycuda") # LB parameters stencil = LBStencil(stencil_name) if stencil.D == 2: L = (4, width) elif stencil.D == 3: L = (4, width, 4) else: raise Exception() periodicity = [True, False] + [True] * (stencil.D - 2) omega = relaxation_rate_from_lattice_viscosity(eta) # ## Data structures dh = ps.create_data_handling(L, periodicity=periodicity, default_target=target) src = dh.add_array('src', values_per_cell=stencil.Q) dst = dh.add_array_like('dst', 'src') ρ = dh.add_array('rho', latex_name='\\rho', values_per_cell=1) u = dh.add_array('u', values_per_cell=stencil.D) p = dh.add_array('p', values_per_cell=stencil.D**2) # LB Setup lbm_config = LBMConfig(stencil=stencil, relaxation_rate=omega, method=Method.TRT, compressible=True, kernel_type='collide_only') lbm_opt = LBMOptimisation(symbolic_field=src) collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) stream = create_stream_pull_with_output_kernel(collision.method, src, dst, {'velocity': u}) config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target) stream_kernel = ps.create_kernel(stream, config=config).compile() collision_kernel = ps.create_kernel(collision, config=config).compile() # Boundaries lbbh = LatticeBoltzmannBoundaryHandling(collision.method, dh, src.name, target=dh.default_target) # Second moment test setup cqc = collision.method.conserved_quantity_computation getter_eqs = cqc.output_equations_from_pdfs(src.center_vector, {'moment2': p}) kernel_compute_p = ps.create_kernel(getter_eqs, config=config).compile() # ## Set up the simulation init = macroscopic_values_setter(collision.method, velocity=(0,) * dh.dim, pdfs=src.center_vector, density=ρ.center) init_kernel = ps.create_kernel(init, ghost_layers=0).compile() vel_vec = sp.Matrix([0.5 * shear_velocity] + [0] * (stencil.D - 1)) if stencil.D == 2: lbbh.set_boundary(UBB(velocity=vel_vec), ps.make_slice[:, :wall_thickness]) lbbh.set_boundary(UBB(velocity=-vel_vec), ps.make_slice[:, -wall_thickness:]) elif stencil.D == 3: lbbh.set_boundary(UBB(velocity=vel_vec), ps.make_slice[:, :wall_thickness, :]) lbbh.set_boundary(UBB(velocity=-vel_vec), ps.make_slice[:, -wall_thickness:, :]) else: raise Exception() for bh in lbbh, : assert len(bh._boundary_object_to_boundary_info) == 2, "Restart kernel to clear boundaries" def init(): dh.fill(ρ.name, rho_0) dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True) dh.fill(u.name, 0) dh.run_kernel(init_kernel) sync_pdfs = dh.synchronization_function([src.name]) # Time loop def time_loop(steps): dh.all_to_gpu() for i in range(steps): dh.run_kernel(collision_kernel) sync_pdfs() lbbh() dh.run_kernel(stream_kernel) dh.run_kernel(kernel_compute_p) dh.swap(src.name, dst.name) if u.name in dh.gpu_arrays: dh.to_cpu(u.name) uu = dh.gather_array(u.name) # average periodic directions if stencil.D == 3: # dont' swap order uu = np.average(uu, axis=2) uu = np.average(uu, axis=0) if p.name in dh.gpu_arrays: dh.to_cpu(p.name) pp = dh.gather_array(p.name) # average periodic directions if stencil.D == 3: # dont' swap order pp = np.average(pp, axis=2) pp = np.average(pp, axis=0) # cut off wall regions uu = uu[wall_thickness:-wall_thickness] pp = pp[wall_thickness:-wall_thickness] if stencil.D == 2: pp = pp.reshape((len(pp), 2, 2)) if stencil.D == 3: pp = pp.reshape((len(pp), 3, 3)) return uu, pp init() # Simulation profile, pressure_profile = time_loop(t_max) expected = shear_flow(x=(np.arange(0, actual_width) + .5), t=t_max, nu=eta / rho_0, v=shear_velocity, h=actual_width, k_max=100) if stencil.D == 2: shear_direction = np.array((1, 0), dtype=float) shear_plane_normal = np.array((0, 1), dtype=float) if stencil.D == 3: shear_direction = np.array((1, 0, 0), dtype=float) shear_plane_normal = np.array((0, 1, 0), dtype=float) shear_rate = shear_velocity / actual_width dynamic_viscosity = eta * rho_0 correction_factor = eta / (eta - 1. / 6.) p_expected = rho_0 * np.identity(dh.dim) / 3.0 + dynamic_viscosity * shear_rate / correction_factor * ( np.outer(shear_plane_normal, shear_direction) + np.transpose(np.outer(shear_plane_normal, shear_direction))) # Sustract the tensorproduct of the velosity to get the pressure pressure_profile[:, 0, 0] -= rho_0 * profile[:, 0]**2 np.testing.assert_allclose(profile[:, 0], expected[1:-1], atol=1E-9) for i in range(actual_width - 2): np.testing.assert_allclose(pressure_profile[i], p_expected, atol=1E-9, rtol=1E-3)
def test_diffusion(): """ Runs the "Diffusion from Plate in Uniform Flow" benchmark as it is described in [ch. 8.6.3, The Lattice Boltzmann Method, Krüger et al.]. dC/dy = 0 ┌───────────────┐ │ → → → │ C = 0 │ → u → │ dC/dx = 0 │ → → → │ └───────────────┘ C = 1 The analytical solution is given by: C(x,y) = 1 * erfc(y / sqrt(4Dx/u)) The hydrodynamic field is not simulated, instead a constant velocity is assumed. """ pytest.importorskip("pycuda") # Parameters domain_size = (1600, 160) omega = 1.38 diffusion = (1 / omega - 0.5) / 3 velocity = 0.05 time_steps = 50000 stencil = LBStencil(Stencil.D2Q9) target = ps.Target.GPU # Data Handling dh = ps.create_data_handling(domain_size=domain_size, default_target=target) vel_field = dh.add_array('vel_field', values_per_cell=stencil.D) dh.fill('vel_field', velocity, 0, ghost_layers=True) dh.fill('vel_field', 0.0, 1, ghost_layers=True) con_field = dh.add_array('con_field', values_per_cell=1) dh.fill('con_field', 0.0, ghost_layers=True) pdfs = dh.add_array('pdfs', values_per_cell=stencil.Q) dh.fill('pdfs', 0.0, ghost_layers=True) pdfs_tmp = dh.add_array('pdfs_tmp', values_per_cell=stencil.Q) dh.fill('pdfs_tmp', 0.0, ghost_layers=True) # Lattice Boltzmann method lbm_config = LBMConfig(stencil=stencil, method=Method.MRT, relaxation_rates=[1, 1.5, 1, 1.5, 1], velocity_input=vel_field, output={'density': con_field}, compressible=True, weighted=True, kernel_type='stream_pull_collide') lbm_opt = LBMOptimisation(symbolic_field=pdfs, symbolic_temporary_field=pdfs_tmp) config = ps.CreateKernelConfig(target=dh.default_target, cpu_openmp=True) method = create_lb_method(lbm_config=lbm_config) method.set_conserved_moments_relaxation_rate(omega) lbm_config = replace(lbm_config, lb_method=method) update_rule = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) kernel = ps.create_kernel(update_rule, config=config).compile() # PDF initalization init = pdf_initialization_assignments(method, con_field.center, vel_field.center_vector, pdfs.center_vector) dh.run_kernel(ps.create_kernel(init).compile()) dh.all_to_gpu() # Boundary Handling bh = LatticeBoltzmannBoundaryHandling(update_rule.method, dh, 'pdfs', name="bh", target=dh.default_target) add_box_boundary(bh, boundary=NeumannByCopy()) bh.set_boundary(DiffusionDirichlet(0), slice_from_direction('W', dh.dim)) bh.set_boundary(DiffusionDirichlet(1), slice_from_direction('S', dh.dim)) # Timeloop for i in range(time_steps): bh() dh.run_kernel(kernel) dh.swap("pdfs", "pdfs_tmp") dh.all_to_cpu() # Verification x = np.arange(1, domain_size[0], 1) y = np.arange(0, domain_size[1], 1) X, Y = np.meshgrid(x, y) analytical = np.zeros(domain_size) analytical[1:, :] = np.vectorize(math.erfc)( Y / np.vectorize(math.sqrt)(4 * diffusion * X / velocity)).transpose() simulated = dh.gather_array('con_field', ghost_layers=False) residual = 0 for i in x: for j in y: residual += (simulated[i, j] - analytical[i, j])**2 residual = math.sqrt(residual / (domain_size[0] * domain_size[1])) assert residual < 1e-2
def test_lees_edwards(): domain_size = (64, 64) omega = 1.0 # relaxation rate of first component shear_velocity = 0.1 # shear velocity shear_dir = 0 # direction of shear flow shear_dir_normal = 1 # direction normal to shear plane, for interpolation stencil = LBStencil(Stencil.D2Q9) dh = ps.create_data_handling(domain_size, periodicity=True, default_target=ps.Target.CPU) src = dh.add_array('src', values_per_cell=stencil.Q) dh.fill('src', 1.0, ghost_layers=True) dst = dh.add_array_like('dst', 'src') dh.fill('dst', 0.0, ghost_layers=True) force = dh.add_array('force', values_per_cell=stencil.D) dh.fill('force', 0.0, ghost_layers=True) rho = dh.add_array('rho', values_per_cell=1) dh.fill('rho', 1.0, ghost_layers=True) u = dh.add_array('u', values_per_cell=stencil.D) dh.fill('u', 0.0, ghost_layers=True) counters = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(stencil.D) ] points_up = sp.Symbol('points_up') points_down = sp.Symbol('points_down') u_p = sp.Piecewise( (1, sp.And(ps.data_types.type_all_numbers(counters[1] <= 1, 'int'), points_down)), (-1, sp.And( ps.data_types.type_all_numbers(counters[1] >= src.shape[1] - 2, 'int'), points_up)), (0, True)) * shear_velocity lbm_config = LBMConfig(stencil=stencil, relaxation_rate=omega, compressible=True, velocity_input=u.center_vector + sp.Matrix([u_p, 0]), density_input=rho, force_model=ForceModel.LUO, force=force.center_vector, kernel_type='collide_only') lbm_opt = LBMOptimisation(symbolic_field=src) collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) to_insert = [ s.lhs for s in collision.subexpressions if collision.method.first_order_equilibrium_moment_symbols[shear_dir] in s.free_symbols ] for s in to_insert: collision = collision.new_with_inserted_subexpression(s) ma = [] for a, c in zip(collision.main_assignments, collision.method.stencil): if c[shear_dir_normal] == -1: b = (True, False) elif c[shear_dir_normal] == 1: b = (False, True) else: b = (False, False) a = ps.Assignment(a.lhs, a.rhs.replace(points_down, b[0])) a = ps.Assignment(a.lhs, a.rhs.replace(points_up, b[1])) ma.append(a) collision.main_assignments = ma stream = create_stream_pull_with_output_kernel(collision.method, src, dst, { 'density': rho, 'velocity': u }) config = ps.CreateKernelConfig(target=dh.default_target) stream_kernel = ps.create_kernel(stream, config=config).compile() collision_kernel = ps.create_kernel(collision, config=config).compile() init = macroscopic_values_setter(collision.method, velocity=(0, 0), pdfs=src.center_vector, density=rho.center) init_kernel = ps.create_kernel(init, ghost_layers=0).compile() offset = [0.0] sync_pdfs = dh.synchronization_function([src.name], functor=partial( get_le_boundary_functor, shear_offset=offset)) dh.run_kernel(init_kernel) time = 500 dh.all_to_gpu() for i in range(time): dh.run_kernel(collision_kernel) sync_pdfs() dh.run_kernel(stream_kernel) dh.swap(src.name, dst.name) offset[0] += shear_velocity dh.all_to_cpu() nu = lattice_viscosity_from_relaxation_rate(omega) h = domain_size[0] k_max = 100 analytical_solution = get_solution_navier_stokes( np.linspace(0.5, h - 0.5, h), time, nu, shear_velocity, h, k_max) np.testing.assert_array_almost_equal(analytical_solution, dh.gather_array(u.name)[0, :, 0], decimal=5) dh.fill(rho.name, 1.0, ghost_layers=True) dh.run_kernel(init_kernel) dh.fill(u.name, 0.0, ghost_layers=True) dh.fill('force', 0.0, ghost_layers=True) dh.cpu_arrays[force.name][64 // 3, 1, :] = [1e-2, -1e-1] offset[0] = 0 time = 20 dh.all_to_gpu() for i in range(time): dh.run_kernel(collision_kernel) sync_pdfs() dh.run_kernel(stream_kernel) dh.swap(src.name, dst.name) dh.all_to_cpu() vel_unshifted = np.array(dh.gather_array(u.name)[:, -3:-1, :]) dh.fill(rho.name, 1.0, ghost_layers=True) dh.run_kernel(init_kernel) dh.fill(u.name, 0.0, ghost_layers=True) dh.fill('force', 0.0, ghost_layers=True) dh.cpu_arrays[force.name][64 // 3, 1, :] = [1e-2, -1e-1] offset[0] = 10 time = 20 dh.all_to_gpu() for i in range(time): dh.run_kernel(collision_kernel) sync_pdfs() dh.run_kernel(stream_kernel) dh.swap(src.name, dst.name) dh.all_to_cpu() vel_shifted = np.array(dh.gather_array(u.name)[:, -3:-1, :]) vel_rolled = np.roll(vel_shifted, -offset[0], axis=0) np.testing.assert_array_almost_equal(vel_unshifted, vel_rolled)
def poiseuille_channel(target, stencil_name): # physical parameters rho_0 = 1.2 # density eta = 0.2 # kinematic viscosity width = 41 # of box actual_width = width - 2 # subtract boundary layer from box width ext_force_density = 0.2 / actual_width ** 2 # scale by width to keep stable # LB parameters lb_stencil = LBStencil(stencil_name) if lb_stencil.D == 2: L = (4, width) elif lb_stencil.D == 3: L = (4, width, 4) else: raise Exception() periodicity = [True, False] + [True] * (lb_stencil.D - 2) omega = lbmpy.relaxationrates.relaxation_rate_from_lattice_viscosity(eta) # ## Data structures dh = ps.create_data_handling(L, periodicity=periodicity, default_target=target) src = dh.add_array('src', values_per_cell=len(lb_stencil)) dst = dh.add_array_like('dst', 'src') ρ = dh.add_array('rho', latex_name='\\rho', values_per_cell=1) u = dh.add_array('u', values_per_cell=dh.dim) # LB Setup lbm_config = LBMConfig(stencil=lb_stencil, relaxation_rate=omega, method=Method.TRT, compressible=True, force_model=ForceModel.GUO, force=tuple([ext_force_density] + [0] * (lb_stencil.D - 1)), kernel_type='collide_only') lbm_opt = LBMOptimisation(symbolic_field=src) collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) stream = create_stream_pull_with_output_kernel(collision.method, src, dst, {'velocity': u}) config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target) stream_kernel = ps.create_kernel(stream, config=config).compile() collision_kernel = ps.create_kernel(collision, config=config).compile() # Boundaries lbbh = LatticeBoltzmannBoundaryHandling(collision.method, dh, src.name, target=dh.default_target) # ## Set up the simulation init = macroscopic_values_setter(collision.method, velocity=(0,) * dh.dim, pdfs=src.center_vector, density=ρ.center) init_kernel = ps.create_kernel(init, ghost_layers=0).compile() noslip = NoSlip() wall_thickness = 2 if lb_stencil.D == 2: lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness]) lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:]) elif lb_stencil.D == 3: lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness, :]) lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:, :]) else: raise Exception() for bh in lbbh, : assert len(bh._boundary_object_to_boundary_info) == 1, "Restart kernel to clear boundaries" def init(): dh.fill(ρ.name, rho_0) dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True) dh.fill(u.name, 0) dh.run_kernel(init_kernel) # In[6]: sync_pdfs = dh.synchronization_function([src.name]) # Time loop def time_loop(steps): dh.all_to_gpu() i = -1 last_max_vel = -1 for i in range(steps): dh.run_kernel(collision_kernel) sync_pdfs() lbbh() dh.run_kernel(stream_kernel) dh.swap(src.name, dst.name) # Consider early termination if i % 100 == 0: if u.name in dh.gpu_arrays: dh.to_cpu(u.name) uu = dh.gather_array(u.name) # average periodic directions if lb_stencil.D == 3: # dont' swap order uu = np.average(uu, axis=2) uu = np.average(uu, axis=0) max_vel = np.nanmax(uu) if np.abs(max_vel / last_max_vel - 1) < 5E-6: break last_max_vel = max_vel # cut off wall regions uu = uu[wall_thickness:-wall_thickness] # correct for f/2 term uu -= np.array([ext_force_density / 2 / rho_0] + [0] * (lb_stencil.D - 1)) return uu init() # Simulation profile = time_loop(5000) # compare against analytical solution # The profile is of shape (n,3). Force is in x-direction y = np.arange(len(profile[:, 0])) mid = (y[-1] - y[0]) / 2 # Mid point of channel expected = poiseuille_flow((y - mid), actual_width, ext_force_density, rho_0 * eta) np.testing.assert_allclose(profile[:, 0], expected, rtol=0.006) # Test zero vel in other directions np.testing.assert_allclose(profile[:, 1:], np.zeros_like(profile[:, 1:]), atol=1E-9)
def test_total_momentum(method_enum, force_model, omega): # for the EDM force model this test case not work. However it is successfully used in test_entropic_model # Any attempt to adapted the EDM force model so it fullfills the test case did result in a failure in the # entropic test case. Note also that the test runs for MRT and EMD if force_model == ForceModel.EDM: pytest.skip() L = (16, 16) stencil = LBStencil(Stencil.D2Q9) F = (2e-4, -3e-4) dh = ps.create_data_handling(L, periodicity=True, default_target=Target.CPU) src = dh.add_array('src', values_per_cell=stencil.Q) dst = dh.add_array_like('dst', 'src') ρ = dh.add_array('rho', values_per_cell=1) u = dh.add_array('u', values_per_cell=stencil.D) lbm_config = LBMConfig(method=method_enum, stencil=stencil, relaxation_rate=omega, compressible=True, force_model=force_model, force=F, streaming_pattern='pull') lbm_opt = LBMOptimisation(symbolic_field=src) collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) config = ps.CreateKernelConfig(cpu_openmp=True, target=dh.default_target) collision_kernel = ps.create_kernel(collision, config=config).compile() def init(): dh.fill(ρ.name, 1) dh.fill(u.name, 0) setter = macroscopic_values_setter(collision.method, velocity=(0, ) * dh.dim, pdfs=src, density=ρ.center, set_pre_collision_pdfs=True) kernel = ps.create_kernel(setter).compile() dh.run_kernel(kernel) sync_pdfs = dh.synchronization_function([src.name]) getter = macroscopic_values_getter(collision.method, ρ.center, u.center_vector, src, use_pre_collision_pdfs=True) getter_kernel = ps.create_kernel(getter).compile() def time_loop(steps): dh.all_to_gpu() for _ in range(steps): dh.run_kernel(collision_kernel) dh.swap(src.name, dst.name) sync_pdfs() dh.all_to_cpu() t = 20 init() time_loop(t) dh.run_kernel(getter_kernel) total = np.sum(dh.gather_array(u.name), axis=(0, 1)) assert np.allclose(total / np.prod(L) / F / t, 1)