def test_kernel(): for domain_shape in [(4, 5), (3, 4, 5)]: dh = create_data_handling(domain_size=domain_shape, periodicity=True) kernel_execution_jacobi(dh, test_gpu=True) reduction(dh) try: import pycuda dh = create_data_handling(domain_size=domain_shape, periodicity=True) kernel_execution_jacobi(dh, test_gpu=False) except ImportError: pass
def test_philox_float(): for target in ('cpu', 'gpu'): dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target) f = dh.add_array("f", values_per_cell=4) dh.fill('f', 42.0) philox_node = PhiloxFourFloats(dh.dim) assignments = [philox_node] + [ ps.Assignment(f(i), philox_node.result_symbols[i]) for i in range(4) ] kernel = ps.create_kernel(assignments, target=dh.default_target).compile() dh.all_to_gpu() dh.run_kernel(kernel, time_step=124) dh.all_to_cpu() arr = dh.gather_array('f') assert np.logical_and(arr <= 1.0, arr >= 0).all() float_reference = philox_reference * 2.**-32 + 2.**-33 assert (np.allclose(arr, float_reference, rtol=0, atol=np.finfo(np.float32).eps))
def test_philox_double(): for target in ('cpu', 'gpu'): dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target) f = dh.add_array("f", values_per_cell=2) dh.fill('f', 42.0) philox_node = PhiloxTwoDoubles(dh.dim) assignments = [ philox_node, ps.Assignment(f(0), philox_node.result_symbols[0]), ps.Assignment(f(1), philox_node.result_symbols[1]) ] kernel = ps.create_kernel(assignments, target=dh.default_target).compile() dh.all_to_gpu() dh.run_kernel(kernel, time_step=124) dh.all_to_cpu() arr = dh.gather_array('f') assert np.logical_and(arr <= 1.0, arr >= 0).all() x = philox_reference[:, :, 0::2] y = philox_reference[:, :, 1::2] z = x ^ y << (53 - 32) double_reference = z * 2.**-53 + 2.**-54 assert (np.allclose(arr, double_reference, rtol=0, atol=np.finfo(np.float64).eps))
def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), keys=(0, 0), offset_values=None): if (target in ['neon', 'vsx', 'rvv'] or target.startswith('sve')) and rng == 'aesni': pytest.xfail('AES not yet implemented for this architecture') cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': target} dh = ps.create_data_handling((131, 131), default_ghost_layers=0, default_target=Target.CPU) f = dh.add_array("f", values_per_cell=4 if precision == 'float' else 2, dtype=np.float32 if dtype == 'float' else np.float64, alignment=True) dh.fill(f.name, 42.0) ref = dh.add_array("ref", values_per_cell=4 if precision == 'float' else 2) rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets) assignments = [rng_node] + [ps.Assignment(ref(i), s) for i, s in enumerate(rng_node.result_symbols)] kernel = ps.create_kernel(assignments, target=dh.default_target).compile() kwargs = {'time_step': t} if offset_values is not None: kwargs.update({k.name: v for k, v in zip(offsets, offset_values)}) dh.run_kernel(kernel, **kwargs) rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets) assignments = [rng_node] + [ps.Assignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)] kernel = ps.create_kernel(assignments, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile() dh.run_kernel(kernel, **kwargs) ref_data = dh.gather_array(ref.name) data = dh.gather_array(f.name) assert np.allclose(ref_data, data)
def test_contact_angle(): stencil = LBStencil(Stencil.D2Q9) contact_angle = 45 phase_value = 0.5 domain_size = (9, 9) dh = ps.create_data_handling(domain_size, periodicity=(False, False)) C = dh.add_array("C", values_per_cell=1) dh.fill("C", 0.0, ghost_layers=True) dh.fill("C", phase_value, ghost_layers=False) bh = BoundaryHandling(dh, C.name, stencil, target=ps.Target.CPU) bh.set_boundary(ContactAngle(45, 5), ps.make_slice[:, 0]) bh() h = 1.0 myA = 1.0 - 0.5 * h * (4.0 / 5) * math.cos(math.radians(contact_angle)) phase_on_boundary = (myA - np.sqrt(myA * myA - 4.0 * (myA - 1.0) * phase_value)) / (myA - 1.0) - phase_value np.testing.assert_almost_equal(dh.cpu_arrays["C"][5, 0], phase_on_boundary) assert ContactAngle(45, 5) == ContactAngle(45, 5) assert ContactAngle(46, 5) != ContactAngle(45, 5)
def test_staggered(vectorized): """Make sure that the RNG counter can be substituted during loop cutting""" dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target=Target.CPU) j = dh.add_array("j", values_per_cell=dh.dim, field_type=ps.FieldType.STAGGERED_FLUX) a = ps.AssignmentCollection([ps.Assignment(j.staggered_access(n), 0) for n in j.staggered_stencil]) rng_symbol_gen = random_symbol(a.subexpressions, dim=dh.dim, rng_node=PhiloxTwoDoubles) a.main_assignments[0] = ps.Assignment(a.main_assignments[0].lhs, next(rng_symbol_gen)) kernel = ps.create_staggered_kernel(a, target=dh.default_target).compile() if not vectorized: return if not instruction_sets: pytest.skip("cannot detect CPU instruction set") pytest.importorskip('islpy') cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': False, 'instruction_set': instruction_sets[-1]} dh.fill(j.name, 867) dh.run_kernel(kernel, seed=5, time_step=309) ref_data = dh.gather_array(j.name) kernel2 = ps.create_staggered_kernel(a, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile() dh.fill(j.name, 867) dh.run_kernel(kernel2, seed=5, time_step=309) data = dh.gather_array(j.name) assert np.allclose(ref_data, data)
def test_advection(dim): L = (8, ) * dim dh = ps.create_data_handling(L, periodicity=True, default_target=ps.Target.CPU) c = dh.add_array('c', values_per_cell=1) j = dh.add_array('j', values_per_cell=3**dh.dim // 2, field_type=ps.FieldType.STAGGERED_FLUX) u = dh.add_array('u', values_per_cell=dh.dim) dh.cpu_arrays[c.name][:] = (np.random.random([l + 2 for l in L])) dh.cpu_arrays[u.name][:] = (np.random.random([l + 2 for l in L] + [dim]) - 0.5) / 5 vof1 = ps.create_kernel(ps.fd.VOF(j, u, c)).compile() dh.fill(j.name, np.nan, ghost_layers=True) dh.run_kernel(vof1) j1 = dh.gather_array(j.name).copy() vof2 = ps.create_kernel(VOF2(j, u, c, simplify=False)).compile() dh.fill(j.name, np.nan, ghost_layers=True) dh.run_kernel(vof2) j2 = dh.gather_array(j.name) assert np.allclose(j1, j2)
def test_free_slip_equivalence(): # check if Free slip BC does the same if the normal direction is specified or not stencil = LBStencil(Stencil.D2Q9) dh = create_data_handling(domain_size=(4, 4), periodicity=(False, False)) src1 = dh.add_array('src1', values_per_cell=stencil.Q, alignment=True) src2 = dh.add_array('src2', values_per_cell=stencil.Q, alignment=True) dh.fill('src1', 0.0, ghost_layers=True) dh.fill('src2', 0.0, ghost_layers=True) shape = dh.gather_array('src1', ghost_layers=True).shape num = 0 for x in range(shape[0]): for y in range(shape[1]): for direction in range(shape[2]): dh.cpu_arrays['src1'][x, y, direction] = num dh.cpu_arrays['src2'][x, y, direction] = num num += 1 method = create_lb_method(lbm_config=LBMConfig( stencil=stencil, method=Method.SRT, relaxation_rate=1.8)) bh1 = LatticeBoltzmannBoundaryHandling(method, dh, 'src1', name="bh1") free_slip1 = FreeSlip(stencil=stencil) bh1.set_boundary(free_slip1, slice_from_direction('N', dh.dim)) bh2 = LatticeBoltzmannBoundaryHandling(method, dh, 'src2', name="bh2") free_slip2 = FreeSlip(stencil=stencil, normal_direction=(0, -1)) bh2.set_boundary(free_slip2, slice_from_direction('N', dh.dim)) bh1() bh2() assert np.array_equal(dh.cpu_arrays['src1'], dh.cpu_arrays['src2'])
def test_stream_only_kernel(streaming_pattern): domain_size = (4, 4) stencil = LBStencil(Stencil.D2Q9) dh = ps.create_data_handling(domain_size, default_target=Target.CPU) pdfs = dh.add_array('pdfs', values_per_cell=len(stencil)) pdfs_tmp = dh.add_array_like('pdfs_tmp', 'pdfs') for t in get_timesteps(streaming_pattern): accessor = get_accessor(streaming_pattern, t) src = pdfs dst = pdfs if is_inplace(streaming_pattern) else pdfs_tmp dh.fill(src.name, 0.0) dh.fill(dst.name, 0.0) stream_kernel = create_stream_only_kernel(stencil, src, dst, accessor=accessor) stream_func = create_kernel(stream_kernel).compile() # Check functionality acc_in = AccessPdfValues(stencil, streaming_dir='in', accessor=accessor) for i in range(len(stencil)): acc_in.write_pdf(dh.cpu_arrays[src.name], (1,1), i, i) dh.run_kernel(stream_func) acc_out = AccessPdfValues(stencil, streaming_dir='out', accessor=accessor) for i in range(len(stencil)): assert acc_out.read_pdf(dh.cpu_arrays[dst.name], (1,1), i) == i
def test_flux_stencil(stencil, derivative): L = (40, ) * int(stencil[1]) dh = ps.create_data_handling(L, periodicity=True, default_target=ps.Target.CPU) c = dh.add_array('c', values_per_cell=1) j = dh.add_array('j', values_per_cell=int(stencil[3:]) // 2, field_type=ps.FieldType.STAGGERED_FLUX) def Gradient(f): return sp.Matrix([ps.fd.diff(f, i) for i in range(dh.dim)]) eq = [ sp.Matrix([sp.Symbol(f"a_{i}") * c.center for i in range(dh.dim)]), Gradient(c) ][derivative] disc = ps.fd.FVM1stOrder(c, flux=eq) # check the continuity continuity_assignments = disc.discrete_continuity(j) assert [len(a.rhs.atoms(ps.field.Field.Access)) for a in continuity_assignments] == \ [int(stencil[3:])] * len(continuity_assignments) # check the flux flux_assignments = disc.discrete_flux(j) assert [len(a.rhs.atoms(ps.field.Field.Access)) for a in flux_assignments] == [2] * len(flux_assignments)
def test_optimised_and_full_communication_equivalence(stencil_name): target = ps.Target.CPU stencil = LBStencil(stencil_name) domain_size = (4, ) * stencil.D dh = ps.create_data_handling(domain_size, periodicity=(True, ) * stencil.D, parallel=False, default_target=target) pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64) dh.fill("pdf", 0, ghost_layers=True) pdf_tmp = dh.add_array("pdf_tmp", values_per_cell=len(stencil), dtype=np.int64) dh.fill("pdf_tmp", 0, ghost_layers=True) gl = dh.ghost_layers_of_field("pdf") num = 0 for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']): dh.cpu_arrays['pdf'][idx] = num dh.cpu_arrays['pdf_tmp'][idx] = num num += 1 lbm_config = LBMConfig(stencil=stencil, kernel_type="stream_pull_only") lbm_opt = LBMOptimisation(symbolic_field=pdf, symbolic_temporary_field=pdf_tmp) config = ps.CreateKernelConfig(target=dh.default_target, cpu_openmp=True) ac = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) ast = ps.create_kernel(ac, config=config) stream = ast.compile() full_communication = dh.synchronization_function(pdf.name, target=dh.default_target, optimization={"openmp": True}) full_communication() dh.run_kernel(stream) dh.swap("pdf", "pdf_tmp") pdf_full_communication = np.copy(dh.cpu_arrays['pdf']) num = 0 for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']): dh.cpu_arrays['pdf'][idx] = num dh.cpu_arrays['pdf_tmp'][idx] = num num += 1 optimised_communication = LBMPeriodicityHandling(stencil=stencil, data_handling=dh, pdf_field_name=pdf.name, streaming_pattern='pull') optimised_communication() dh.run_kernel(stream) dh.swap("pdf", "pdf_tmp") if stencil.D == 3: for i in range(gl, domain_size[0]): for j in range(gl, domain_size[1]): for k in range(gl, domain_size[2]): for f in range(len(stencil)): assert dh.cpu_arrays['pdf'][i, j, k, f] == pdf_full_communication[i, j, k, f], print(f) else: for i in range(gl, domain_size[0]): for j in range(gl, domain_size[1]): for f in range(len(stencil)): assert dh.cpu_arrays['pdf'][i, j, f] == pdf_full_communication[i, j, f]
def test_staggered_loop_cutting(): pytest.importorskip('islpy') dh = ps.create_data_handling((4, 4), periodicity=True, default_target=Target.CPU) j = dh.add_array('j', values_per_cell=4, field_type=ps.FieldType.STAGGERED) assignments = [ps.Assignment(j.staggered_access("SW"), 1)] ast = ps.create_staggered_kernel(assignments, target=dh.default_target) assert not ast.atoms(ps.astnodes.Conditional)
def test_symbolic_fields(): dh = create_data_handling(domain_size=(5, 7)) dh.add_array('f1', values_per_cell=dh.dim) assert dh.fields['f1'].spatial_dimensions == dh.dim assert dh.fields['f1'].index_dimensions == 1 dh.add_array_like("f_tmp", "f1", latex_name=r"f_{tmp}") assert dh.fields['f_tmp'].spatial_dimensions == dh.dim assert dh.fields['f_tmp'].index_dimensions == 1 dh.swap('f1', 'f_tmp')
def test_staggered_subexpressions(): dh = ps.create_data_handling((10, 10), periodicity=True, default_target=Target.CPU) j = dh.add_array('j', values_per_cell=2, field_type=ps.FieldType.STAGGERED) c = sp.symbols("c") assignments = [ ps.Assignment(j.staggered_access("W"), c), ps.Assignment(c, 1) ] ps.create_staggered_kernel(assignments, target=dh.default_target).compile()
def test_data_handling(parallel): for tries in range( 16 ): # try a few times, since we might get lucky and get randomly a correct alignment dh = create_data_handling((6, 7), default_ghost_layers=1, parallel=parallel) dh.add_array('test', alignment=8 * 4, values_per_cell=1) for b in dh.iterate(ghost_layers=True, inner_ghost_layers=True): arr = b['test'] assert is_aligned(arr[1:, 3:], 8 * 4)
def test_aligned_and_nt_stores(instruction_set=instruction_set, openmp=False): domain_size = (24, 24) # create a datahandling object dh = ps.create_data_handling(domain_size, periodicity=(True, True), parallel=False, default_target=Target.CPU) # fields alignment = 'cacheline' if openmp else True g = dh.add_array("g", values_per_cell=1, alignment=alignment) dh.fill("g", 1.0, ghost_layers=True) f = dh.add_array("f", values_per_cell=1, alignment=alignment) dh.fill("f", 0.0, ghost_layers=True) opt = { 'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True, 'assume_inner_stride_one': True } update_rule = [ ps.Assignment(f.center(), 0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1])) ] config = ps.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp) ast = ps.create_kernel(update_rule, config=config) if instruction_set in ['sse'] or instruction_set.startswith('avx'): assert 'stream' in ast.instruction_set assert 'streamFence' in ast.instruction_set if instruction_set in ['neon', 'vsx'] or instruction_set.startswith('sve'): assert 'cachelineZero' in ast.instruction_set if instruction_set in ['vsx']: assert 'storeAAndFlushCacheline' in ast.instruction_set for instruction in [ 'stream', 'streamFence', 'cachelineZero', 'storeAAndFlushCacheline', 'flushCacheline' ]: if instruction in ast.instruction_set: assert ast.instruction_set[instruction].split( '{')[0] in ps.get_code_str(ast) kernel = ast.compile() dh.run_kernel(kernel) np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size))
def test_sliced_getter_data_handling(): domain_shape = (10, 10) dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1) dh.add_array("src", values_per_cell=1) dh.fill("src", 1.0, ghost_layers=True) dh.add_array("dst", values_per_cell=1) dh.fill("dst", 0.0, ghost_layers=True) sli = SlicedGetterDataHandling(dh, 'dst') slice_obj = make_slice[2:-2, 2:-2] assert np.sum(sli[slice_obj]) == 0 sli = SlicedGetterDataHandling(dh, 'src') slice_obj = make_slice[2:-2, 2:-2] assert np.sum(sli[slice_obj]) == 36
def test_aesni_float(): dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target="cpu") f = dh.add_array("f", values_per_cell=4) dh.fill('f', 42.0) aesni_node = AESNIFourFloats(dh.dim) assignments = [aesni_node] + [ ps.Assignment(f(i), aesni_node.result_symbols[i]) for i in range(4) ] kernel = ps.create_kernel(assignments, target=dh.default_target).compile() dh.all_to_gpu() dh.run_kernel(kernel, time_step=124) dh.all_to_cpu() arr = dh.gather_array('f') assert np.logical_and(arr <= 1.0, arr >= 0).all()
def test_access(): for domain_shape in [(2, 3, 4), (2, 4)]: for f_size in (1, 4): dh = create_data_handling(domain_size=domain_shape) dh.add_array('f1', values_per_cell=f_size) assert dh.dim == len(domain_shape) for b in dh.iterate(ghost_layers=1): if f_size > 1: assert b['f1'].shape == tuple( ds + 2 for ds in domain_shape) + (f_size, ) else: assert b['f1'].shape == tuple(ds + 2 for ds in domain_shape) for b in dh.iterate(ghost_layers=0): if f_size > 1: assert b['f1'].shape == domain_shape + (f_size, ) else: assert b['f1'].shape == domain_shape
def test_rng_symbol(vectorized): """Make sure that the RNG symbol generator generates symbols and that the resulting code compiles""" if vectorized: if not instruction_sets: pytest.skip("cannot detect CPU instruction set") else: cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': instruction_sets[-1]} else: cpu_vectorize_info = None dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target=Target.CPU) f = dh.add_array("f", values_per_cell=2 * dh.dim, alignment=True) ac = ps.AssignmentCollection([ps.Assignment(f(i), 0) for i in range(f.shape[-1])]) rng_symbol_gen = random_symbol(ac.subexpressions, dim=dh.dim) for i in range(f.shape[-1]): ac.main_assignments[i] = ps.Assignment(ac.main_assignments[i].lhs, next(rng_symbol_gen)) symbols = [a.rhs for a in ac.main_assignments] assert len(symbols) == f.shape[-1] and len(set(symbols)) == f.shape[-1] ps.create_kernel(ac, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()
def test_source_stencil(stencil): L = (40, ) * int(stencil[1]) dh = ps.create_data_handling(L, periodicity=True, default_target=ps.Target.CPU) c = dh.add_array('c', values_per_cell=1) j = dh.add_array('j', values_per_cell=int(stencil[3:]) // 2, field_type=ps.FieldType.STAGGERED_FLUX) continuity_ref = ps.fd.FVM1stOrder(c).discrete_continuity(j) for eq in [c.center] + [ps.fd.diff(c, i) for i in range(dh.dim)]: disc = ps.fd.FVM1stOrder(c, source=eq) diff = sp.simplify( disc.discrete_continuity(j)[0].rhs - continuity_ref[0].rhs) if type(eq) is ps.field.Field.Access: assert len(diff.atoms(ps.field.Field.Access)) == 1 else: assert len(diff.atoms(ps.field.Field.Access)) == 2
def test_momentum_density_shift(force_model): target = Target.CPU stencil = LBStencil(Stencil.D2Q9) domain_size = (4, 4) dh = ps.create_data_handling(domain_size=domain_size, default_target=target) rho = dh.add_array('rho', values_per_cell=1) dh.fill('rho', 0.0, ghost_layers=True) momentum_density = dh.add_array('momentum_density', values_per_cell=dh.dim) dh.fill('momentum_density', 0.0, ghost_layers=True) src = dh.add_array('src', values_per_cell=len(stencil)) dh.fill('src', 0.0, ghost_layers=True) lbm_config = LBMConfig(method=Method.SRT, compressible=True, force_model=force_model, force=(1, 2)) method = create_lb_method(lbm_config=lbm_config) cqc = method.conserved_quantity_computation momentum_density_getter = cqc.output_equations_from_pdfs( src.center_vector, { 'density': rho.center, 'momentum_density': momentum_density.center_vector }) config = ps.CreateKernelConfig(target=dh.default_target) momentum_density_ast = ps.create_kernel(momentum_density_getter, config=config) momentum_density_kernel = momentum_density_ast.compile() dh.run_kernel(momentum_density_kernel) assert np.sum(dh.gather_array( momentum_density.name)[:, :, 0]) == np.prod(domain_size) / 2 assert np.sum(dh.gather_array( momentum_density.name)[:, :, 1]) == np.prod(domain_size)
def test_aesni_double(): dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target="cpu") f = dh.add_array("f", values_per_cell=2) dh.fill('f', 42.0) aesni_node = AESNITwoDoubles(dh.dim) assignments = [ aesni_node, ps.Assignment(f(0), aesni_node.result_symbols[0]), ps.Assignment(f(1), aesni_node.result_symbols[1]) ] kernel = ps.create_kernel(assignments, target=dh.default_target).compile() dh.all_to_gpu() dh.run_kernel(kernel, time_step=124) dh.all_to_cpu() arr = dh.gather_array('f') assert np.logical_and(arr <= 1.0, arr >= 0).all()
def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set, dtype): dtype = np.float64 if dtype == 'double' else np.float32 domain_size = (128, 128) dh = ps.create_data_handling(domain_size, periodicity=(True, True), default_target=Target.CPU) src = dh.add_array("src", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=True) dh.fill(src.name, 1.0, ghost_layers=True) dst = dh.add_array("dst", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=True) dh.fill(dst.name, 1.0, ghost_layers=True) update_rule = ps.Assignment(dst[0, 0], src[0, 0]) opt = { 'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True, 'assume_inner_stride_one': True } config = ps.CreateKernelConfig(target=dh.default_target, cpu_vectorize_info=opt, ghost_layers=gl_kernel) ast = ps.create_kernel(update_rule, config=config) kernel = ast.compile() if gl_kernel != gl_field: with pytest.raises(ValueError): dh.run_kernel(kernel) else: dh.run_kernel(kernel)
def test_simple(target): if target == Target.GPU: import pytest pytest.importorskip('pycuda') dh = create_data_handling((4, 4), parallel=False, default_target=target) dh.add_array('pdfs', values_per_cell=9, cpu=True, gpu=target != Target.CPU) for i in range(9): dh.fill("pdfs", i, value_idx=i, ghost_layers=True) if target == Target.GPU: dh.all_to_gpu() lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9), compressible=False, relaxation_rate=1.8) config = CreateKernelConfig(target=target) lb_func = create_lb_function(lbm_config=lbm_config, config=config) bh = LatticeBoltzmannBoundaryHandling(lb_func.method, dh, 'pdfs', target=target) wall = NoSlip() moving_wall = UBB((1, 0)) bh.set_boundary(wall, make_slice[0, :]) bh.set_boundary(wall, make_slice[-1, :]) bh.set_boundary(wall, make_slice[:, 0]) bh.set_boundary(moving_wall, make_slice[:, -1]) bh.prepare() bh() if target == Target.GPU: dh.all_to_cpu() # left lower corner assert (dh.cpu_arrays['pdfs'][0, 0, 6] == 7) assert (dh.cpu_arrays['pdfs'][0, 1, 4] == 3) assert (dh.cpu_arrays['pdfs'][0, 1, 6] == 7) assert (dh.cpu_arrays['pdfs'][1, 0, 1] == 2) assert (dh.cpu_arrays['pdfs'][1, 0, 6] == 7) # left side assert (all(dh.cpu_arrays['pdfs'][0, 2:4, 4] == 3)) assert (all(dh.cpu_arrays['pdfs'][0, 2:4, 6] == 7)) assert (all(dh.cpu_arrays['pdfs'][0, 2:4, 5] == 5)) # left upper corner assert (dh.cpu_arrays['pdfs'][0, 4, 4] == 3) assert (dh.cpu_arrays['pdfs'][0, 4, 8] == 5) assert (dh.cpu_arrays['pdfs'][0, 5, 8] == 5 + 6 / 36) assert (dh.cpu_arrays['pdfs'][1, 5, 8] == 5 + 6 / 36) assert (dh.cpu_arrays['pdfs'][1, 5, 2] == 1) # top side assert (all(dh.cpu_arrays['pdfs'][2:4, 5, 2] == 1)) assert (all(dh.cpu_arrays['pdfs'][2:4, 5, 7] == 6 - 6 / 36)) assert (all(dh.cpu_arrays['pdfs'][2:4, 5, 8] == 5 + 6 / 36)) # right upper corner assert (dh.cpu_arrays['pdfs'][4, 5, 2] == 1) assert (dh.cpu_arrays['pdfs'][4, 5, 7] == 6 - 6 / 36) assert (dh.cpu_arrays['pdfs'][5, 5, 7] == 6 - 6 / 36) assert (dh.cpu_arrays['pdfs'][5, 4, 3] == 4) assert (dh.cpu_arrays['pdfs'][5, 4, 7] == 6) # right side assert (all(dh.cpu_arrays['pdfs'][5, 2:4, 3] == 4)) assert (all(dh.cpu_arrays['pdfs'][5, 2:4, 5] == 8)) assert (all(dh.cpu_arrays['pdfs'][5, 2:4, 7] == 6)) # right lower corner assert (dh.cpu_arrays['pdfs'][5, 1, 3] == 4) assert (dh.cpu_arrays['pdfs'][5, 1, 5] == 8) assert (dh.cpu_arrays['pdfs'][5, 0, 5] == 8) assert (dh.cpu_arrays['pdfs'][4, 0, 1] == 2) assert (dh.cpu_arrays['pdfs'][4, 0, 5] == 8) # lower side assert (all(dh.cpu_arrays['pdfs'][0, 2:4, 4] == 3)) assert (all(dh.cpu_arrays['pdfs'][0, 2:4, 6] == 7)) assert (all(dh.cpu_arrays['pdfs'][0, 2:4, 8] == 5))
def test_ek(stencil): # parameters L = (40, 40) D = sp.Symbol("D") z = sp.Symbol("z") # data structures dh = ps.create_data_handling(L, periodicity=True, default_target=ps.Target.CPU) c = dh.add_array('c', values_per_cell=1) j = dh.add_array('j', values_per_cell=int(stencil[-1]) // 2, field_type=ps.FieldType.STAGGERED_FLUX) Phi = dh.add_array('Φ', values_per_cell=1) # perform automatic discretization def Gradient(f): return sp.Matrix([ps.fd.diff(f, i) for i in range(dh.dim)]) flux_eq = -D * Gradient(c) + D * z * c.center * Gradient(Phi) disc = ps.fd.FVM1stOrder(c, flux_eq) flux_assignments = disc.discrete_flux(j) continuity_assignments = disc.discrete_continuity(j) # manual discretization x_staggered = -c[-1, 0] + c[ 0, 0] + z * (c[-1, 0] + c[0, 0]) / 2 * (Phi[-1, 0] - Phi[0, 0]) y_staggered = -c[0, -1] + c[ 0, 0] + z * (c[0, -1] + c[0, 0]) / 2 * (Phi[0, -1] - Phi[0, 0]) xy_staggered = (- c[-1, -1] + c[0, 0]) / sp.sqrt(2) + \ z * (c[-1, -1] + c[0, 0]) / 2 * (Phi[-1, -1] - Phi[0, 0]) / sp.sqrt(2) xY_staggered = (- c[-1, 1] + c[0, 0]) / sp.sqrt(2) + \ z * (c[-1, 1] + c[0, 0]) / 2 * (Phi[-1, 1] - Phi[0, 0]) / sp.sqrt(2) A0 = (1 + sp.sqrt(2) if j.index_shape[0] == 4 else 1) jj = j.staggered_access divergence = -1 * sum([ jj(d) for d in j.staggered_stencil + [ps.stencil.inverse_direction_string(d) for d in j.staggered_stencil] ]) update = [ps.Assignment(c.center, c.center + divergence)] flux = [ ps.Assignment(j.staggered_access("W"), D * x_staggered / A0), ps.Assignment(j.staggered_access("S"), D * y_staggered / A0) ] if j.index_shape[0] == 4: flux += [ ps.Assignment(j.staggered_access("SW"), D * xy_staggered / A0), ps.Assignment(j.staggered_access("NW"), D * xY_staggered / A0) ] # compare for a, b in zip(flux, flux_assignments): assert a.lhs == b.lhs assert sp.simplify(a.rhs - b.rhs) == 0 for a, b in zip(update, continuity_assignments): assert a.lhs == b.lhs assert a.rhs == b.rhs
def test_free_slip_index_list(): stencil = LBStencil(Stencil.D2Q9) dh = create_data_handling(domain_size=(4, 4), periodicity=(False, False)) src = dh.add_array('src', values_per_cell=len(stencil), alignment=True) dh.fill('src', 0.0, ghost_layers=True) lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, relaxation_rate=1.8) method = create_lb_method(lbm_config=lbm_config) bh = LatticeBoltzmannBoundaryHandling(method, dh, 'src', name="bh") free_slip = FreeSlip(stencil=stencil) add_box_boundary(bh, free_slip) bh.prepare() for b in dh.iterate(): for b_obj, idx_arr in b[ bh._index_array_name].boundary_object_to_index_list.items(): index_array = idx_arr # normal directions normal_west = (1, 0) normal_east = (-1, 0) normal_south = (0, 1) normal_north = (0, -1) normal_south_west = (1, 1) normal_north_west = (1, -1) normal_south_east = (-1, 1) normal_north_east = (-1, -1) for cell in index_array: direction = stencil[cell[2]] inv_dir = inverse_direction(direction) boundary_cell = (cell[0] + direction[0], cell[1] + direction[1]) normal = (cell[3], cell[4]) # the data is written on the inverse direction of the fluid cell near the boundary # the data is read from the mirrored direction of the inverse direction where the mirror axis is the normal assert cell[5] == stencil.index(mirror_stencil(list(inv_dir), normal)) if boundary_cell[0] == 0 and 0 < boundary_cell[1] < 5: assert normal == normal_west if boundary_cell[0] == 5 and 0 < boundary_cell[1] < 5: assert normal == normal_east if 0 < boundary_cell[0] < 5 and boundary_cell[1] == 0: assert normal == normal_south if 0 < boundary_cell[0] < 5 and boundary_cell[1] == 5: assert normal == normal_north if boundary_cell == (0, 0): assert cell[2] == cell[5] assert normal == normal_south_west if boundary_cell == (5, 0): assert cell[2] == cell[5] assert normal == normal_south_east if boundary_cell == (0, 5): assert cell[2] == cell[5] assert normal == normal_north_west if boundary_cell == (5, 5): assert cell[2] == cell[5] assert normal == normal_north_east
def poiseuille_channel(target, stencil_name): # physical parameters rho_0 = 1.2 # density eta = 0.2 # kinematic viscosity width = 41 # of box actual_width = width - 2 # subtract boundary layer from box width ext_force_density = 0.2 / actual_width ** 2 # scale by width to keep stable # LB parameters lb_stencil = LBStencil(stencil_name) if lb_stencil.D == 2: L = (4, width) elif lb_stencil.D == 3: L = (4, width, 4) else: raise Exception() periodicity = [True, False] + [True] * (lb_stencil.D - 2) omega = lbmpy.relaxationrates.relaxation_rate_from_lattice_viscosity(eta) # ## Data structures dh = ps.create_data_handling(L, periodicity=periodicity, default_target=target) src = dh.add_array('src', values_per_cell=len(lb_stencil)) dst = dh.add_array_like('dst', 'src') ρ = dh.add_array('rho', latex_name='\\rho', values_per_cell=1) u = dh.add_array('u', values_per_cell=dh.dim) # LB Setup lbm_config = LBMConfig(stencil=lb_stencil, relaxation_rate=omega, method=Method.TRT, compressible=True, force_model=ForceModel.GUO, force=tuple([ext_force_density] + [0] * (lb_stencil.D - 1)), kernel_type='collide_only') lbm_opt = LBMOptimisation(symbolic_field=src) collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt) stream = create_stream_pull_with_output_kernel(collision.method, src, dst, {'velocity': u}) config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target) stream_kernel = ps.create_kernel(stream, config=config).compile() collision_kernel = ps.create_kernel(collision, config=config).compile() # Boundaries lbbh = LatticeBoltzmannBoundaryHandling(collision.method, dh, src.name, target=dh.default_target) # ## Set up the simulation init = macroscopic_values_setter(collision.method, velocity=(0,) * dh.dim, pdfs=src.center_vector, density=ρ.center) init_kernel = ps.create_kernel(init, ghost_layers=0).compile() noslip = NoSlip() wall_thickness = 2 if lb_stencil.D == 2: lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness]) lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:]) elif lb_stencil.D == 3: lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness, :]) lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:, :]) else: raise Exception() for bh in lbbh, : assert len(bh._boundary_object_to_boundary_info) == 1, "Restart kernel to clear boundaries" def init(): dh.fill(ρ.name, rho_0) dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True) dh.fill(u.name, 0) dh.run_kernel(init_kernel) # In[6]: sync_pdfs = dh.synchronization_function([src.name]) # Time loop def time_loop(steps): dh.all_to_gpu() i = -1 last_max_vel = -1 for i in range(steps): dh.run_kernel(collision_kernel) sync_pdfs() lbbh() dh.run_kernel(stream_kernel) dh.swap(src.name, dst.name) # Consider early termination if i % 100 == 0: if u.name in dh.gpu_arrays: dh.to_cpu(u.name) uu = dh.gather_array(u.name) # average periodic directions if lb_stencil.D == 3: # dont' swap order uu = np.average(uu, axis=2) uu = np.average(uu, axis=0) max_vel = np.nanmax(uu) if np.abs(max_vel / last_max_vel - 1) < 5E-6: break last_max_vel = max_vel # cut off wall regions uu = uu[wall_thickness:-wall_thickness] # correct for f/2 term uu -= np.array([ext_force_density / 2 / rho_0] + [0] * (lb_stencil.D - 1)) return uu init() # Simulation profile = time_loop(5000) # compare against analytical solution # The profile is of shape (n,3). Force is in x-direction y = np.arange(len(profile[:, 0])) mid = (y[-1] - y[0]) / 2 # Mid point of channel expected = poiseuille_flow((y - mid), actual_width, ext_force_density, rho_0 * eta) np.testing.assert_allclose(profile[:, 0], expected, rtol=0.006) # Test zero vel in other directions np.testing.assert_allclose(profile[:, 1:], np.zeros_like(profile[:, 1:]), atol=1E-9)
def run(re=6000, eval_interval=0.05, total_time=3.0, domain_size=100, u_0=0.05, initialization_relaxation_rate=None, vtk_output=False, parallel=False, **kwargs): """Runs the kida vortex simulation. Args: re: Reynolds number eval_interval: interval in non-dimensional time to evaluate flow properties total_time: non-dimensional time of complete simulation domain_size: integer (not tuple) since domain is cubic u_0: maximum lattice velocity initialization_relaxation_rate: if not None, an advanced initialization scheme is run to initialize higher order moments correctly vtk_output: if vtk files are written out parallel: MPI parallelization with walberla **kwargs: passed to LbStep Returns: dictionary with simulation results """ domain_shape = (domain_size, domain_size, domain_size) relaxation_rate = relaxation_rate_from_reynolds_number(re, u_0, domain_size) dh = ps.create_data_handling(domain_shape, periodicity=True, parallel=parallel) rr_subs = {'viscosity': relaxation_rate, 'trt_magic': relaxation_rate_from_magic_number(relaxation_rate), 'free': sp.Symbol("rr_f")} if 'relaxation_rates' in kwargs: kwargs['relaxation_rates'] = [rr_subs[r] if isinstance(r, str) else r for r in kwargs['relaxation_rates']] else: kwargs['relaxation_rates'] = [relaxation_rate] dh.log_on_root("Running kida vortex scenario of size {} with {}".format(domain_size, kwargs)) dh.log_on_root("Compiling method") lb_step = LatticeBoltzmannStep(data_handling=dh, name="kida_vortex", **kwargs) set_initial_velocity(lb_step, u_0) residuum, init_steps = np.nan, 0 if initialization_relaxation_rate is not None: dh.log_on_root("Running iterative initialization", level='PROGRESS') residuum, init_steps = lb_step.run_iterative_initialization(initialization_relaxation_rate, convergence_threshold=1e-12, max_steps=100000, check_residuum_after=2 * domain_size) dh.log_on_root("Iterative initialization finished after {} steps at residuum {}".format(init_steps, residuum)) total_time_steps = normalized_time_to_time_step(total_time, domain_size, u_0) eval_time_steps = normalized_time_to_time_step(eval_interval, domain_size, u_0) initial_energy = parallel_mean(lb_step, mean_kinetic_energy, all_reduce=False) times = [] energy_list = [] enstrophy_list = [] mlups_list = [] energy_spectrum_arr = None while lb_step.time_steps_run < total_time_steps: mlups = lb_step.benchmark_run(eval_time_steps, number_of_cells=domain_size**3) if vtk_output: lb_step.write_vtk() current_time = time_step_to_normalized_time(lb_step.time_steps_run, domain_size, u_0) current_kinetic_energy = parallel_mean(lb_step, mean_kinetic_energy) current_enstrophy = parallel_mean(lb_step, mean_enstrophy) is_stable = np.isfinite(lb_step.data_handling.max(lb_step.velocity_data_name)) and current_enstrophy < 1e4 if not is_stable: dh.log_on_root("Simulation got unstable - stopping", level='WARNING') break if current_time >= 0.5 and energy_spectrum_arr is None and domain_size <= 600: dh.log_on_root("Calculating energy spectrum") gathered_velocity = lb_step.velocity[:, :, :, :] if gathered_velocity is not None: energy_spectrum_arr = energy_density_spectrum(gathered_velocity) else: energy_spectrum_arr = False if dh.is_root: current_kinetic_energy /= initial_energy current_enstrophy *= domain_size ** 2 times.append(current_time) energy_list.append(current_kinetic_energy) enstrophy_list.append(current_enstrophy) mlups_list.append(mlups) dh.log_on_root("Progress: {current_time:.02f} / {total_time} at {mlups:.01f} MLUPS\t" "Enstrophy {current_enstrophy:.04f}\t" "KinEnergy {current_kinetic_energy:.06f}".format(**locals())) if dh.is_root: return { 'initialization_residuum': residuum, 'initialization_steps': init_steps, 'time': times, 'kinetic_energy': energy_list, 'enstrophy': enstrophy_list, 'mlups': np.average(mlups_list), 'energy_spectrum': list(energy_spectrum_arr), 'stable': bool(np.isfinite(lb_step.data_handling.max(lb_step.velocity_data_name))) } else: return None
def advection_diffusion(dim: int): # parameters if dim == 2: L = (32, 32) elif dim == 3: L = (16, 16, 16) dh = ps.create_data_handling(domain_size=L, periodicity=True, default_target=ps.Target.CPU) n_field = dh.add_array('n', values_per_cell=1) j_field = dh.add_array('j', values_per_cell=3**dim // 2, field_type=ps.FieldType.STAGGERED_FLUX) velocity_field = dh.add_array('v', values_per_cell=dim) D = 0.0666 time = 100 def grad(f): return sp.Matrix([ps.fd.diff(f, i) for i in range(dim)]) flux_eq = -D * grad(n_field) fvm_eq = ps.fd.FVM1stOrder(n_field, flux=flux_eq) vof_adv = ps.fd.VOF(j_field, velocity_field, n_field) # merge calculation of advection and diffusion terms flux = [] for adv, div in zip(vof_adv, fvm_eq.discrete_flux(j_field)): assert adv.lhs == div.lhs flux.append(ps.Assignment(adv.lhs, adv.rhs + div.rhs)) flux_kernel = ps.create_staggered_kernel(flux).compile() pde_kernel = ps.create_kernel( fvm_eq.discrete_continuity(j_field)).compile() sync_conc = dh.synchronization_function([n_field.name]) # analytical density calculation def density(pos: np.ndarray, time: int, D: float): return (4 * np.pi * D * time)**(-dim / 2) * \ np.exp(-np.sum(np.square(pos), axis=-1) / (4 * D * time)) pos = np.zeros((*L, dim)) xpos = np.arange(-L[0] // 2, L[0] // 2) ypos = np.arange(-L[1] // 2, L[1] // 2) if dim == 2: pos[..., 1], pos[..., 0] = np.meshgrid(xpos, ypos) elif dim == 3: zpos = np.arange(-L[2] // 2, L[2] // 2) pos[..., 2], pos[..., 1], pos[..., 0] = np.meshgrid(xpos, ypos, zpos) pos += 0.5 def run(velocity: np.ndarray, time: int): dh.fill(n_field.name, np.nan, ghost_layers=True, inner_ghost_layers=True) dh.fill(j_field.name, np.nan, ghost_layers=True, inner_ghost_layers=True) # set initial values for velocity and density for i in range(dim): dh.fill(velocity_field.name, velocity[i], i, ghost_layers=True, inner_ghost_layers=True) dh.fill(n_field.name, 0) if dim == 2: start = ps.make_slice[L[0] // 2 - 1:L[0] // 2 + 1, L[1] // 2 - 1:L[1] // 2 + 1] else: start = ps.make_slice[L[0] // 2 - 1:L[0] // 2 + 1, L[1] // 2 - 1:L[1] // 2 + 1, L[2] // 2 - 1:L[2] // 2 + 1] dh.fill(n_field.name, 2**-dim, slice_obj=start) sync_conc() for i in range(time): dh.run_kernel(flux_kernel) dh.run_kernel(pde_kernel) sync_conc() sim_density = dh.gather_array(n_field.name) # check that mass was conserved assert np.isclose(sim_density.sum(), 1) assert np.all(sim_density > 0) # check that the maximum is in the right place peak = np.unravel_index(np.argmax(sim_density, axis=None), sim_density.shape) assert np.allclose(peak, np.array(L) // 2 - 0.5 + velocity * time, atol=0.5) # check the concentration profile if np.linalg.norm(velocity) == 0: calc_density = density(pos - velocity * time, time, D) target = [time, D] pytest.importorskip('scipy.optimize') from scipy.optimize import curve_fit popt, _ = curve_fit( lambda x, t, D: density(x - velocity * time, t, D), pos.reshape(-1, dim), sim_density.reshape(-1), p0=target) assert np.isclose(popt[0], time, rtol=0.1) assert np.isclose(popt[1], D, rtol=0.1) assert np.allclose(calc_density, sim_density, atol=1e-4) return lambda v: run(np.array(v), time)