def test_codegen(): for openmp in (False, True): for da in (False, True): with ManualCodeGenerationContext(openmp=openmp, double_accuracy=da) as ctx: h = sp.symbols("h") dtype = "float64" if ctx.double_accuracy else "float32" # ----- Jacobi 2D - created by specifying weights in nested list -------------------------- src, dst = ps.fields("src, src_tmp: {}[2D]".format(dtype)) stencil = [[0, -1, 0], [-1, 4, -1], [0, -1, 0]] assignments = ps.assignment_from_stencil( stencil, src, dst, normalization_factor=4 * h**2) generate_sweep(ctx, 'JacobiKernel2D', assignments, field_swaps=[(src, dst)]) # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format ----- src, dst = ps.fields("src, src_tmp: {}[3D]".format(dtype)) @ps.kernel def kernel_func(): dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] + src[0, 1, 0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0, -1]) / (6 * h**2) generate_sweep(ctx, 'JacobiKernel3D', kernel_func, field_swaps=[(src, dst)]) expected_files = ('JacobiKernel3D.cpp', 'JacobiKernel3D.h', 'JacobiKernel2D.cpp', 'JacobiKernel2D.h') assert all(e in ctx.files for e in expected_files) for file_name_to_test in ('JacobiKernel3D.cpp', 'JacobiKernel2D.cpp'): file_to_test = ctx.files[file_name_to_test] if openmp: assert '#pragma omp parallel' in file_to_test if da: assert 'float ' not in file_to_test else: assert 'double ' not in file_to_test
# Kernels options_without_opt = options.copy() del options_without_opt['optimization'] update_rules = {} for name, accessor in accessors.items(): update_rule = create_lb_update_rule(lb_method=lb_method, kernel_type=accessor, **options) update_rule = insert_fast_divisions(update_rule) update_rule = insert_fast_sqrts(update_rule) update_rules[name] = update_rule generate_sweep(ctx, 'UniformGridGPU_AA_LbKernel' + name, update_rule, inner_outer_split=True, target='gpu', gpu_indexing_params=sweep_params, varying_parameters=vp) # getter & setter setter_assignments = macroscopic_values_setter( lb_method, velocity=velocity_field.center_vector, pdfs=pdfs.center_vector, density=1) getter_assignments = macroscopic_values_getter( lb_method, velocity=velocity_field.center_vector, pdfs=pdfs.center_vector, density=None)
update_rule = insert_fast_divisions(update_rule) update_rule = insert_fast_sqrts(update_rule) # CPU lattice model - required for macroscopic value computation, VTK output etc. options_without_opt = options.copy() del options_without_opt['optimization'] generate_lattice_model(ctx, 'UniformGridGPU_LatticeModel', lb_method, update_rule_params=options_without_opt) # gpu LB sweep & boundaries generate_sweep(ctx, 'UniformGridGPU_LbKernel', update_rule, field_swaps=[('pdfs', 'pdfs_tmp')], inner_outer_split=True, target='gpu', gpu_indexing_params=sweep_params, varying_parameters=vp) generate_boundary(ctx, 'UniformGridGPU_NoSlip', NoSlip(), lb_method, target='gpu') generate_boundary(ctx, 'UniformGridGPU_UBB', UBB([0.05, 0, 0]), lb_method, target='gpu')
import sympy as sp import pystencils as ps from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: h = sp.symbols("h") # ----- Jacobi 2D - created by specifying weights in nested list -------------------------- src, dst = ps.fields("src, src_tmp: [2D]") stencil = [[0, -1, 0], [-1, 4, -1], [0, -1, 0]] assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=4 * h**2) generate_sweep(ctx, 'JacobiKernel2D', assignments, field_swaps=[(src, dst)]) # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format ----- src, dst = ps.fields("src, src_tmp: [3D]") @ps.kernel def kernel_func(): dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] + src[0, 1, 0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0, -1]) / (6 * h**2) generate_sweep(ctx, 'JacobiKernel3D', kernel_func, field_swaps=[(src, dst)])
from lbmpy.updatekernels import create_stream_pull_only_kernel from lbmpy.stencils import get_stencil from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: f_size = 19 dtype = 'float64' if ctx.double_accuracy else 'float32' # Copy sweep src, dst = ps.fields("src({f_size}), dst({f_size}) : {dtype}[3D]".format( dtype=dtype, f_size=f_size), layout='fzyx') copy_only = [ps.Assignment(dst(i), src(i)) for i in range(f_size)] generate_sweep(ctx, 'MicroBenchmarkCopyKernel', copy_only, target='gpu', gpu_indexing_params={'block_size': (128, 1, 1)}) # Stream-only sweep stencil = get_stencil("D3Q19") stream_only = create_stream_pull_only_kernel(stencil, src_field_name='src', dst_field_name='dst', generic_field_type=dtype, generic_layout='fzyx') generate_sweep(ctx, 'MicroBenchmarkStreamKernel', stream_only, target='gpu', gpu_indexing_params={'block_size': (128, 1, 1)})
from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: # LB options options = { 'method': 'srt', 'stencil': 'D3Q19', 'relaxation_rate': sp.Symbol("omega"), 'field_name': 'pdfs', 'compressible': False, 'temporary_field_name': 'pdfs_tmp', 'optimization': {'cse_global': True, 'cse_pdfs': True, 'gpu_indexing_params': {'block_size': (128, 1, 1)}} } lb_method = create_lb_method(**options) update_rule = create_lb_update_rule(lb_method=lb_method, **options) # CPU lattice model - required for macroscopic value computation, VTK output etc. generate_lattice_model(ctx, 'UniformGridGPU_LatticeModel', lb_method) # gpu LB sweep & boundaries generate_sweep(ctx, 'UniformGridGPU_LbKernel', update_rule, field_swaps=[('pdfs', 'pdfs_tmp')], inner_outer_split=True, target='gpu') generate_boundary(ctx, 'UniformGridGPU_NoSlip', NoSlip(), lb_method, target='gpu') generate_boundary(ctx, 'UniformGridGPU_UBB', UBB([0.05, 0, 0]), lb_method, target='gpu') # communication generate_pack_info_from_kernel(ctx, 'UniformGridGPU_PackInfo', update_rule, target='gpu')
import pystencils as ps from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: h = sp.symbols("h") # ----- Jacobi 2D - created by specifying weights in nested list -------------------------- src, dst = ps.fields("src, src_tmp: [2D]") stencil = [[0, -1, 0], [-1, 4, -1], [0, -1, 0]] assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=4 * h**2) generate_sweep(ctx, 'CudaJacobiKernel2D', assignments, field_swaps=[(src, dst)], target="gpu") # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format ----- src, dst = ps.fields("src, src_tmp: [3D]") @ps.kernel def kernel_func(): dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] + src[0, 1, 0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0, -1]) / (6 * h**2) generate_sweep(ctx, 'CudaJacobiKernel3D', kernel_func,
import sympy as sp import pystencils as ps from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: h = sp.symbols("h") # ----- Jacobi 2D - created by specifying weights in nested list -------------------------- src, dst = ps.fields("src, src_tmp: [2D]") stencil = [[0, -1, 0], [-1, 4, -1], [0, -1, 0]] assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=4 * h**2) generate_sweep(ctx, 'CudaJacobiKernel2D', assignments, field_swaps=[(src, dst)], target="gpu") # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format ----- src, dst = ps.fields("src, src_tmp: [3D]") @ps.kernel def kernel_func(): dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] + src[0, 1, 0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2) generate_sweep(ctx, 'CudaJacobiKernel3D', kernel_func, field_swaps=[(src, dst)], target="gpu")
import pystencils as ps from lbmpy.updatekernels import create_stream_pull_only_kernel from lbmpy.stencils import get_stencil from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: f_size = 19 dtype = 'float64' if ctx.double_accuracy else 'float32' # Copy sweep src, dst = ps.fields("src({f_size}), dst({f_size}) : {dtype}[3D]".format(dtype=dtype, f_size=f_size), layout='fzyx') copy_only = [ps.Assignment(dst(i), src(i)) for i in range(f_size)] generate_sweep(ctx, 'MicroBenchmarkCopyKernel', copy_only, target='gpu', gpu_indexing_params={'block_size': (128, 1, 1)}) # Stream-only sweep stencil = get_stencil("D3Q19") stream_only = create_stream_pull_only_kernel(stencil, src_field_name='src', dst_field_name='dst', generic_field_type=dtype, generic_layout='fzyx') generate_sweep(ctx, 'MicroBenchmarkStreamKernel', stream_only, target='gpu', gpu_indexing_params={'block_size': (128, 1, 1)})
import sympy as sp import pystencils as ps from pystencils_walberla import CodeGeneration, generate_sweep with CodeGeneration() as ctx: h = sp.symbols("h") # ----- Jacobi 2D - created by specifying weights in nested list -------------------------- src, dst = ps.fields("src, src_tmp: [2D]", layout='fzyx') stencil = [[0, 1, 0], [1, 0, 1], [0, 1, 0]] assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=1 / (4 * h ** 2)) generate_sweep(ctx, 'JacobiKernel2D', assignments, field_swaps=[(src, dst)]) # ----- Jacobi 3D - created by using kernel_decorator with assignments in '@=' format ----- src, dst = ps.fields("src, src_tmp: [3D]") @ps.kernel def kernel_func(): dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0] + src[0, 1, 0] + src[0, -1, 0] + src[0, 0, 1] + src[0, 0, -1]) / (h ** 2 * 6) generate_sweep(ctx, 'JacobiKernel3D', kernel_func, field_swaps=[(src, dst)])