Python CreateKernelConfigの例、pystencils.CreateKernelConfig Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_vectorization_specific.py プロジェクト: mabau/pystencils

def test_strided(instruction_set, dtype):
    f, g = ps.fields(f"f, g : float{64 if dtype == 'double' else 32}[2D]")
    update_rule = [
        ps.Assignment(g[0, 0],
                      f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)
    ]
    if 'storeS' not in get_vector_instruction_set(
            dtype, instruction_set) and not instruction_set in [
                'avx512', 'rvv'
            ] and not instruction_set.startswith('sve'):
        with pytest.warns(UserWarning) as warn:
            config = ps.CreateKernelConfig(
                cpu_vectorize_info={'instruction_set': instruction_set})
            ast = ps.create_kernel(update_rule, config=config)
            assert 'Could not vectorize loop' in warn[0].message.args[0]
    else:
        with pytest.warns(None) as warn:
            config = ps.CreateKernelConfig(
                cpu_vectorize_info={'instruction_set': instruction_set})
            ast = ps.create_kernel(update_rule, config=config)
            assert len(warn) == 0
    func = ast.compile()
    ref_func = ps.create_kernel(update_rule).compile()

    arr = np.random.random(
        (23 + 2,
         17 + 2)).astype(np.float64 if dtype == 'double' else np.float32)
    dst = np.zeros_like(arr,
                        dtype=np.float64 if dtype == 'double' else np.float32)
    ref = np.zeros_like(arr,
                        dtype=np.float64 if dtype == 'double' else np.float32)

    func(g=dst, f=arr)
    ref_func(g=ref, f=arr)
    np.testing.assert_almost_equal(dst, ref, 13 if dtype == 'double' else 5)

コード例 #2

0

ファイルを表示

ファイル: test_create_kernel_config.py プロジェクト: mabau/pystencils

def test_create_kernel_config():
    c = ps.CreateKernelConfig()
    assert c.backend == ps.Backend.C
    assert c.target == ps.Target.CPU

    c = ps.CreateKernelConfig(target=ps.Target.GPU)
    assert c.backend == ps.Backend.CUDA

    c = ps.CreateKernelConfig(backend=ps.Backend.CUDA)
    assert c.target == ps.Target.CPU
    assert c.backend == ps.Backend.CUDA

コード例 #3

0

ファイルを表示

ファイル: test_types.py プロジェクト: mabau/pystencils

def test_sqrt_of_integer():
    """Regression test for bug where sqrt(3) was classified as integer"""
    f = ps.fields("f: [1D]")
    tmp = sp.symbols("tmp")

    assignments = [ps.Assignment(tmp, sp.sqrt(3)),
                   ps.Assignment(f[0], tmp)]
    arr_double = np.array([1], dtype=np.float64)
    kernel = ps.create_kernel(assignments).compile()
    kernel(f=arr_double)
    assert 1.7 < arr_double[0] < 1.8

    f = ps.fields("f: float32[1D]")
    tmp = sp.symbols("tmp")

    assignments = [ps.Assignment(tmp, sp.sqrt(3)),
                   ps.Assignment(f[0], tmp)]
    arr_single = np.array([1], dtype=np.float32)
    config = ps.CreateKernelConfig(data_type="float32")
    kernel = ps.create_kernel(assignments, config=config).compile()
    kernel(f=arr_single)

    code = ps.get_code_str(kernel.ast)
    # ps.show_code(kernel.ast)
    # 1.7320508075688772935  --> it is actually correct to round to ...773. This was wrong before !282
    assert "1.7320508075688773f" in code
    assert 1.7 < arr_single[0] < 1.8

コード例 #4

0

ファイルを表示

def test_sum_use_float(default_assignment_simplifications):

    sum = sympy.Sum(sp.abc.k, (sp.abc.k, 1, 100))
    expanded_sum = sum.doit()

    print(sum)
    print(expanded_sum)

    x = ps.fields('x: float32[1d]')

    assignments = ps.AssignmentCollection({x.center(): sum})

    config = ps.CreateKernelConfig(
        default_assignment_simplifications=default_assignment_simplifications,
        data_type=create_type('float32'))
    ast = ps.create_kernel(assignments, config=config)
    code = ps.get_code_str(ast)
    kernel = ast.compile()

    print(code)
    if default_assignment_simplifications is False:
        assert 'float sum' in code

    array = np.zeros((10, ), np.float32)

    kernel(x=array)

    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))

コード例 #5

0

ファイルを表示

def test_product(default_assignment_simplifications):

    k = ps.TypedSymbol('k', create_type('int64'))

    sum = sympy.Product(k, (k, 1, 10))
    expanded_sum = sum.doit()

    print(sum)
    print(expanded_sum)

    x = ps.fields('x: int64[1d]')

    assignments = ps.AssignmentCollection({x.center(): sum})

    config = ps.CreateKernelConfig(
        default_assignment_simplifications=default_assignment_simplifications)

    ast = ps.create_kernel(assignments, config=config)
    code = ps.get_code_str(ast)
    kernel = ast.compile()

    print(code)
    if default_assignment_simplifications is False:
        assert 'int64_t product' in code

    array = np.zeros((10, ), np.int64)

    kernel(x=array)

    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))

コード例 #6

0

ファイルを表示

ファイル: test_communication.py プロジェクト: mabau/lbmpy

def test_optimised_and_full_communication_equivalence(stencil_name):
    target = ps.Target.CPU
    stencil = LBStencil(stencil_name)
    domain_size = (4, ) * stencil.D

    dh = ps.create_data_handling(domain_size, periodicity=(True, ) * stencil.D,
                                 parallel=False, default_target=target)

    pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64)
    dh.fill("pdf", 0, ghost_layers=True)
    pdf_tmp = dh.add_array("pdf_tmp", values_per_cell=len(stencil), dtype=np.int64)
    dh.fill("pdf_tmp", 0, ghost_layers=True)

    gl = dh.ghost_layers_of_field("pdf")

    num = 0
    for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
        dh.cpu_arrays['pdf'][idx] = num
        dh.cpu_arrays['pdf_tmp'][idx] = num
        num += 1

    lbm_config = LBMConfig(stencil=stencil, kernel_type="stream_pull_only")
    lbm_opt = LBMOptimisation(symbolic_field=pdf, symbolic_temporary_field=pdf_tmp)
    config = ps.CreateKernelConfig(target=dh.default_target, cpu_openmp=True)

    ac = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
    ast = ps.create_kernel(ac, config=config)
    stream = ast.compile()

    full_communication = dh.synchronization_function(pdf.name, target=dh.default_target, optimization={"openmp": True})
    full_communication()

    dh.run_kernel(stream)
    dh.swap("pdf", "pdf_tmp")
    pdf_full_communication = np.copy(dh.cpu_arrays['pdf'])

    num = 0
    for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
        dh.cpu_arrays['pdf'][idx] = num
        dh.cpu_arrays['pdf_tmp'][idx] = num
        num += 1

    optimised_communication = LBMPeriodicityHandling(stencil=stencil, data_handling=dh, pdf_field_name=pdf.name,
                                                     streaming_pattern='pull')
    optimised_communication()
    dh.run_kernel(stream)
    dh.swap("pdf", "pdf_tmp")

    if stencil.D == 3:
        for i in range(gl, domain_size[0]):
            for j in range(gl, domain_size[1]):
                for k in range(gl, domain_size[2]):
                    for f in range(len(stencil)):
                        assert dh.cpu_arrays['pdf'][i, j, k, f] == pdf_full_communication[i, j, k, f], print(f)
    else:
        for i in range(gl, domain_size[0]):
            for j in range(gl, domain_size[1]):
                for f in range(len(stencil)):
                    assert dh.cpu_arrays['pdf'][i, j, f] == pdf_full_communication[i, j, f]

コード例 #7

0

ファイルを表示

ファイル: test_create_kernel_config.py プロジェクト: mabau/pystencils

def test_kernel_decorator_config():
    config = ps.CreateKernelConfig()
    a, b, c = ps.fields(a=np.ones(100), b=np.ones(100), c=np.ones(100))

    @ps.kernel_config(config)
    def test():
        a[0] @= b[0] + c[0]

    ps.create_kernel(**test)

コード例 #8

0

ファイルを表示

def test_abs():
    x, y, z = ps.fields('x, y, z:  float64[2d]')

    default_int_type = create_type('int64')

    assignments = ps.AssignmentCollection(
        {x[0, 0]: sympy.Abs(cast_func(y[0, 0], default_int_type))})

    config = ps.CreateKernelConfig(target=ps.Target.GPU)
    ast = ps.create_kernel(assignments, config=config)
    code = ps.get_code_str(ast)
    print(code)
    assert 'fabs(' not in code

コード例 #9

0

ファイルを表示

ファイル: test_float_kernel.py プロジェクト: mabau/lbmpy

def test_creation(method_enum, double_precision):
    """Simple test that makes sure that only float variables are created"""
    lbm_config = LBMConfig(method=method_enum, relaxation_rate=1.5)
    config = ps.CreateKernelConfig(
        data_type="float64" if double_precision else "float32")
    func = create_lb_function(lbm_config=lbm_config, config=config)
    code = ps.get_code_str(func)

    if double_precision:
        assert 'float' not in code
        assert 'double' in code
    else:
        assert 'double' not in code
        assert 'float' in code

コード例 #10

0

ファイルを表示

ファイル: test_float_kernel.py プロジェクト: mabau/lbmpy

def test_scenario(method_enum, double_precision):
    lbm_config = LBMConfig(method=method_enum, relaxation_rate=1.5)
    config = ps.CreateKernelConfig(
        data_type="double" if double_precision else "float32")
    sc = create_lid_driven_cavity((16, 16, 8),
                                  lbm_config=lbm_config,
                                  config=config)
    sc.run(1)
    code = ps.get_code_str(sc.ast)

    if double_precision:
        assert 'float' not in code
        assert 'double' in code
    else:
        assert 'double' not in code
        assert 'float' in code

コード例 #11

0

ファイルを表示

def test_lbm_vectorization(instruction_set, aligned_and_padding, nontemporal,
                           double_precision, fixed_loop_sizes):
    vectorization_options = {
        'instruction_set': instruction_set,
        'assume_aligned': aligned_and_padding[0],
        'nontemporal': nontemporal,
        'assume_inner_stride_one': True,
        'assume_sufficient_line_padding': aligned_and_padding[1]
    }
    time_steps = 100
    size1 = (64, 32)
    size2 = (666, 34)
    relaxation_rate = 1.8

    print("Computing reference solutions")
    ldc1_ref = create_lid_driven_cavity(size1, relaxation_rate=relaxation_rate)
    ldc1_ref.run(time_steps)
    ldc2_ref = create_lid_driven_cavity(size2, relaxation_rate=relaxation_rate)
    ldc2_ref.run(time_steps)

    lbm_config = LBMConfig(relaxation_rate=relaxation_rate)
    config = ps.CreateKernelConfig(
        data_type="double" if double_precision else "float32",
        cpu_vectorize_info=vectorization_options)
    lbm_opt_split = LBMOptimisation(cse_global=True, split=True)
    lbm_opt = LBMOptimisation(cse_global=True, split=False)

    print(
        f"Vectorization test, double precision {double_precision}, vectorization {vectorization_options}, "
        f"fixed loop sizes {fixed_loop_sizes}")
    ldc1 = create_lid_driven_cavity(size1,
                                    fixed_loop_sizes=fixed_loop_sizes,
                                    lbm_config=lbm_config,
                                    lbm_optimisation=lbm_opt,
                                    config=config)
    ldc1.run(time_steps)
    np.testing.assert_almost_equal(ldc1_ref.velocity[:, :],
                                   ldc1.velocity[:, :])

    ldc2 = create_lid_driven_cavity(size2,
                                    fixed_loop_sizes=fixed_loop_sizes,
                                    lbm_config=lbm_config,
                                    lbm_optimisation=lbm_opt_split,
                                    config=config)
    ldc2.run(time_steps)
    np.testing.assert_almost_equal(ldc2_ref.velocity[:, :],
                                   ldc2.velocity[:, :])

コード例 #12

0

ファイルを表示

def test_aligned_and_nt_stores(instruction_set=instruction_set, openmp=False):
    domain_size = (24, 24)
    # create a datahandling object
    dh = ps.create_data_handling(domain_size,
                                 periodicity=(True, True),
                                 parallel=False,
                                 default_target=Target.CPU)

    # fields
    alignment = 'cacheline' if openmp else True
    g = dh.add_array("g", values_per_cell=1, alignment=alignment)
    dh.fill("g", 1.0, ghost_layers=True)
    f = dh.add_array("f", values_per_cell=1, alignment=alignment)
    dh.fill("f", 0.0, ghost_layers=True)
    opt = {
        'instruction_set': instruction_set,
        'assume_aligned': True,
        'nontemporal': True,
        'assume_inner_stride_one': True
    }
    update_rule = [
        ps.Assignment(f.center(),
                      0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1]))
    ]
    config = ps.CreateKernelConfig(target=dh.default_target,
                                   cpu_vectorize_info=opt,
                                   cpu_openmp=openmp)
    ast = ps.create_kernel(update_rule, config=config)
    if instruction_set in ['sse'] or instruction_set.startswith('avx'):
        assert 'stream' in ast.instruction_set
        assert 'streamFence' in ast.instruction_set
    if instruction_set in ['neon', 'vsx'] or instruction_set.startswith('sve'):
        assert 'cachelineZero' in ast.instruction_set
    if instruction_set in ['vsx']:
        assert 'storeAAndFlushCacheline' in ast.instruction_set
    for instruction in [
            'stream', 'streamFence', 'cachelineZero',
            'storeAAndFlushCacheline', 'flushCacheline'
    ]:
        if instruction in ast.instruction_set:
            assert ast.instruction_set[instruction].split(
                '{')[0] in ps.get_code_str(ast)
    kernel = ast.compile()

    dh.run_kernel(kernel)
    np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size))

コード例 #13

0

ファイルを表示

def test_evaluate_constant_terms(target, simplification):
    if target == ps.Target.GPU:
        pytest.importorskip("pycuda")
    src, dst = ps.fields('src, dst:  float32[2d]')

    # Triggers Sympy's cos optimization
    assignments = ps.AssignmentCollection({src[0, 0]: -sp.cos(1) + dst[0, 0]})

    config = ps.CreateKernelConfig(
        target=target, default_assignment_simplifications=simplification)
    ast = ps.create_kernel(assignments, config=config)
    code = ps.get_code_str(ast)
    if simplification:
        assert 'cos(' not in code
    else:
        assert 'cos(' in code
    print(code)

コード例 #14

0

ファイルを表示

def test_source_code_comment():

    a, b = pystencils.fields('a,b: float[2D]')

    assignments = pystencils.AssignmentCollection(
        {a.center(): b[0, 2] + b[0, 0]}, {})

    config = pystencils.CreateKernelConfig(target=pystencils.Target.CPU)
    ast = pystencils.create_kernel(assignments, config=config)

    ast.body.append(pystencils.astnodes.SourceCodeComment("Hallo"))
    ast.body.append(pystencils.astnodes.EmptyLine())
    ast.body.append(pystencils.astnodes.SourceCodeComment("World!"))
    print(ast)
    compiled = ast.compile()
    assert compiled is not None

    pystencils.show_code(ast)

コード例 #15

0

ファイルを表示

ファイル: test_vectorization_specific.py プロジェクト: mabau/pystencils

def test_vectorized_abs(instruction_set, dtype):
    """Some instructions sets have abs, some don't.
       Furthermore, the special treatment of unary minus makes this data type-sensitive too.
    """
    arr = np.ones((2**2 + 2, 2**3 + 2),
                  dtype=np.float64 if dtype == 'double' else np.float32)
    arr[-3:, :] = -1

    f, g = ps.fields(f=arr, g=arr)
    update_rule = [ps.Assignment(g.center(), sp.Abs(f.center()))]

    config = ps.CreateKernelConfig(
        cpu_vectorize_info={'instruction_set': instruction_set})
    ast = ps.create_kernel(update_rule, config=config)

    func = ast.compile()
    dst = np.zeros_like(arr)
    func(g=dst, f=arr)
    np.testing.assert_equal(np.sum(dst[1:-1, 1:-1]), 2**2 * 2**3)

コード例 #16

0

ファイルを表示

def test_momentum_density_shift(force_model):
    target = Target.CPU

    stencil = LBStencil(Stencil.D2Q9)
    domain_size = (4, 4)
    dh = ps.create_data_handling(domain_size=domain_size,
                                 default_target=target)

    rho = dh.add_array('rho', values_per_cell=1)
    dh.fill('rho', 0.0, ghost_layers=True)

    momentum_density = dh.add_array('momentum_density', values_per_cell=dh.dim)
    dh.fill('momentum_density', 0.0, ghost_layers=True)

    src = dh.add_array('src', values_per_cell=len(stencil))
    dh.fill('src', 0.0, ghost_layers=True)

    lbm_config = LBMConfig(method=Method.SRT,
                           compressible=True,
                           force_model=force_model,
                           force=(1, 2))
    method = create_lb_method(lbm_config=lbm_config)

    cqc = method.conserved_quantity_computation

    momentum_density_getter = cqc.output_equations_from_pdfs(
        src.center_vector, {
            'density': rho.center,
            'momentum_density': momentum_density.center_vector
        })

    config = ps.CreateKernelConfig(target=dh.default_target)
    momentum_density_ast = ps.create_kernel(momentum_density_getter,
                                            config=config)
    momentum_density_kernel = momentum_density_ast.compile()

    dh.run_kernel(momentum_density_kernel)
    assert np.sum(dh.gather_array(
        momentum_density.name)[:, :, 0]) == np.prod(domain_size) / 2
    assert np.sum(dh.gather_array(
        momentum_density.name)[:, :, 1]) == np.prod(domain_size)

コード例 #17

0

ファイルを表示

def test_sympy_optimizations(target, simplification):
    if target == ps.Target.GPU:
        pytest.importorskip("pycuda")
    src, dst = ps.fields('src, dst:  float32[2d]')

    # Triggers Sympy's expm1 optimization
    # Sympy's expm1 optimization is tedious to use and the behaviour is highly depended on the sympy version. In
    # some cases the exp expression has to be encapsulated in brackets or multiplied with 1 or 1.0
    # for sympy to work properly ...
    assignments = ps.AssignmentCollection(
        {src[0, 0]: 1.0 * (sp.exp(dst[0, 0]) - 1)})

    config = ps.CreateKernelConfig(
        target=target, default_assignment_simplifications=simplification)
    ast = ps.create_kernel(assignments, config=config)

    code = ps.get_code_str(ast)
    if simplification:
        assert 'expm1(' in code
    else:
        assert 'expm1(' not in code

コード例 #18

0

ファイルを表示

ファイル: test_vectorization_specific.py プロジェクト: mabau/pystencils

def test_vectorisation_varying_arch(instruction_set):
    shape = (9, 9, 3)
    arr = np.ones(shape, order='f')

    @ps.kernel
    def update_rule(s):
        f = ps.fields("f(3) : [2D]", f=arr)
        s.tmp0 @= f(0)
        s.tmp1 @= f(1)
        s.tmp2 @= f(2)
        f0, f1, f2 = f(0), f(1), f(2)
        f0 @= 2 * s.tmp0
        f1 @= 2 * s.tmp0
        f2 @= 2 * s.tmp0

    config = ps.CreateKernelConfig(
        cpu_vectorize_info={'instruction_set': instruction_set})
    ast = ps.create_kernel(update_rule, config=config)
    kernel = ast.compile()
    kernel(f=arr)
    np.testing.assert_equal(arr, 2)

コード例 #19

0

ファイルを表示

def test_lbm_vectorization_short():
    print("Computing reference solutions")
    size1 = (64, 32)
    relaxation_rate = 1.8

    ldc1_ref = create_lid_driven_cavity(size1, relaxation_rate=relaxation_rate)
    ldc1_ref.run(10)

    lbm_config = LBMConfig(relaxation_rate=relaxation_rate)
    config = ps.CreateKernelConfig(
        cpu_vectorize_info={
            'instruction_set': get_supported_instruction_sets()[-1],
            'assume_aligned': True,
            'nontemporal': True,
            'assume_inner_stride_one': True,
            'assume_sufficient_line_padding': False,
        })
    ldc1 = create_lid_driven_cavity(size1,
                                    lbm_config=lbm_config,
                                    config=config,
                                    fixed_loop_sizes=False)
    ldc1.run(10)

コード例 #20

0

ファイルを表示

ファイル: test_vectorization_specific.py プロジェクト: mabau/pystencils

def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel,
                                            instruction_set, dtype):
    dtype = np.float64 if dtype == 'double' else np.float32

    domain_size = (128, 128)
    dh = ps.create_data_handling(domain_size,
                                 periodicity=(True, True),
                                 default_target=Target.CPU)
    src = dh.add_array("src",
                       values_per_cell=1,
                       dtype=dtype,
                       ghost_layers=gl_field,
                       alignment=True)
    dh.fill(src.name, 1.0, ghost_layers=True)
    dst = dh.add_array("dst",
                       values_per_cell=1,
                       dtype=dtype,
                       ghost_layers=gl_field,
                       alignment=True)
    dh.fill(dst.name, 1.0, ghost_layers=True)

    update_rule = ps.Assignment(dst[0, 0], src[0, 0])
    opt = {
        'instruction_set': instruction_set,
        'assume_aligned': True,
        'nontemporal': True,
        'assume_inner_stride_one': True
    }
    config = ps.CreateKernelConfig(target=dh.default_target,
                                   cpu_vectorize_info=opt,
                                   ghost_layers=gl_kernel)
    ast = ps.create_kernel(update_rule, config=config)
    kernel = ast.compile()
    if gl_kernel != gl_field:
        with pytest.raises(ValueError):
            dh.run_kernel(kernel)
    else:
        dh.run_kernel(kernel)

コード例 #21

0

ファイルを表示

def test_issue40(*_):
    """https://i10git.cs.fau.de/pycodegen/pystencils/-/issues/40"""
    opt = {
        'instruction_set': "avx512",
        'assume_aligned': False,
        'nontemporal': False,
        'assume_inner_stride_one': True
    }

    src = ps.fields("src(1): double[2D]", layout='fzyx')
    eq = [
        ps.Assignment(sp.Symbol('rho'), 1.0),
        ps.Assignment(src[0, 0](0),
                      sp.Rational(4, 9) * sp.Symbol('rho'))
    ]

    config = ps.CreateKernelConfig(target=Target.CPU,
                                   cpu_vectorize_info=opt,
                                   data_type='float64')
    ast = ps.create_kernel(eq, config=config)

    code = ps.get_code_str(ast)
    assert 'epi32' not in code

コード例 #22

0

ファイルを表示

def test_sympy_assignment(default_assignment_simplifications):
    assignment = SympyAssignment(dst[0, 0](0),
                                 sp.log(x + 3) / sp.log(2) + sp.log(x**2 + 1))

    config = ps.CreateKernelConfig(
        default_assignment_simplifications=default_assignment_simplifications)
    ast = ps.create_kernel([assignment], config=config)
    code = ps.get_code_str(ast)

    if default_assignment_simplifications:
        assert 'log1p' in code
        # constant term is directly evaluated
        assert 'log2' not in code
    else:
        # no optimisations will be applied so the optimised version of log will not be in the code
        assert 'log1p' not in code
        assert 'log2' not in code

    assignment.replace(assignment.lhs, dst[0, 0](1))
    assignment.replace(assignment.rhs, sp.log(2))

    assert assignment.lhs == dst[0, 0](1)
    assert assignment.rhs == sp.log(2)

コード例 #23

0

ファイルを表示

ファイル: test_vectorization_specific.py プロジェクト: mabau/pystencils

def test_square_root(dtype, instruction_set, field_layout):
    config = ps.CreateKernelConfig(data_type=dtype,
                                   cpu_vectorize_info={
                                       'instruction_set': instruction_set,
                                       'assume_inner_stride_one': True,
                                       'assume_aligned': False,
                                       'nontemporal': False
                                   })

    src_field = ps.Field.create_generic('pdfs',
                                        2,
                                        dtype,
                                        index_dimensions=1,
                                        layout=field_layout,
                                        index_shape=(9, ))

    eq = [
        ps.Assignment(sp.Symbol("xi"), sum(src_field.center_vector)),
        ps.Assignment(sp.Symbol("xi_2"),
                      sp.Symbol("xi") * sp.sqrt(src_field.center))
    ]

    ps.create_kernel(eq, config=config).compile()

コード例 #24

0

ファイルを表示

ファイル: test_shear_flow.py プロジェクト: mabau/lbmpy

def test_shear_flow(target, stencil_name):
    # Cuda
    if target == ps.Target.GPU:
        pytest.importorskip("pycuda")

    # LB parameters
    stencil = LBStencil(stencil_name)

    if stencil.D == 2:
        L = (4, width)
    elif stencil.D == 3:
        L = (4, width, 4)
    else:
        raise Exception()
    periodicity = [True, False] + [True] * (stencil.D - 2)

    omega = relaxation_rate_from_lattice_viscosity(eta)

    # ## Data structures
    dh = ps.create_data_handling(L, periodicity=periodicity, default_target=target)

    src = dh.add_array('src', values_per_cell=stencil.Q)
    dst = dh.add_array_like('dst', 'src')
    ρ = dh.add_array('rho', latex_name='\\rho', values_per_cell=1)
    u = dh.add_array('u', values_per_cell=stencil.D)
    p = dh.add_array('p', values_per_cell=stencil.D**2)

    # LB Setup
    lbm_config = LBMConfig(stencil=stencil, relaxation_rate=omega, method=Method.TRT,
                           compressible=True, kernel_type='collide_only')
    lbm_opt = LBMOptimisation(symbolic_field=src)
    collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)

    stream = create_stream_pull_with_output_kernel(collision.method, src, dst, {'velocity': u})
    config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target)

    stream_kernel = ps.create_kernel(stream, config=config).compile()
    collision_kernel = ps.create_kernel(collision, config=config).compile()

    # Boundaries
    lbbh = LatticeBoltzmannBoundaryHandling(collision.method, dh, src.name, target=dh.default_target)

    # Second moment test setup
    cqc = collision.method.conserved_quantity_computation
    getter_eqs = cqc.output_equations_from_pdfs(src.center_vector,
                                                {'moment2': p})

    kernel_compute_p = ps.create_kernel(getter_eqs, config=config).compile()

    # ## Set up the simulation

    init = macroscopic_values_setter(collision.method, velocity=(0,) * dh.dim,
                                     pdfs=src.center_vector, density=ρ.center)
    init_kernel = ps.create_kernel(init, ghost_layers=0).compile()

    vel_vec = sp.Matrix([0.5 * shear_velocity] + [0] * (stencil.D - 1))
    if stencil.D == 2:
        lbbh.set_boundary(UBB(velocity=vel_vec), ps.make_slice[:, :wall_thickness])
        lbbh.set_boundary(UBB(velocity=-vel_vec), ps.make_slice[:, -wall_thickness:])
    elif stencil.D == 3:
        lbbh.set_boundary(UBB(velocity=vel_vec), ps.make_slice[:, :wall_thickness, :])
        lbbh.set_boundary(UBB(velocity=-vel_vec), ps.make_slice[:, -wall_thickness:, :])
    else:
        raise Exception()

    for bh in lbbh, :
        assert len(bh._boundary_object_to_boundary_info) == 2, "Restart kernel to clear boundaries"

    def init():
        dh.fill(ρ.name, rho_0)
        dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True)
        dh.fill(u.name, 0)

        dh.run_kernel(init_kernel)

    sync_pdfs = dh.synchronization_function([src.name])

    # Time loop
    def time_loop(steps):
        dh.all_to_gpu()
        for i in range(steps):
            dh.run_kernel(collision_kernel)
            sync_pdfs()
            lbbh()
            dh.run_kernel(stream_kernel)
            dh.run_kernel(kernel_compute_p)

            dh.swap(src.name, dst.name)

        if u.name in dh.gpu_arrays:
            dh.to_cpu(u.name)
        uu = dh.gather_array(u.name)
        # average periodic directions
        if stencil.D == 3:  # dont' swap order
            uu = np.average(uu, axis=2)
        uu = np.average(uu, axis=0)

        if p.name in dh.gpu_arrays:
            dh.to_cpu(p.name)
        pp = dh.gather_array(p.name)
        # average periodic directions
        if stencil.D == 3:  # dont' swap order
            pp = np.average(pp, axis=2)
        pp = np.average(pp, axis=0)

        # cut off wall regions
        uu = uu[wall_thickness:-wall_thickness]
        pp = pp[wall_thickness:-wall_thickness]

        if stencil.D == 2:
            pp = pp.reshape((len(pp), 2, 2))
        if stencil.D == 3:
            pp = pp.reshape((len(pp), 3, 3))
        return uu, pp

    init()
    # Simulation
    profile, pressure_profile = time_loop(t_max)

    expected = shear_flow(x=(np.arange(0, actual_width) + .5),
                          t=t_max,
                          nu=eta / rho_0,
                          v=shear_velocity,
                          h=actual_width,
                          k_max=100)

    if stencil.D == 2:
        shear_direction = np.array((1, 0), dtype=float)
        shear_plane_normal = np.array((0, 1), dtype=float)
    if stencil.D == 3:
        shear_direction = np.array((1, 0, 0), dtype=float)
        shear_plane_normal = np.array((0, 1, 0), dtype=float)

    shear_rate = shear_velocity / actual_width
    dynamic_viscosity = eta * rho_0
    correction_factor = eta / (eta - 1. / 6.)

    p_expected = rho_0 * np.identity(dh.dim) / 3.0 + dynamic_viscosity * shear_rate / correction_factor * (
        np.outer(shear_plane_normal, shear_direction) + np.transpose(np.outer(shear_plane_normal, shear_direction)))

    # Sustract the tensorproduct of the velosity to get the pressure
    pressure_profile[:, 0, 0] -= rho_0 * profile[:, 0]**2
    
    np.testing.assert_allclose(profile[:, 0], expected[1:-1], atol=1E-9)
    for i in range(actual_width - 2):
        np.testing.assert_allclose(pressure_profile[i], p_expected, atol=1E-9, rtol=1E-3)

コード例 #25

0

ファイルを表示

def test_diffusion():
    """
      Runs the "Diffusion from Plate in Uniform Flow" benchmark as it is described
      in [ch. 8.6.3, The Lattice Boltzmann Method, Krüger et al.].

                dC/dy = 0
            ┌───────────────┐
            │     → → →     │
      C = 0 │     → u →     │ dC/dx = 0
            │     → → →     │
            └───────────────┘
                  C = 1

      The analytical solution is given by:
        C(x,y) = 1 * erfc(y / sqrt(4Dx/u))

      The hydrodynamic field is not simulated, instead a constant velocity is assumed.
    """
    pytest.importorskip("pycuda")
    # Parameters
    domain_size = (1600, 160)
    omega = 1.38
    diffusion = (1 / omega - 0.5) / 3
    velocity = 0.05
    time_steps = 50000
    stencil = LBStencil(Stencil.D2Q9)
    target = ps.Target.GPU

    # Data Handling
    dh = ps.create_data_handling(domain_size=domain_size,
                                 default_target=target)

    vel_field = dh.add_array('vel_field', values_per_cell=stencil.D)
    dh.fill('vel_field', velocity, 0, ghost_layers=True)
    dh.fill('vel_field', 0.0, 1, ghost_layers=True)

    con_field = dh.add_array('con_field', values_per_cell=1)
    dh.fill('con_field', 0.0, ghost_layers=True)

    pdfs = dh.add_array('pdfs', values_per_cell=stencil.Q)
    dh.fill('pdfs', 0.0, ghost_layers=True)
    pdfs_tmp = dh.add_array('pdfs_tmp', values_per_cell=stencil.Q)
    dh.fill('pdfs_tmp', 0.0, ghost_layers=True)

    # Lattice Boltzmann method
    lbm_config = LBMConfig(stencil=stencil,
                           method=Method.MRT,
                           relaxation_rates=[1, 1.5, 1, 1.5, 1],
                           velocity_input=vel_field,
                           output={'density': con_field},
                           compressible=True,
                           weighted=True,
                           kernel_type='stream_pull_collide')

    lbm_opt = LBMOptimisation(symbolic_field=pdfs,
                              symbolic_temporary_field=pdfs_tmp)
    config = ps.CreateKernelConfig(target=dh.default_target, cpu_openmp=True)

    method = create_lb_method(lbm_config=lbm_config)
    method.set_conserved_moments_relaxation_rate(omega)

    lbm_config = replace(lbm_config, lb_method=method)
    update_rule = create_lb_update_rule(lbm_config=lbm_config,
                                        lbm_optimisation=lbm_opt)
    kernel = ps.create_kernel(update_rule, config=config).compile()

    # PDF initalization
    init = pdf_initialization_assignments(method, con_field.center,
                                          vel_field.center_vector,
                                          pdfs.center_vector)
    dh.run_kernel(ps.create_kernel(init).compile())

    dh.all_to_gpu()

    # Boundary Handling
    bh = LatticeBoltzmannBoundaryHandling(update_rule.method,
                                          dh,
                                          'pdfs',
                                          name="bh",
                                          target=dh.default_target)
    add_box_boundary(bh, boundary=NeumannByCopy())
    bh.set_boundary(DiffusionDirichlet(0), slice_from_direction('W', dh.dim))
    bh.set_boundary(DiffusionDirichlet(1), slice_from_direction('S', dh.dim))

    # Timeloop
    for i in range(time_steps):
        bh()
        dh.run_kernel(kernel)
        dh.swap("pdfs", "pdfs_tmp")

    dh.all_to_cpu()
    # Verification
    x = np.arange(1, domain_size[0], 1)
    y = np.arange(0, domain_size[1], 1)
    X, Y = np.meshgrid(x, y)
    analytical = np.zeros(domain_size)
    analytical[1:, :] = np.vectorize(math.erfc)(
        Y / np.vectorize(math.sqrt)(4 * diffusion * X / velocity)).transpose()
    simulated = dh.gather_array('con_field', ghost_layers=False)

    residual = 0
    for i in x:
        for j in y:
            residual += (simulated[i, j] - analytical[i, j])**2
    residual = math.sqrt(residual / (domain_size[0] * domain_size[1]))

    assert residual < 1e-2

コード例 #26

0

ファイルを表示

ファイル: test_lees_edwards.py プロジェクト: mabau/lbmpy

def test_lees_edwards():

    domain_size = (64, 64)
    omega = 1.0  # relaxation rate of first component
    shear_velocity = 0.1  # shear velocity
    shear_dir = 0  # direction of shear flow
    shear_dir_normal = 1  # direction normal to shear plane, for interpolation

    stencil = LBStencil(Stencil.D2Q9)

    dh = ps.create_data_handling(domain_size,
                                 periodicity=True,
                                 default_target=ps.Target.CPU)

    src = dh.add_array('src', values_per_cell=stencil.Q)
    dh.fill('src', 1.0, ghost_layers=True)

    dst = dh.add_array_like('dst', 'src')
    dh.fill('dst', 0.0, ghost_layers=True)

    force = dh.add_array('force', values_per_cell=stencil.D)
    dh.fill('force', 0.0, ghost_layers=True)

    rho = dh.add_array('rho', values_per_cell=1)
    dh.fill('rho', 1.0, ghost_layers=True)
    u = dh.add_array('u', values_per_cell=stencil.D)
    dh.fill('u', 0.0, ghost_layers=True)

    counters = [
        LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(stencil.D)
    ]
    points_up = sp.Symbol('points_up')
    points_down = sp.Symbol('points_down')

    u_p = sp.Piecewise(
        (1,
         sp.And(ps.data_types.type_all_numbers(counters[1] <= 1, 'int'),
                points_down)),
        (-1,
         sp.And(
             ps.data_types.type_all_numbers(counters[1] >= src.shape[1] - 2,
                                            'int'), points_up)),
        (0, True)) * shear_velocity

    lbm_config = LBMConfig(stencil=stencil,
                           relaxation_rate=omega,
                           compressible=True,
                           velocity_input=u.center_vector +
                           sp.Matrix([u_p, 0]),
                           density_input=rho,
                           force_model=ForceModel.LUO,
                           force=force.center_vector,
                           kernel_type='collide_only')
    lbm_opt = LBMOptimisation(symbolic_field=src)
    collision = create_lb_update_rule(lbm_config=lbm_config,
                                      lbm_optimisation=lbm_opt)

    to_insert = [
        s.lhs for s in collision.subexpressions
        if collision.method.first_order_equilibrium_moment_symbols[shear_dir]
        in s.free_symbols
    ]
    for s in to_insert:
        collision = collision.new_with_inserted_subexpression(s)
    ma = []
    for a, c in zip(collision.main_assignments, collision.method.stencil):
        if c[shear_dir_normal] == -1:
            b = (True, False)
        elif c[shear_dir_normal] == 1:
            b = (False, True)
        else:
            b = (False, False)
        a = ps.Assignment(a.lhs, a.rhs.replace(points_down, b[0]))
        a = ps.Assignment(a.lhs, a.rhs.replace(points_up, b[1]))
        ma.append(a)
    collision.main_assignments = ma

    stream = create_stream_pull_with_output_kernel(collision.method, src, dst,
                                                   {
                                                       'density': rho,
                                                       'velocity': u
                                                   })

    config = ps.CreateKernelConfig(target=dh.default_target)
    stream_kernel = ps.create_kernel(stream, config=config).compile()
    collision_kernel = ps.create_kernel(collision, config=config).compile()

    init = macroscopic_values_setter(collision.method,
                                     velocity=(0, 0),
                                     pdfs=src.center_vector,
                                     density=rho.center)
    init_kernel = ps.create_kernel(init, ghost_layers=0).compile()

    offset = [0.0]

    sync_pdfs = dh.synchronization_function([src.name],
                                            functor=partial(
                                                get_le_boundary_functor,
                                                shear_offset=offset))

    dh.run_kernel(init_kernel)

    time = 500

    dh.all_to_gpu()
    for i in range(time):
        dh.run_kernel(collision_kernel)

        sync_pdfs()
        dh.run_kernel(stream_kernel)

        dh.swap(src.name, dst.name)
        offset[0] += shear_velocity
    dh.all_to_cpu()

    nu = lattice_viscosity_from_relaxation_rate(omega)
    h = domain_size[0]
    k_max = 100

    analytical_solution = get_solution_navier_stokes(
        np.linspace(0.5, h - 0.5, h), time, nu, shear_velocity, h, k_max)
    np.testing.assert_array_almost_equal(analytical_solution,
                                         dh.gather_array(u.name)[0, :, 0],
                                         decimal=5)

    dh.fill(rho.name, 1.0, ghost_layers=True)
    dh.run_kernel(init_kernel)
    dh.fill(u.name, 0.0, ghost_layers=True)
    dh.fill('force', 0.0, ghost_layers=True)
    dh.cpu_arrays[force.name][64 // 3, 1, :] = [1e-2, -1e-1]

    offset[0] = 0

    time = 20

    dh.all_to_gpu()
    for i in range(time):
        dh.run_kernel(collision_kernel)

        sync_pdfs()
        dh.run_kernel(stream_kernel)

        dh.swap(src.name, dst.name)
    dh.all_to_cpu()

    vel_unshifted = np.array(dh.gather_array(u.name)[:, -3:-1, :])

    dh.fill(rho.name, 1.0, ghost_layers=True)
    dh.run_kernel(init_kernel)
    dh.fill(u.name, 0.0, ghost_layers=True)
    dh.fill('force', 0.0, ghost_layers=True)
    dh.cpu_arrays[force.name][64 // 3, 1, :] = [1e-2, -1e-1]

    offset[0] = 10

    time = 20

    dh.all_to_gpu()
    for i in range(time):
        dh.run_kernel(collision_kernel)

        sync_pdfs()
        dh.run_kernel(stream_kernel)

        dh.swap(src.name, dst.name)
    dh.all_to_cpu()

    vel_shifted = np.array(dh.gather_array(u.name)[:, -3:-1, :])

    vel_rolled = np.roll(vel_shifted, -offset[0], axis=0)

    np.testing.assert_array_almost_equal(vel_unshifted, vel_rolled)

コード例 #27

0

ファイルを表示

def poiseuille_channel(target, stencil_name):
    # physical parameters
    rho_0 = 1.2  # density
    eta = 0.2  # kinematic viscosity
    width = 41  # of box
    actual_width = width - 2  # subtract boundary layer from box width
    ext_force_density = 0.2 / actual_width ** 2  # scale by width to keep stable

    # LB parameters
    lb_stencil = LBStencil(stencil_name)

    if lb_stencil.D == 2:
        L = (4, width)
    elif lb_stencil.D == 3:
        L = (4, width, 4)
    else:
        raise Exception()
    periodicity = [True, False] + [True] * (lb_stencil.D - 2)

    omega = lbmpy.relaxationrates.relaxation_rate_from_lattice_viscosity(eta)

    # ## Data structures
    dh = ps.create_data_handling(L, periodicity=periodicity, default_target=target)

    src = dh.add_array('src', values_per_cell=len(lb_stencil))
    dst = dh.add_array_like('dst', 'src')
    ρ = dh.add_array('rho', latex_name='\\rho', values_per_cell=1)
    u = dh.add_array('u', values_per_cell=dh.dim)

    # LB Setup
    lbm_config = LBMConfig(stencil=lb_stencil, relaxation_rate=omega, method=Method.TRT,
                           compressible=True, force_model=ForceModel.GUO,
                           force=tuple([ext_force_density] + [0] * (lb_stencil.D - 1)),
                           kernel_type='collide_only')

    lbm_opt = LBMOptimisation(symbolic_field=src)
    collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)

    stream = create_stream_pull_with_output_kernel(collision.method, src, dst, {'velocity': u})

    config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target)

    stream_kernel = ps.create_kernel(stream, config=config).compile()
    collision_kernel = ps.create_kernel(collision, config=config).compile()

    # Boundaries
    lbbh = LatticeBoltzmannBoundaryHandling(collision.method, dh, src.name, target=dh.default_target)

    # ## Set up the simulation

    init = macroscopic_values_setter(collision.method, velocity=(0,) * dh.dim,
                                     pdfs=src.center_vector, density=ρ.center)
    init_kernel = ps.create_kernel(init, ghost_layers=0).compile()

    noslip = NoSlip()
    wall_thickness = 2
    if lb_stencil.D == 2:
        lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness])
        lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:])
    elif lb_stencil.D == 3:
        lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness, :])
        lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:, :])
    else:
        raise Exception()

    for bh in lbbh, :
        assert len(bh._boundary_object_to_boundary_info) == 1, "Restart kernel to clear boundaries"

    def init():
        dh.fill(ρ.name, rho_0)
        dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True)
        dh.fill(u.name, 0)

        dh.run_kernel(init_kernel)

    # In[6]:

    sync_pdfs = dh.synchronization_function([src.name])

    # Time loop
    def time_loop(steps):
        dh.all_to_gpu()
        i = -1
        last_max_vel = -1
        for i in range(steps):
            dh.run_kernel(collision_kernel)
            sync_pdfs()
            lbbh()
            dh.run_kernel(stream_kernel)

            dh.swap(src.name, dst.name)

            # Consider early termination
            if i % 100 == 0:
                if u.name in dh.gpu_arrays:
                    dh.to_cpu(u.name)
                uu = dh.gather_array(u.name)
                # average periodic directions
                if lb_stencil.D == 3:  # dont' swap order
                    uu = np.average(uu, axis=2)
                uu = np.average(uu, axis=0)

                max_vel = np.nanmax(uu)
                if np.abs(max_vel / last_max_vel - 1) < 5E-6:
                    break
                last_max_vel = max_vel

        # cut off wall regions
        uu = uu[wall_thickness:-wall_thickness]

        # correct for f/2 term
        uu -= np.array([ext_force_density / 2 / rho_0] + [0] * (lb_stencil.D - 1))

        return uu

    init()
    # Simulation
    profile = time_loop(5000)

    # compare against analytical solution
    # The profile is of shape (n,3). Force is in x-direction
    y = np.arange(len(profile[:, 0]))
    mid = (y[-1] - y[0]) / 2  # Mid point of channel

    expected = poiseuille_flow((y - mid), actual_width, ext_force_density, rho_0 * eta)

    np.testing.assert_allclose(profile[:, 0], expected, rtol=0.006)

    # Test zero vel in other directions
    np.testing.assert_allclose(profile[:, 1:], np.zeros_like(profile[:, 1:]), atol=1E-9)

コード例 #28

0

ファイルを表示

def test_total_momentum(method_enum, force_model, omega):
    # for the EDM force model this test case not work. However it is successfully used in test_entropic_model
    # Any attempt to adapted the EDM force model so it fullfills the test case did result in a failure in the
    # entropic test case. Note also that the test runs for MRT and EMD
    if force_model == ForceModel.EDM:
        pytest.skip()

    L = (16, 16)
    stencil = LBStencil(Stencil.D2Q9)
    F = (2e-4, -3e-4)

    dh = ps.create_data_handling(L,
                                 periodicity=True,
                                 default_target=Target.CPU)
    src = dh.add_array('src', values_per_cell=stencil.Q)
    dst = dh.add_array_like('dst', 'src')
    ρ = dh.add_array('rho', values_per_cell=1)
    u = dh.add_array('u', values_per_cell=stencil.D)

    lbm_config = LBMConfig(method=method_enum,
                           stencil=stencil,
                           relaxation_rate=omega,
                           compressible=True,
                           force_model=force_model,
                           force=F,
                           streaming_pattern='pull')
    lbm_opt = LBMOptimisation(symbolic_field=src)

    collision = create_lb_update_rule(lbm_config=lbm_config,
                                      lbm_optimisation=lbm_opt)

    config = ps.CreateKernelConfig(cpu_openmp=True, target=dh.default_target)

    collision_kernel = ps.create_kernel(collision, config=config).compile()

    def init():
        dh.fill(ρ.name, 1)
        dh.fill(u.name, 0)

        setter = macroscopic_values_setter(collision.method,
                                           velocity=(0, ) * dh.dim,
                                           pdfs=src,
                                           density=ρ.center,
                                           set_pre_collision_pdfs=True)
        kernel = ps.create_kernel(setter).compile()
        dh.run_kernel(kernel)

    sync_pdfs = dh.synchronization_function([src.name])

    getter = macroscopic_values_getter(collision.method,
                                       ρ.center,
                                       u.center_vector,
                                       src,
                                       use_pre_collision_pdfs=True)
    getter_kernel = ps.create_kernel(getter).compile()

    def time_loop(steps):
        dh.all_to_gpu()
        for _ in range(steps):
            dh.run_kernel(collision_kernel)
            dh.swap(src.name, dst.name)
            sync_pdfs()
        dh.all_to_cpu()

    t = 20
    init()
    time_loop(t)
    dh.run_kernel(getter_kernel)
    total = np.sum(dh.gather_array(u.name), axis=(0, 1))
    assert np.allclose(total / np.prod(L) / F / t, 1)