def test_kernel():
    for domain_shape in [(4, 5), (3, 4, 5)]:
        dh = create_data_handling(domain_size=domain_shape, periodicity=True)
        kernel_execution_jacobi(dh, test_gpu=True)
        reduction(dh)

        try:
            import pycuda
            dh = create_data_handling(domain_size=domain_shape,
                                      periodicity=True)
            kernel_execution_jacobi(dh, test_gpu=False)
        except ImportError:
            pass
Example #2
0
def test_philox_float():
    for target in ('cpu', 'gpu'):
        dh = ps.create_data_handling((2, 2),
                                     default_ghost_layers=0,
                                     default_target=target)
        f = dh.add_array("f", values_per_cell=4)

        dh.fill('f', 42.0)

        philox_node = PhiloxFourFloats(dh.dim)
        assignments = [philox_node] + [
            ps.Assignment(f(i), philox_node.result_symbols[i])
            for i in range(4)
        ]
        kernel = ps.create_kernel(assignments,
                                  target=dh.default_target).compile()

        dh.all_to_gpu()
        dh.run_kernel(kernel, time_step=124)
        dh.all_to_cpu()
        arr = dh.gather_array('f')
        assert np.logical_and(arr <= 1.0, arr >= 0).all()

        float_reference = philox_reference * 2.**-32 + 2.**-33
        assert (np.allclose(arr,
                            float_reference,
                            rtol=0,
                            atol=np.finfo(np.float32).eps))
Example #3
0
def test_philox_double():
    for target in ('cpu', 'gpu'):
        dh = ps.create_data_handling((2, 2),
                                     default_ghost_layers=0,
                                     default_target=target)
        f = dh.add_array("f", values_per_cell=2)

        dh.fill('f', 42.0)

        philox_node = PhiloxTwoDoubles(dh.dim)
        assignments = [
            philox_node,
            ps.Assignment(f(0), philox_node.result_symbols[0]),
            ps.Assignment(f(1), philox_node.result_symbols[1])
        ]
        kernel = ps.create_kernel(assignments,
                                  target=dh.default_target).compile()

        dh.all_to_gpu()
        dh.run_kernel(kernel, time_step=124)
        dh.all_to_cpu()

        arr = dh.gather_array('f')
        assert np.logical_and(arr <= 1.0, arr >= 0).all()

        x = philox_reference[:, :, 0::2]
        y = philox_reference[:, :, 1::2]
        z = x ^ y << (53 - 32)
        double_reference = z * 2.**-53 + 2.**-54
        assert (np.allclose(arr,
                            double_reference,
                            rtol=0,
                            atol=np.finfo(np.float64).eps))
Example #4
0
def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), keys=(0, 0), offset_values=None):
    if (target in ['neon', 'vsx', 'rvv'] or target.startswith('sve')) and rng == 'aesni':
        pytest.xfail('AES not yet implemented for this architecture')
    cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': target}

    dh = ps.create_data_handling((131, 131), default_ghost_layers=0, default_target=Target.CPU)
    f = dh.add_array("f", values_per_cell=4 if precision == 'float' else 2,
                     dtype=np.float32 if dtype == 'float' else np.float64, alignment=True)
    dh.fill(f.name, 42.0)
    ref = dh.add_array("ref", values_per_cell=4 if precision == 'float' else 2)

    rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets)
    assignments = [rng_node] + [ps.Assignment(ref(i), s) for i, s in enumerate(rng_node.result_symbols)]
    kernel = ps.create_kernel(assignments, target=dh.default_target).compile()

    kwargs = {'time_step': t}
    if offset_values is not None:
        kwargs.update({k.name: v for k, v in zip(offsets, offset_values)})
    dh.run_kernel(kernel, **kwargs)

    rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets)
    assignments = [rng_node] + [ps.Assignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)]
    kernel = ps.create_kernel(assignments, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()

    dh.run_kernel(kernel, **kwargs)

    ref_data = dh.gather_array(ref.name)
    data = dh.gather_array(f.name)

    assert np.allclose(ref_data, data)
Example #5
0
def test_contact_angle():
    stencil = LBStencil(Stencil.D2Q9)
    contact_angle = 45
    phase_value = 0.5

    domain_size = (9, 9)

    dh = ps.create_data_handling(domain_size, periodicity=(False, False))

    C = dh.add_array("C", values_per_cell=1)
    dh.fill("C", 0.0, ghost_layers=True)
    dh.fill("C", phase_value, ghost_layers=False)

    bh = BoundaryHandling(dh, C.name, stencil, target=ps.Target.CPU)
    bh.set_boundary(ContactAngle(45, 5), ps.make_slice[:, 0])
    bh()

    h = 1.0
    myA = 1.0 - 0.5 * h * (4.0 / 5) * math.cos(math.radians(contact_angle))

    phase_on_boundary = (myA - np.sqrt(myA * myA - 4.0 * (myA - 1.0) * phase_value)) / (myA - 1.0) - phase_value

    np.testing.assert_almost_equal(dh.cpu_arrays["C"][5, 0], phase_on_boundary)

    assert ContactAngle(45, 5) == ContactAngle(45, 5)
    assert ContactAngle(46, 5) != ContactAngle(45, 5)
Example #6
0
def test_staggered(vectorized):
    """Make sure that the RNG counter can be substituted during loop cutting"""

    dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target=Target.CPU)
    j = dh.add_array("j", values_per_cell=dh.dim, field_type=ps.FieldType.STAGGERED_FLUX)
    a = ps.AssignmentCollection([ps.Assignment(j.staggered_access(n), 0) for n in j.staggered_stencil])
    rng_symbol_gen = random_symbol(a.subexpressions, dim=dh.dim, rng_node=PhiloxTwoDoubles)
    a.main_assignments[0] = ps.Assignment(a.main_assignments[0].lhs, next(rng_symbol_gen))
    kernel = ps.create_staggered_kernel(a, target=dh.default_target).compile()

    if not vectorized:
        return
    if not instruction_sets:
        pytest.skip("cannot detect CPU instruction set")
    pytest.importorskip('islpy')
    cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': False,
                          'instruction_set': instruction_sets[-1]}

    dh.fill(j.name, 867)
    dh.run_kernel(kernel, seed=5, time_step=309)
    ref_data = dh.gather_array(j.name)

    kernel2 = ps.create_staggered_kernel(a, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()

    dh.fill(j.name, 867)
    dh.run_kernel(kernel2, seed=5, time_step=309)
    data = dh.gather_array(j.name)

    assert np.allclose(ref_data, data)
Example #7
0
def test_advection(dim):
    L = (8, ) * dim
    dh = ps.create_data_handling(L,
                                 periodicity=True,
                                 default_target=ps.Target.CPU)
    c = dh.add_array('c', values_per_cell=1)
    j = dh.add_array('j',
                     values_per_cell=3**dh.dim // 2,
                     field_type=ps.FieldType.STAGGERED_FLUX)
    u = dh.add_array('u', values_per_cell=dh.dim)

    dh.cpu_arrays[c.name][:] = (np.random.random([l + 2 for l in L]))
    dh.cpu_arrays[u.name][:] = (np.random.random([l + 2 for l in L] + [dim]) -
                                0.5) / 5

    vof1 = ps.create_kernel(ps.fd.VOF(j, u, c)).compile()
    dh.fill(j.name, np.nan, ghost_layers=True)
    dh.run_kernel(vof1)
    j1 = dh.gather_array(j.name).copy()

    vof2 = ps.create_kernel(VOF2(j, u, c, simplify=False)).compile()
    dh.fill(j.name, np.nan, ghost_layers=True)
    dh.run_kernel(vof2)
    j2 = dh.gather_array(j.name)

    assert np.allclose(j1, j2)
Example #8
0
def test_free_slip_equivalence():
    # check if Free slip BC does the same if the normal direction is specified or not

    stencil = LBStencil(Stencil.D2Q9)
    dh = create_data_handling(domain_size=(4, 4), periodicity=(False, False))
    src1 = dh.add_array('src1', values_per_cell=stencil.Q, alignment=True)
    src2 = dh.add_array('src2', values_per_cell=stencil.Q, alignment=True)
    dh.fill('src1', 0.0, ghost_layers=True)
    dh.fill('src2', 0.0, ghost_layers=True)

    shape = dh.gather_array('src1', ghost_layers=True).shape

    num = 0
    for x in range(shape[0]):
        for y in range(shape[1]):
            for direction in range(shape[2]):
                dh.cpu_arrays['src1'][x, y, direction] = num
                dh.cpu_arrays['src2'][x, y, direction] = num
                num += 1

    method = create_lb_method(lbm_config=LBMConfig(
        stencil=stencil, method=Method.SRT, relaxation_rate=1.8))

    bh1 = LatticeBoltzmannBoundaryHandling(method, dh, 'src1', name="bh1")
    free_slip1 = FreeSlip(stencil=stencil)
    bh1.set_boundary(free_slip1, slice_from_direction('N', dh.dim))

    bh2 = LatticeBoltzmannBoundaryHandling(method, dh, 'src2', name="bh2")
    free_slip2 = FreeSlip(stencil=stencil, normal_direction=(0, -1))
    bh2.set_boundary(free_slip2, slice_from_direction('N', dh.dim))

    bh1()
    bh2()

    assert np.array_equal(dh.cpu_arrays['src1'], dh.cpu_arrays['src2'])
Example #9
0
def test_stream_only_kernel(streaming_pattern):
    domain_size = (4, 4)
    stencil = LBStencil(Stencil.D2Q9)
    dh = ps.create_data_handling(domain_size, default_target=Target.CPU)
    pdfs = dh.add_array('pdfs', values_per_cell=len(stencil))
    pdfs_tmp = dh.add_array_like('pdfs_tmp', 'pdfs')

    for t in get_timesteps(streaming_pattern):
        accessor = get_accessor(streaming_pattern, t)
        src = pdfs
        dst = pdfs if is_inplace(streaming_pattern) else pdfs_tmp

        dh.fill(src.name, 0.0)
        dh.fill(dst.name, 0.0)

        stream_kernel = create_stream_only_kernel(stencil, src, dst, accessor=accessor)
        stream_func = create_kernel(stream_kernel).compile()

        #   Check functionality
        acc_in = AccessPdfValues(stencil, streaming_dir='in', accessor=accessor)
        for i in range(len(stencil)):
            acc_in.write_pdf(dh.cpu_arrays[src.name], (1,1), i, i)

        dh.run_kernel(stream_func)

        acc_out = AccessPdfValues(stencil, streaming_dir='out', accessor=accessor)
        for i in range(len(stencil)):
            assert acc_out.read_pdf(dh.cpu_arrays[dst.name], (1,1), i) == i
Example #10
0
def test_flux_stencil(stencil, derivative):
    L = (40, ) * int(stencil[1])
    dh = ps.create_data_handling(L,
                                 periodicity=True,
                                 default_target=ps.Target.CPU)
    c = dh.add_array('c', values_per_cell=1)
    j = dh.add_array('j',
                     values_per_cell=int(stencil[3:]) // 2,
                     field_type=ps.FieldType.STAGGERED_FLUX)

    def Gradient(f):
        return sp.Matrix([ps.fd.diff(f, i) for i in range(dh.dim)])

    eq = [
        sp.Matrix([sp.Symbol(f"a_{i}") * c.center for i in range(dh.dim)]),
        Gradient(c)
    ][derivative]
    disc = ps.fd.FVM1stOrder(c, flux=eq)

    # check the continuity
    continuity_assignments = disc.discrete_continuity(j)
    assert [len(a.rhs.atoms(ps.field.Field.Access)) for a in continuity_assignments] == \
           [int(stencil[3:])] * len(continuity_assignments)

    # check the flux
    flux_assignments = disc.discrete_flux(j)
    assert [len(a.rhs.atoms(ps.field.Field.Access))
            for a in flux_assignments] == [2] * len(flux_assignments)
Example #11
0
def test_optimised_and_full_communication_equivalence(stencil_name):
    target = ps.Target.CPU
    stencil = LBStencil(stencil_name)
    domain_size = (4, ) * stencil.D

    dh = ps.create_data_handling(domain_size, periodicity=(True, ) * stencil.D,
                                 parallel=False, default_target=target)

    pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64)
    dh.fill("pdf", 0, ghost_layers=True)
    pdf_tmp = dh.add_array("pdf_tmp", values_per_cell=len(stencil), dtype=np.int64)
    dh.fill("pdf_tmp", 0, ghost_layers=True)

    gl = dh.ghost_layers_of_field("pdf")

    num = 0
    for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
        dh.cpu_arrays['pdf'][idx] = num
        dh.cpu_arrays['pdf_tmp'][idx] = num
        num += 1

    lbm_config = LBMConfig(stencil=stencil, kernel_type="stream_pull_only")
    lbm_opt = LBMOptimisation(symbolic_field=pdf, symbolic_temporary_field=pdf_tmp)
    config = ps.CreateKernelConfig(target=dh.default_target, cpu_openmp=True)

    ac = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
    ast = ps.create_kernel(ac, config=config)
    stream = ast.compile()

    full_communication = dh.synchronization_function(pdf.name, target=dh.default_target, optimization={"openmp": True})
    full_communication()

    dh.run_kernel(stream)
    dh.swap("pdf", "pdf_tmp")
    pdf_full_communication = np.copy(dh.cpu_arrays['pdf'])

    num = 0
    for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
        dh.cpu_arrays['pdf'][idx] = num
        dh.cpu_arrays['pdf_tmp'][idx] = num
        num += 1

    optimised_communication = LBMPeriodicityHandling(stencil=stencil, data_handling=dh, pdf_field_name=pdf.name,
                                                     streaming_pattern='pull')
    optimised_communication()
    dh.run_kernel(stream)
    dh.swap("pdf", "pdf_tmp")

    if stencil.D == 3:
        for i in range(gl, domain_size[0]):
            for j in range(gl, domain_size[1]):
                for k in range(gl, domain_size[2]):
                    for f in range(len(stencil)):
                        assert dh.cpu_arrays['pdf'][i, j, k, f] == pdf_full_communication[i, j, k, f], print(f)
    else:
        for i in range(gl, domain_size[0]):
            for j in range(gl, domain_size[1]):
                for f in range(len(stencil)):
                    assert dh.cpu_arrays['pdf'][i, j, f] == pdf_full_communication[i, j, f]
Example #12
0
def test_staggered_loop_cutting():
    pytest.importorskip('islpy')
    dh = ps.create_data_handling((4, 4),
                                 periodicity=True,
                                 default_target=Target.CPU)
    j = dh.add_array('j', values_per_cell=4, field_type=ps.FieldType.STAGGERED)
    assignments = [ps.Assignment(j.staggered_access("SW"), 1)]
    ast = ps.create_staggered_kernel(assignments, target=dh.default_target)
    assert not ast.atoms(ps.astnodes.Conditional)
Example #13
0
def test_symbolic_fields():
    dh = create_data_handling(domain_size=(5, 7))
    dh.add_array('f1', values_per_cell=dh.dim)
    assert dh.fields['f1'].spatial_dimensions == dh.dim
    assert dh.fields['f1'].index_dimensions == 1

    dh.add_array_like("f_tmp", "f1", latex_name=r"f_{tmp}")
    assert dh.fields['f_tmp'].spatial_dimensions == dh.dim
    assert dh.fields['f_tmp'].index_dimensions == 1

    dh.swap('f1', 'f_tmp')
Example #14
0
def test_staggered_subexpressions():
    dh = ps.create_data_handling((10, 10),
                                 periodicity=True,
                                 default_target=Target.CPU)
    j = dh.add_array('j', values_per_cell=2, field_type=ps.FieldType.STAGGERED)
    c = sp.symbols("c")
    assignments = [
        ps.Assignment(j.staggered_access("W"), c),
        ps.Assignment(c, 1)
    ]
    ps.create_staggered_kernel(assignments, target=dh.default_target).compile()
Example #15
0
def test_data_handling(parallel):
    for tries in range(
            16
    ):  # try a few times, since we might get lucky and get randomly a correct alignment
        dh = create_data_handling((6, 7),
                                  default_ghost_layers=1,
                                  parallel=parallel)
        dh.add_array('test', alignment=8 * 4, values_per_cell=1)
        for b in dh.iterate(ghost_layers=True, inner_ghost_layers=True):
            arr = b['test']
            assert is_aligned(arr[1:, 3:], 8 * 4)
Example #16
0
def test_aligned_and_nt_stores(instruction_set=instruction_set, openmp=False):
    domain_size = (24, 24)
    # create a datahandling object
    dh = ps.create_data_handling(domain_size,
                                 periodicity=(True, True),
                                 parallel=False,
                                 default_target=Target.CPU)

    # fields
    alignment = 'cacheline' if openmp else True
    g = dh.add_array("g", values_per_cell=1, alignment=alignment)
    dh.fill("g", 1.0, ghost_layers=True)
    f = dh.add_array("f", values_per_cell=1, alignment=alignment)
    dh.fill("f", 0.0, ghost_layers=True)
    opt = {
        'instruction_set': instruction_set,
        'assume_aligned': True,
        'nontemporal': True,
        'assume_inner_stride_one': True
    }
    update_rule = [
        ps.Assignment(f.center(),
                      0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1]))
    ]
    config = ps.CreateKernelConfig(target=dh.default_target,
                                   cpu_vectorize_info=opt,
                                   cpu_openmp=openmp)
    ast = ps.create_kernel(update_rule, config=config)
    if instruction_set in ['sse'] or instruction_set.startswith('avx'):
        assert 'stream' in ast.instruction_set
        assert 'streamFence' in ast.instruction_set
    if instruction_set in ['neon', 'vsx'] or instruction_set.startswith('sve'):
        assert 'cachelineZero' in ast.instruction_set
    if instruction_set in ['vsx']:
        assert 'storeAAndFlushCacheline' in ast.instruction_set
    for instruction in [
            'stream', 'streamFence', 'cachelineZero',
            'storeAAndFlushCacheline', 'flushCacheline'
    ]:
        if instruction in ast.instruction_set:
            assert ast.instruction_set[instruction].split(
                '{')[0] in ps.get_code_str(ast)
    kernel = ast.compile()

    dh.run_kernel(kernel)
    np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size))
Example #17
0
def test_sliced_getter_data_handling():
    domain_shape = (10, 10)

    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
    dh.add_array("src", values_per_cell=1)
    dh.fill("src", 1.0, ghost_layers=True)

    dh.add_array("dst", values_per_cell=1)
    dh.fill("dst", 0.0, ghost_layers=True)

    sli = SlicedGetterDataHandling(dh, 'dst')
    slice_obj = make_slice[2:-2, 2:-2]
    assert np.sum(sli[slice_obj]) == 0

    sli = SlicedGetterDataHandling(dh, 'src')
    slice_obj = make_slice[2:-2, 2:-2]
    assert np.sum(sli[slice_obj]) == 36
Example #18
0
def test_aesni_float():
    dh = ps.create_data_handling((2, 2),
                                 default_ghost_layers=0,
                                 default_target="cpu")
    f = dh.add_array("f", values_per_cell=4)

    dh.fill('f', 42.0)

    aesni_node = AESNIFourFloats(dh.dim)
    assignments = [aesni_node] + [
        ps.Assignment(f(i), aesni_node.result_symbols[i]) for i in range(4)
    ]
    kernel = ps.create_kernel(assignments, target=dh.default_target).compile()

    dh.all_to_gpu()
    dh.run_kernel(kernel, time_step=124)
    dh.all_to_cpu()
    arr = dh.gather_array('f')
    assert np.logical_and(arr <= 1.0, arr >= 0).all()
Example #19
0
def test_access():
    for domain_shape in [(2, 3, 4), (2, 4)]:
        for f_size in (1, 4):
            dh = create_data_handling(domain_size=domain_shape)
            dh.add_array('f1', values_per_cell=f_size)
            assert dh.dim == len(domain_shape)

            for b in dh.iterate(ghost_layers=1):
                if f_size > 1:
                    assert b['f1'].shape == tuple(
                        ds + 2 for ds in domain_shape) + (f_size, )
                else:
                    assert b['f1'].shape == tuple(ds + 2
                                                  for ds in domain_shape)

            for b in dh.iterate(ghost_layers=0):
                if f_size > 1:
                    assert b['f1'].shape == domain_shape + (f_size, )
                else:
                    assert b['f1'].shape == domain_shape
Example #20
0
def test_rng_symbol(vectorized):
    """Make sure that the RNG symbol generator generates symbols and that the resulting code compiles"""
    if vectorized:
        if not instruction_sets:
            pytest.skip("cannot detect CPU instruction set")
        else:
            cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True,
                                  'instruction_set': instruction_sets[-1]}
    else:
        cpu_vectorize_info = None

    dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target=Target.CPU)
    f = dh.add_array("f", values_per_cell=2 * dh.dim, alignment=True)
    ac = ps.AssignmentCollection([ps.Assignment(f(i), 0) for i in range(f.shape[-1])])
    rng_symbol_gen = random_symbol(ac.subexpressions, dim=dh.dim)
    for i in range(f.shape[-1]):
        ac.main_assignments[i] = ps.Assignment(ac.main_assignments[i].lhs, next(rng_symbol_gen))
    symbols = [a.rhs for a in ac.main_assignments]
    assert len(symbols) == f.shape[-1] and len(set(symbols)) == f.shape[-1]
    ps.create_kernel(ac, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()
Example #21
0
def test_source_stencil(stencil):
    L = (40, ) * int(stencil[1])
    dh = ps.create_data_handling(L,
                                 periodicity=True,
                                 default_target=ps.Target.CPU)
    c = dh.add_array('c', values_per_cell=1)
    j = dh.add_array('j',
                     values_per_cell=int(stencil[3:]) // 2,
                     field_type=ps.FieldType.STAGGERED_FLUX)

    continuity_ref = ps.fd.FVM1stOrder(c).discrete_continuity(j)

    for eq in [c.center] + [ps.fd.diff(c, i) for i in range(dh.dim)]:
        disc = ps.fd.FVM1stOrder(c, source=eq)
        diff = sp.simplify(
            disc.discrete_continuity(j)[0].rhs - continuity_ref[0].rhs)
        if type(eq) is ps.field.Field.Access:
            assert len(diff.atoms(ps.field.Field.Access)) == 1
        else:
            assert len(diff.atoms(ps.field.Field.Access)) == 2
Example #22
0
def test_momentum_density_shift(force_model):
    target = Target.CPU

    stencil = LBStencil(Stencil.D2Q9)
    domain_size = (4, 4)
    dh = ps.create_data_handling(domain_size=domain_size,
                                 default_target=target)

    rho = dh.add_array('rho', values_per_cell=1)
    dh.fill('rho', 0.0, ghost_layers=True)

    momentum_density = dh.add_array('momentum_density', values_per_cell=dh.dim)
    dh.fill('momentum_density', 0.0, ghost_layers=True)

    src = dh.add_array('src', values_per_cell=len(stencil))
    dh.fill('src', 0.0, ghost_layers=True)

    lbm_config = LBMConfig(method=Method.SRT,
                           compressible=True,
                           force_model=force_model,
                           force=(1, 2))
    method = create_lb_method(lbm_config=lbm_config)

    cqc = method.conserved_quantity_computation

    momentum_density_getter = cqc.output_equations_from_pdfs(
        src.center_vector, {
            'density': rho.center,
            'momentum_density': momentum_density.center_vector
        })

    config = ps.CreateKernelConfig(target=dh.default_target)
    momentum_density_ast = ps.create_kernel(momentum_density_getter,
                                            config=config)
    momentum_density_kernel = momentum_density_ast.compile()

    dh.run_kernel(momentum_density_kernel)
    assert np.sum(dh.gather_array(
        momentum_density.name)[:, :, 0]) == np.prod(domain_size) / 2
    assert np.sum(dh.gather_array(
        momentum_density.name)[:, :, 1]) == np.prod(domain_size)
Example #23
0
def test_aesni_double():
    dh = ps.create_data_handling((2, 2),
                                 default_ghost_layers=0,
                                 default_target="cpu")
    f = dh.add_array("f", values_per_cell=2)

    dh.fill('f', 42.0)

    aesni_node = AESNITwoDoubles(dh.dim)
    assignments = [
        aesni_node,
        ps.Assignment(f(0), aesni_node.result_symbols[0]),
        ps.Assignment(f(1), aesni_node.result_symbols[1])
    ]
    kernel = ps.create_kernel(assignments, target=dh.default_target).compile()

    dh.all_to_gpu()
    dh.run_kernel(kernel, time_step=124)
    dh.all_to_cpu()

    arr = dh.gather_array('f')
    assert np.logical_and(arr <= 1.0, arr >= 0).all()
def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel,
                                            instruction_set, dtype):
    dtype = np.float64 if dtype == 'double' else np.float32

    domain_size = (128, 128)
    dh = ps.create_data_handling(domain_size,
                                 periodicity=(True, True),
                                 default_target=Target.CPU)
    src = dh.add_array("src",
                       values_per_cell=1,
                       dtype=dtype,
                       ghost_layers=gl_field,
                       alignment=True)
    dh.fill(src.name, 1.0, ghost_layers=True)
    dst = dh.add_array("dst",
                       values_per_cell=1,
                       dtype=dtype,
                       ghost_layers=gl_field,
                       alignment=True)
    dh.fill(dst.name, 1.0, ghost_layers=True)

    update_rule = ps.Assignment(dst[0, 0], src[0, 0])
    opt = {
        'instruction_set': instruction_set,
        'assume_aligned': True,
        'nontemporal': True,
        'assume_inner_stride_one': True
    }
    config = ps.CreateKernelConfig(target=dh.default_target,
                                   cpu_vectorize_info=opt,
                                   ghost_layers=gl_kernel)
    ast = ps.create_kernel(update_rule, config=config)
    kernel = ast.compile()
    if gl_kernel != gl_field:
        with pytest.raises(ValueError):
            dh.run_kernel(kernel)
    else:
        dh.run_kernel(kernel)
Example #25
0
def test_simple(target):
    if target == Target.GPU:
        import pytest
        pytest.importorskip('pycuda')

    dh = create_data_handling((4, 4), parallel=False, default_target=target)
    dh.add_array('pdfs', values_per_cell=9, cpu=True, gpu=target != Target.CPU)
    for i in range(9):
        dh.fill("pdfs", i, value_idx=i, ghost_layers=True)

    if target == Target.GPU:
        dh.all_to_gpu()

    lbm_config = LBMConfig(stencil=LBStencil(Stencil.D2Q9),
                           compressible=False,
                           relaxation_rate=1.8)
    config = CreateKernelConfig(target=target)

    lb_func = create_lb_function(lbm_config=lbm_config, config=config)

    bh = LatticeBoltzmannBoundaryHandling(lb_func.method,
                                          dh,
                                          'pdfs',
                                          target=target)

    wall = NoSlip()
    moving_wall = UBB((1, 0))
    bh.set_boundary(wall, make_slice[0, :])
    bh.set_boundary(wall, make_slice[-1, :])
    bh.set_boundary(wall, make_slice[:, 0])
    bh.set_boundary(moving_wall, make_slice[:, -1])

    bh.prepare()
    bh()

    if target == Target.GPU:
        dh.all_to_cpu()
    # left lower corner
    assert (dh.cpu_arrays['pdfs'][0, 0, 6] == 7)

    assert (dh.cpu_arrays['pdfs'][0, 1, 4] == 3)
    assert (dh.cpu_arrays['pdfs'][0, 1, 6] == 7)

    assert (dh.cpu_arrays['pdfs'][1, 0, 1] == 2)
    assert (dh.cpu_arrays['pdfs'][1, 0, 6] == 7)

    # left side
    assert (all(dh.cpu_arrays['pdfs'][0, 2:4, 4] == 3))
    assert (all(dh.cpu_arrays['pdfs'][0, 2:4, 6] == 7))
    assert (all(dh.cpu_arrays['pdfs'][0, 2:4, 5] == 5))

    # left upper corner
    assert (dh.cpu_arrays['pdfs'][0, 4, 4] == 3)
    assert (dh.cpu_arrays['pdfs'][0, 4, 8] == 5)

    assert (dh.cpu_arrays['pdfs'][0, 5, 8] == 5 + 6 / 36)

    assert (dh.cpu_arrays['pdfs'][1, 5, 8] == 5 + 6 / 36)
    assert (dh.cpu_arrays['pdfs'][1, 5, 2] == 1)

    # top side
    assert (all(dh.cpu_arrays['pdfs'][2:4, 5, 2] == 1))
    assert (all(dh.cpu_arrays['pdfs'][2:4, 5, 7] == 6 - 6 / 36))
    assert (all(dh.cpu_arrays['pdfs'][2:4, 5, 8] == 5 + 6 / 36))

    # right upper corner
    assert (dh.cpu_arrays['pdfs'][4, 5, 2] == 1)
    assert (dh.cpu_arrays['pdfs'][4, 5, 7] == 6 - 6 / 36)

    assert (dh.cpu_arrays['pdfs'][5, 5, 7] == 6 - 6 / 36)

    assert (dh.cpu_arrays['pdfs'][5, 4, 3] == 4)
    assert (dh.cpu_arrays['pdfs'][5, 4, 7] == 6)

    # right side
    assert (all(dh.cpu_arrays['pdfs'][5, 2:4, 3] == 4))
    assert (all(dh.cpu_arrays['pdfs'][5, 2:4, 5] == 8))
    assert (all(dh.cpu_arrays['pdfs'][5, 2:4, 7] == 6))

    # right lower corner
    assert (dh.cpu_arrays['pdfs'][5, 1, 3] == 4)
    assert (dh.cpu_arrays['pdfs'][5, 1, 5] == 8)

    assert (dh.cpu_arrays['pdfs'][5, 0, 5] == 8)

    assert (dh.cpu_arrays['pdfs'][4, 0, 1] == 2)
    assert (dh.cpu_arrays['pdfs'][4, 0, 5] == 8)

    # lower side
    assert (all(dh.cpu_arrays['pdfs'][0, 2:4, 4] == 3))
    assert (all(dh.cpu_arrays['pdfs'][0, 2:4, 6] == 7))
    assert (all(dh.cpu_arrays['pdfs'][0, 2:4, 8] == 5))
Example #26
0
def test_ek(stencil):

    # parameters

    L = (40, 40)
    D = sp.Symbol("D")
    z = sp.Symbol("z")

    # data structures

    dh = ps.create_data_handling(L,
                                 periodicity=True,
                                 default_target=ps.Target.CPU)
    c = dh.add_array('c', values_per_cell=1)
    j = dh.add_array('j',
                     values_per_cell=int(stencil[-1]) // 2,
                     field_type=ps.FieldType.STAGGERED_FLUX)
    Phi = dh.add_array('Φ', values_per_cell=1)

    # perform automatic discretization

    def Gradient(f):
        return sp.Matrix([ps.fd.diff(f, i) for i in range(dh.dim)])

    flux_eq = -D * Gradient(c) + D * z * c.center * Gradient(Phi)

    disc = ps.fd.FVM1stOrder(c, flux_eq)
    flux_assignments = disc.discrete_flux(j)
    continuity_assignments = disc.discrete_continuity(j)

    # manual discretization

    x_staggered = -c[-1, 0] + c[
        0, 0] + z * (c[-1, 0] + c[0, 0]) / 2 * (Phi[-1, 0] - Phi[0, 0])
    y_staggered = -c[0, -1] + c[
        0, 0] + z * (c[0, -1] + c[0, 0]) / 2 * (Phi[0, -1] - Phi[0, 0])
    xy_staggered = (- c[-1, -1] + c[0, 0]) / sp.sqrt(2) + \
        z * (c[-1, -1] + c[0, 0]) / 2 * (Phi[-1, -1] - Phi[0, 0]) / sp.sqrt(2)
    xY_staggered = (- c[-1, 1] + c[0, 0]) / sp.sqrt(2) + \
        z * (c[-1, 1] + c[0, 0]) / 2 * (Phi[-1, 1] - Phi[0, 0]) / sp.sqrt(2)
    A0 = (1 + sp.sqrt(2) if j.index_shape[0] == 4 else 1)

    jj = j.staggered_access
    divergence = -1 * sum([
        jj(d) for d in j.staggered_stencil +
        [ps.stencil.inverse_direction_string(d) for d in j.staggered_stencil]
    ])

    update = [ps.Assignment(c.center, c.center + divergence)]
    flux = [
        ps.Assignment(j.staggered_access("W"), D * x_staggered / A0),
        ps.Assignment(j.staggered_access("S"), D * y_staggered / A0)
    ]
    if j.index_shape[0] == 4:
        flux += [
            ps.Assignment(j.staggered_access("SW"), D * xy_staggered / A0),
            ps.Assignment(j.staggered_access("NW"), D * xY_staggered / A0)
        ]

    # compare

    for a, b in zip(flux, flux_assignments):
        assert a.lhs == b.lhs
        assert sp.simplify(a.rhs - b.rhs) == 0
    for a, b in zip(update, continuity_assignments):
        assert a.lhs == b.lhs
        assert a.rhs == b.rhs
Example #27
0
def test_free_slip_index_list():
    stencil = LBStencil(Stencil.D2Q9)
    dh = create_data_handling(domain_size=(4, 4), periodicity=(False, False))
    src = dh.add_array('src', values_per_cell=len(stencil), alignment=True)
    dh.fill('src', 0.0, ghost_layers=True)

    lbm_config = LBMConfig(stencil=stencil,
                           method=Method.SRT,
                           relaxation_rate=1.8)
    method = create_lb_method(lbm_config=lbm_config)

    bh = LatticeBoltzmannBoundaryHandling(method, dh, 'src', name="bh")

    free_slip = FreeSlip(stencil=stencil)
    add_box_boundary(bh, free_slip)

    bh.prepare()
    for b in dh.iterate():
        for b_obj, idx_arr in b[
                bh._index_array_name].boundary_object_to_index_list.items():
            index_array = idx_arr

    # normal directions
    normal_west = (1, 0)
    normal_east = (-1, 0)
    normal_south = (0, 1)
    normal_north = (0, -1)

    normal_south_west = (1, 1)
    normal_north_west = (1, -1)
    normal_south_east = (-1, 1)
    normal_north_east = (-1, -1)

    for cell in index_array:
        direction = stencil[cell[2]]
        inv_dir = inverse_direction(direction)

        boundary_cell = (cell[0] + direction[0], cell[1] + direction[1])
        normal = (cell[3], cell[4])
        # the data is written on the inverse direction of the fluid cell near the boundary
        # the data is read from the mirrored direction of the inverse direction where the mirror axis is the normal
        assert cell[5] == stencil.index(mirror_stencil(list(inv_dir), normal))

        if boundary_cell[0] == 0 and 0 < boundary_cell[1] < 5:
            assert normal == normal_west

        if boundary_cell[0] == 5 and 0 < boundary_cell[1] < 5:
            assert normal == normal_east

        if 0 < boundary_cell[0] < 5 and boundary_cell[1] == 0:
            assert normal == normal_south

        if 0 < boundary_cell[0] < 5 and boundary_cell[1] == 5:
            assert normal == normal_north

        if boundary_cell == (0, 0):
            assert cell[2] == cell[5]
            assert normal == normal_south_west

        if boundary_cell == (5, 0):
            assert cell[2] == cell[5]
            assert normal == normal_south_east

        if boundary_cell == (0, 5):
            assert cell[2] == cell[5]
            assert normal == normal_north_west

        if boundary_cell == (5, 5):
            assert cell[2] == cell[5]
            assert normal == normal_north_east
Example #28
0
def poiseuille_channel(target, stencil_name):
    # physical parameters
    rho_0 = 1.2  # density
    eta = 0.2  # kinematic viscosity
    width = 41  # of box
    actual_width = width - 2  # subtract boundary layer from box width
    ext_force_density = 0.2 / actual_width ** 2  # scale by width to keep stable

    # LB parameters
    lb_stencil = LBStencil(stencil_name)

    if lb_stencil.D == 2:
        L = (4, width)
    elif lb_stencil.D == 3:
        L = (4, width, 4)
    else:
        raise Exception()
    periodicity = [True, False] + [True] * (lb_stencil.D - 2)

    omega = lbmpy.relaxationrates.relaxation_rate_from_lattice_viscosity(eta)

    # ## Data structures
    dh = ps.create_data_handling(L, periodicity=periodicity, default_target=target)

    src = dh.add_array('src', values_per_cell=len(lb_stencil))
    dst = dh.add_array_like('dst', 'src')
    ρ = dh.add_array('rho', latex_name='\\rho', values_per_cell=1)
    u = dh.add_array('u', values_per_cell=dh.dim)

    # LB Setup
    lbm_config = LBMConfig(stencil=lb_stencil, relaxation_rate=omega, method=Method.TRT,
                           compressible=True, force_model=ForceModel.GUO,
                           force=tuple([ext_force_density] + [0] * (lb_stencil.D - 1)),
                           kernel_type='collide_only')

    lbm_opt = LBMOptimisation(symbolic_field=src)
    collision = create_lb_update_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)

    stream = create_stream_pull_with_output_kernel(collision.method, src, dst, {'velocity': u})

    config = ps.CreateKernelConfig(cpu_openmp=False, target=dh.default_target)

    stream_kernel = ps.create_kernel(stream, config=config).compile()
    collision_kernel = ps.create_kernel(collision, config=config).compile()

    # Boundaries
    lbbh = LatticeBoltzmannBoundaryHandling(collision.method, dh, src.name, target=dh.default_target)

    # ## Set up the simulation

    init = macroscopic_values_setter(collision.method, velocity=(0,) * dh.dim,
                                     pdfs=src.center_vector, density=ρ.center)
    init_kernel = ps.create_kernel(init, ghost_layers=0).compile()

    noslip = NoSlip()
    wall_thickness = 2
    if lb_stencil.D == 2:
        lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness])
        lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:])
    elif lb_stencil.D == 3:
        lbbh.set_boundary(noslip, ps.make_slice[:, :wall_thickness, :])
        lbbh.set_boundary(noslip, ps.make_slice[:, -wall_thickness:, :])
    else:
        raise Exception()

    for bh in lbbh, :
        assert len(bh._boundary_object_to_boundary_info) == 1, "Restart kernel to clear boundaries"

    def init():
        dh.fill(ρ.name, rho_0)
        dh.fill(u.name, np.nan, ghost_layers=True, inner_ghost_layers=True)
        dh.fill(u.name, 0)

        dh.run_kernel(init_kernel)

    # In[6]:

    sync_pdfs = dh.synchronization_function([src.name])

    # Time loop
    def time_loop(steps):
        dh.all_to_gpu()
        i = -1
        last_max_vel = -1
        for i in range(steps):
            dh.run_kernel(collision_kernel)
            sync_pdfs()
            lbbh()
            dh.run_kernel(stream_kernel)

            dh.swap(src.name, dst.name)

            # Consider early termination
            if i % 100 == 0:
                if u.name in dh.gpu_arrays:
                    dh.to_cpu(u.name)
                uu = dh.gather_array(u.name)
                # average periodic directions
                if lb_stencil.D == 3:  # dont' swap order
                    uu = np.average(uu, axis=2)
                uu = np.average(uu, axis=0)

                max_vel = np.nanmax(uu)
                if np.abs(max_vel / last_max_vel - 1) < 5E-6:
                    break
                last_max_vel = max_vel

        # cut off wall regions
        uu = uu[wall_thickness:-wall_thickness]

        # correct for f/2 term
        uu -= np.array([ext_force_density / 2 / rho_0] + [0] * (lb_stencil.D - 1))

        return uu

    init()
    # Simulation
    profile = time_loop(5000)

    # compare against analytical solution
    # The profile is of shape (n,3). Force is in x-direction
    y = np.arange(len(profile[:, 0]))
    mid = (y[-1] - y[0]) / 2  # Mid point of channel

    expected = poiseuille_flow((y - mid), actual_width, ext_force_density, rho_0 * eta)

    np.testing.assert_allclose(profile[:, 0], expected, rtol=0.006)

    # Test zero vel in other directions
    np.testing.assert_allclose(profile[:, 1:], np.zeros_like(profile[:, 1:]), atol=1E-9)
Example #29
0
def run(re=6000, eval_interval=0.05, total_time=3.0, domain_size=100, u_0=0.05,
        initialization_relaxation_rate=None, vtk_output=False, parallel=False, **kwargs):
    """Runs the kida vortex simulation.

    Args:
        re: Reynolds number
        eval_interval: interval in non-dimensional time to evaluate flow properties
        total_time: non-dimensional time of complete simulation
        domain_size: integer (not tuple) since domain is cubic
        u_0: maximum lattice velocity
        initialization_relaxation_rate: if not None, an advanced initialization scheme is run to initialize higher
                                        order moments correctly
        vtk_output: if vtk files are written out
        parallel: MPI parallelization with walberla
        **kwargs: passed to LbStep

    Returns:
        dictionary with simulation results
    """
    domain_shape = (domain_size, domain_size, domain_size)
    relaxation_rate = relaxation_rate_from_reynolds_number(re, u_0, domain_size)
    dh = ps.create_data_handling(domain_shape, periodicity=True, parallel=parallel)
    rr_subs = {'viscosity': relaxation_rate,
               'trt_magic': relaxation_rate_from_magic_number(relaxation_rate),
               'free': sp.Symbol("rr_f")}

    if 'relaxation_rates' in kwargs:
        kwargs['relaxation_rates'] = [rr_subs[r] if isinstance(r, str) else r for r in kwargs['relaxation_rates']]
    else:
        kwargs['relaxation_rates'] = [relaxation_rate]

    dh.log_on_root("Running kida vortex scenario of size {} with {}".format(domain_size, kwargs))
    dh.log_on_root("Compiling method")

    lb_step = LatticeBoltzmannStep(data_handling=dh, name="kida_vortex", **kwargs)

    set_initial_velocity(lb_step, u_0)
    residuum, init_steps = np.nan, 0
    if initialization_relaxation_rate is not None:
        dh.log_on_root("Running iterative initialization", level='PROGRESS')
        residuum, init_steps = lb_step.run_iterative_initialization(initialization_relaxation_rate,
                                                                    convergence_threshold=1e-12, max_steps=100000,
                                                                    check_residuum_after=2 * domain_size)
        dh.log_on_root("Iterative initialization finished after {} steps at residuum {}".format(init_steps, residuum))

    total_time_steps = normalized_time_to_time_step(total_time, domain_size, u_0)
    eval_time_steps = normalized_time_to_time_step(eval_interval, domain_size, u_0)

    initial_energy = parallel_mean(lb_step, mean_kinetic_energy, all_reduce=False)
    times = []
    energy_list = []
    enstrophy_list = []
    mlups_list = []
    energy_spectrum_arr = None

    while lb_step.time_steps_run < total_time_steps:
        mlups = lb_step.benchmark_run(eval_time_steps, number_of_cells=domain_size**3)
        if vtk_output:
            lb_step.write_vtk()

        current_time = time_step_to_normalized_time(lb_step.time_steps_run, domain_size, u_0)
        current_kinetic_energy = parallel_mean(lb_step, mean_kinetic_energy)
        current_enstrophy = parallel_mean(lb_step, mean_enstrophy)

        is_stable = np.isfinite(lb_step.data_handling.max(lb_step.velocity_data_name)) and current_enstrophy < 1e4
        if not is_stable:
            dh.log_on_root("Simulation got unstable - stopping", level='WARNING')
            break

        if current_time >= 0.5 and energy_spectrum_arr is None and domain_size <= 600:
            dh.log_on_root("Calculating energy spectrum")
            gathered_velocity = lb_step.velocity[:, :, :, :]

            if gathered_velocity is not None:
                energy_spectrum_arr = energy_density_spectrum(gathered_velocity)
            else:
                energy_spectrum_arr = False

        if dh.is_root:
            current_kinetic_energy /= initial_energy
            current_enstrophy *= domain_size ** 2

            times.append(current_time)
            energy_list.append(current_kinetic_energy)
            enstrophy_list.append(current_enstrophy)
            mlups_list.append(mlups)

            dh.log_on_root("Progress: {current_time:.02f} / {total_time} at {mlups:.01f} MLUPS\t"
                           "Enstrophy {current_enstrophy:.04f}\t"
                           "KinEnergy {current_kinetic_energy:.06f}".format(**locals()))

    if dh.is_root:
        return {
            'initialization_residuum': residuum,
            'initialization_steps': init_steps,
            'time': times,
            'kinetic_energy': energy_list,
            'enstrophy': enstrophy_list,
            'mlups': np.average(mlups_list),
            'energy_spectrum': list(energy_spectrum_arr),
            'stable': bool(np.isfinite(lb_step.data_handling.max(lb_step.velocity_data_name)))
        }
    else:
        return None
Example #30
0
def advection_diffusion(dim: int):
    # parameters
    if dim == 2:
        L = (32, 32)
    elif dim == 3:
        L = (16, 16, 16)

    dh = ps.create_data_handling(domain_size=L,
                                 periodicity=True,
                                 default_target=ps.Target.CPU)

    n_field = dh.add_array('n', values_per_cell=1)
    j_field = dh.add_array('j',
                           values_per_cell=3**dim // 2,
                           field_type=ps.FieldType.STAGGERED_FLUX)
    velocity_field = dh.add_array('v', values_per_cell=dim)

    D = 0.0666
    time = 100

    def grad(f):
        return sp.Matrix([ps.fd.diff(f, i) for i in range(dim)])

    flux_eq = -D * grad(n_field)
    fvm_eq = ps.fd.FVM1stOrder(n_field, flux=flux_eq)

    vof_adv = ps.fd.VOF(j_field, velocity_field, n_field)

    # merge calculation of advection and diffusion terms
    flux = []
    for adv, div in zip(vof_adv, fvm_eq.discrete_flux(j_field)):
        assert adv.lhs == div.lhs
        flux.append(ps.Assignment(adv.lhs, adv.rhs + div.rhs))

    flux_kernel = ps.create_staggered_kernel(flux).compile()

    pde_kernel = ps.create_kernel(
        fvm_eq.discrete_continuity(j_field)).compile()

    sync_conc = dh.synchronization_function([n_field.name])

    # analytical density calculation
    def density(pos: np.ndarray, time: int, D: float):
        return (4 * np.pi * D * time)**(-dim / 2) * \
            np.exp(-np.sum(np.square(pos), axis=-1) / (4 * D * time))

    pos = np.zeros((*L, dim))
    xpos = np.arange(-L[0] // 2, L[0] // 2)
    ypos = np.arange(-L[1] // 2, L[1] // 2)

    if dim == 2:
        pos[..., 1], pos[..., 0] = np.meshgrid(xpos, ypos)
    elif dim == 3:
        zpos = np.arange(-L[2] // 2, L[2] // 2)
        pos[..., 2], pos[..., 1], pos[..., 0] = np.meshgrid(xpos, ypos, zpos)
    pos += 0.5

    def run(velocity: np.ndarray, time: int):
        dh.fill(n_field.name,
                np.nan,
                ghost_layers=True,
                inner_ghost_layers=True)
        dh.fill(j_field.name,
                np.nan,
                ghost_layers=True,
                inner_ghost_layers=True)

        # set initial values for velocity and density
        for i in range(dim):
            dh.fill(velocity_field.name,
                    velocity[i],
                    i,
                    ghost_layers=True,
                    inner_ghost_layers=True)
        dh.fill(n_field.name, 0)
        if dim == 2:
            start = ps.make_slice[L[0] // 2 - 1:L[0] // 2 + 1,
                                  L[1] // 2 - 1:L[1] // 2 + 1]
        else:
            start = ps.make_slice[L[0] // 2 - 1:L[0] // 2 + 1,
                                  L[1] // 2 - 1:L[1] // 2 + 1,
                                  L[2] // 2 - 1:L[2] // 2 + 1]
        dh.fill(n_field.name, 2**-dim, slice_obj=start)

        sync_conc()
        for i in range(time):
            dh.run_kernel(flux_kernel)
            dh.run_kernel(pde_kernel)
            sync_conc()

        sim_density = dh.gather_array(n_field.name)

        # check that mass was conserved
        assert np.isclose(sim_density.sum(), 1)
        assert np.all(sim_density > 0)

        # check that the maximum is in the right place
        peak = np.unravel_index(np.argmax(sim_density, axis=None),
                                sim_density.shape)
        assert np.allclose(peak,
                           np.array(L) // 2 - 0.5 + velocity * time,
                           atol=0.5)

        # check the concentration profile
        if np.linalg.norm(velocity) == 0:
            calc_density = density(pos - velocity * time, time, D)
            target = [time, D]

            pytest.importorskip('scipy.optimize')
            from scipy.optimize import curve_fit
            popt, _ = curve_fit(
                lambda x, t, D: density(x - velocity * time, t, D),
                pos.reshape(-1, dim),
                sim_density.reshape(-1),
                p0=target)

            assert np.isclose(popt[0], time, rtol=0.1)
            assert np.isclose(popt[1], D, rtol=0.1)
            assert np.allclose(calc_density, sim_density, atol=1e-4)

    return lambda v: run(np.array(v), time)