예제 #1
0
def test_field_statistics(ctx_factory,
                          grid_shape,
                          proc_shape,
                          dtype,
                          _grid_shape,
                          pass_grid_dims,
                          timing=False):
    ctx = ctx_factory()

    queue = cl.CommandQueue(ctx)
    h = 1
    grid_shape = _grid_shape or grid_shape
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)

    # make select parameters local for convenience
    h = 2
    f = clr.rand(queue, (2, 1) + tuple(ni + 2 * h for ni in rank_shape),
                 dtype=dtype)

    if pass_grid_dims:
        statistics = ps.FieldStatistics(mpi,
                                        h,
                                        rank_shape=rank_shape,
                                        grid_size=np.product(grid_shape))
    else:
        statistics = ps.FieldStatistics(mpi, h)

    import pyopencl.tools as clt
    pool = clt.MemoryPool(clt.ImmediateAllocator(queue))

    stats = statistics(f, allocator=pool)
    avg = stats["mean"]
    var = stats["variance"]

    f_h = f.get()
    rank_sum = np.sum(f_h[..., h:-h, h:-h, h:-h], axis=(-3, -2, -1))
    avg_test = mpi.allreduce(rank_sum) / np.product(grid_shape)

    rank_sum = np.sum(f_h[..., h:-h, h:-h, h:-h]**2, axis=(-3, -2, -1))
    var_test = mpi.allreduce(rank_sum) / np.product(grid_shape) - avg_test**2

    rtol = 5e-14 if dtype == np.float64 else 1e-5

    assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \
        f"average innaccurate for {grid_shape=}, {proc_shape=}"

    assert np.allclose(var, var_test, rtol=rtol, atol=0), \
        f"variance innaccurate for {grid_shape=}, {proc_shape=}"

    if timing:
        from common import timer
        t = timer(lambda: statistics(f, allocator=pool))
        if mpi.rank == 0:
            print(
                f"field stats took {t:.3f} ms "
                f"for outer shape {f.shape[:-3]}, {grid_shape=}, {proc_shape=}"
            )
예제 #2
0
def test_spectral_poisson(ctx_factory, grid_shape, proc_shape, h, dtype,
                          timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)
    fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)

    L = (3, 5, 7)
    dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape))
    dk = tuple(2 * np.pi / Li for Li in L)

    if h == 0:
        def get_evals_2(k, dx):
            return - k**2

        derivs = ps.SpectralCollocator(fft, dk)
    else:
        from pystella.derivs import SecondCenteredDifference
        get_evals_2 = SecondCenteredDifference(h).get_eigenvalues
        derivs = ps.FiniteDifferencer(mpi, h, dx, stream=False)

    solver = ps.SpectralPoissonSolver(fft, dk, dx, get_evals_2)

    pencil_shape = tuple(ni + 2*h for ni in rank_shape)

    statistics = ps.FieldStatistics(mpi, 0, rank_shape=rank_shape,
                                    grid_size=np.product(grid_shape))

    fx = cla.empty(queue, pencil_shape, dtype)
    rho = clr.rand(queue, rank_shape, dtype)
    rho -= statistics(rho)["mean"]
    lap = cla.empty(queue, rank_shape, dtype)
    rho_h = rho.get()

    for m_squared in (0, 1.2, 19.2):
        solver(queue, fx, rho, m_squared=m_squared)
        fx_h = fx.get()
        if h > 0:
            fx_h = fx_h[h:-h, h:-h, h:-h]

        derivs(queue, fx=fx, lap=lap)

        diff = np.fabs(lap.get() - rho_h - m_squared * fx_h)
        max_err = np.max(diff) / cla.max(clm.fabs(rho))
        avg_err = np.sum(diff) / cla.sum(clm.fabs(rho))

        max_rtol = 1e-12 if dtype == np.float64 else 1e-4
        avg_rtol = 1e-13 if dtype == np.float64 else 1e-5

        assert max_err < max_rtol and avg_err < avg_rtol, \
            f"solution inaccurate for {h=}, {grid_shape=}, {proc_shape=}"

    if timing:
        from common import timer
        time = timer(lambda: solver(queue, fx, rho, m_squared=m_squared), ntime=10)

        if mpi.rank == 0:
            print(f"poisson took {time:.3f} ms for {grid_shape=}, {proc_shape=}")
예제 #3
0
def test_relax(ctx_factory,
               grid_shape,
               proc_shape,
               h,
               dtype,
               Solver,
               timing=False):
    if min(grid_shape) < 128:
        pytest.skip("test_relax needs larger grids, for now")

    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape))
    mpi = ps.DomainDecomposition(proc_shape, h, rank_shape)

    L = 10
    dx = L / grid_shape[0]
    dk = 2 * np.pi / L

    fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)
    spectra = ps.PowerSpectra(mpi, fft, (dk, ) * 3, L**3)
    statistics = ps.FieldStatistics(mpi,
                                    h,
                                    rank_shape=rank_shape,
                                    grid_size=np.product(grid_shape))

    def get_laplacian(f):
        from pystella.derivs import _lap_coefs, centered_diff
        lap_coefs = _lap_coefs[h]
        from pymbolic import var
        return sum([
            centered_diff(f, lap_coefs, direction=mu, order=2)
            for mu in range(1, 4)
        ]) / var("dx")**2

    test_problems = {}

    from pystella import Field
    f = Field("f", offset="h")
    rho = Field("rho", offset="h")
    test_problems[f] = (get_laplacian(f), rho)

    f = Field("f2", offset="h")
    rho = Field("rho2", offset="h")
    test_problems[f] = (get_laplacian(f) - f, rho)

    solver = Solver(mpi,
                    queue,
                    test_problems,
                    halo_shape=h,
                    dtype=dtype,
                    fixed_parameters=dict(omega=1 / 2))

    def zero_mean_array():
        f0 = clr.rand(queue, grid_shape, dtype)
        f = clr.rand(queue, tuple(ni + 2 * h for ni in rank_shape), dtype)
        mpi.scatter_array(queue, f0, f, root=0)
        avg = statistics(f)["mean"]
        f = f - avg
        mpi.share_halos(queue, f)
        return f

    f = zero_mean_array()
    rho = zero_mean_array()
    tmp = cla.zeros_like(f)

    f2 = zero_mean_array()
    rho2 = zero_mean_array()
    tmp2 = cla.zeros_like(f)

    num_iterations = 1000
    errors = {"f": [], "f2": []}
    first_mode_zeroed = {"f": [], "f2": []}
    for i in range(0, num_iterations, 2):
        solver(mpi,
               queue,
               iterations=2,
               dx=np.array(dx),
               f=f,
               tmp_f=tmp,
               rho=rho,
               f2=f2,
               tmp_f2=tmp2,
               rho2=rho2)

        err = solver.get_error(queue,
                               f=f,
                               r_f=tmp,
                               rho=rho,
                               f2=f2,
                               r_f2=tmp2,
                               rho2=rho2,
                               dx=np.array(dx))
        for k, v in err.items():
            errors[k].append(v)

        for key, resid in zip(["f", "f2"], [tmp, tmp2]):
            spectrum = spectra(resid, k_power=0)
            if mpi.rank == 0:
                max_amp = np.max(spectrum)
                first_zero = np.argmax(spectrum[1:] < 1e-30 * max_amp)
                first_mode_zeroed[key].append(first_zero)

    for k, errs in errors.items():
        errs = np.array(errs)
        iters = np.arange(1, errs.shape[0] + 1)
        assert (errs[10:, 0] * iters[10:] / errs[0, 0] < 1.).all(), \
            "relaxation not converging at least linearly for " \
            f"{grid_shape=}, {h=}, {proc_shape=}"

    first_mode_zeroed = mpi.bcast(first_mode_zeroed, root=0)
    for k, x in first_mode_zeroed.items():
        x = np.array(list(x))[2:]
        assert (x[1:] <= x[:-1]).all() and np.min(x) < np.max(x) / 5, \
            f"relaxation not smoothing error {grid_shape=}, {h=}, {proc_shape=}"
예제 #4
0
def test_multigrid(ctx_factory, grid_shape, proc_shape, h, dtype, Solver, MG,
                   timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape))
    mpi = ps.DomainDecomposition(proc_shape, h, rank_shape)

    L = 10
    dx = L / grid_shape[0]

    statistics = ps.FieldStatistics(mpi, h, rank_shape=rank_shape,
                                    grid_size=np.product(grid_shape))

    def get_laplacian(f):
        from pystella.derivs import _lap_coefs, centered_diff
        lap_coefs = _lap_coefs[h]
        from pymbolic import var
        return sum([centered_diff(f, lap_coefs, direction=mu, order=2)
                    for mu in range(1, 4)]) / var("dx")**2

    test_problems = {}

    from pystella import Field
    f = Field("f", offset="h")
    rho = Field("rho", offset="h")
    test_problems[f] = (get_laplacian(f), rho)

    f = Field("f2", offset="h")
    rho = Field("rho2", offset="h")
    test_problems[f] = (get_laplacian(f) - f, rho)

    solver = Solver(mpi, queue, test_problems, halo_shape=h, dtype=dtype,
                    fixed_parameters=dict(omega=1/2))
    mg = MG(solver=solver, halo_shape=h, dtype=dtype)

    def zero_mean_array():
        f0 = clr.rand(queue, grid_shape, dtype)
        f = clr.rand(queue, tuple(ni + 2*h for ni in rank_shape), dtype)
        mpi.scatter_array(queue, f0, f, root=0)
        avg = statistics(f)["mean"]
        f = f - avg
        mpi.share_halos(queue, f)
        return f

    f = zero_mean_array()
    rho = zero_mean_array()

    f2 = zero_mean_array()
    rho2 = zero_mean_array()

    poisson_errs = []
    helmholtz_errs = []
    num_v_cycles = 15 if MG == MultiGridSolver else 10
    for i in range(num_v_cycles):
        errs = mg(mpi, queue, dx0=dx, f=f, rho=rho, f2=f2, rho2=rho2)
        poisson_errs.append(errs[-1][-1]["f"])
        helmholtz_errs.append(errs[-1][-1]["f2"])

    for name, cycle_errs in zip(["poisson", "helmholtz"],
                                [poisson_errs, helmholtz_errs]):
        tol = 1e-6 if MG == MultiGridSolver else 1e-15
        assert cycle_errs[-1][1] < tol and cycle_errs[-2][1] < 10*tol, \
            "multigrid solution to {name} eqn is inaccurate for " \
            f"{grid_shape=}, {h=}, {proc_shape=}"
예제 #5
0
    if gravitational_waves:
        derivs(queue, fx=f, lap=lap_f, grd=dfdx)
    else:
        derivs(queue, fx=f, lap=lap_f)

    return reduce_energy(queue, f=f, dfdt=dfdt, lap_f=lap_f, a=np.array(a))


# create output function
if decomp.rank == 0:
    from pystella.output import OutputFile
    out = OutputFile(ctx=ctx, runfile=__file__)
else:
    out = None
statistics = ps.FieldStatistics(decomp,
                                halo_shape,
                                rank_shape=rank_shape,
                                grid_size=grid_size)
spectra = ps.PowerSpectra(decomp, fft, dk, volume)
projector = ps.Projector(fft, halo_shape, dk, dx)
hist = ps.FieldHistogrammer(decomp, 1000, dtype, rank_shape=rank_shape)

a_sq_rho = (3 * mpl**2 * ps.Field("hubble", indices=[])**2 / 8 / np.pi)
rho_dict = {ps.Field("rho"): scalar_sector.stress_tensor(0, 0) / a_sq_rho}
compute_rho = ps.ElementWiseMap(rho_dict,
                                halo_shape=halo_shape,
                                rank_shape=rank_shape)


def output(step_count, t, energy, expand, f, dfdt, lap_f, dfdx, hij, dhijdt,
           lap_hij):
    if step_count % 4 == 0: