def test_field_statistics(ctx_factory, grid_shape, proc_shape, dtype, _grid_shape, pass_grid_dims, timing=False): ctx = ctx_factory() queue = cl.CommandQueue(ctx) h = 1 grid_shape = _grid_shape or grid_shape mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape) rank_shape, _ = mpi.get_rank_shape_start(grid_shape) # make select parameters local for convenience h = 2 f = clr.rand(queue, (2, 1) + tuple(ni + 2 * h for ni in rank_shape), dtype=dtype) if pass_grid_dims: statistics = ps.FieldStatistics(mpi, h, rank_shape=rank_shape, grid_size=np.product(grid_shape)) else: statistics = ps.FieldStatistics(mpi, h) import pyopencl.tools as clt pool = clt.MemoryPool(clt.ImmediateAllocator(queue)) stats = statistics(f, allocator=pool) avg = stats["mean"] var = stats["variance"] f_h = f.get() rank_sum = np.sum(f_h[..., h:-h, h:-h, h:-h], axis=(-3, -2, -1)) avg_test = mpi.allreduce(rank_sum) / np.product(grid_shape) rank_sum = np.sum(f_h[..., h:-h, h:-h, h:-h]**2, axis=(-3, -2, -1)) var_test = mpi.allreduce(rank_sum) / np.product(grid_shape) - avg_test**2 rtol = 5e-14 if dtype == np.float64 else 1e-5 assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \ f"average innaccurate for {grid_shape=}, {proc_shape=}" assert np.allclose(var, var_test, rtol=rtol, atol=0), \ f"variance innaccurate for {grid_shape=}, {proc_shape=}" if timing: from common import timer t = timer(lambda: statistics(f, allocator=pool)) if mpi.rank == 0: print( f"field stats took {t:.3f} ms " f"for outer shape {f.shape[:-3]}, {grid_shape=}, {proc_shape=}" )
def test_spectral_poisson(ctx_factory, grid_shape, proc_shape, h, dtype, timing=False): if ctx_factory: ctx = ctx_factory() else: ctx = ps.choose_device_and_make_context() queue = cl.CommandQueue(ctx) mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape) rank_shape, _ = mpi.get_rank_shape_start(grid_shape) fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) L = (3, 5, 7) dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape)) dk = tuple(2 * np.pi / Li for Li in L) if h == 0: def get_evals_2(k, dx): return - k**2 derivs = ps.SpectralCollocator(fft, dk) else: from pystella.derivs import SecondCenteredDifference get_evals_2 = SecondCenteredDifference(h).get_eigenvalues derivs = ps.FiniteDifferencer(mpi, h, dx, stream=False) solver = ps.SpectralPoissonSolver(fft, dk, dx, get_evals_2) pencil_shape = tuple(ni + 2*h for ni in rank_shape) statistics = ps.FieldStatistics(mpi, 0, rank_shape=rank_shape, grid_size=np.product(grid_shape)) fx = cla.empty(queue, pencil_shape, dtype) rho = clr.rand(queue, rank_shape, dtype) rho -= statistics(rho)["mean"] lap = cla.empty(queue, rank_shape, dtype) rho_h = rho.get() for m_squared in (0, 1.2, 19.2): solver(queue, fx, rho, m_squared=m_squared) fx_h = fx.get() if h > 0: fx_h = fx_h[h:-h, h:-h, h:-h] derivs(queue, fx=fx, lap=lap) diff = np.fabs(lap.get() - rho_h - m_squared * fx_h) max_err = np.max(diff) / cla.max(clm.fabs(rho)) avg_err = np.sum(diff) / cla.sum(clm.fabs(rho)) max_rtol = 1e-12 if dtype == np.float64 else 1e-4 avg_rtol = 1e-13 if dtype == np.float64 else 1e-5 assert max_err < max_rtol and avg_err < avg_rtol, \ f"solution inaccurate for {h=}, {grid_shape=}, {proc_shape=}" if timing: from common import timer time = timer(lambda: solver(queue, fx, rho, m_squared=m_squared), ntime=10) if mpi.rank == 0: print(f"poisson took {time:.3f} ms for {grid_shape=}, {proc_shape=}")
def test_relax(ctx_factory, grid_shape, proc_shape, h, dtype, Solver, timing=False): if min(grid_shape) < 128: pytest.skip("test_relax needs larger grids, for now") if ctx_factory: ctx = ctx_factory() else: ctx = ps.choose_device_and_make_context() queue = cl.CommandQueue(ctx) rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) L = 10 dx = L / grid_shape[0] dk = 2 * np.pi / L fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) spectra = ps.PowerSpectra(mpi, fft, (dk, ) * 3, L**3) statistics = ps.FieldStatistics(mpi, h, rank_shape=rank_shape, grid_size=np.product(grid_shape)) def get_laplacian(f): from pystella.derivs import _lap_coefs, centered_diff lap_coefs = _lap_coefs[h] from pymbolic import var return sum([ centered_diff(f, lap_coefs, direction=mu, order=2) for mu in range(1, 4) ]) / var("dx")**2 test_problems = {} from pystella import Field f = Field("f", offset="h") rho = Field("rho", offset="h") test_problems[f] = (get_laplacian(f), rho) f = Field("f2", offset="h") rho = Field("rho2", offset="h") test_problems[f] = (get_laplacian(f) - f, rho) solver = Solver(mpi, queue, test_problems, halo_shape=h, dtype=dtype, fixed_parameters=dict(omega=1 / 2)) def zero_mean_array(): f0 = clr.rand(queue, grid_shape, dtype) f = clr.rand(queue, tuple(ni + 2 * h for ni in rank_shape), dtype) mpi.scatter_array(queue, f0, f, root=0) avg = statistics(f)["mean"] f = f - avg mpi.share_halos(queue, f) return f f = zero_mean_array() rho = zero_mean_array() tmp = cla.zeros_like(f) f2 = zero_mean_array() rho2 = zero_mean_array() tmp2 = cla.zeros_like(f) num_iterations = 1000 errors = {"f": [], "f2": []} first_mode_zeroed = {"f": [], "f2": []} for i in range(0, num_iterations, 2): solver(mpi, queue, iterations=2, dx=np.array(dx), f=f, tmp_f=tmp, rho=rho, f2=f2, tmp_f2=tmp2, rho2=rho2) err = solver.get_error(queue, f=f, r_f=tmp, rho=rho, f2=f2, r_f2=tmp2, rho2=rho2, dx=np.array(dx)) for k, v in err.items(): errors[k].append(v) for key, resid in zip(["f", "f2"], [tmp, tmp2]): spectrum = spectra(resid, k_power=0) if mpi.rank == 0: max_amp = np.max(spectrum) first_zero = np.argmax(spectrum[1:] < 1e-30 * max_amp) first_mode_zeroed[key].append(first_zero) for k, errs in errors.items(): errs = np.array(errs) iters = np.arange(1, errs.shape[0] + 1) assert (errs[10:, 0] * iters[10:] / errs[0, 0] < 1.).all(), \ "relaxation not converging at least linearly for " \ f"{grid_shape=}, {h=}, {proc_shape=}" first_mode_zeroed = mpi.bcast(first_mode_zeroed, root=0) for k, x in first_mode_zeroed.items(): x = np.array(list(x))[2:] assert (x[1:] <= x[:-1]).all() and np.min(x) < np.max(x) / 5, \ f"relaxation not smoothing error {grid_shape=}, {h=}, {proc_shape=}"
def test_multigrid(ctx_factory, grid_shape, proc_shape, h, dtype, Solver, MG, timing=False): if ctx_factory: ctx = ctx_factory() else: ctx = ps.choose_device_and_make_context() queue = cl.CommandQueue(ctx) rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) L = 10 dx = L / grid_shape[0] statistics = ps.FieldStatistics(mpi, h, rank_shape=rank_shape, grid_size=np.product(grid_shape)) def get_laplacian(f): from pystella.derivs import _lap_coefs, centered_diff lap_coefs = _lap_coefs[h] from pymbolic import var return sum([centered_diff(f, lap_coefs, direction=mu, order=2) for mu in range(1, 4)]) / var("dx")**2 test_problems = {} from pystella import Field f = Field("f", offset="h") rho = Field("rho", offset="h") test_problems[f] = (get_laplacian(f), rho) f = Field("f2", offset="h") rho = Field("rho2", offset="h") test_problems[f] = (get_laplacian(f) - f, rho) solver = Solver(mpi, queue, test_problems, halo_shape=h, dtype=dtype, fixed_parameters=dict(omega=1/2)) mg = MG(solver=solver, halo_shape=h, dtype=dtype) def zero_mean_array(): f0 = clr.rand(queue, grid_shape, dtype) f = clr.rand(queue, tuple(ni + 2*h for ni in rank_shape), dtype) mpi.scatter_array(queue, f0, f, root=0) avg = statistics(f)["mean"] f = f - avg mpi.share_halos(queue, f) return f f = zero_mean_array() rho = zero_mean_array() f2 = zero_mean_array() rho2 = zero_mean_array() poisson_errs = [] helmholtz_errs = [] num_v_cycles = 15 if MG == MultiGridSolver else 10 for i in range(num_v_cycles): errs = mg(mpi, queue, dx0=dx, f=f, rho=rho, f2=f2, rho2=rho2) poisson_errs.append(errs[-1][-1]["f"]) helmholtz_errs.append(errs[-1][-1]["f2"]) for name, cycle_errs in zip(["poisson", "helmholtz"], [poisson_errs, helmholtz_errs]): tol = 1e-6 if MG == MultiGridSolver else 1e-15 assert cycle_errs[-1][1] < tol and cycle_errs[-2][1] < 10*tol, \ "multigrid solution to {name} eqn is inaccurate for " \ f"{grid_shape=}, {h=}, {proc_shape=}"
if gravitational_waves: derivs(queue, fx=f, lap=lap_f, grd=dfdx) else: derivs(queue, fx=f, lap=lap_f) return reduce_energy(queue, f=f, dfdt=dfdt, lap_f=lap_f, a=np.array(a)) # create output function if decomp.rank == 0: from pystella.output import OutputFile out = OutputFile(ctx=ctx, runfile=__file__) else: out = None statistics = ps.FieldStatistics(decomp, halo_shape, rank_shape=rank_shape, grid_size=grid_size) spectra = ps.PowerSpectra(decomp, fft, dk, volume) projector = ps.Projector(fft, halo_shape, dk, dx) hist = ps.FieldHistogrammer(decomp, 1000, dtype, rank_shape=rank_shape) a_sq_rho = (3 * mpl**2 * ps.Field("hubble", indices=[])**2 / 8 / np.pi) rho_dict = {ps.Field("rho"): scalar_sector.stress_tensor(0, 0) / a_sq_rho} compute_rho = ps.ElementWiseMap(rho_dict, halo_shape=halo_shape, rank_shape=rank_shape) def output(step_count, t, energy, expand, f, dfdt, lap_f, dfdx, hij, dhijdt, lap_hij): if step_count % 4 == 0: