def test_generate(ctx_factory, grid_shape, proc_shape, dtype, random, timing=False): if ctx_factory: ctx = ctx_factory() else: ctx = ps.choose_device_and_make_context() queue = cl.CommandQueue(ctx) h = 1 mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape) rank_shape, _ = mpi.get_rank_shape_start(grid_shape) fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) num_bins = int(sum(Ni**2 for Ni in grid_shape)**.5 / 2 + .5) + 1 L = (10,)*3 volume = np.product(L) dk = tuple(2 * np.pi / Li for Li in L) spectra = ps.PowerSpectra(mpi, fft, dk, volume) modes = ps.RayleighGenerator(ctx, fft, dk, volume, seed=5123) kbins = min(dk) * np.arange(0, num_bins) test_norm = 1 / 2 / np.pi**2 / np.product(grid_shape)**2 for exp in [-1, -2, -3]: def power(k): return k**exp fk = modes.generate(queue, random=random, norm=1, field_ps=power) spectrum = spectra.norm * spectra.bin_power(fk, queue=queue, k_power=3)[1:-1] true_spectrum = test_norm * kbins[1:-1]**3 * power(kbins[1:-1]) err = np.abs(1 - spectrum / true_spectrum) tol = .1 if num_bins < 64 else .3 assert (np.max(err[num_bins//3:-num_bins//3]) < tol and np.average(err[1:]) < tol), \ f"init power spectrum incorrect for {random=}, k**{exp}" if random: fx = fft.idft(cla.to_device(queue, fk)).real if isinstance(fx, cla.Array): fx = fx.get() grid_size = np.product(grid_shape) avg = mpi.allreduce(np.sum(fx)) / grid_size var = mpi.allreduce(np.sum(fx**2)) / grid_size - avg**2 skew = mpi.allreduce(np.sum(fx**3)) / grid_size - 3 * avg * var - avg**3 skew /= var**1.5 assert skew < tol, \ f"init power spectrum has large skewness for k**{exp}" if timing: ntime = 10 from common import timer t = timer(lambda: modes.generate(queue, random=random), ntime=ntime) print(f"{random=} set_modes took {t:.3f} ms for {grid_shape=}")
def test_spectra(ctx_factory, grid_shape, proc_shape, dtype, L, timing=False): ctx = ctx_factory() queue = cl.CommandQueue(ctx) h = 1 mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape) rank_shape, _ = mpi.get_rank_shape_start(grid_shape) fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) L = L or (3, 5, 7) dk = tuple(2 * np.pi / Li for Li in L) cdtype = fft.cdtype spec = ps.PowerSpectra(mpi, fft, dk, np.product(L), bin_width=min(dk) + .001) # FIXME: bin_width=min(dk) sometimes disagrees to O(.1%) with numpy... assert int(np.sum(spec.bin_counts)) == np.product(grid_shape), \ "bin counts don't sum to total number of points/modes" k_power = 2. fk = make_data(*fft.shape(True)).astype(cdtype) fk_d = cla.to_device(queue, fk) spectrum = spec.bin_power(fk_d, k_power=k_power) bins = np.arange(-.5, spec.num_bins + .5) * spec.bin_width sub_k = list(x.get() for x in fft.sub_k.values()) kvecs = np.meshgrid(*sub_k, indexing="ij", sparse=False) kmags = np.sqrt(sum((dki * ki)**2 for dki, ki in zip(dk, kvecs))) if fft.is_real: counts = 2. * np.ones_like(kmags) counts[kvecs[2] == 0] = 1 counts[kvecs[2] == grid_shape[-1] // 2] = 1 else: counts = 1. * np.ones_like(kmags) if np.dtype(dtype) in (np.dtype("float64"), np.dtype("complex128")): max_rtol = 1e-8 avg_rtol = 1e-11 else: max_rtol = 2e-2 avg_rtol = 2e-4 bin_counts2 = spec.bin_power(np.ones_like(fk), queue=queue, k_power=0) max_err, avg_err = get_errs(bin_counts2, np.ones_like(bin_counts2)) assert max_err < max_rtol and avg_err < avg_rtol, \ f"bin counting disagrees between PowerSpectra and np.histogram" \ f" for {grid_shape=}: {max_err=}, {avg_err=}" hist = np.histogram(kmags, bins=bins, weights=np.abs(fk)**2 * counts * kmags**k_power)[0] hist = mpi.allreduce(hist) / spec.bin_counts # skip the Nyquist mode and the zero mode max_err, avg_err = get_errs(spectrum[1:-2], hist[1:-2]) assert max_err < max_rtol and avg_err < avg_rtol, \ f"power spectrum inaccurate for {grid_shape=}: {max_err=}, {avg_err=}" if timing: from common import timer t = timer(lambda: spec.bin_power(fk_d, k_power=k_power)) print(f"power spectrum took {t:.3f} ms for {grid_shape=}, {dtype=}")
def test_pol_spectra(ctx_factory, grid_shape, proc_shape, dtype, timing=False): ctx = ctx_factory() if np.dtype(dtype).kind != "f": dtype = "float64" queue = cl.CommandQueue(ctx) h = 1 mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape) rank_shape, _ = mpi.get_rank_shape_start(grid_shape) fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) L = (10, 8, 7) dk = tuple(2 * np.pi / Li for Li in L) dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape)) cdtype = fft.cdtype spec = ps.PowerSpectra(mpi, fft, dk, np.product(L)) k_power = 2. fk = make_data(*fft.shape(True)).astype(cdtype) fk = make_hermitian(fk, fft).astype(cdtype) plus = cla.to_device(queue, fk) fk = make_data(*fft.shape(True)).astype(cdtype) fk = make_hermitian(fk, fft).astype(cdtype) minus = cla.to_device(queue, fk) plus_ps_1 = spec.bin_power(plus, queue=queue, k_power=k_power) minus_ps_1 = spec.bin_power(minus, queue=queue, k_power=k_power) project = ps.Projector(fft, h, dk, dx) vector = cla.empty(queue, (3, ) + fft.shape(True), cdtype) project.pol_to_vec(queue, plus, minus, vector) project.vec_to_pol(queue, plus, minus, vector) plus_ps_2 = spec.bin_power(plus, k_power=k_power) minus_ps_2 = spec.bin_power(minus, k_power=k_power) max_rtol = 1e-8 if dtype == np.float64 else 1e-2 avg_rtol = 1e-11 if dtype == np.float64 else 1e-4 max_err, avg_err = get_errs(plus_ps_1[1:-2], plus_ps_2[1:-2]) assert max_err < max_rtol and avg_err < avg_rtol, \ f"plus power spectrum inaccurate for {grid_shape=}: {max_err=}, {avg_err=}" max_err, avg_err = get_errs(minus_ps_1[1:-2], minus_ps_2[1:-2]) assert max_err < max_rtol and avg_err < avg_rtol, \ f"minus power spectrum inaccurate for {grid_shape=}: {max_err=}, {avg_err=}" vec_sum = sum( spec.bin_power(vector[mu], k_power=k_power) for mu in range(3)) pol_sum = plus_ps_1 + minus_ps_1 max_err, avg_err = get_errs(vec_sum[1:-2], pol_sum[1:-2]) assert max_err < max_rtol and avg_err < avg_rtol, \ f"polarization power spectrum inaccurate for {grid_shape=}" \ f": {max_err=}, {avg_err=}" # reset for mu in range(3): fk = make_data(*fft.shape(True)).astype(cdtype) fk = make_hermitian(fk, fft).astype(cdtype) vector[mu].set(fk) long = cla.zeros_like(plus) project.decompose_vector(queue, vector, plus, minus, long, times_abs_k=True) plus_ps = spec.bin_power(plus, k_power=k_power) minus_ps = spec.bin_power(minus, k_power=k_power) long_ps = spec.bin_power(long, k_power=k_power) vec_sum = sum( spec.bin_power(vector[mu], k_power=k_power) for mu in range(3)) dec_sum = plus_ps + minus_ps + long_ps max_err, avg_err = get_errs(vec_sum[1:-2], dec_sum[1:-2]) assert max_err < max_rtol and avg_err < avg_rtol, \ f"decomp power spectrum inaccurate for {grid_shape=}: {max_err=}, {avg_err=}" hij = cl.clrandom.rand(queue, (6, ) + rank_shape, dtype) gw_spec = spec.gw(hij, project, 1.3) gw_pol_spec = spec.gw_polarization(hij, project, 1.3) max_rtol = 1e-14 if dtype == np.float64 else 1e-2 avg_rtol = 1e-11 if dtype == np.float64 else 1e-4 pol_sum = gw_pol_spec[0] + gw_pol_spec[1] max_err, avg_err = get_errs(gw_spec[1:-2], pol_sum[1:-2]) assert max_err < max_rtol and avg_err < avg_rtol, \ f"gw pol don't add up to gw for {grid_shape=}: {max_err=}, {avg_err=}"
def test_relax(ctx_factory, grid_shape, proc_shape, h, dtype, Solver, timing=False): if min(grid_shape) < 128: pytest.skip("test_relax needs larger grids, for now") if ctx_factory: ctx = ctx_factory() else: ctx = ps.choose_device_and_make_context() queue = cl.CommandQueue(ctx) rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) L = 10 dx = L / grid_shape[0] dk = 2 * np.pi / L fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) spectra = ps.PowerSpectra(mpi, fft, (dk, ) * 3, L**3) statistics = ps.FieldStatistics(mpi, h, rank_shape=rank_shape, grid_size=np.product(grid_shape)) def get_laplacian(f): from pystella.derivs import _lap_coefs, centered_diff lap_coefs = _lap_coefs[h] from pymbolic import var return sum([ centered_diff(f, lap_coefs, direction=mu, order=2) for mu in range(1, 4) ]) / var("dx")**2 test_problems = {} from pystella import Field f = Field("f", offset="h") rho = Field("rho", offset="h") test_problems[f] = (get_laplacian(f), rho) f = Field("f2", offset="h") rho = Field("rho2", offset="h") test_problems[f] = (get_laplacian(f) - f, rho) solver = Solver(mpi, queue, test_problems, halo_shape=h, dtype=dtype, fixed_parameters=dict(omega=1 / 2)) def zero_mean_array(): f0 = clr.rand(queue, grid_shape, dtype) f = clr.rand(queue, tuple(ni + 2 * h for ni in rank_shape), dtype) mpi.scatter_array(queue, f0, f, root=0) avg = statistics(f)["mean"] f = f - avg mpi.share_halos(queue, f) return f f = zero_mean_array() rho = zero_mean_array() tmp = cla.zeros_like(f) f2 = zero_mean_array() rho2 = zero_mean_array() tmp2 = cla.zeros_like(f) num_iterations = 1000 errors = {"f": [], "f2": []} first_mode_zeroed = {"f": [], "f2": []} for i in range(0, num_iterations, 2): solver(mpi, queue, iterations=2, dx=np.array(dx), f=f, tmp_f=tmp, rho=rho, f2=f2, tmp_f2=tmp2, rho2=rho2) err = solver.get_error(queue, f=f, r_f=tmp, rho=rho, f2=f2, r_f2=tmp2, rho2=rho2, dx=np.array(dx)) for k, v in err.items(): errors[k].append(v) for key, resid in zip(["f", "f2"], [tmp, tmp2]): spectrum = spectra(resid, k_power=0) if mpi.rank == 0: max_amp = np.max(spectrum) first_zero = np.argmax(spectrum[1:] < 1e-30 * max_amp) first_mode_zeroed[key].append(first_zero) for k, errs in errors.items(): errs = np.array(errs) iters = np.arange(1, errs.shape[0] + 1) assert (errs[10:, 0] * iters[10:] / errs[0, 0] < 1.).all(), \ "relaxation not converging at least linearly for " \ f"{grid_shape=}, {h=}, {proc_shape=}" first_mode_zeroed = mpi.bcast(first_mode_zeroed, root=0) for k, x in first_mode_zeroed.items(): x = np.array(list(x))[2:] assert (x[1:] <= x[:-1]).all() and np.min(x) < np.max(x) / 5, \ f"relaxation not smoothing error {grid_shape=}, {h=}, {proc_shape=}"
derivs(queue, fx=f, lap=lap_f) return reduce_energy(queue, f=f, dfdt=dfdt, lap_f=lap_f, a=np.array(a)) # create output function if decomp.rank == 0: from pystella.output import OutputFile out = OutputFile(ctx=ctx, runfile=__file__) else: out = None statistics = ps.FieldStatistics(decomp, halo_shape, rank_shape=rank_shape, grid_size=grid_size) spectra = ps.PowerSpectra(decomp, fft, dk, volume) projector = ps.Projector(fft, halo_shape, dk, dx) hist = ps.FieldHistogrammer(decomp, 1000, dtype, rank_shape=rank_shape) a_sq_rho = (3 * mpl**2 * ps.Field("hubble", indices=[])**2 / 8 / np.pi) rho_dict = {ps.Field("rho"): scalar_sector.stress_tensor(0, 0) / a_sq_rho} compute_rho = ps.ElementWiseMap(rho_dict, halo_shape=halo_shape, rank_shape=rank_shape) def output(step_count, t, energy, expand, f, dfdt, lap_f, dfdx, hij, dhijdt, lap_hij): if step_count % 4 == 0: f_stats = statistics(f)