Example #1
0
def test_generate(ctx_factory, grid_shape, proc_shape, dtype, random, timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    h = 1
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)

    fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)

    num_bins = int(sum(Ni**2 for Ni in grid_shape)**.5 / 2 + .5) + 1
    L = (10,)*3
    volume = np.product(L)
    dk = tuple(2 * np.pi / Li for Li in L)
    spectra = ps.PowerSpectra(mpi, fft, dk, volume)
    modes = ps.RayleighGenerator(ctx, fft, dk, volume, seed=5123)

    kbins = min(dk) * np.arange(0, num_bins)
    test_norm = 1 / 2 / np.pi**2 / np.product(grid_shape)**2

    for exp in [-1, -2, -3]:
        def power(k):
            return k**exp

        fk = modes.generate(queue, random=random, norm=1, field_ps=power)

        spectrum = spectra.norm * spectra.bin_power(fk, queue=queue, k_power=3)[1:-1]
        true_spectrum = test_norm * kbins[1:-1]**3 * power(kbins[1:-1])
        err = np.abs(1 - spectrum / true_spectrum)

        tol = .1 if num_bins < 64 else .3
        assert (np.max(err[num_bins//3:-num_bins//3]) < tol
                and np.average(err[1:]) < tol), \
            f"init power spectrum incorrect for {random=}, k**{exp}"

        if random:
            fx = fft.idft(cla.to_device(queue, fk)).real
            if isinstance(fx, cla.Array):
                fx = fx.get()

            grid_size = np.product(grid_shape)

            avg = mpi.allreduce(np.sum(fx)) / grid_size
            var = mpi.allreduce(np.sum(fx**2)) / grid_size - avg**2
            skew = mpi.allreduce(np.sum(fx**3)) / grid_size - 3 * avg * var - avg**3
            skew /= var**1.5
            assert skew < tol, \
                f"init power spectrum has large skewness for k**{exp}"

    if timing:
        ntime = 10
        from common import timer
        t = timer(lambda: modes.generate(queue, random=random), ntime=ntime)
        print(f"{random=} set_modes took {t:.3f} ms for {grid_shape=}")
Example #2
0
def test_spectra(ctx_factory, grid_shape, proc_shape, dtype, L, timing=False):
    ctx = ctx_factory()

    queue = cl.CommandQueue(ctx)
    h = 1
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)

    fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)

    L = L or (3, 5, 7)
    dk = tuple(2 * np.pi / Li for Li in L)
    cdtype = fft.cdtype
    spec = ps.PowerSpectra(mpi,
                           fft,
                           dk,
                           np.product(L),
                           bin_width=min(dk) + .001)
    # FIXME: bin_width=min(dk) sometimes disagrees to O(.1%) with numpy...

    assert int(np.sum(spec.bin_counts)) == np.product(grid_shape), \
        "bin counts don't sum to total number of points/modes"

    k_power = 2.
    fk = make_data(*fft.shape(True)).astype(cdtype)

    fk_d = cla.to_device(queue, fk)
    spectrum = spec.bin_power(fk_d, k_power=k_power)
    bins = np.arange(-.5, spec.num_bins + .5) * spec.bin_width

    sub_k = list(x.get() for x in fft.sub_k.values())
    kvecs = np.meshgrid(*sub_k, indexing="ij", sparse=False)
    kmags = np.sqrt(sum((dki * ki)**2 for dki, ki in zip(dk, kvecs)))

    if fft.is_real:
        counts = 2. * np.ones_like(kmags)
        counts[kvecs[2] == 0] = 1
        counts[kvecs[2] == grid_shape[-1] // 2] = 1
    else:
        counts = 1. * np.ones_like(kmags)

    if np.dtype(dtype) in (np.dtype("float64"), np.dtype("complex128")):
        max_rtol = 1e-8
        avg_rtol = 1e-11
    else:
        max_rtol = 2e-2
        avg_rtol = 2e-4

    bin_counts2 = spec.bin_power(np.ones_like(fk), queue=queue, k_power=0)

    max_err, avg_err = get_errs(bin_counts2, np.ones_like(bin_counts2))
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"bin counting disagrees between PowerSpectra and np.histogram" \
        f" for {grid_shape=}: {max_err=}, {avg_err=}"

    hist = np.histogram(kmags,
                        bins=bins,
                        weights=np.abs(fk)**2 * counts * kmags**k_power)[0]
    hist = mpi.allreduce(hist) / spec.bin_counts

    # skip the Nyquist mode and the zero mode
    max_err, avg_err = get_errs(spectrum[1:-2], hist[1:-2])
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"power spectrum inaccurate for {grid_shape=}: {max_err=}, {avg_err=}"

    if timing:
        from common import timer
        t = timer(lambda: spec.bin_power(fk_d, k_power=k_power))
        print(f"power spectrum took {t:.3f} ms for {grid_shape=}, {dtype=}")
Example #3
0
def test_pol_spectra(ctx_factory, grid_shape, proc_shape, dtype, timing=False):
    ctx = ctx_factory()

    if np.dtype(dtype).kind != "f":
        dtype = "float64"

    queue = cl.CommandQueue(ctx)
    h = 1
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)

    fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)

    L = (10, 8, 7)
    dk = tuple(2 * np.pi / Li for Li in L)
    dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape))
    cdtype = fft.cdtype
    spec = ps.PowerSpectra(mpi, fft, dk, np.product(L))

    k_power = 2.

    fk = make_data(*fft.shape(True)).astype(cdtype)
    fk = make_hermitian(fk, fft).astype(cdtype)
    plus = cla.to_device(queue, fk)

    fk = make_data(*fft.shape(True)).astype(cdtype)
    fk = make_hermitian(fk, fft).astype(cdtype)
    minus = cla.to_device(queue, fk)

    plus_ps_1 = spec.bin_power(plus, queue=queue, k_power=k_power)
    minus_ps_1 = spec.bin_power(minus, queue=queue, k_power=k_power)

    project = ps.Projector(fft, h, dk, dx)

    vector = cla.empty(queue, (3, ) + fft.shape(True), cdtype)
    project.pol_to_vec(queue, plus, minus, vector)
    project.vec_to_pol(queue, plus, minus, vector)

    plus_ps_2 = spec.bin_power(plus, k_power=k_power)
    minus_ps_2 = spec.bin_power(minus, k_power=k_power)

    max_rtol = 1e-8 if dtype == np.float64 else 1e-2
    avg_rtol = 1e-11 if dtype == np.float64 else 1e-4

    max_err, avg_err = get_errs(plus_ps_1[1:-2], plus_ps_2[1:-2])
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"plus power spectrum inaccurate for {grid_shape=}: {max_err=}, {avg_err=}"

    max_err, avg_err = get_errs(minus_ps_1[1:-2], minus_ps_2[1:-2])
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"minus power spectrum inaccurate for {grid_shape=}: {max_err=}, {avg_err=}"

    vec_sum = sum(
        spec.bin_power(vector[mu], k_power=k_power) for mu in range(3))
    pol_sum = plus_ps_1 + minus_ps_1

    max_err, avg_err = get_errs(vec_sum[1:-2], pol_sum[1:-2])
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"polarization power spectrum inaccurate for {grid_shape=}" \
        f": {max_err=}, {avg_err=}"

    # reset
    for mu in range(3):
        fk = make_data(*fft.shape(True)).astype(cdtype)
        fk = make_hermitian(fk, fft).astype(cdtype)
        vector[mu].set(fk)

    long = cla.zeros_like(plus)
    project.decompose_vector(queue,
                             vector,
                             plus,
                             minus,
                             long,
                             times_abs_k=True)
    plus_ps = spec.bin_power(plus, k_power=k_power)
    minus_ps = spec.bin_power(minus, k_power=k_power)
    long_ps = spec.bin_power(long, k_power=k_power)

    vec_sum = sum(
        spec.bin_power(vector[mu], k_power=k_power) for mu in range(3))
    dec_sum = plus_ps + minus_ps + long_ps

    max_err, avg_err = get_errs(vec_sum[1:-2], dec_sum[1:-2])
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"decomp power spectrum inaccurate for {grid_shape=}: {max_err=}, {avg_err=}"

    hij = cl.clrandom.rand(queue, (6, ) + rank_shape, dtype)
    gw_spec = spec.gw(hij, project, 1.3)
    gw_pol_spec = spec.gw_polarization(hij, project, 1.3)

    max_rtol = 1e-14 if dtype == np.float64 else 1e-2
    avg_rtol = 1e-11 if dtype == np.float64 else 1e-4

    pol_sum = gw_pol_spec[0] + gw_pol_spec[1]
    max_err, avg_err = get_errs(gw_spec[1:-2], pol_sum[1:-2])
    assert max_err < max_rtol and avg_err < avg_rtol, \
        f"gw pol don't add up to gw for {grid_shape=}: {max_err=}, {avg_err=}"
Example #4
0
def test_relax(ctx_factory,
               grid_shape,
               proc_shape,
               h,
               dtype,
               Solver,
               timing=False):
    if min(grid_shape) < 128:
        pytest.skip("test_relax needs larger grids, for now")

    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape))
    mpi = ps.DomainDecomposition(proc_shape, h, rank_shape)

    L = 10
    dx = L / grid_shape[0]
    dk = 2 * np.pi / L

    fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)
    spectra = ps.PowerSpectra(mpi, fft, (dk, ) * 3, L**3)
    statistics = ps.FieldStatistics(mpi,
                                    h,
                                    rank_shape=rank_shape,
                                    grid_size=np.product(grid_shape))

    def get_laplacian(f):
        from pystella.derivs import _lap_coefs, centered_diff
        lap_coefs = _lap_coefs[h]
        from pymbolic import var
        return sum([
            centered_diff(f, lap_coefs, direction=mu, order=2)
            for mu in range(1, 4)
        ]) / var("dx")**2

    test_problems = {}

    from pystella import Field
    f = Field("f", offset="h")
    rho = Field("rho", offset="h")
    test_problems[f] = (get_laplacian(f), rho)

    f = Field("f2", offset="h")
    rho = Field("rho2", offset="h")
    test_problems[f] = (get_laplacian(f) - f, rho)

    solver = Solver(mpi,
                    queue,
                    test_problems,
                    halo_shape=h,
                    dtype=dtype,
                    fixed_parameters=dict(omega=1 / 2))

    def zero_mean_array():
        f0 = clr.rand(queue, grid_shape, dtype)
        f = clr.rand(queue, tuple(ni + 2 * h for ni in rank_shape), dtype)
        mpi.scatter_array(queue, f0, f, root=0)
        avg = statistics(f)["mean"]
        f = f - avg
        mpi.share_halos(queue, f)
        return f

    f = zero_mean_array()
    rho = zero_mean_array()
    tmp = cla.zeros_like(f)

    f2 = zero_mean_array()
    rho2 = zero_mean_array()
    tmp2 = cla.zeros_like(f)

    num_iterations = 1000
    errors = {"f": [], "f2": []}
    first_mode_zeroed = {"f": [], "f2": []}
    for i in range(0, num_iterations, 2):
        solver(mpi,
               queue,
               iterations=2,
               dx=np.array(dx),
               f=f,
               tmp_f=tmp,
               rho=rho,
               f2=f2,
               tmp_f2=tmp2,
               rho2=rho2)

        err = solver.get_error(queue,
                               f=f,
                               r_f=tmp,
                               rho=rho,
                               f2=f2,
                               r_f2=tmp2,
                               rho2=rho2,
                               dx=np.array(dx))
        for k, v in err.items():
            errors[k].append(v)

        for key, resid in zip(["f", "f2"], [tmp, tmp2]):
            spectrum = spectra(resid, k_power=0)
            if mpi.rank == 0:
                max_amp = np.max(spectrum)
                first_zero = np.argmax(spectrum[1:] < 1e-30 * max_amp)
                first_mode_zeroed[key].append(first_zero)

    for k, errs in errors.items():
        errs = np.array(errs)
        iters = np.arange(1, errs.shape[0] + 1)
        assert (errs[10:, 0] * iters[10:] / errs[0, 0] < 1.).all(), \
            "relaxation not converging at least linearly for " \
            f"{grid_shape=}, {h=}, {proc_shape=}"

    first_mode_zeroed = mpi.bcast(first_mode_zeroed, root=0)
    for k, x in first_mode_zeroed.items():
        x = np.array(list(x))[2:]
        assert (x[1:] <= x[:-1]).all() and np.min(x) < np.max(x) / 5, \
            f"relaxation not smoothing error {grid_shape=}, {h=}, {proc_shape=}"
        derivs(queue, fx=f, lap=lap_f)

    return reduce_energy(queue, f=f, dfdt=dfdt, lap_f=lap_f, a=np.array(a))


# create output function
if decomp.rank == 0:
    from pystella.output import OutputFile
    out = OutputFile(ctx=ctx, runfile=__file__)
else:
    out = None
statistics = ps.FieldStatistics(decomp,
                                halo_shape,
                                rank_shape=rank_shape,
                                grid_size=grid_size)
spectra = ps.PowerSpectra(decomp, fft, dk, volume)
projector = ps.Projector(fft, halo_shape, dk, dx)
hist = ps.FieldHistogrammer(decomp, 1000, dtype, rank_shape=rank_shape)

a_sq_rho = (3 * mpl**2 * ps.Field("hubble", indices=[])**2 / 8 / np.pi)
rho_dict = {ps.Field("rho"): scalar_sector.stress_tensor(0, 0) / a_sq_rho}
compute_rho = ps.ElementWiseMap(rho_dict,
                                halo_shape=halo_shape,
                                rank_shape=rank_shape)


def output(step_count, t, energy, expand, f, dfdt, lap_f, dfdx, hij, dhijdt,
           lap_hij):
    if step_count % 4 == 0:
        f_stats = statistics(f)