def __init__(self, setup, params): # save input self.setup = setup self.params = params # aliases s = self.setup par = self.params # parameters self.smooth_solver = g.util.to_list(par["smooth_solver"], s.nlevel - 1) self.wrapper_solver = g.util.to_list(par["wrapper_solver"], s.nlevel - 2) self.coarsest_solver = par["coarsest_solver"] # verbosity self.verbose = g.default.is_verbose("multi_grid_inverter") # print prefix self.print_prefix = ["mg: level %d:" % i for i in range(s.nlevel)] # assertions assert g.util.entries_have_length([self.smooth_solver], s.nlevel - 1) assert g.util.entries_have_length([self.wrapper_solver], s.nlevel - 2) assert g.util.is_callable( [self.smooth_solver, self.coarsest_solver, self.wrapper_solver]) assert type(self.coarsest_solver) != list assert not g.util.all_have_attribute(self.wrapper_solver, "inverter") # timing self.t = [ g.timer("mg_solve_lvl_%d" % (lvl)) for lvl in range(s.nlevel) ] # temporary vectors self.r, self.e = [None] * s.nlevel, [None] * s.nlevel for lvl in range(s.finest + 1, s.nlevel): nf_lvl = s.nf_lvl[lvl] self.r[lvl] = g.vcomplex(s.grid[lvl], s.nbasis[nf_lvl]) self.e[lvl] = g.vcomplex(s.grid[lvl], s.nbasis[nf_lvl]) self.r[s.finest] = g.vspincolor(s.grid[s.finest]) # setup a history for all solvers self.history = [None] * s.nlevel for lvl in range(s.finest, s.coarsest): self.history[lvl] = {"smooth": [], "wrapper": []} self.history[s.coarsest] = {"coarsest": []}
def gamma5(src): if hasattr(src.otype, "fundamental"): nbasis = src.otype.shape[0] assert nbasis % 2 == 0 nb = nbasis // 2 return gpt.vcomplex([1] * nb + [-1] * nb, nbasis) else: return gpt.gamma[5]
eps = g.norm2(g.sum(exp_ixp * l_sp) / np.prod(L) - fft_l_sp[1, 2, 3, 4]) g.message("FFT forward test:", eps) assert eps < 1e-12 fft_mom_A = g.slice( g.exp_ixp(2.0 * np.pi * np.array([1, 2, 3, 0]) / L) * l_sp, 3 ) / np.prod(L[0:3]) fft_mom_B = [g.vcolor(x) for x in g.eval(g.fft([0, 1, 2]) * l_sp)[1, 2, 3, 0 : L[3]]] for t in range(L[3]): eps = g.norm2(fft_mom_A[t] - fft_mom_B[t]) assert eps < 1e-12 ################################################################################ # Test vcomplex ################################################################################ va = g.vcomplex(grid_sp, 30) vb = g.lattice(va) va[:] = g.vcomplex([1] * 15 + [0.5] * 15, 30) vb[:] = g.vcomplex([0.5] * 5 + [1.0] * 20 + [0.2] * 5, 30) va @= 0.5 * va + 0.5 * vb assert abs(va[0, 0, 0, 0][3] - 0.75) < 1e-6 assert abs(va[0, 0, 0, 0][18] - 0.75) < 1e-6 assert abs(va[0, 0, 0, 0][28] - 0.35) < 1e-6 ################################################################################ # MPI ################################################################################ grid_sp.barrier() nodes = grid_sp.globalsum(1) assert nodes == grid_sp.Nprocessors a = np.array([[1.0, 2.0, 3.0], [4, 5, 6j]], dtype=np.complex64)
inner = g.adj(lhs) * rhs inner_comp = 0.0 for i in range(3): inner_comp += lhs.array.conjugate()[i] * rhs.array[i] for j in range(3): assert abs(outer.array[i, j] - lhs.array[i] * rhs.array.conjugate()[j]) < 1e-14 assert abs(inner_comp - inner) < 1e-14 assert abs(inner_comp - g.rank_inner_product(lhs, rhs)) < 1e-14 # TODO: the following is already implemented for vcomplex but should # be implemented for all vectors # cwise = lhs * rhs # inner product for vcomplex lhs = g.vcomplex([1.0] * 10 + [2] * 10 + [3] * 10 + [4] * 10, 40) rhs = g.vcomplex([5.0] * 10 + [6] * 10 + [7] * 10 + [8] * 10, 40) inner = g.adj(lhs) * rhs inner_comp = 0.0 for i in range(40): inner_comp += lhs.array.conjugate()[i] * rhs.array[i] assert abs(inner_comp - inner) < 1e-14 assert inner.real == 700.0 # demonstrate slicing of internal indices vc = g.vcomplex(grid, 30) vc[0, 0, 0, 0, 0] = 1 vc[0, 0, 0, 0, 1:29] = 1.5 vc[0, 0, 0, 0, 29] = 2 vc_comp = g.vcomplex([1] + [1.5] * 28 + [2], 30)
def vc12(): return g.vcomplex(fine_grid, 12)
niter_cg = len(cg.history) g.message("Test resid/iter cg: ", eps2, niter_cg) assert eps2 < 1e-8 sol_defl = g.eval(defl(w.Mpc) * start) eps2 = g.norm2(w.Mpc * sol_defl - start) / g.norm2(start) niter_defl = len(cg.history) g.message("Test resid/iter deflated cg: ", eps2, niter_defl) assert eps2 < 1e-8 assert niter_defl < niter_cg # block grid_coarse = g.block.grid(w.Mpc.vector_space[0].grid, [2, 2, 2, 2]) nbasis = 20 cstart = g.vcomplex(grid_coarse, nbasis) cstart[:] = g.vcomplex([1] * nbasis, nbasis) basis = evec[0:nbasis] b = g.block.map(grid_coarse, basis) for i in range(2): b.orthonormalize() # define coarse-grid operator cop = b.coarse_operator(c(w.Mpc)) eps2 = g.norm2(cop * cstart - b.project * c(w.Mpc) * b.promote * cstart) / g.norm2(cstart) g.message(f"Test coarse-grid promote/project cycle: {eps2}") assert eps2 < 1e-13 # coarse-grid lanczos cevec, cev = irl(cop, cstart)
################################################################################ # Test all other representations ################################################################################ for eps_ref, grid in [(1e-6, grid_sp), (1e-12, grid_dp)]: for representation in [ g.matrix_su2_fundamental, g.matrix_su2_adjoint, g.matrix_su3_fundamental, g.u1, g.complex, g.real, lambda grid: g.vreal(grid, 8), lambda grid: g.mreal(grid, 8), lambda grid: g.vcomplex(grid, 8), lambda grid: g.mcomplex(grid, 8), ]: U = representation(grid) g.message(f"Test {U.otype.__name__} on grid {grid.precision.__name__}") rng.element(U) check_element(U) check_representation(U, eps_ref) for method in ["defect_left", "defect_right"]: g.project(U, method) check_element(U) V = representation(grid) rng.element(V) check_inner_product(U, V, eps_ref)
# Authors: Christoph Lehner 2020 # # Desc.: Illustrate core concepts and features # import gpt as g import numpy as np import sys # grid grid = g.grid([2, 2, 2, 2], g.single) # test different lattice types vc = g.vcolor(grid) g.message(vc) vz30 = g.vcomplex(grid, 30) g.message(vz30) vz30c = g.lattice(grid, vz30.describe()) vz30c[:] = g.vcomplex([1] * 15 + [0.5] * 15, 30) g.message(vz30c) vz30b = g.lattice(vz30c) vz30b[:] = g.vcomplex([0.5] * 5 + [1.0] * 20 + [0.2] * 5, 30) g.message(g.eval(vz30c + 0.3 * vz30b)) # perform a barrier grid.barrier() # and a global sum over a number and a single-precision numpy array
outer = lhs * g.adj(rhs) inner = g.adj(lhs) * rhs inner_comp = 0.0 for i in range(3): inner_comp += lhs.array.conjugate()[i] * rhs.array[i] for j in range(3): assert abs(outer.array[i, j] - lhs.array[i] * rhs.array.conjugate()[j]) < 1e-14 assert abs(inner_comp - inner) < 1e-14 # TODO: the following is already implemented for vcomplex but should # be implemented for all vectors # cwise = lhs * rhs # demonstrate slicing of internal indices vc = g.vcomplex(grid, 30) vc[0, 0, 0, 0, 0] = 1 vc[0, 0, 0, 0, 1:29] = 1.5 vc[0, 0, 0, 0, 29] = 2 vc_comp = g.vcomplex([1] + [1.5] * 28 + [2], 30) eps2 = g.norm2(vc[0, 0, 0, 0] - vc_comp) assert eps2 < 1e-13 # demonstrate mask mask = g.complex(grid) mask[:] = 0 mask[0, 1, 2, 3] = 1 vc[:] = vc[0, 0, 0, 0] vcmask = g.eval(mask * vc) assert g.norm2(vcmask[0, 0, 0, 0]) < 1e-13 assert g.norm2(vcmask[0, 1, 2, 3] - vc_comp) < 1e-13
# setup rng rng = g.random("ducks_smell_funny") # size of basis nbasis_f = 30 nbasis_c = 40 nb_f = nbasis_f // 2 nb_c = nbasis_c // 2 # setup fine basis basis_ref_f = [g.vspincolor(grid) for __ in range(nb_f)] basis_split_f = [g.vspincolor(grid) for __ in range(nbasis_f)] rng.cnormal(basis_ref_f) # setup coarse basis basis_ref_c = [g.vcomplex(grid, nbasis_f) for __ in range(nb_c)] basis_split_c = [g.vcomplex(grid, nbasis_f) for __ in range(nbasis_c)] rng.cnormal(basis_ref_c) def run_test(basis_split, basis_ref): for factor in [0.5, 1.0, None]: for i in range(len(basis_ref)): basis_split[i] = g.copy(basis_ref[i]) g.coarse.split_chiral(basis_split, factor) g.coarse.unsplit_chiral(basis_split, factor) typename = basis_split[0].otype.__name__ for i in range(len(basis_ref)): diff2 = g.norm2(basis_ref[i] - basis_split[i])
import numpy as np import sys import random import cgpt # add test for byte order swap data = memoryview(bytearray(4)) data[:] = b"NUXI" mdata = memoryview(bytes(4)) cgpt.munge_byte_order(mdata, data, 4) assert mdata[::-1] == data # import/export test grid = g.grid([4, 4, 8, 8], g.single) src = g.vcomplex(grid, 30) dst = g.vcomplex(grid, 30) dst[:] = 0 # fill a test lattice for x in range(4): for y in range(4): for z in range(8): for t in range(8): src[x, y, z, t] = g.vcomplex([x + t * 1j, y + t * 1j, z + t * 1j] * 10, 30) # now create a random partition of this lattice distributed over all nodes c = (g.coordinates(grid).copy().view(np.ndarray) ) # copy to make it writeable and lift local_coordinate type
rng.cnormal(basis) for nvec in [1, 4]: g.message(f""" Lookup Table Benchmark with fine fdimensions : {fgrid.fdimensions} coarse fdimensions : {cgrid.fdimensions} precision : {precision.__name__} nbasis : {nbasis} basis_n_block : {basis_n_block} nvec : {nvec} """) # Source and destination fine = [g.vspincolor(fgrid) for i in range(nvec)] coarse = [g.vcomplex(cgrid, nbasis) for i in range(nvec)] rng.cnormal(coarse) Nc = fine[0].otype.shape[1] fine_floats = fine[0].otype.nfloats fine_complex = fine_floats // 2 coarse_floats = 2 * nbasis coarse_complex = coarse_floats // 2 flops_per_cmul = 6 flops_per_cadd = 2 ####### # Benchmark project # # Flops (count flops and bytes of nvec sequential operations as reference) flops_per_fine_site = (fine_complex * flops_per_cmul +
t2 = g.time() g.message( "Creating the A2A coarse basis took", t1 - t0, "s and", t2 - t1, "s for orthonormalization", ) # now create and compress v and w vectors a2a_cleft, a2a_cright = [], [] for i in range(len(cevec)): t0 = g.time() a2a_cleft.append( g.block.project( g.vcomplex(a2a_coarse_grid, len(a2a_basis)), a2a_left(g.block.promote(cevec[i], tmpf, basis)), a2a_basis, ) ) a2a_cright.append( g.block.project( g.vcomplex(a2a_coarse_grid, len(a2a_basis)), a2a_right(g.block.promote(cevec[i], tmpf, basis)), a2a_basis, ) ) t1 = g.time() cevec[i] = None # release memory g.message("Create compressed left/right vectors %d in %g s" % (i, t1 - t0)) del basis
def mk_ceig(gf, job_tag, inv_type): timer = q.Timer(f"py:mk_ceig({job_tag},{inv_type})", True) timer.start() gpt_gf = g.convert(qg.gpt_from_qlat(gf), g.single) parity = g.odd params = get_lanc_params(job_tag, inv_type) q.displayln_info(f"mk_ceig: job_tag={job_tag} inv_type={inv_type}") q.displayln_info(f"mk_ceig: params={params}") fermion_params = params["fermion_params"] if "omega" in fermion_params: qm = g.qcd.fermion.zmobius(gpt_gf, fermion_params) else: qm = g.qcd.fermion.mobius(gpt_gf, fermion_params) w = g.qcd.fermion.preconditioner.eo2_ne(parity=parity)(qm) def make_src(rng): src = g.vspincolor(w.F_grid_eo) # src[:] = g.vspincolor([[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]]) rng.cnormal(src) src.checkerboard(parity) return src pit = g.algorithms.eigen.power_iteration(**params["pit_params"]) pit_ev, _, _ = pit(w.Mpc, make_src(g.random("lanc"))) q.displayln_info(f"mk_ceig: pit_ev={pit_ev}") # cheby = g.algorithms.polynomial.chebyshev(params["cheby_params"]) irl = g.algorithms.eigen.irl(params["irl_params"]) evec, ev = irl(cheby(w.Mpc), make_src(g.random("lanc"))) evals = g.algorithms.eigen.evals(w.Mpc, evec, check_eps2=1e-6, real=True) g.mem_report() # inv = g.algorithms.inverter # cparams = get_clanc_params(job_tag, inv_type) q.displayln_info(f"mk_ceig: cparams={cparams}") # grid_coarse = g.block.grid(w.F_grid_eo, [get_ls_from_fermion_params(fermion_params)] + cparams["block"]) nbasis = cparams["nbasis"] basis = evec[0:nbasis] b = g.block.map(grid_coarse, basis) for i in range(2): b.orthonormalize() del evec gc.collect() # ccheby = g.algorithms.polynomial.chebyshev(cparams["cheby_params"]) cop = b.coarse_operator(ccheby(w.Mpc)) # cstart = g.vcomplex(grid_coarse, nbasis) cstart[:] = g.vcomplex([1] * nbasis, nbasis) eps2 = g.norm2(cop * cstart - b.project * ccheby(w.Mpc) * b.promote * cstart) / g.norm2(cstart) g.message(f"Test coarse-grid promote/project cycle: {eps2}") cirl = g.algorithms.eigen.irl(cparams["irl_params"]) cevec, cev = cirl(cop, cstart) # smoother = inv.cg(cparams["smoother_params"])(w.Mpc) smoothed_evals = [] tmpf = g.lattice(basis[0]) for i, cv in enumerate(cevec): tmpf @= smoother * b.promote * cv smoothed_evals = smoothed_evals + g.algorithms.eigen.evals( w.Mpc, [tmpf], check_eps2=10, real=True) g.mem_report() # timer.stop() return basis, cevec, smoothed_evals
A, B = rng.cnormal([g.complex(grid_dp) for i in range(2)]) eps = abs( g.correlate(A, B, [0, 1, 2])[1, 0, 3, 2] - correlate_test_3d(A, B, [1, 0, 3, 2]) ) g.message(f"Test correlate 3d: {eps}") assert eps < 1e-13 eps = abs(g.correlate(A, B)[1, 0, 3, 2] - correlate_test_4d(A, B, [1, 0, 3, 2])) g.message(f"Test correlate 4d: {eps}") assert eps < 1e-13 ################################################################################ # Test vcomplex ################################################################################ va = g.vcomplex(grid_sp, 30) vb = g.lattice(va) va[:] = g.vcomplex([1] * 15 + [0.5] * 15, 30) vb[:] = g.vcomplex([0.5] * 5 + [1.0] * 20 + [0.2] * 5, 30) va @= 0.5 * va + 0.5 * vb assert abs(va[0, 0, 0, 0][3] - 0.75) < 1e-6 assert abs(va[0, 0, 0, 0][18] - 0.75) < 1e-6 assert abs(va[0, 0, 0, 0][28] - 0.35) < 1e-6 ################################################################################ # MPI ################################################################################ grid_sp.barrier() nodes = grid_sp.globalsum(1) assert nodes == grid_sp.Nprocessors a = np.array([[1.0, 2.0, 3.0], [4, 5, 6j]], dtype=np.complex64)
# compare link fields for p in range(9): err2 = g.norm2(A_c[p] - Asaved_c[p]) / g.norm2(A_c[p]) g.message( f"Relative deviation of Asaved_c[{p}] from A_c[{p}] = {err2:e}", ) assert err2 <= tol_links g.message("Tests for links passed for all directions") del Asaved_c # create coarse operator from links mat_c = g.qcd.fermion.coarse_fermion(A_c, level=0) # setup coarse vectors vec_in_c = g.vcomplex(grid_c, nbasis_f) rng.cnormal(vec_in_c) # apply chained and constructed coarse operator vec_out_chained_c = g(bm_f.project * mat_f * bm_f.promote * vec_in_c) vec_out_constructed_c = g(mat_c * vec_in_c) # report error err2 = g.norm2(vec_out_chained_c - vec_out_constructed_c) / g.norm2(vec_out_chained_c) g.message( "Relative deviation of constructed from chained coarse operator on coarse grid = %e" % err2 ) assert err2 <= tol_operator g.message("Test passed for coarse operator, %e <= %e" % (err2, tol_operator))
[g.vspincolor(q.F_grid_eo) for i in range(nbasis)], g.infrequent_use ) rng.zn(fg_basis) g.save("basis", [fg_basis]) # g.mem_report() # g.prefetch( fg_basis, g.to_accelerator) # g.mem_report() # w=fg_basis[-1] # g.orthogonalize(w,fg_basis[0:1]) # g.orthogonalize(w,fg_basis[0:15]) fg_basis = g.advise(fg_basis, g.infrequent_use) tg = g.block.grid(q.F_grid_eo, [12, 2, 2, 2, 2]) fg_cevec = g.advise([g.vcomplex(tg, 150) for i in range(nbasis)], g.infrequent_use) rng.zn(fg_cevec) fg_feval = [0.0 for i in range(nbasis)] # memory info g.mem_report() # norms for i in range(nbasis): g.message("Norm2 of basis[%d] = %g" % (i, g.norm2(fg_basis[i]))) for i in range(nbasis): g.message("Norm2 of cevec[%d] = %g" % (i, g.norm2(fg_cevec[i]))) g.mem_report()
assert eps < 1e-12 fft_mom_A = g.slice( g.exp_ixp(2.0 * np.pi * np.array([1, 2, 3, 0]) / L) * l_sp, 3) / np.prod( L[0:3]) fft_mom_B = [ g.vcolor(x) for x in g.eval(g.fft([0, 1, 2]) * l_sp)[1, 2, 3, 0:L[3]] ] for t in range(L[3]): eps = g.norm2(fft_mom_A[t] - fft_mom_B[t]) assert eps < 1e-12 ################################################################################ # Test vcomplex ################################################################################ va = g.vcomplex(grid_sp, 30) vb = g.lattice(va) va[:] = g.vcomplex([1] * 15 + [0.5] * 15, 30) vb[:] = g.vcomplex([0.5] * 5 + [1.0] * 20 + [0.2] * 5, 30) va @= 0.5 * va + 0.5 * vb assert abs(va[0, 0, 0, 0][3] - 0.75) < 1e-6 assert abs(va[0, 0, 0, 0][18] - 0.75) < 1e-6 assert abs(va[0, 0, 0, 0][28] - 0.35) < 1e-6 ################################################################################ # MPI ################################################################################ grid_sp.barrier() nodes = grid_sp.globalsum(1) assert nodes == grid_sp.Nprocessors a = np.array([[1.0, 2.0, 3.0], [4, 5, 6j]], dtype=np.complex64)
def load(filename, params): # first check if this is right file format if not os.path.exists(filename + "/00/0000000000.compressed" ) or not os.path.exists(filename + "/metadata.txt"): raise NotImplementedError() # verbosity verbose = gpt.default.is_verbose("io") # site checkerboard # only odd is used in this file format but # would be easy to generalize here site_cb = gpt.odd # need grids parameter assert params["grids"] is not None assert type(params["grids"]) == gpt.grid fgrid = params["grids"] assert fgrid.precision == gpt.single fdimensions = fgrid.fdimensions # read metadata metadata = read_metadata(filename + "/metadata.txt") s = get_ivec(metadata, "s") ldimensions = [s[4]] + s[:4] blocksize = get_ivec(metadata, "b") blocksize = [blocksize[4]] + blocksize[:4] nb = get_ivec(metadata, "nb") nb = [nb[4]] + nb[:4] crc32 = get_xvec(metadata, "crc32") neigen = int(metadata["neig"]) nbasis = int(metadata["nkeep"]) nsingle = int(metadata["nkeep_single"]) blocks = int(metadata["blocks"]) FP16_COEF_EXP_SHARE_FLOATS = int(metadata["FP16_COEF_EXP_SHARE_FLOATS"]) nsingleCap = min([nsingle, nbasis]) # check nd = len(ldimensions) assert nd == 5 assert nd == len(fdimensions) assert nd == len(blocksize) assert fgrid.cb.n == 2 assert fgrid.cb.cb_mask == [0, 1, 1, 1, 1] # create coarse grid cgrid = gpt.block.grid(fgrid, blocksize) # allow for partial loading of data if params["nmax"] is not None: nmax = params["nmax"] nbasis_max = min([nmax, nbasis]) neigen_max = min([nmax, neigen]) nsingleCap_max = min([nmax, nsingleCap]) else: nbasis_max = nbasis neigen_max = neigen nsingleCap_max = nsingleCap # allocate all lattices basis = [gpt.vspincolor(fgrid) for i in range(nbasis_max)] cevec = [gpt.vcomplex(cgrid, nbasis) for i in range(neigen_max)] if params["advise_basis"] is not None: gpt.advise(basis, params["advise_basis"]) if params["advise_cevec"] is not None: gpt.advise(cevec, params["advise_cevec"]) # fix checkerboard of basis for i in range(nbasis_max): basis[i].checkerboard(site_cb) # mpi layout mpi = [] for i in range(nd): assert fdimensions[i] % ldimensions[i] == 0 mpi.append(fdimensions[i] // ldimensions[i]) assert mpi[0] == 1 # assert no mpi in 5th direction # create cartesian view on fine grid cv0 = gpt.cartesian_view(-1, mpi, fdimensions, fgrid.cb, site_cb) views = cv0.views_for_node(fgrid) # timing totalSizeGB = 0 dt_fp16 = 1e-30 dt_distr = 1e-30 dt_munge = 1e-30 dt_crc = 1e-30 dt_fread = 1e-30 t0 = gpt.time() # load all views if verbose: gpt.message("Loading %s with %d views per node" % (filename, len(views))) for i, v in enumerate(views): cv = gpt.cartesian_view(v if v is not None else -1, mpi, fdimensions, fgrid.cb, site_cb) cvc = gpt.cartesian_view(v if v is not None else -1, mpi, cgrid.fdimensions, gpt.full, gpt.none) pos_coarse = gpt.coordinates(cvc, "canonical") dn, fn = get_local_name(filename, cv) # sizes slot_lsites = numpy.prod(cv.view_dimensions) assert slot_lsites % blocks == 0 block_data_size_single = slot_lsites * 12 // 2 // blocks * 2 * 4 block_data_size_fp16 = FP_16_SIZE(slot_lsites * 12 // 2 // blocks * 2, 24) coarse_block_size_part_fp32 = 2 * (4 * nsingleCap) coarse_block_size_part_fp16 = 2 * (FP_16_SIZE( nbasis - nsingleCap, FP16_COEF_EXP_SHARE_FLOATS)) coarse_vector_size = (coarse_block_size_part_fp32 + coarse_block_size_part_fp16) * blocks coarse_fp32_vector_size = 2 * (4 * nbasis) * blocks # checksum crc32_comp = 0 # file f = gpt.FILE(fn, "rb") if fn is not None else None # block positions pos = [ cgpt.coordinates_from_block(cv.top, cv.bottom, b, nb, "canonicalOdd") for b in range(blocks) ] # group blocks read_blocks = blocks block_reduce = 1 max_read_blocks = get_param(params, "max_read_blocks", 8) while read_blocks > max_read_blocks and read_blocks % 2 == 0: pos = [ numpy.concatenate((pos[2 * i + 0], pos[2 * i + 1])) for i in range(read_blocks // 2) ] block_data_size_single *= 2 block_data_size_fp16 *= 2 read_blocks //= 2 block_reduce *= 2 gpt.message("Read blocks", blocks) # make read-only to enable caching for x in pos: x.setflags(write=0) # dummy buffer data0 = memoryview(bytes()) # single-precision data data_munged = memoryview(bytearray(block_data_size_single * nsingleCap)) for b in range(read_blocks): fgrid.barrier() dt_fread -= gpt.time() if f is not None: data = memoryview(f.read(block_data_size_single * nsingleCap)) globalReadGB = len(data) / 1024.0**3.0 else: globalReadGB = 0.0 globalReadGB = fgrid.globalsum(globalReadGB) dt_fread += gpt.time() totalSizeGB += globalReadGB if f is not None: dt_crc -= gpt.time() crc32_comp = gpt.crc32(data, crc32_comp) dt_crc += gpt.time() dt_munge -= gpt.time() # data: lattice0_posA lattice1_posA .... lattice0_posB lattice1_posB cgpt.munge_inner_outer(data_munged, data, nsingleCap, block_reduce) # data_munged: lattice0 lattice1 lattice2 ... dt_munge += gpt.time() else: data_munged = data0 fgrid.barrier() dt_distr -= gpt.time() rhs = data_munged[0:block_data_size_single] distribute_plan = gpt.copy_plan(basis[0], rhs) distribute_plan.destination += basis[0].view[pos[b]] distribute_plan.source += gpt.global_memory_view( fgrid, [[fgrid.processor, rhs, 0, rhs.nbytes]]) rhs = None distribute_plan = distribute_plan() for i in range(nsingleCap_max): distribute_plan( basis[i], data_munged[block_data_size_single * i:block_data_size_single * (i + 1)], ) dt_distr += gpt.time() if verbose: gpt.message( "* read %g GB: fread at %g GB/s, crc32 at %g GB/s, munge at %g GB/s, distribute at %g GB/s; available = %g GB" % ( totalSizeGB, totalSizeGB / dt_fread, totalSizeGB / dt_crc, totalSizeGB / dt_munge, totalSizeGB / dt_distr, mem_avail(), )) # fp16 data if nbasis != nsingleCap: # allocate data buffer data_fp32 = memoryview( bytearray(block_data_size_single * (nbasis - nsingleCap))) data_munged = memoryview( bytearray(block_data_size_single * (nbasis - nsingleCap))) for b in range(read_blocks): fgrid.barrier() dt_fread -= gpt.time() if f is not None: data = memoryview( f.read(block_data_size_fp16 * (nbasis - nsingleCap))) globalReadGB = len(data) / 1024.0**3.0 else: globalReadGB = 0.0 globalReadGB = fgrid.globalsum(globalReadGB) dt_fread += gpt.time() totalSizeGB += globalReadGB if f is not None: dt_crc -= gpt.time() crc32_comp = gpt.crc32(data, crc32_comp) dt_crc += gpt.time() dt_fp16 -= gpt.time() cgpt.fp16_to_fp32(data_fp32, data, 24) dt_fp16 += gpt.time() dt_munge -= gpt.time() cgpt.munge_inner_outer( data_munged, data_fp32, nbasis - nsingleCap, block_reduce, ) dt_munge += gpt.time() else: data_munged = data0 fgrid.barrier() dt_distr -= gpt.time() if nsingleCap < nbasis_max: rhs = data_munged[0:block_data_size_single] distribute_plan = gpt.copy_plan(basis[0], rhs) distribute_plan.destination += basis[0].view[pos[b]] distribute_plan.source += gpt.global_memory_view( fgrid, [[fgrid.processor, rhs, 0, rhs.nbytes]]) rhs = None distribute_plan = distribute_plan() for i in range(nsingleCap, nbasis_max): j = i - nsingleCap distribute_plan( basis[i], data_munged[block_data_size_single * j:block_data_size_single * (j + 1)], ) dt_distr += gpt.time() if verbose: gpt.message( "* read %g GB: fread at %g GB/s, crc32 at %g GB/s, munge at %g GB/s, distribute at %g GB/s, fp16 at %g GB/s; available = %g GB" % ( totalSizeGB, totalSizeGB / dt_fread, totalSizeGB / dt_crc, totalSizeGB / dt_munge, totalSizeGB / dt_distr, totalSizeGB / dt_fp16, mem_avail(), )) # coarse grid data data_fp32 = memoryview(bytearray(coarse_fp32_vector_size)) distribute_plan = None for j in range(neigen): fgrid.barrier() dt_fread -= gpt.time() if f is not None: data = memoryview(f.read(coarse_vector_size)) globalReadGB = len(data) / 1024.0**3.0 else: globalReadGB = 0.0 globalReadGB = fgrid.globalsum(globalReadGB) dt_fread += gpt.time() totalSizeGB += globalReadGB if f is not None: dt_crc -= gpt.time() crc32_comp = gpt.crc32(data, crc32_comp) dt_crc += gpt.time() dt_fp16 -= gpt.time() cgpt.mixed_fp32fp16_to_fp32( data_fp32, data, coarse_block_size_part_fp32, coarse_block_size_part_fp16, FP16_COEF_EXP_SHARE_FLOATS, ) dt_fp16 += gpt.time() data = data_fp32 else: data = data0 fgrid.barrier() dt_distr -= gpt.time() if j < neigen_max: if distribute_plan is None: distribute_plan = gpt.copy_plan(cevec[j], data) distribute_plan.destination += cevec[j].view[pos_coarse] distribute_plan.source += gpt.global_memory_view( cgrid, [[cgrid.processor, data, 0, data.nbytes]]) distribute_plan = distribute_plan() distribute_plan(cevec[j], data) dt_distr += gpt.time() if verbose and j % (neigen // 10) == 0: gpt.message( "* read %g GB: fread at %g GB/s, crc32 at %g GB/s, munge at %g GB/s, distribute at %g GB/s, fp16 at %g GB/s; available = %g GB" % ( totalSizeGB, totalSizeGB / dt_fread, totalSizeGB / dt_crc, totalSizeGB / dt_munge, totalSizeGB / dt_distr, totalSizeGB / dt_fp16, mem_avail(), )) # crc checks if f is not None: assert crc32_comp == crc32[cv.rank] # timing t1 = gpt.time() # verbosity if verbose: gpt.message("* load %g GB at %g GB/s" % (totalSizeGB, totalSizeGB / (t1 - t0))) # eigenvalues evln = list( filter(lambda x: x != "", open(filename + "/eigen-values.txt").read().split("\n"))) nev = int(evln[0]) ev = [float(x) for x in evln[1:]] assert len(ev) == nev return (basis, cevec, ev)
# basis n = 30 res = None tmpf_prev = None for dtype in [msc, vc12]: g.message(f"Data type {dtype.__name__}") basis = [dtype() for i in range(n)] rng = g.random("block_seed_string_13") rng.cnormal(basis) for i in range(2): g.message("Ortho step %d" % i) g.block.orthonormalize(coarse_grid, basis) # test coarse vector lcoarse = g.vcomplex(coarse_grid, n) rng.cnormal(lcoarse) # temporary fine and coarse vectors tmpf = g.lattice(basis[0]) lcoarse2 = g.lattice(lcoarse) # coarse-to-fine-to-coarse g.block.promote(lcoarse, tmpf, basis) g.block.project(lcoarse2, tmpf, basis) # report error err2 = g.norm2(lcoarse - lcoarse2) / g.norm2(lcoarse) g.message(err2) assert err2 < 1e-12
def vec_c_full(): return g.vcomplex(mat_c.F_grid, nbasis_f)
#!/usr/bin/env python3 # # Authors: Christoph Lehner 2020 # # Desc.: Illustrate core concepts and features # import gpt as g import numpy as np import sys # load configuration fine_grid = g.grid([8, 8, 8, 16], g.single) # basis n = 31 basis = [g.vcomplex(fine_grid, 30) for i in range(n)] rng = g.random("block_seed_string_13") rng.cnormal(basis) # gram-schmidt for i in range(n): basis[i] /= g.norm2(basis[i]) ** 0.5 g.orthogonalize(basis[i], basis[:i]) for j in range(i): eps = g.inner_product(basis[j], basis[i]) g.message(" <%d|%d> =" % (j, i), eps) assert abs(eps) < 1e-6
def vec_c_half(): return g.vcomplex(mat_c.F_grid_eo, nbasis_f)
def __init__(self, mat_f, params): # save parameters self.params = params # fine grid from fine matrix if issubclass(type(mat_f), g.matrix_operator): self.grid = [mat_f.grid[1]] else: self.grid = [mat_f.grid] # grid sizes - allow specifying in two ways if "grid" in params: self.grid.extend(params["grid"]) elif "blocksize" in params: for i, bs in enumerate(params["blocksize"]): assert type(bs) == list self.grid.append(g.block.grid(self.grid[i], bs)) else: assert 0 # dependent sizes self.nlevel = len(self.grid) self.ncoarselevel = self.nlevel - 1 self.finest = 0 self.coarsest = self.nlevel - 1 # other parameters self.nblockortho = g.util.to_list(params["nblockortho"], self.nlevel - 1) self.check_blockortho = g.util.to_list(params["check_blockortho"], self.nlevel - 1) self.nbasis = g.util.to_list(params["nbasis"], self.nlevel - 1) self.make_hermitian = g.util.to_list(params["make_hermitian"], self.nlevel - 1) self.save_links = g.util.to_list(params["save_links"], self.nlevel - 1) self.npreortho = g.util.to_list(params["npreortho"], self.nlevel - 1) self.npostortho = g.util.to_list(params["npostortho"], self.nlevel - 1) self.vector_type = g.util.to_list(params["vector_type"], self.nlevel - 1) self.distribution = g.util.to_list(params["distribution"], self.nlevel - 1) self.solver = g.util.to_list(params["solver"], self.nlevel - 1) # verbosity self.verbose = g.default.is_verbose("multi_grid_setup") # print prefix self.print_prefix = [ "mg_setup: level %d:" % i for i in range(self.nlevel) ] # easy access to current level and neighbors self.lvl = [i for i in range(self.nlevel)] self.nf_lvl = [i - 1 for i in range(self.nlevel)] self.nc_lvl = [i + 1 for i in range(self.nlevel)] self.nf_lvl[self.finest] = None self.nc_lvl[self.coarsest] = None # halved nbasis self.nb = [] for lvl, b in enumerate(self.nbasis): assert b % 2 == 0 self.nb.append(b // 2) # assertions assert self.nlevel >= 2 assert g.util.entries_have_length( [ self.nblockortho, self.nbasis, self.make_hermitian, self.save_links, self.npreortho, self.npostortho, self.vector_type, self.distribution, self.solver, self.nb, ], self.nlevel - 1, ) # timing self.t = [ g.timer("mg_setup_lvl_%d" % (lvl)) for lvl in range(self.nlevel) ] # matrices (coarse ones initialized later) self.mat = [mat_f] + [None] * (self.nlevel - 1) # setup random basis vectors on all levels but coarsest self.basis = [None] * self.nlevel for lvl, grid in enumerate(self.grid): if lvl == self.coarsest: continue elif lvl == self.finest: self.basis[lvl] = [ g.vspincolor(grid) for __ in range(self.nbasis[lvl]) ] else: self.basis[lvl] = [ g.vcomplex(grid, self.nbasis[self.nf_lvl[lvl]]) for __ in range(self.nbasis[lvl]) ] self.distribution[lvl](self.basis[lvl][0:self.nb[lvl]]) # setup a block map on all levels but coarsest self.blockmap = [None] * self.nlevel for lvl in self.lvl: if lvl == self.coarsest: continue else: self.blockmap[lvl] = g.block.map(self.grid[self.nc_lvl[lvl]], self.basis[lvl]) # setup coarse link fields on all levels but finest self.A = [None] * self.nlevel for lvl in range(self.finest + 1, self.nlevel): self.A[lvl] = [ g.mcomplex(self.grid[lvl], self.nbasis[self.nf_lvl[lvl]]) for __ in range(9) ] # setup a solver history self.history = [[None]] * (self.nlevel - 1) # rest of setup self.__call__()
tmpf_prev = None for dtype in [vsc, vc12]: g.message(f"Data type {dtype.__name__}") basis = [dtype() for i in range(n)] if cb is not None: for x in basis: x.checkerboard(cb) rng = g.random("block_seed_string_13") rng.cnormal(basis) b = g.block.map(coarse_grid, basis) for i in range(2): g.message("Ortho step %d" % i) b.orthonormalize() # test coarse vector lcoarse = [g.vcomplex(coarse_grid, n) for i in range(nvec)] rng.cnormal(lcoarse) # report error of promote-project cycle lcoarse2 = g(b.project * b.promote * lcoarse) for i in range(nvec): lcoarse2_i = g(b.project * b.promote * lcoarse[i]) eps2 = g.norm2(lcoarse2[i] - lcoarse2_i) / g.norm2(lcoarse2_i) g.message(eps2) assert eps2 < 1e-12 err2 = g.norm2(lcoarse2[0] - lcoarse[0]) / g.norm2(lcoarse[0]) g.message(err2) assert err2 < 1e-12
def create_links(A, fmat, basis, params): # NOTE: we expect the blocks in the basis vectors # to already be orthogonalized! # parameters make_hermitian = params["make_hermitian"] save_links = params["save_links"] assert not (make_hermitian and not save_links) # verbosity verbose = gpt.default.is_verbose("coarsen") # setup timings t = gpt.timer("coarsen") t("setup") # get grids f_grid = basis[0].grid c_grid = A[0].grid # directions/displacements we coarsen for dirs = [1, 2, 3, 4] if f_grid.nd == 5 else [0, 1, 2, 3] disp = +1 dirdisps_full = list(zip(dirs * 2, [+1] * 4 + [-1] * 4)) dirdisps_forward = list(zip(dirs, [disp] * 4)) nhops = len(dirdisps_full) selflink = nhops # setup fields Mvr = [gpt.lattice(basis[0]) for i in range(nhops)] tmp = gpt.lattice(basis[0]) oproj = gpt.vcomplex(c_grid, len(basis)) selfproj = gpt.vcomplex(c_grid, len(basis)) # setup masks onemask, blockevenmask, blockoddmask = ( gpt.complex(f_grid), gpt.complex(f_grid), gpt.complex(f_grid), ) dirmasks = [gpt.complex(f_grid) for p in range(nhops)] # auxilliary stuff needed for masks t("masks") onemask[:] = 1.0 coor = gpt.coordinates(blockevenmask) block = numpy.array(f_grid.ldimensions) / numpy.array(c_grid.ldimensions) block_cb = coor[:, :] // block[:] # fill masks for sites within even/odd blocks gpt.coordinate_mask(blockevenmask, numpy.sum(block_cb, axis=1) % 2 == 0) blockoddmask @= onemask - blockevenmask # fill masks for sites on borders of blocks dirmasks_forward_np = coor[:, :] % block[:] == block[:] - 1 dirmasks_backward_np = coor[:, :] % block[:] == 0 for mu in dirs: gpt.coordinate_mask(dirmasks[mu], dirmasks_forward_np[:, mu]) gpt.coordinate_mask(dirmasks[mu + 4], dirmasks_backward_np[:, mu]) # save applications of matrix and coarsening if possible dirdisps = dirdisps_forward if save_links else dirdisps_full # create block maps t("blockmap") dirbms = [ gpt.block.map(c_grid, basis, dirmasks[p]) for p, (mu, fb) in enumerate(dirdisps) ] fullbm = gpt.block.map(c_grid, basis) for i, vr in enumerate(basis): # apply directional hopping terms # this triggers len(dirdisps) comms -> TODO expose DhopdirAll from Grid # BUT problem with vector<Lattice<...>> in rhs t("apply_hop") [fmat.Mdir(*dirdisp)(Mvr[p], vr) for p, dirdisp in enumerate(dirdisps)] # coarsen directional terms + write to link for p, (mu, fb) in enumerate(dirdisps): t("coarsen_hop") dirbms[p].project(oproj, Mvr[p]) t("copy_hop") A[p][:, :, :, :, :, i] = oproj[:] # fast diagonal term: apply full matrix to both block cbs separately and discard hops into other cb t("apply_self") tmp @= (blockevenmask * fmat * vr * blockevenmask + blockoddmask * fmat * vr * blockoddmask) # coarsen diagonal term t("coarsen_self") fullbm.project(selfproj, tmp) # write to self link t("copy_self") A[selflink][:, :, :, :, :, i] = selfproj[:] if verbose: gpt.message("coarsen: done with vector %d" % i) # communicate opposite links if save_links: t("comm") communicate_links(A, dirdisps_forward, make_hermitian) t() if verbose: gpt.message(t)
g.message(i) g.algorithms.eigen.evals(q.Mpc, [basis[i]], check_eps2=1e-4, real=True) g.mem_report(details=False) # coarse grid cgrid = params["cgrid"](q.F_grid_eo) b = g.block.map(cgrid, basis) # cheby on coarse grid cop = params["cmatrix"](q.NDagN, b) # implicitly restarted lanczos on coarse grid irl = params["method_evec"] # start vector cstart = g.vcomplex(cgrid, nbasis) cstart[:] = g.vcomplex([1] * nbasis, nbasis) g.mem_report() # basis northo = params["northo"] for i in range(northo): g.message("Orthonormalization round %d" % i) b.orthonormalize() g.mem_report() # now define coarse-grid operator g.message("Test precision of promote-project chain: %g" % (g.norm2(cstart - b.project * b.promote * cstart) / g.norm2(cstart)))
else: work_dir = "." # grids fgrid = g.grid([4, 8, 8, 8, 16], g.single, g.redblack) cgrid = g.grid([1, 4, 4, 4, 4], g.single) # fgrid = g.grid([12, 96, 48, 24, 24], g.single, g.redblack) # cgrid = g.grid([1, 96//4, 48//4, 24//4, 24//4], g.single) # vectors nbasis = 40 nsingle = 10 nevec = 48 rng = g.random("test") basis = [g.vspincolor(fgrid) for i in range(nbasis)] cevec = [g.vcomplex(cgrid, nbasis) for i in range(nevec)] feval = [rng.normal(mu=2.0, sigma=0.5).real for i in range(nevec)] for b in basis: b.checkerboard(g.odd) rng.cnormal([basis, cevec]) b = g.block.map(cgrid, basis) for i in range(2): b.orthonormalize() for mpi_layout in [[1, 1, 1, 1, 1], [1, 2, 2, 2, 2]]: # save in fixed layout g.save( f"{work_dir}/cevec", [basis, cevec, feval], g.format.cevec({
grid = g.grid(g.default.get_ivec("--grid", [6, 6, 6, 6], 4), precision) g.message(f""" Coarse Operator Benchmark with fdimensions : {grid.fdimensions} precision : {precision.__name__} nbasis : {nbasis} level : {level} """) # Coarse operator A = [g.mcomplex(grid, nbasis) for __ in range(9)] rng.cnormal(A) co = g.qcd.fermion.coarse(A, {"level": level}) # Source and destination src = g.vcomplex(grid, nbasis) dst = g.vcomplex(grid, nbasis) rng.cnormal(src) # Flops flops_per_site = 2 * nbasis * (36 * nbasis - 1) flops = flops_per_site * src.grid.gsites * N nbytes = ((9 * 2 * nbasis + 9 * 2 * nbasis * nbasis + 2 * nbasis) * precision.nbytes * src.grid.gsites * N) # Warmup for n in range(5): co.mat(dst, src) # Time t0 = g.time()