def create_source(pos, point=False): srcD = g.mspincolor(l_exact.U_grid) srcD[:] = 0 # create time-sparsened source sign_of_slice = [rng.zn(n=2) for i in range(source_time_slices)] for i in range(use_source_time_slices, source_time_slices): sign_of_slice[i] = 0.0 pos_of_slice = [ [ pos[i] if i < 3 else (pos[i] + j * sparse_time) % full_time for i in range(4) ] for j in range(source_time_slices) ] g.message(f"Signature: {pos} -> {pos_of_slice} with signs {sign_of_slice}") for i in range(source_time_slices): if point: srcD += ( g.create.point(g.lattice(srcD), pos_of_slice[i]) * sign_of_slice[i] ) else: srcD += g.create.wall.z2(g.lattice(srcD), pos_of_slice[i][3], rng) * ( sign_of_slice[i] / vol3d ** 0.5 ) return srcD, pos_of_slice, sign_of_slice
def run_test(U): """ Check the exact sum rule for tr(D_5^2) Inputs: U = gauge field Output: imaginary parts of the eigenvalues """ # extract representation (for output message) Nc = U[0].otype.Nc if "adjoint" in U[0].otype.__name__: rep = "adjoint" else: rep = "fundamental" # compute imaginary parts of eigenvalues ev = compute_evals(U) summe = sum(ev * ev) # expected sum Volume = U[0].grid.fsites expected = Udelta_average(U) * Volume / 2.0 # check sum rule g.message(f"tr(-D_5^2): {summe}, expected: {expected}") assert abs(summe - expected) / Volume < 1e-4 g.message(f"Test passed for SU({Nc}) {rep}.") return 1
def check_unitarity(U, eps_ref): eye = g.lattice(U) eye[:] = np.eye(U.otype.shape[0], dtype=U.grid.precision.complex_dtype) eps = (g.norm2(U * g.adj(U) - eye) / g.norm2(eye))**0.5 g.message(f"Test unitarity: {eps}") assert eps < eps_ref U.otype.is_element(U)
def perform(self, root): global basis_size, T, current_config if current_config is not None and current_config.conf_file != self.conf_file: current_config = None if current_config is None: current_config = config(self.conf_file) c = None vcj = [ g.vcolor(current_config.l_exact.U_grid) for jr in range(basis_size) ] for vcjj in vcj: vcjj[:] = 0 for tprime in range(T): basis_evec, basis_evals = g.load(self.basis_fmt % (self.conf, tprime)) plan = g.copy_plan(vcj[0], basis_evec[0], embed_in_communicator=vcj[0].grid) c = g.coordinates(basis_evec[0]) plan.destination += vcj[0].view[np.hstack( (c, np.ones((len(c), 1), dtype=np.int32) * tprime))] plan.source += basis_evec[0].view[c] plan = plan() for l in range(basis_size): plan(vcj[l], basis_evec[l]) for l in range(basis_size): g.message("Check norm:", l, g.norm2(vcj[l])) g.save(f"{root}/{self.name}/basis", vcj)
def orthogonalize(w, basis, ips=None, nblock=4): # verbosity verbose = gpt.default.is_verbose("orthogonalize") n = len(basis) if n == 0: return grid = basis[0].grid i = 0 t_rank_inner_product = 0.0 t_globalSum = 0.0 t_linearCombination = 0.0 for i in range(0, n, nblock): t_rank_inner_product -= gpt.time() lip = gpt.rank_inner_product(basis[i:i + nblock], w) t_rank_inner_product += gpt.time() t_globalSum -= gpt.time() grid.globalsum(lip) lip = [complex(x) for x in lip] t_globalSum += gpt.time() if ips is not None: for j in range(len(lip)): ips[i + j] = lip[j] expr = w - lip[0] * basis[i + 0] for j in range(1, len(lip)): expr -= lip[j] * basis[i + j] t_linearCombination -= gpt.time() w @= expr t_linearCombination += gpt.time() if verbose: gpt.message( "Timing Ortho: %g rank_inner_product, %g globalsum, %g lc" % (t_rank_inner_product, t_globalSum, t_linearCombination))
def get_otype_from_multiplication(t_otype, t_adj, f_otype, f_adj): if f_adj and not t_adj and f_otype.itab is not None: # inner tab = f_otype.itab rtab = {} elif not t_adj and f_adj and f_otype.otab is not None: # outer tab = f_otype.otab rtab = {} else: tab = f_otype.mtab rtab = t_otype.rmtab if t_otype.__name__ in tab: return tab[t_otype.__name__][0]() else: if f_otype.__name__ not in rtab: if f_otype.data_alias is not None: return get_otype_from_multiplication( t_otype, t_adj, f_otype.data_alias(), f_adj ) elif t_otype.data_alias is not None: return get_otype_from_multiplication( t_otype.data_alias(), t_adj, f_otype, f_adj ) else: gpt.message( "Missing entry in multiplication table: %s x %s" % (t_otype.__name__, f_otype.__name__) ) return rtab[f_otype.__name__][0]()
def inv(dst, src): # verbosity verbose = g.default.is_verbose("split") if len(src) % nparallel != 0: raise Exception( f"Cannot divide {len(src)} global vectors into {nparallel} groups" ) t0 = g.time() src_split = g.split(src, matrix_split.grid[1], cache) dst_split = g.split(dst, matrix_split.grid[0], cache) t1 = g.time() operation_split(dst_split, src_split) t2 = g.time() g.unsplit(dst, dst_split, cache) t3 = g.time() if verbose: g.message( f"Split {len(src)} global vectors to {len(src_split)} local vectors\n" + f"Timing: {t1-t0} s (split), {t2-t1} s (operation), {t3-t2} s (unsplit)" )
def get_next_name(root, jobs, max_weight, stale_seconds): # create lut lut = {} for j in jobs: lut[j.name] = j for j in jobs: if max_weight is None or j.weight <= max_weight: has_started = j.has_started(root) if has_started and stale_seconds is not None: if not j.has_completed(root): run_time = j.run_time(root) if run_time > stale_seconds: g.message( f"Job {j.name} is stale after {run_time} seconds; purge" ) j.purge(root) has_started = False if not has_started: # check dependencies dependencies_ok = True for dep_j in [lut[d] for d in j.needs]: if not dep_j.has_completed(root): dependencies_ok = False g.message( f"Dependency {dep_j.name} of {j.name} is not yet satisfied." ) break if dependencies_ok: # last check if in meantime somebody else has started running same job if j.atomic_reserve_start(root): return j.name return ""
def __init__(self, n, precision): self.n = n self.fdimensions = [2**n] self.grid = g.grid(self.fdimensions, precision) self.verbose = g.default.is_verbose("qis_map") self.zero_coordinate = (0, ) # |00000 ... 0> state t = g.timer("map_init") t("coordinates") # TODO: need to split over multiple dimensions, single dimension can hold at most 32 bits self.coordinates = g.coordinates(self.grid) self.not_coordinates = [ np.bitwise_xor(self.coordinates, 2**i) for i in range(n) ] for i in range(n): self.not_coordinates[i].flags["WRITEABLE"] = False t("masks") self.one_mask = [] self.zero_mask = [] for i in range(n): proj = np.bitwise_and(self.coordinates, 2**i) mask = g.complex(self.grid) g.coordinate_mask(mask, proj != 0) self.one_mask.append(mask) mask = g.complex(self.grid) g.coordinate_mask(mask, proj == 0) self.zero_mask.append(mask) t() if self.verbose: g.message(t)
def converged(self, a, mat, evals, little_evec): evals_max = np.max(np.abs(evals)) Nstop = self.params["Nstop"] idx0 = len(evals) - Nstop idx1 = len(evals) n = 1 Nconv = 0 while True: idx = idx0 + n - 1 if idx >= idx1: idx = idx1 - 1 n *= 2 ev, eps2 = g.algorithms.eigen.evals( mat, [a.single_evec(little_evec, idx)]) eps2 = eps2[0] / evals_max**2.0 if self.verbose: g.message(f"eval[{idx1 - idx - 1}] = {ev[0]} ; eps^2 = {eps2}") if eps2 < self.params["resid"]: Nconv = max([Nconv, idx1 - idx]) if idx == idx1 - 1: break if self.verbose: g.message(f"Arnoldi: {Nconv} eigenmodes converged") return Nconv >= Nstop
def next(root, jobs, max_weight=None, stale_seconds=None): if g.rank() == 0: j = get_next_name(root, jobs, max_weight, stale_seconds).encode("utf-8") else: j = bytes() j_name = g.broadcast(0, j).decode("utf-8") for j in jobs: if j.name == j_name: g.message(f""" -------------------------------------------------------------------------------- Start job {j.name} -------------------------------------------------------------------------------- """) t0 = g.time() j(root) t1 = g.time() g.message(f""" -------------------------------------------------------------------------------- Completed {j.name} in {t1-t0} seconds -------------------------------------------------------------------------------- """) return j return None
def implicit_restart(self, H, evals, p): n = len(self.H) k = n - p Q = np.identity(n, np.complex128) eye = np.identity(n, np.complex128) t0 = g.time() for i in range(p): Qi, Ri = np.linalg.qr(H - evals[i] * eye) H = Ri @ Qi + evals[i] * eye Q = Q @ Qi t1 = g.time() if self.verbose: g.message(f"Arnoldi: QR in {t1-t0} s") r = g.eval(self.basis[k] * H[k, k - 1] + self.basis[-1] * self.H[-1][-1] * Q[n - 1, k - 1]) rn = g.norm2(r)**0.5 t0 = g.time() g.rotate(self.basis, np.ascontiguousarray(Q.T), 0, k, 0, n) t1 = g.time() if self.verbose: g.message(f"Arnoldi: rotate in {t1-t0} s") self.basis = self.basis[0:k] self.basis.append(g.eval(r / rn)) self.H = [[H[j, i] for j in range(i + 2)] for i in range(k)] self.H[-1][-1] = rn
def inv(dst, src): dst[:] = 0 eta = gpt.copy(src) ws = [gpt.copy(src) for _ in range(2)] for eo in range(2): ws[0][:] = 0 src_blk = F_domains[eo].lattice(op.otype) dst_blk = F_domains[eo].lattice(op.otype) F_domains[eo].project(src_blk, eta) dst_blk[:] = 0 # for now solver[eo](dst_blk, src_blk) F_domains[eo].promote(ws[0], dst_blk) if eo == 0: op(ws[1], ws[0]) eta -= ws[1] dst += ws[0] gpt.message( f"SAP cycle; |rho|^2 = {gpt.norm2(eta):g}; |dst|^2 = {gpt.norm2(dst):g}" )
def open_view(self, xk, iview, write, mpi, fdimensions, g_cb, l_cb): cv = gpt.cartesian_view(iview if iview is not None else -1, mpi, fdimensions, g_cb, l_cb) dn, fn = get_local_name(self.root, cv) loc_desc = cv.describe() + "/" + ("Write" if write else "Read") tag = "%d-%s" % (xk, str(iview)) tag_pos = "%s-%s-%s-%s" % (tag, str(fdimensions), str(g_cb), str(l_cb)) if loc_desc != self.loc_desc: self.close_views() self.loc_desc = loc_desc if self.verbose: gpt.message("Switching view to %s" % self.loc_desc) if tag not in self.loc: if write and dn is not None: os.makedirs(dn, exist_ok=True) self.loc[tag] = gpt.FILE( fn, "a+b" if write else "rb") if fn is not None else None if tag_pos not in self.pos: self.pos[tag_pos] = gpt.coordinates(cv) return self.loc[tag], self.pos[tag_pos]
def inv(dst, src): dst[:] = 0 eta = gpt.copy(src) ws = [gpt.copy(src) for _ in range(2)] dt_solv = dt_distr = dt_hop = 0.0 for eo in range(2): ws[0][:] = 0 dt_distr -= gpt.time() src_blk[sap.pos] = eta[sap.coor[ eo]] # reminder view interface eta[[pos]], ... eta[...,idx] dt_distr += gpt.time() dt_solv -= gpt.time() solver[eo](dst_blk, src_blk) dt_solv += gpt.time() dt_distr -= gpt.time() ws[0][sap.coor[eo]] = dst_blk[sap.pos] dt_distr += gpt.time() dt_hop -= gpt.time() if eo == 0: sap.op(ws[1], ws[0]) eta -= ws[1] dst += ws[0] dt_hop += gpt.time() gpt.message( f"SAP cycle; |rho|^2 = {gpt.norm2(eta):g}; |dst|^2 = {gpt.norm2(dst):g}" ) gpt.message( f"SAP Timings: distr {dt_distr:g} secs, blk_solver {dt_solv:g} secs, hop+update {dt_hop:g} secs" )
def __call__(self, mat, src, psi): assert (src != psi) self.history = [] verbose = g.default.is_verbose("cg") t0 = g.time() p, mmp, r = g.copy(src), g.copy(src), g.copy(src) guess = g.norm2(psi) mat(psi, mmp) # in, out d = g.innerProduct(psi, mmp).real b = g.norm2(mmp) r @= src - mmp p @= r a = g.norm2(p) cp = a ssq = g.norm2(src) rsq = self.eps**2. * ssq for k in range(1, self.maxiter + 1): c = cp mat(p, mmp) dc = g.innerProduct(p, mmp) d = dc.real a = c / d cp = g.axpy_norm2(r, -a, mmp, r) b = cp / c psi += a * p p @= b * p + r self.history.append(cp) if verbose: g.message("res^2[ %d ] = %g" % (k, cp)) if cp <= rsq: if verbose: t1 = g.time() g.message("Converged in %g s" % (t1 - t0)) break
def sample(self, t, p): if type(t) == list: for x in t: self.sample(x, p) elif t is None: return cgpt.random_sample(self.obj, t, p) elif type(t) == gpt.lattice: if "pos" in p: pos = p["pos"] else: pos = gpt.coordinates(t) t0 = gpt.time() mv = cgpt.random_sample( self.obj, pos, { **p, **{ "shape": list(t.otype.shape), "grid": t.grid.obj, "precision": t.grid.precision, }, }, ) t1 = gpt.time() t[pos] = mv if self.verbose: szGB = mv.size * mv.itemsize / 1024.0**3.0 gpt.message("Generated %g GB of random data at %g GB/s" % (szGB, szGB / (t1 - t0))) return t else: assert 0
def orthogonalize(w, basis, ips=None, nblock=4): # verbosity t = gpt.timer("orthogonalize", verbose_performance) n = len(basis) if n == 0: return grid = basis[0].grid i = 0 if verbose_performance: cgpt.timer_begin() for i in range(0, n, nblock): t("rank_inner_product") lip = gpt.rank_inner_product(basis[i : i + nblock], w) t("global_sum") grid.globalsum(lip) t("create expression") lip = [complex(x) for x in lip] if ips is not None: for j in range(len(lip)): ips[i + j] = lip[j] expr = w - lip[0] * basis[i + 0] for j in range(1, len(lip)): expr -= lip[j] * basis[i + j] t("linear combination") w @= expr t() if verbose_performance: t_cgpt = gpt.timer("cgpt_orthogonalize", True) t_cgpt += cgpt.timer_end() gpt.message(f"\nPerformance of orthogonalize:\n{t}\n{t_cgpt}")
def expr_eval(first, second=None, ac=False): if not second is None: t_obj = first.v_obj e = gpt.expr(second) else: if type(first) == gpt.lattice: return first e = gpt.expr(first) lat = get_lattice(e) grid = lat.grid otype = lat.otype n = len(otype.v_idx) t_obj = None if gpt.default.is_verbose("eval"): gpt.message("GPT::verbose::eval: " + str(e)) if not t_obj is None: for i, t in enumerate(t_obj): assert (0 == cgpt.eval(t, e.val, e.unary, ac, i)) return first else: assert (ac == False) t_obj, s_ot, s_pr = [0] * n, [0] * n, [0] * n for i in otype.v_idx: t_obj[i], s_ot[i], s_pr[i] = cgpt.eval(t_obj[i], e.val, e.unary, False, i) if len(s_ot) == 1: otype = eval("gpt.otype." + s_ot[0]) else: otype = gpt.otype.from_v_otype(s_ot) return gpt.lattice(grid, otype, t_obj)
def converged(self, a, mat, evals, little_evec): n = 1 Nconv = 0 while True: idx = len(evals) - n n *= 2 if idx < 0: idx = 0 try: g.algorithms.eigen.evals( mat, [a.single_evec(little_evec, idx)], check_eps2=evals[-1]**2.0 * self.params["resid"], verbose=self.verbose, ) except g.algorithms.eigen.EvalsNotConverged: break Nconv = len(evals) - idx if idx == 0: break if self.verbose: g.message(f"Arnoldi: {Nconv} eigenmodes converged") return Nconv >= self.params["Nstop"]
def sample(self, t, p): if type(t) == list: for x in t: self.sample(x, p) return t elif t is None: return cgpt.random_sample(self.obj, p) elif type(t) == gpt.lattice: t0 = gpt.time() cgpt.random_sample( self.obj, { **p, **{ "lattices": [t] }, }, ) t1 = gpt.time() assert "pos" not in p # to ensure that deprecated code is not used # optimize memory mapping t.swap(gpt.copy(t)) if self.verbose_performance: szGB = t.global_bytes() / 1024.0**3.0 gpt.message("Generated %g GB of random data at %g GB/s" % (szGB, szGB / (t1 - t0))) return t else: assert 0
def _ac(*arguments): r = f(*arguments) if iterative.converged is None: gpt.message("Warning: could not determine converged state") else: assert iterative.converged return r
def assert_gradient_error(self, rng, fields, dfields, epsilon_approx, epsilon_assert): fields = g.util.to_list(fields) dfields = g.util.to_list(dfields) weights = rng.normal_element(g.group.cartesian(dfields)) # the functional needs to be real eps = complex(self(fields)).imag g.message(f"Test that functional is real: {eps}") assert eps == 0.0 # the gradient needs to be correct gradient = self.gradient(fields, dfields) a = sum( [g.group.inner_product(w, gr) for gr, w in zip(gradient, weights)]) b = self.approximate_gradient(fields, dfields, weights, epsilon=epsilon_approx) eps = abs(a - b) / abs(b) g.message(f"Assert gradient error: {eps} < {epsilon_assert}") if eps > epsilon_assert: g.message(f"Error: gradient = {a} <> approximate_gradient = {b}") assert False # the gradient needs to live in cartesian for gr in gradient: if gr.otype.__name__ != weights[0].otype.__name__: g.message( f"Gradient has incorrect object type: {gr.otype.__name__} != {weights[0].otype.__name__}" ) eps = g.group.defect(gr) if eps > epsilon_assert: g.message(f"Error: cartesian defect: {eps} > {epsilon_assert}") assert False
def timed_end(self, t): if self.verbose_performance: t() self.timer += t gpt.message( f"\nPerformance of {self.name}:\n\nThis call:\n{t}\n\nAll calls:\n{self.timer}\n" )
def save(self, obj): if type(obj) == list: for o in obj: self.save(o) elif type(obj) == gpt.lattice: self.save(obj.mview()) elif type(obj) == float: self.save(memoryview(struct.pack("d", obj))) elif type(obj) == complex: self.save(memoryview(struct.pack("dd", obj.real, obj.imag))) elif type(obj) == memoryview: self.f.seek(0, 1) sz = len(obj) szGB = sz / 1024.0**3 self.f.write(sz.to_bytes(8, "little")) t0 = gpt.time() self.f.write(gpt.crc32(obj).to_bytes(4, "little")) t1 = gpt.time() self.f.write(obj) self.f.flush() t2 = gpt.time() if self.verbose: if self.grid is None: gpt.message( "Checkpoint %g GB on head node at %g GB/s for crc32 and %g GB/s for write in %g s total" % (szGB, szGB / (t1 - t0), szGB / (t2 - t1), t2 - t0)) else: szGB = self.grid.globalsum(szGB) gpt.message( "Checkpoint %g GB at %g GB/s for crc32 and %g GB/s for write in %g s total" % (szGB, szGB / (t1 - t0), szGB / (t2 - t1), t2 - t0)) else: assert 0
def __call__(self, mat, src, psi): verbose = g.default.is_verbose("mr") t0 = time() r, mmr = g.copy(src), g.copy(src) mat(psi, mmr) r @= src - mmr ssq = g.norm2(src) rsq = self.eps**2. * ssq for k in range(self.maxiter): mat(r, mmr) ip, mmr2 = g.innerProductNorm2(mmr, r) if mmr2 == 0.: continue alpha = ip.real / mmr2 * self.relax psi += alpha * r r2 = g.axpy_norm2(r, -alpha, mmr, r) if verbose: g.message("res^2[ %d ] = %g" % (k, r2)) if r2 <= rsq: if verbose: t1 = time() g.message("Converged in %g s" % (t1 - t0)) break
def hamiltonian(draw): if draw: rng.normal_element(U_mom) project_open_bc(U_mom) s = action_gauge(U) if not pure_gauge: #sp = sd(fields) for i in range(len(hasenbusch_ratios)): if hasenbusch_ratios[i][3] is not two_flavor_ratio: si = action_fermions_e[i].draw(fields[i], rng, hasenbusch_ratios[i][2]) #si = sp[i] si_check = action_fermions_e[i](fields[i]) g.message("action", i, si_check) r = f"{hasenbusch_ratios[i][0]}/{hasenbusch_ratios[i][1]}" e = abs(si / si_check - 1) g.message( f"Error of rational approximation for Hasenbusch ratio {r}: {e}" ) else: si = action_fermions_e[i].draw(fields[i], rng) s += si h = s + action_gauge_mom(U_mom) else: s = action_gauge(U) if not pure_gauge: for i in range(len(hasenbusch_ratios)): s += action_fermions_e[i](fields[i]) h = s + action_gauge_mom(U_mom) return h, s
def __init__(self, op, bs): self.op = op self.op_blk = [] dt = -gpt.time() # thanks to double copy inside operator, U only temporary Ublk = [sap_blk(op.U_grid, bs, eo) for eo in range(2)] U = [gpt.mcolor(Ublk[0].grid) for _ in range(4)] for eo in range(2): Ucoor = Ublk[eo].coor(op.U_grid) for mu in range(4): U[mu][Ublk[eo].pos] = op.U[mu][Ucoor] Ublk[eo].set_BC_Ufld(U) self.op_blk.append(op.updated(U)) if self.op.F_grid.nd == len(bs) + 1: _bs = [self.op.F_grid.fdimensions[0]] + bs else: _bs = bs blk = [sap_blk(self.op.F_grid, _bs, eo) for eo in range(2)] self.pos = blk[0].pos self.pos.flags["WRITEABLE"] = False self.coor = [blk[eo].coor(op.F_grid) for eo in range(2)] for eo in range(2): self.coor[eo].flags["WRITEABLE"] = False dt += gpt.time() gpt.message(f"SAP Initialized in {dt:g} secs")
def save(filename, objs, params): t0 = gpt.time() # create io x = gpt_io(filename, params, True) # create index f = io.StringIO("") x.create_index(f, "", objs) mvidx = memoryview(f.getvalue().encode("utf-8")) # write index to fs index_crc = gpt.crc32(mvidx) if gpt.rank() == 0: open(filename + "/index", "wb").write(mvidx) open(filename + "/index.crc32", "wt").write("%X\n" % index_crc) # close x.close() # goodbye if x.verbose: t1 = gpt.time() gpt.message("Completed writing %s in %g s" % (filename, t1 - t0))
def inv(dst, src): for i in range(len(dst)): eps = g.norm2(mat * dst[i] - src[i]) ** 0.5 nrm = g.norm2(src[i]) ** 0.5 g.message( f"{self.tag}| mat * dst[{i}] - src[{i}] | / | src | = {eps/nrm}, | src[{i}] | = {nrm}" )