def log(i, convergence_threshold=0.5): # i = n*(1 + x), log(i) = log(n) + log(1+x) # x = i/n - 1, |x|^2 = <i/n - 1, i/n - 1> = |i|^2/n^2 + |1|^2 - (<i,1> + <1,i>)/n # d/dn |x|^2 = -2 |i|^2/n^3 + (<i,1> + <1,i>)/n^2 = 0 -> 2|i|^2 == n (<i,1> + <1,i>) if i.grid.precision != gpt.double: x = gpt.convert(i, gpt.double) else: x = gpt.copy(i) I = numpy.identity(x.otype.shape[0], x.grid.precision.complex_dtype) lI = gpt.lattice(x) lI[:] = I n = gpt.norm2(x) / gpt.inner_product(x, lI).real x /= n x -= lI n2 = gpt.norm2(x)**0.5 / x.grid.gsites order = 8 * int(16 / (-numpy.log10(n2))) assert n2 < convergence_threshold o = gpt.copy(x) xn = gpt.copy(x) for j in range(2, order + 1): xn @= xn * x o -= xn * ((-1.0)**j / j) o += lI * numpy.log(n) if i.grid.precision != gpt.double: r = gpt.lattice(i) gpt.convert(r, o) o = r return o
def __call__(self, mat, src, t): dst, tmp = g.lattice(src), g.copy(src) tmp /= g.norm2(tmp) ** 0.5 ev_prev = None for it in range(self.maxit): t("matrix") mat(dst, tmp) t("inner_product") ev = g.inner_product(tmp, dst) t("other") if self.real: ev = ev.real self.log_convergence(it, ev) t("normalize") tmp @= dst / g.norm2(dst) ** 0.5 t("other") if ev_prev is not None: if abs(ev - ev_prev) < self.tol * abs(ev): self.log(f"converged in iteration {it}") return (ev, tmp, True) ev_prev = ev return (ev, tmp, False)
def __call__(self, Uprime): return g.inner_product( src, get_matrix(fermion.updated(Uprime), tag).adj() * fermion.G5 * src, ).real
def __call__(self, fields): M1, M2, U, phi = self._updated(fields) psi = g.lattice(phi) psi @= self.operator.M(M2) * phi chi = g.lattice(phi) chi @= self.inverter(self.operator.MMdag(M1)) * psi return g.inner_product(psi, chi).real
def inv(psi, src): assert src != psi self.history = [] verbose = g.default.is_verbose("cg") t = g.timer("cg") t("setup") p, mmp, r = g.copy(src), g.copy(src), g.copy(src) mat(mmp, psi) # in, out d = g.inner_product(psi, mmp).real b = g.norm2(mmp) r @= src - mmp p @= r a = g.norm2(p) cp = a ssq = g.norm2(src) if ssq == 0.0: assert a != 0.0 # need either source or psi to not be zero ssq = a rsq = self.eps ** 2.0 * ssq for k in range(1, self.maxiter + 1): c = cp t("mat") mat(mmp, p) t("inner") dc = g.inner_product(p, mmp) d = dc.real a = c / d t("axpy_norm") cp = g.axpy_norm2(r, -a, mmp, r) t("linearcomb") b = cp / c psi += a * p p @= b * p + r t("other") self.history.append(cp) if verbose: g.message("cg: res^2[ %d ] = %g, target = %g" % (k, cp, rsq)) if cp <= rsq: if verbose: t() g.message( "cg: converged in %d iterations, took %g s" % (k, t.dt["total"]) ) g.message(t) break
def inv(psi, src, t): if len(self.solution_space) == 0: return t("orthonormalize") v = g.orthonormalize(g.copy(self.solution_space)) # Idea is to minimize # # res = | M a_i v_i - src |^2 # = v_i^dag a_i^dag M^dag M a_j v_j + src^dag src - src^dag M a_i v_i - v_i^dag a_i^dag M^dag src # # by selecting an optimal a_i, i.e., to compute # # d res/d a_i^dag = v_i^dag M^dag M a_j v_j - v_i^dag M^dag src = 0 # # Therefore # # G_ij a_j = b_i # # with b_i = v_i^dag M^dag src, G_ij = v_i^dag M^dag M v_j # t("mat v") mat_v = [mat(x) for x in v] t("projected source") b = g.inner_product(mat_v, src)[:, 0] t("projected matrix") G_ij = np.matrix([ g.inner_product(mat_v, mat_v[j])[:, 0] for j in range(len(v)) ]).T t("solve") a = np.linalg.solve(G_ij, b) t("linear combination") g.linear_combination(psi, v, a) eps2 = g.norm2(mat(psi) - src) / g.norm2(src) self.log( f"minimal residual with {len(v)}-dimensional solution space has eps^2 = {eps2}" )
def check_inner_product(left, right, eps_ref): left_algebra = g.convert(left, left.otype.cartesian()) right_algebra = g.convert(right, right.otype.cartesian()) ip = left_algebra.otype.inner_product(left_algebra, right_algebra) c_left = left_algebra.otype.coordinates(left_algebra) c_right = right_algebra.otype.coordinates(right_algebra) ipc = sum([g.inner_product(l, r).real for l, r in zip(c_left, c_right)]) eps = abs(ip - ipc) / abs(ip + ipc) g.message(f"Test inner product: {eps}") assert eps < eps_ref * 10.0
def approx(dst, src): assert src != dst verbose = g.default.is_verbose("modes") t0 = g.time() dst[:] = 0 for i, x in enumerate(left): dst += f_evals[i] * x * g.inner_product(right[i], src) if verbose: t1 = g.time() g.message("Approximation by %d modes took %g s" % (len(left), t1 - t0))
def verify_matrix_element(mat, dst, src, tag): src_prime = g.eval(mat * src) dst.checkerboard(src_prime.checkerboard()) X = g.inner_product(dst, src_prime) eps_ref = src.grid.precision.eps * 50.0 if mat.adj_mat is not None: X_from_adj = g.inner_product(src, g.adj(mat) * dst).conjugate() eps = abs(X - X_from_adj) / abs(X) g.message(f"Test adj({tag}): {eps}") assert eps < eps_ref if mat.inv_mat is not None: eps = (g.norm2(src - mat * g.inv(mat) * src) / g.norm2(src))**0.5 g.message(f"Test inv({tag}): {eps}") assert eps < eps_ref Y = g.inner_product(dst, g.inv(g.adj(mat)) * src) Y_from_adj = g.inner_product(src, g.inv(mat) * dst).conjugate() eps = abs(Y - Y_from_adj) / abs(Y) g.message(f"Test adj(inv({tag})): {eps}") assert eps < eps_ref return X
def inv(psi, src, t): assert src != psi t("setup") p, mmp, r = g.copy(src), g.copy(src), g.copy(src) mat(mmp, psi) # in, out d = g.inner_product(psi, mmp).real b = g.norm2(mmp) r @= src - mmp p @= r a = g.norm2(p) cp = a ssq = g.norm2(src) if ssq == 0.0: psi[:] = 0 return rsq = self.eps ** 2.0 * ssq for k in range(self.maxiter): c = cp t("matrix") mat(mmp, p) t("inner_product") dc = g.inner_product(p, mmp) d = dc.real a = c / d t("axpy_norm2") cp = g.axpy_norm2(r, -a, mmp, r) t("linear combination") b = cp / c psi += a * p p @= b * p + r t("other") self.log_convergence(k, cp, rsq) if cp <= rsq: self.log(f"converged in {k+1} iterations") return self.log( f"NOT converged in {k+1} iterations; squared residual {cp:e} / {rsq:e}" )
def __call__(self, phi): J = None act = 0.0 for p in g.core.util.to_list(phi): if J is None: J = g.lattice(p) J[:] = 0 for mu in range(p.grid.nd): J += g.cshift(p, mu, 1) act += -2.0 * self.kappa * g.inner_product(J, g.adj(p)).real p2 = g.norm2(p) act += p2 if self.l != 0.0: p4 = g.norm2(p * g.adj(p)) act += self.l * (p4 - 2.0 * p2 + p.grid.fsites) return act
def approx(dst, src): assert src != dst verbose = g.default.is_verbose("modes") t0 = g.time() src_coarse = g.lattice(right[0]) g.block.project(src_coarse, src, right_basis) dst_coarse = g.lattice(left[0]) dst_coarse[:] = 0 for i, x in enumerate(left): dst_coarse += f_evals[i] * x * g.inner_product( right[i], src_coarse) g.block.promote(dst_coarse, dst, left_basis) if verbose: t1 = g.time() g.message("Approximation by %d coarse modes took %g s" % (len(left), t1 - t0))
def evals(matrix, evec, params): check_eps2 = params["check_eps2"] skip = params["skip"] assert len(evec) > 0 tmp = g.lattice(evec[0]) ev = [] for i in range(0, len(evec), skip): v = evec[i] matrix(tmp, v) # M |v> = l |v> -> <v|M|v> / <v|v> l = g.inner_product(v, tmp) / g.norm2(v) if params["real"]: l = l.real ev.append(l) if check_eps2 is not None: eps2 = g.norm2(tmp - l * v) g.message(f"eval[ {i} ] = {l}, eps^2 = {eps2}") if eps2 > check_eps2: raise EvalsNotConverged() return ev
def evals(matrix, evec, params): calculate_eps2 = params["calculate_eps2"] skip = params["skip"] assert len(evec) > 0 tmp = g.lattice(evec[0]) ev = [] eps2 = [] for i in range(0, len(evec), skip): v = evec[i] matrix(tmp, v) # M |v> = l |v> -> <v|M|v> / <v|v> l = g.inner_product(v, tmp) / g.norm2(v) if params["real"]: l = l.real ev.append(l) if calculate_eps2 is not None: eps2.append(g.norm2(tmp - l * v)) if calculate_eps2: return ev, eps2 return ev
def __call__(self, mat, src): verbose = g.default.is_verbose("power_iteration") dst, tmp = g.lattice(src), g.copy(src) tmp /= g.norm2(tmp) ** 0.5 ev_prev = None for it in range(self.maxit): mat(dst, tmp) ev = g.inner_product(tmp, dst) if self.real: ev = ev.real if verbose: g.message(f"eval_max[ {it} ] = {ev}") tmp @= dst / g.norm2(dst) ** 0.5 if ev_prev is not None: if abs(ev - ev_prev) < self.tol * abs(ev): if verbose: g.message("Converged") return (ev, tmp, True) ev_prev = ev return (ev, tmp, False)
def inner_product(self, left, right): return gpt.inner_product(left, right).real
def __call__(self, mat, src, ckpt=None): # verbosity verbose = g.default.is_verbose("irl") # checkpointer if ckpt is None: ckpt = g.checkpointer_none() ckpt.grid = src.grid self.ckpt = ckpt # first approximate largest eigenvalue pit = g.algorithms.eigen.power_iteration(eps=0.05, maxiter=10, real=True) lambda_max = pit(mat, src)[0] # parameters Nm = self.params["Nm"] Nk = self.params["Nk"] Nstop = self.params["Nstop"] assert Nm >= Nk and Nstop <= Nk # tensors dtype = np.float64 lme = np.empty((Nm, ), dtype) lme2 = np.empty((Nm, ), dtype) ev = np.empty((Nm, ), dtype) ev2 = np.empty((Nm, ), dtype) ev2_copy = np.empty((Nm, ), dtype) # fields f = g.lattice(src) v = g.lattice(src) evec = [g.lattice(src) for i in range(Nm)] # advice memory storage if not self.params["advise"] is None: g.advise(evec, self.params["advise"]) # scalars k1 = 1 k2 = Nk beta_k = 0.0 # set initial vector evec[0] @= src / g.norm2(src)**0.5 # initial Nk steps for k in range(Nk): self.step(mat, ev, lme, evec, f, Nm, k) # restarting loop for it in range(self.params["maxiter"]): if verbose: g.message("Restart iteration %d" % it) for k in range(Nk, Nm): self.step(mat, ev, lme, evec, f, Nm, k) f *= lme[Nm - 1] # eigenvalues for k in range(Nm): ev2[k] = ev[k + k1 - 1] lme2[k] = lme[k + k1 - 1] # diagonalize t0 = g.time() Qt = np.identity(Nm, dtype) self.diagonalize(ev2, lme2, Nm, Qt) t1 = g.time() if verbose: g.message("Diagonalization took %g s" % (t1 - t0)) # sort ev2_copy = ev2.copy() ev2 = list(reversed(sorted(ev2))) # implicitly shifted QR transformations Qt = np.identity(Nm, dtype) t0 = g.time() for ip in range(k2, Nm): g.qr_decomposition(ev, lme, Nm, Nm, Qt, ev2[ip], k1, Nm) t1 = g.time() if verbose: g.message("QR took %g s" % (t1 - t0)) # rotate t0 = g.time() g.rotate(evec, Qt, k1 - 1, k2 + 1, 0, Nm) t1 = g.time() if verbose: g.message("Basis rotation took %g s" % (t1 - t0)) # compression f *= Qt[k2 - 1, Nm - 1] f += lme[k2 - 1] * evec[k2] beta_k = g.norm2(f)**0.5 betar = 1.0 / beta_k evec[k2] @= betar * f lme[k2 - 1] = beta_k if verbose: g.message("beta_k = ", beta_k) # convergence test if it >= self.params["Nminres"]: if verbose: g.message("Rotation to test convergence") # diagonalize for k in range(Nm): ev2[k] = ev[k] lme2[k] = lme[k] Qt = np.identity(Nm, dtype) t0 = g.time() self.diagonalize(ev2, lme2, Nk, Qt) t1 = g.time() if verbose: g.message("Diagonalization took %g s" % (t1 - t0)) B = g.copy(evec[0]) allconv = True if beta_k >= self.params["betastp"]: jj = 1 while jj <= Nstop: j = Nstop - jj g.linear_combination(B, evec[0:Nk], Qt[j, 0:Nk]) B *= 1.0 / g.norm2(B)**0.5 if not ckpt.load(v): mat(v, B) ckpt.save(v) ev_test = g.inner_product(B, v).real eps2 = g.norm2(v - ev_test * B) / lambda_max**2.0 if verbose: g.message("%-65s %-45s %-50s" % ( "ev[ %d ] = %s" % (j, ev2_copy[j]), "<B|M|B> = %s" % (ev_test), "|M B - ev B|^2 / ev_max^2 = %s" % (eps2), )) if eps2 > self.params["resid"]: allconv = False if jj == Nstop: break jj = min([Nstop, 2 * jj]) if allconv: if verbose: g.message("Converged in %d iterations" % it) break t0 = g.time() g.rotate(evec, Qt, 0, Nstop, 0, Nk) t1 = g.time() if verbose: g.message("Final basis rotation took %g s" % (t1 - t0)) return (evec[0:Nstop], ev2_copy[0:Nstop])
def step(self, mat, lmd, lme, evec, w, Nm, k): assert k < Nm verbose = g.default.is_verbose("irl") ckpt = self.ckpt alph = 0.0 beta = 0.0 evec_k = evec[k] results = [w, alph, beta] if ckpt.load(results): w, alph, beta = results # use checkpoint if verbose: g.message("%-65s %-45s" % ("alpha[ %d ] = %s" % (k, alph), "beta[ %d ] = %s" % (k, beta))) else: if self.params["mem_report"]: g.mem_report(details=False) # compute t0 = g.time() mat(w, evec_k) t1 = g.time() # allow to restrict maximal number of applications within run self.napply += 1 if "maxapply" in self.params: if self.napply == self.params["maxapply"]: if verbose: g.message( "Maximal number of matrix applications reached") sys.exit(0) if k > 0: w -= lme[k - 1] * evec[k - 1] zalph = g.inner_product(evec_k, w) alph = zalph.real w -= alph * evec_k beta = g.norm2(w)**0.5 w /= beta t2 = g.time() if k > 0: g.orthogonalize(w, evec[0:k]) t3 = g.time() ckpt.save([w, alph, beta]) if verbose: g.message("%-65s %-45s %-50s" % ( "alpha[ %d ] = %s" % (k, zalph), "beta[ %d ] = %s" % (k, beta), " timing: %g s (matrix), %g s (ortho)" % (t1 - t0, t3 - t2), )) lmd[k] = alph lme[k] = beta if k < Nm - 1: evec[k + 1] @= w
def inv(psi, src, t): if len(src) > 1: n = len(src) # do different sources separately for idx in range(n): inv(psi[idx::n], [src[idx]]) return # timing t("setup") # fields src = src[0] mmp, r = g.copy(src), g.copy(src) # initial residual r2 = g.norm2(src) assert r2 != 0.0 # target residual rsq = self.eps**2.0 * r2 # restartlen rlen = self.restartlen # prec prec = self.setup_prec(mat) plen = len(self.shifts) idx = [i for i in range(plen)] # shifted systems sfgmres = [] for j, s in enumerate(self.shifts): sfgmres += [shifted_fgmres(psi[j], src, s, rlen, prec)] # krylov space V = [g.copy(src) for i in range(rlen + 1)] V[0] /= r2**0.5 # return rhos for prec fgmres rr = self.rhos for k in range(0, self.maxiter, rlen): # arnoldi H = self.arnoldi(mat, V, rlen, mmp, sfgmres, prec, idx, t) t("hessenberg") fgmres = sfgmres[0] Hs = fgmres.hessenberg(H, prec) t("qr") fgmres.qr(Hs, r2) t("update_psi") fgmres.update_psi(mmp, V, prec) t("update_res") r2_new = fgmres.r2 fgmres.update_res(mat, r, src, mmp) t("inner_product") vr = [g.inner_product(v, r) for v in V] t("other") self.log_convergence((k, 0), r2_new, rsq) for j, fgmres in enumerate(sfgmres[1:]): if fgmres.converged is False or rr: t("hessenberg") Hs = fgmres.hessenberg(H, prec) Hs.append(vr.copy()) t("solve_hessenberg") fgmres.solve_hessenberg(Hs, r2, r2_new) t("update_psi") fgmres.update_psi(mmp, V, prec) t("other") self.log_convergence((k, j + 1), fgmres.r2, rsq) t("other") for fgmres in sfgmres: msg = fgmres.check(rsq) if msg: msg += f" at iteration {k+rlen}" if self.maxiter != rlen: msg += f"; computed squared residual {fgmres.r2:e} / {rsq:e}" if self.checkres: res = fgmres.calc_res(mat, src, mmp) msg += f"; true squared residual {res:e} / {rsq:e}" self.log(msg) if all([fgmres.converged for fgmres in sfgmres]): self.log(f"converged in {k+rlen} iterations") return [fgmres.rho for fgmres in sfgmres] if rr else None if self.maxiter != rlen: t("restart") r2 = g.norm2(r) V[0] @= r / r2**0.5 if prec is not None and rr is False: t("restart_prec") plen_new = sum( [not fgmres.converged for fgmres in sfgmres[1:]]) + 1 if plen_new != plen: plen = plen_new prec, idx = self.restart_prec(mat, sfgmres) self.debug("performed restart") t("other") for fgmres in sfgmres: if fgmres.converged is False: msg = f"shift {fgmres.s} NOT converged in {k+rlen} iterations" if self.maxiter != rlen: msg += f"; computed squared residual {fgmres.r2:e} / {rsq:e}" if self.checkres: res = fgmres.calc_res(mat, src, mmp) msg += f"; true squared residual {res:e} / {rsq:e}" self.log(msg) cs = sum([fgmres.converged for fgmres in sfgmres if True]) ns = len(self.shifts) self.log( f"NOT converged in {k+rlen} iterations; {cs} / {ns} converged shifts" ) return [fgmres.rho for fgmres in sfgmres] if rr else None
def inv(psi, src, t): t("setup") r, rhat, p, s = g.copy(src), g.copy(src), g.copy(src), g.copy(src) mmpsi, mmp, mms = g.copy(src), g.copy(src), g.copy(src) rho, rhoprev, alpha, omega = 1.0, 1.0, 1.0, 1.0 mat(mmpsi, psi) r @= src - mmpsi rhat @= r p @= r mmp @= r r2 = g.norm2(r) ssq = g.norm2(src) if ssq == 0.0: assert r2 != 0.0 # need either source or psi to not be zero ssq = r2 rsq = self.eps**2.0 * ssq for k in range(self.maxiter): t("inner") rhoprev = rho rho = g.inner_product(rhat, r).real t("linearcomb") beta = (rho / rhoprev) * (alpha / omega) p @= r + beta * p - beta * omega * mmp t("mat") mat(mmp, p) t("inner") alpha = rho / g.inner_product(rhat, mmp).real t("linearcomb") s @= r - alpha * mmp t("mat") mat(mms, s) t("inner") ip, mms2 = g.inner_product_norm2(mms, s) if mms2 == 0.0: continue t("linearcomb") omega = ip.real / mms2 psi += alpha * p + omega * s t("axpy_norm") r2 = g.axpy_norm2(r, -omega, mms, s) t("other") self.log_convergence(k, r2, rsq) if r2 <= rsq: self.log(f"converged in {k+1} iterations") return self.log( f"NOT converged in {k+1} iterations; squared residual {r2:e} / {rsq:e}" )
def inv(psi, src, t): assert src != psi t("setup") p, mmp, r = g.lattice(src), g.lattice(src), g.lattice(src) if prec is not None: z = g.lattice(src) t("matrix") mat(mmp, psi) # in, out t("setup") g.axpy(r, -1.0, mmp, src) if prec is not None: z[:] = 0 prec(z, r) g.copy(p, z) cp = g.inner_product(r, z).real else: g.copy(p, r) cp = g.norm2(p) ssq = g.norm2(src) if ssq == 0.0: psi[:] = 0 return rsq = self.eps**2.0 * ssq for k in range(self.maxiter): c = cp t("matrix") mat(mmp, p) t("inner_product") d = g.inner_product(p, mmp).real a = c / d t("axpy_norm2") if prec is not None: # c = <r,z>, d = <p,A p> g.axpy(r, -a, mmp, r) t("prec") z[:] = 0 prec(z, r) t("axpy_norm2") cp = g.inner_product(r, z).real else: cp = g.axpy_norm2(r, -a, mmp, r) t("linear combination") b = cp / c psi += a * p if prec is not None: g.axpy(p, b, p, z) else: g.axpy(p, b, p, r) t("other") res = abs(cp) self.log_convergence(k, res, rsq) if k + 1 >= self.miniter: if self.eps_abs is not None and res <= self.eps_abs**2.0: self.log( f"converged in {k+1} iterations (absolute criterion)" ) return if res <= rsq: self.log(f"converged in {k+1} iterations") return self.log( f"NOT converged in {k+1} iterations; squared residual {res:e} / {rsq:e}" )
def verify_matrix_element(fermion, dst, src, tag): mat = get_matrix(fermion_dp, tag) src_prime = g.eval(mat * src) dst.checkerboard(src_prime.checkerboard()) X = g.inner_product(dst, src_prime) eps_ref = src.grid.precision.eps * finger_print_tolerance if mat.adj_mat is not None: X_from_adj = g.inner_product(src, g.adj(mat) * dst).conjugate() eps = abs(X - X_from_adj) / abs(X) g.message(f"Test adj({tag}): {eps}") assert eps < eps_ref if mat.inv_mat is not None: eps = (g.norm2(src - mat * g.inv(mat) * src) / g.norm2(src)) ** 0.5 g.message(f"Test inv({tag}): {eps}") assert eps < eps_ref Y = g.inner_product(dst, g.inv(g.adj(mat)) * src) Y_from_adj = g.inner_product(src, g.inv(mat) * dst).conjugate() eps = abs(Y - Y_from_adj) / abs(Y) g.message(f"Test adj(inv({tag})): {eps}") assert eps < eps_ref # do even/odd tests even_odd_operators = {"": ("Mooee", "Meooe")} if tag in even_odd_operators: g.message(f"Test eo versions of {tag}") grid_rb = fermion.F_grid_eo src_p = g.vspincolor(grid_rb) dst_p = g.vspincolor(grid_rb) tag_Mooee, tag_Meooe = even_odd_operators[tag] mat_Mooee = get_matrix(fermion, tag_Mooee) mat_Meooe = get_matrix(fermion, tag_Meooe) for parity in [g.even, g.odd]: g.pick_checkerboard(parity, src_p, src) g.pick_checkerboard(parity, dst_p, src) verify_matrix_element(fermion, dst_p, src_p, tag_Mooee) verify_projected_even_odd(mat, mat_Mooee, dst_p, src_p, src) g.pick_checkerboard(parity.inv(), dst_p, src) verify_matrix_element(fermion, dst_p, src_p, tag_Meooe) verify_projected_even_odd(mat, mat_Meooe, dst_p, src_p, src) # perform derivative tests projected_gradient_operators = {"": "M_projected_gradient"} if tag in projected_gradient_operators and isinstance( fermion, g.qcd.fermion.differentiable_fine_operator ): # Test projected gradient for src^dag M^dag M src g.message(f"Test projected_gradient of {tag} via src^dag M^dag M src") mat_pg = get_matrix(fermion, projected_gradient_operators[tag]) dst_pg = g(mat * src) class df(g.group.differentiable_functional): def __call__(self, Uprime): return g.norm2(get_matrix(fermion.updated(Uprime), tag) * src) def gradient(self, Uprime, dUprime): assert dUprime == Uprime return [ g.qcd.gauge.project.traceless_hermitian(g.eval(a + b)) for a, b in zip(mat_pg(dst_pg, src), mat_pg.adj()(src, dst_pg)) ] dfv = df() dfv.assert_gradient_error(rng, U, U, 1e-3, 1e-6) # Test projected gradient for src^dag G5 M src if isinstance(fermion, g.qcd.fermion.gauge_independent_g5_hermitian): g.message(f"Test projected_gradient of {tag} via src^dag G5 M src") class df(g.group.differentiable_functional): def __call__(self, Uprime): return g.inner_product( src, fermion.G5 * get_matrix(fermion.updated(Uprime), tag) * src ).real def gradient(self, Uprime, dUprime): assert dUprime == Uprime return g.qcd.gauge.project.traceless_hermitian( mat_pg(fermion.G5 * src, src) ) dfv = df() dfv.assert_gradient_error(rng, U, U, 1e-3, 1e-6) g.message(f"Test projected_gradient of {tag} via src^dag M^dag G5 src") class df(g.group.differentiable_functional): def __call__(self, Uprime): return g.inner_product( src, get_matrix(fermion.updated(Uprime), tag).adj() * fermion.G5 * src, ).real def gradient(self, Uprime, dUprime): assert dUprime == Uprime return g.qcd.gauge.project.traceless_hermitian( mat_pg.adj()(src, fermion.G5 * src) ) dfv = df() dfv.assert_gradient_error(rng, U, U, 1e-3, 1e-6) # perform even-odd derivative tests projected_gradient_operators = {"Meooe": "Meooe_projected_gradient"} if tag in projected_gradient_operators and isinstance( fermion, g.qcd.fermion.differentiable_fine_operator ): # Test projected gradient for src_p^dag M^dag M src_p g.message(f"Test projected_gradient of {tag} via src^dag M^dag M src") mat_pg = get_matrix(fermion, projected_gradient_operators[tag]) src_p = g.lattice(fermion.F_grid_eo, fermion.otype) for parity in [g.even, g.odd]: g.pick_checkerboard(parity, src_p, src) dst_p = g(mat * src_p) class df(g.group.differentiable_functional): def __call__(self, Uprime): return g.norm2(get_matrix(fermion.updated(Uprime), tag) * src_p) def gradient(self, Uprime, dUprime): assert dUprime == Uprime R = g.group.cartesian(Uprime) for r, x in zip( R + R, mat_pg(dst_p, src_p) + mat_pg.adj()(src_p, dst_p) ): g.set_checkerboard( r, g.qcd.gauge.project.traceless_hermitian(x) ) return R dfv = df() dfv.assert_gradient_error(rng, U, U, 1e-3, 1e-6) return X
# use different solver and compare g.default.push_verbose("cg_convergence", True) ssrc.checkerboard(g.odd) matrix = pc.eo2_ne(parity=g.odd)(w).Mpc cg1 = inv.cg({"eps": 1e-6, "maxiter": 250}) inv2_w = cg1(matrix) icg = inv.cg({"eps": 1e-7, "maxiter": 4}) icg.verbose_convergence = False icg.verbose = False open_inv = g.qcd.fermion.preconditioner.open_boundary_local(icg, margin=2) # check that matrix is still Hermitian x = g.inner_product(ssrc, open_inv(matrix) * ssrc) assert abs(x.imag / x.real) < 1e-7 cg2 = inv.cg(eps=1e-6, maxiter=130, prec=open_inv) inv3_w = cg2(matrix) dst2 = g.eval(inv2_w * ssrc) dst3 = g.eval(inv3_w * ssrc) eps2 = g.norm2(dst2 - dst3) / g.norm2(dst2) g.message(f"Both solutions agree to: eps^2 = {eps2}") assert eps2 < 1e-10 speedup = len(cg1.history) / len(cg2.history) g.message(f"Speedup in terms of outer CG iterations: {speedup}") assert speedup > 1.0
def inv(psi, src, t): if len(src) > 1: n = len(src) # do different sources separately for idx in range(n): inv(psi[idx::n], [src[idx]]) return src = src[0] scgs = [] for j, s in enumerate(self.shifts): scgs += [shifted_cg(psi[j], src, s)] t("setup") p, mmp, r = g.copy(src), g.copy(src), g.copy(src) x = g.copy(src) x[:] = 0 b = 0.0 a = g.norm2(p) cp = a assert a != 0.0 # need either source or psi to not be zero rsq = self.eps**2.0 * a for k in range(self.maxiter): c = cp t("matrix") mat(mmp, p) t("inner_product") dc = g.inner_product(p, mmp) d = dc.real om = b / a a = c / d om *= a t("axpy_norm2") cp = g.axpy_norm2(r, -a, mmp, r) t("linear combination") b = cp / c for cg in scgs: if not cg.converged: cg.step1(a, b, om) x += a * p p @= b * p + r for cg in scgs: if not cg.converged: cg.step2(r) t("other") for cg in scgs: msg = cg.check(cp, rsq) if msg: self.log(f"{msg} at iteration {k+1}") if sum([cg.converged for cg in scgs]) == ns: return self.log( f"NOT converged in {k+1} iterations; squared residual {cp:e} / {rsq:e}" )
def inv(psi, src): self.history = [] verbose = g.default.is_verbose("bicgstab") t = g.timer("bicgstab") t("setup") r, rhat, p, s = g.copy(src), g.copy(src), g.copy(src), g.copy(src) mmpsi, mmp, mms = g.copy(src), g.copy(src), g.copy(src) rho, rhoprev, alpha, omega = 1.0, 1.0, 1.0, 1.0 mat(mmpsi, psi) r @= src - mmpsi rhat @= r p @= r mmp @= r r2 = g.norm2(r) ssq = g.norm2(src) if ssq == 0.0: assert r2 != 0.0 # need either source or psi to not be zero ssq = r2 rsq = self.eps**2.0 * ssq for k in range(self.maxiter): t("inner") rhoprev = rho rho = g.inner_product(rhat, r).real t("linearcomb") beta = (rho / rhoprev) * (alpha / omega) p @= r + beta * p - beta * omega * mmp t("mat") mat(mmp, p) t("inner") alpha = rho / g.inner_product(rhat, mmp).real t("linearcomb") s @= r - alpha * mmp t("mat") mat(mms, s) t("inner") ip, mms2 = g.inner_product_norm2(mms, s) if mms2 == 0.0: continue t("linearcomb") omega = ip.real / mms2 psi += alpha * p + omega * s t("axpy_norm") r2 = g.axpy_norm2(r, -omega, mms, s) t("other") self.history.append(r2) if verbose: g.message("bicgstab: res^2[ %d ] = %g, target = %g" % (k, r2, rsq)) if r2 <= rsq: if verbose: t() g.message( "bicgstab: converged in %d iterations, took %g s" % (k + 1, t.dt["total"])) g.message(t) break
g.set_checkerboard(res, tmp2_e) g.set_checkerboard(res, tmp2_o) rel_dev = g.norm2(ref - res) / g.norm2(ref) g.message(f""" Test: Meo^dag + Moe^dag + Moo^dag + Mee^dag = M^dag src = {g.norm2(src)} ref = {g.norm2(ref)} res = {g.norm2(res)} rel. dev. = {rel_dev} -> test {'passed' if rel_dev <= 1e-14 else 'failed'}""" ) assert rel_dev <= 1e-14 # imag(v^dag M^dag M v) = 0 (on full grid) mat.mat(tmp, src) mat.adj_mat(res, tmp) dot = g.inner_product(src, res) rel_dev = abs(dot.imag) / abs(dot.real) g.message(f""" Test: imag(v^dag M^dag M v) = 0 (on full grid) dot = {dot} rel. dev. = {rel_dev} -> test {'passed' if rel_dev <= 1e-9 else 'failed'}""" ) assert rel_dev <= 1e-9 # imag(v^dag Meooe^dag Meooe v) = 0 (on both cbs) mat.Meooe.mat(tmp_o, src_e) mat.Meooe.adj_mat(res_e, tmp_o) mat.Meooe.mat(tmp_e, src_o) mat.Meooe.adj_mat(res_o, tmp_e) dot_e = g.inner_product(src_e, res_e) dot_o = g.inner_product(src_o, res_o)
#!/usr/bin/env python3 # # Authors: Christoph Lehner 2020 # # Desc.: Illustrate core concepts and features # import gpt as g import numpy as np import sys # load configuration fine_grid = g.grid([8, 8, 8, 16], g.single) # basis n = 31 basis = [g.vcomplex(fine_grid, 30) for i in range(n)] rng = g.random("block_seed_string_13") rng.cnormal(basis) # gram-schmidt for i in range(n): basis[i] /= g.norm2(basis[i]) ** 0.5 g.orthogonalize(basis[i], basis[:i]) for j in range(i): eps = g.inner_product(basis[j], basis[i]) g.message(" <%d|%d> =" % (j, i), eps) assert abs(eps) < 1e-6
def __call__(self, fields): phi = fields[-1] M = self.matrix(fields) psi = g(M * phi) return g.inner_product(phi, psi).real
def __call__(self, fields): M, U, phi = self._updated(fields) chi = g.lattice(phi) chi @= self.inverter(self.operator.MMdag(M)) * phi return g.inner_product(phi, chi).real
def inv(psi, src, t): t("setup") # parameters rlen = self.restartlen # tensors alpha = np.empty((rlen), g.double.complex_dtype) # fields r, mmpsi = g.copy(src), g.copy(src) p = [g.lattice(src) for i in range(rlen + 1)] # in QUDA, q is just an "alias" to p with q[k] = p[k+1] # don't alias here, but just use slicing # initial residual r2 = self.calc_res(mat, psi, mmpsi, src, r) p[0] @= r # source ssq = g.norm2(src) if ssq == 0.0: assert r2 != 0.0 # need either source or psi to not be zero ssq = r2 # target residual rsq = self.eps**2.0 * ssq for k in range(0, self.maxiter, rlen): t("mat") for i in range(rlen): mat(p[i + 1], p[i]) t("inner_product") ips = g.inner_product(p[1:], p[1:] + [p[0]]) # single reduction t("solve") rhs = ips[:, -1] # last column A = ips[:, :-1] # all but last column alpha = np.linalg.solve(A, rhs) # # check that solution is correct # g.message(np.allclose(np.dot(A, alpha), rhs)) t("update_psi") for i in range(rlen): g.axpy(psi, alpha[i], p[i], psi) if self.maxiter != rlen: t("update_residual") for i in range(rlen): g.axpy(r, -alpha[i], p[i + 1], r) t("residual") r2 = g.norm2(r) t("other") self.log_convergence(k, r2, rsq) if r2 <= rsq: msg = f"converged in {k+rlen} iterations" if self.maxiter != rlen: msg += f"; computed squared residual {r2:e} / {rsq:e}" if self.checkres: res = self.calc_res(mat, psi, mmpsi, src, r) msg += f"; true squared residual {res:e} / {rsq:e}" self.log(msg) return if self.maxiter != rlen: t("restart") p[0] @= r self.debug("performed restart") msg = f"NOT converged in {k+rlen} iterations" if self.maxiter != rlen: msg += f"; computed squared residual {r2:e} / {rsq:e}" if self.checkres: res = self.calc_res(mat, psi, mmpsi, src, r) msg += f"; true squared residual {res:e} / {rsq:e}" self.log(msg)