def inv(dst, src): verbose = g.default.is_verbose("deflate") # |dst> = sum_n 1/ev[n] |n><n|src> t0 = g.time() grid = src[0].grid rip = np.zeros((len(src), len(self.evec)), dtype=np.complex128) block = self.params["block"] for i0 in range(0, len(self.evec), block): rip_block = g.rank_inner_product(self.evec[i0:i0 + block], src, True) for i in range(rip_block.shape[0]): for j in range(rip_block.shape[1]): rip[j, i0 + i] = rip_block[i, j] / self.ev[i0 + i] t1 = g.time() grid.globalsum(rip) t2 = g.time() # TODO: simultaneous linear_combinations for j in range(len(src)): g.linear_combination(dst[j], self.evec, rip[j]) t3 = g.time() if verbose: g.message( "Deflated %d vector(s) in %g s (%g s for rank_inner_product, %g s for global sum, %g s for linear combinations)" % (len(src), t3 - t0, t1 - t0, t2 - t1, t3 - t2)) return inverter(dst, src)
def inv(psi, src, t): if len(self.solution_space) == 0: return t("orthonormalize") v = g.orthonormalize(g.copy(self.solution_space)) # Idea is to minimize # # res = | M a_i v_i - src |^2 # = v_i^dag a_i^dag M^dag M a_j v_j + src^dag src - src^dag M a_i v_i - v_i^dag a_i^dag M^dag src # # by selecting an optimal a_i, i.e., to compute # # d res/d a_i^dag = v_i^dag M^dag M a_j v_j - v_i^dag M^dag src = 0 # # Therefore # # G_ij a_j = b_i # # with b_i = v_i^dag M^dag src, G_ij = v_i^dag M^dag M v_j # t("mat v") mat_v = [mat(x) for x in v] t("projected source") b = g.inner_product(mat_v, src)[:, 0] t("projected matrix") G_ij = np.matrix([ g.inner_product(mat_v, mat_v[j])[:, 0] for j in range(len(v)) ]).T t("solve") a = np.linalg.solve(G_ij, b) t("linear combination") g.linear_combination(psi, v, a) eps2 = g.norm2(mat(psi) - src) / g.norm2(src) self.log( f"minimal residual with {len(v)}-dimensional solution space has eps^2 = {eps2}" )
def inv(dst, src): verbose = g.default.is_verbose("deflate") # |dst> = sum_n 1/ev[n] |n><n|src> t0 = g.time() grid = src[0].grid rip = np.zeros((len(src), len(self.evec)), dtype=np.complex128) for i in range(len(self.evec)): for j in range(len(src)): rip[j, i] = g.rankInnerProduct(self.evec[i], src[j]) / self.ev[i] t1 = g.time() grid.globalsum(rip) t2 = g.time() # TODO: simultaneous linear_combinations for j in range(len(src)): g.linear_combination(dst[j], self.evec, rip[j]) t3 = g.time() if verbose: g.message( "Deflated %d vector(s) in %g s (%g s for rankInnerProduct, %g s for global sum, %g s for linear combinations)" % (len(src), t3 - t0, t1 - t0, t2 - t1, t3 - t2)) return self.inverter(matrix)(dst, src)
def update_psi(self, mmp, V): g.linear_combination(mmp, V[0:-1], self.y) self.x += mmp
eps = abs(host_result_individual - ref) / abs(ref) assert eps < 1e-12 ################################################################################ # Test multi linear_combination against expression engine ################################################################################ for grid in [grid_sp, grid_dp]: nbasis = 7 nblock = 3 nvec = 2 basis = [g.vcomplex(grid, 8) for i in range(nbasis)] rng.cnormal(basis) dst = [g.vcomplex(grid, 8) for i in range(nvec)] coef = [[rng.cnormal() for i in range(nbasis)] for j in range(nvec)] # multi g.linear_combination(dst, basis, coef, nblock) for j in range(nvec): ref = g.vcomplex(grid, 8) ref[:] = 0 for i in range(nbasis): ref += coef[j][i] * basis[i] eps2 = g.norm2(dst[j] - ref) / g.norm2(ref) g.message(f"Test linear combination of vector {j}: {eps2}") assert eps2 < 1e-13 ################################################################################ # Test bilinear_combination against expression engine ################################################################################ for grid in [grid_sp, grid_dp]: left = [g.complex(grid) for i in range(3)]
def __call__(self, mat, src, ckpt=None): # verbosity verbose = g.default.is_verbose("irl") # checkpointer if ckpt is None: ckpt = g.checkpointer_none() ckpt.grid = src.grid self.ckpt = ckpt # first approximate largest eigenvalue pit = g.algorithms.eigen.power_iteration(eps=0.05, maxiter=10, real=True) lambda_max = pit(mat, src)[0] # parameters Nm = self.params["Nm"] Nk = self.params["Nk"] Nstop = self.params["Nstop"] assert Nm >= Nk and Nstop <= Nk # tensors dtype = np.float64 lme = np.empty((Nm, ), dtype) lme2 = np.empty((Nm, ), dtype) ev = np.empty((Nm, ), dtype) ev2 = np.empty((Nm, ), dtype) ev2_copy = np.empty((Nm, ), dtype) # fields f = g.lattice(src) v = g.lattice(src) evec = [g.lattice(src) for i in range(Nm)] # advice memory storage if not self.params["advise"] is None: g.advise(evec, self.params["advise"]) # scalars k1 = 1 k2 = Nk beta_k = 0.0 # set initial vector evec[0] @= src / g.norm2(src)**0.5 # initial Nk steps for k in range(Nk): self.step(mat, ev, lme, evec, f, Nm, k) # restarting loop for it in range(self.params["maxiter"]): if verbose: g.message("Restart iteration %d" % it) for k in range(Nk, Nm): self.step(mat, ev, lme, evec, f, Nm, k) f *= lme[Nm - 1] # eigenvalues for k in range(Nm): ev2[k] = ev[k + k1 - 1] lme2[k] = lme[k + k1 - 1] # diagonalize t0 = g.time() Qt = np.identity(Nm, dtype) self.diagonalize(ev2, lme2, Nm, Qt) t1 = g.time() if verbose: g.message("Diagonalization took %g s" % (t1 - t0)) # sort ev2_copy = ev2.copy() ev2 = list(reversed(sorted(ev2))) # implicitly shifted QR transformations Qt = np.identity(Nm, dtype) t0 = g.time() for ip in range(k2, Nm): g.qr_decomposition(ev, lme, Nm, Nm, Qt, ev2[ip], k1, Nm) t1 = g.time() if verbose: g.message("QR took %g s" % (t1 - t0)) # rotate t0 = g.time() g.rotate(evec, Qt, k1 - 1, k2 + 1, 0, Nm) t1 = g.time() if verbose: g.message("Basis rotation took %g s" % (t1 - t0)) # compression f *= Qt[k2 - 1, Nm - 1] f += lme[k2 - 1] * evec[k2] beta_k = g.norm2(f)**0.5 betar = 1.0 / beta_k evec[k2] @= betar * f lme[k2 - 1] = beta_k if verbose: g.message("beta_k = ", beta_k) # convergence test if it >= self.params["Nminres"]: if verbose: g.message("Rotation to test convergence") # diagonalize for k in range(Nm): ev2[k] = ev[k] lme2[k] = lme[k] Qt = np.identity(Nm, dtype) t0 = g.time() self.diagonalize(ev2, lme2, Nk, Qt) t1 = g.time() if verbose: g.message("Diagonalization took %g s" % (t1 - t0)) B = g.copy(evec[0]) allconv = True if beta_k >= self.params["betastp"]: jj = 1 while jj <= Nstop: j = Nstop - jj g.linear_combination(B, evec[0:Nk], Qt[j, 0:Nk]) B *= 1.0 / g.norm2(B)**0.5 if not ckpt.load(v): mat(v, B) ckpt.save(v) ev_test = g.inner_product(B, v).real eps2 = g.norm2(v - ev_test * B) / lambda_max**2.0 if verbose: g.message("%-65s %-45s %-50s" % ( "ev[ %d ] = %s" % (j, ev2_copy[j]), "<B|M|B> = %s" % (ev_test), "|M B - ev B|^2 / ev_max^2 = %s" % (eps2), )) if eps2 > self.params["resid"]: allconv = False if jj == Nstop: break jj = min([Nstop, 2 * jj]) if allconv: if verbose: g.message("Converged in %d iterations" % it) break t0 = g.time() g.rotate(evec, Qt, 0, Nstop, 0, Nk) t1 = g.time() if verbose: g.message("Final basis rotation took %g s" % (t1 - t0)) return (evec[0:Nstop], ev2_copy[0:Nstop])
def __call__(self, mat, src, ckpt=None): # verbosity verbose = g.default.is_verbose("irl") # checkpointer if ckpt is None: ckpt = g.checkpointer_none() ckpt.grid = src.grid self.ckpt = ckpt # first approximate largest eigenvalue pit = g.algorithms.eigen.power_iteration(eps=0.05, maxiter=10, real=True) lambda_max = pit(mat, src)[0] # parameters Nm = self.params["Nm"] Nu = self.params["Nu"] Nk = self.params["Nk"] Nstop = self.params["Nstop"] Np = Nm-Nk MaxIter=self.params["maxiter"] Np /= MaxIter assert Nm >= Nk and Nstop <= Nk print ( 'Nm=',Nm,'Nu=',Nu,'Nk=',Nk ) # tensors dtype = np.float64 ctype = np.complex128 lme = np.zeros((Nu,Nm), ctype) lmd = np.zeros((Nu,Nm), ctype) lme2 = np.zeros((Nu,Nm), ctype) lmd2 = np.empty((Nu,Nm), ctype) Qt = np.zeros((Nm,Nm),ctype) Q = np.zeros((Nm,Nm),ctype) ev = np.empty((Nm,), dtype) ev2_copy = np.empty((Nm,), dtype) # fields f = g.lattice(src) v = g.lattice(src) evec = [g.lattice(src) for i in range(Nm)] w = [g.lattice(src) for i in range(Nu)] w_copy = [g.lattice(src) for i in range(Nu)] # advice memory storage if not self.params["advise"] is None: g.advise(evec, self.params["advise"]) # scalars k1 = 1 k2 = Nk beta_k = 0.0 rng=g.random("test") # set initial vector # rng.zn(w) for i in range(Nu): rng.zn(w[i]) if i > 0: g.orthogonalize(w[i],evec[0:i]) evec[i]=g.copy(w[i]) evec[i] *= 1.0/ g.norm2(evec[i]) ** 0.5 g.message("norm(evec[%d]=%e "%(i,g.norm2(evec[i]))) if i > 0: for j in range(i): ip=g.innerProduct(evec[j],w[i]) if np.abs(ip) >1e-6: g.message("inner(evec[%d],w[%d])=%e %e"% (j,i,ip.real,ip.imag)) # evec[i] @= src[i] / g.norm2(src[i]) ** 0.5 # initial Nk steps Nblock_k = int(Nk/Nu) for b in range(Nblock_k): self.blockStep(mat, lmd, lme, evec, w, w_copy, Nm, b,Nu) Nblock_p = int(Np/Nu) # restarting loop # for it in range(self.params["maxiter"]): for it in range(MaxIter): if verbose: g.message("Restart iteration %d" % it) Nblock_l = Nblock_k + it*Nblock_p; Nblock_r = Nblock_l + Nblock_p; Nl = Nblock_l*Nu Nr = Nblock_r*Nu # ev2.resize(Nr) ev2 = np.empty((Nr,), dtype) for b in range(Nblock_l, Nblock_r): self.blockStep(mat, lmd, lme, evec, w, w_copy, Nm, b,Nu) for u in range(Nu): for k in range(Nr): lmd2[u,k]=lmd[u,k] lme2[u,k]=lme[u,k] Qt = np.identity(Nr, ctype) # diagonalize t0 = g.time() # self.diagonalize(ev2, lme2, Nm, Qt) self.diagonalize(ev2,lmd2,lme2,Nu,Nr,Qt) # def diagonalize(self, eval, lmd, lme, Nu, Nk, Nm, Qt): t1 = g.time() if verbose: g.message("Diagonalization took %g s" % (t1 - t0)) # sort ev2_copy = ev2.copy() ev2 = list(reversed(sorted(ev2))) for i in range(Nr): g.message("Rval[%d]= %e"%(i,ev2[i])) # rotate # t0 = g.time() # g.rotate(evec, Qt, k1 - 1, k2 + 1, 0, Nm) # t1 = g.time() # if verbose: # g.message("Basis rotation took %g s" % (t1 - t0)) # convergence test if it >= self.params["Nminres"]: if verbose: g.message("Rotation to test convergence") # diagonalize for k in range(Nr): ev2[k] = ev[k] # lme2[k] = lme[k] for u in range(Nu): for k in range(Nr): lmd2[u,k]=lmd[u,k] lme2[u,k]=lme[u,k] Qt = np.identity(Nm, ctype) t0 = g.time() # self.diagonalize(ev2, lme2, Nk, Qt) self.diagonalize(ev2,lmd2,lme2,Nu,Nr,Qt) t1 = g.time() if verbose: g.message("Diagonalization took %g s" % (t1 - t0)) B = g.copy(evec[0]) allconv = True if beta_k >= self.params["betastp"]: jj = 1 while jj <= Nstop: j = Nstop - jj g.linear_combination(B, evec[0:Nr], Qt[j, 0:Nr]) g.message("norm=%e"%(g.norm2(B))) B *= 1.0 / g.norm2(B) ** 0.5 if not ckpt.load(v): mat(v, B) ckpt.save(v) ev_test = g.innerProduct(B, v).real eps2 = g.norm2(v - ev_test * B) / lambda_max ** 2.0 if verbose: g.message( "%-65s %-45s %-50s" % ( "ev[ %d ] = %s" % (j, ev2_copy[j]), "<B|M|B> = %s" % (ev_test), "|M B - ev B|^2 / ev_max^2 = %s" % (eps2), ) ) if eps2 > self.params["resid"]: allconv = False if jj == Nstop: break jj = min([Nstop, 2 * jj]) if allconv: if verbose: g.message("Converged in %d iterations" % it) break t0 = g.time() g.rotate(evec, Qt, 0, Nstop, 0, Nk) t1 = g.time() if verbose: g.message("Final basis rotation took %g s" % (t1 - t0)) return (evec[0:Nstop], ev2_copy[0:Nstop])
eps = abs(acc_result_individual - acc_result[i, j]) / abs( acc_result[i, j]) assert eps < 1e-13 if i == 0 and j == 0: ref = np.vdot(left[i][:].astype(np.complex128), right[j][:].astype(np.complex128)) eps = abs(host_result_individual - ref) / abs(ref) assert eps < 1e-12 ################################################################################ # Test multi linear_combination against expression engine ################################################################################ for grid in [grid_sp, grid_dp]: nbasis = 7 nblock = 3 nvec = 2 basis = [g.vcomplex(grid, 8) for i in range(nbasis)] rng.cnormal(basis) dst = [g.vcomplex(grid, 8) for i in range(nvec)] coef = [[rng.cnormal() for i in range(nbasis)] for j in range(nvec)] # multi g.linear_combination(dst, basis, coef, nblock) for j in range(nvec): ref = g.vcomplex(grid, 8) ref[:] = 0 for i in range(nbasis): ref += coef[j][i] * basis[i] eps2 = g.norm2(dst[j] - ref) / g.norm2(ref) g.message(f"Test linear combination of vector {j}: {eps2}") assert eps2 < 1e-13
basis = [g.lattice(grid, tp) for i in range(nbasis)] result = g.lattice(grid, tp) rng.cnormal(basis) # Typical usecase: nbasis -> 1 Qt = np.ones((1, nbasis), np.complex128) # Bytes nbytes = (nbasis + 1) * result.global_bytes() * N # Time dt = 0.0 for it in range(N + Nwarmup): if it >= Nwarmup: dt -= g.time() g.linear_combination(result, basis, Qt) if it >= Nwarmup: dt += g.time() # Report GBPerSec = nbytes / dt / 1e9 g.message( f"""{N} linear_combination Object type : {tp.__name__} Number of basis vectors : {nbasis} Time to complete : {dt:.2f} s Effective memory bandwidth : {GBPerSec:.2f} GB/s """ )
def single_evec(self, little_evec, i): n = len(self.H) test = g.lattice(self.basis[0]) g.linear_combination(test, self.basis[0:n], little_evec[:, i]) return test
def update_psi(self, mmp, V, prec): ZV = self.Z if prec is not None else V[0:-1] g.linear_combination(mmp, ZV, self.y) self.x += mmp
def inv(psi, src, t): assert src != psi t("setup") rlen = self.restartlen mmp, r = g.copy(src), g.copy(src) r2 = self.calc_res(mat, psi, mmp, src, r) ssq = g.norm2(src) if ssq == 0.0: assert r2 != 0.0 ssq = r2 rsq = self.eps**2.0 * ssq g.default.push_verbose("arnoldi", False) a = arnoldi_iteration(mat, r) g.default.pop_verbose() for k in range(0, self.maxiter, rlen): t("arnoldi") for i in range(rlen): # for sufficiently small restart length # should not need second orthogonalization # step a(second_orthogonalization=False) Q, H = a.basis, a.H t("solve_hessenberg") y, rn = self.solve_hessenberg(H, r2) t("update_psi") g.linear_combination(mmp, Q[0:-1], y) psi += mmp if self.maxiter != rlen: t("update_res") r @= g.eval(Q[-1] * rn) t("residual") r2 = np.abs(rn)**2.0 t("other") self.log_convergence(k, r2, rsq) if r2 <= rsq: msg = f"converged in {k+rlen} iterations" if self.maxiter != rlen: msg += f"; computed squared residual {r2:e} / {rsq:e}" if self.checkres: res = self.calc_res(mat, psi, mmp, src, r) msg += f"; true squared residual {res:e} / {rsq:e}" self.log(msg) return if self.maxiter != rlen: t("restart") a.basis = [Q[-1]] a.H = [] self.debug("performed restart") msg = f"NOT converged in {k+rlen} iterations" if self.maxiter != rlen: msg += f"; computed squared residual {r2:e} / {rsq:e}" if self.checkres: res = self.calc_res(mat, psi, mmp, src, r) msg += f"; true squared residual {res:e} / {rsq:e}" self.log(msg)