def next(root, jobs, max_weight=None, stale_seconds=None): if g.rank() == 0: j = get_next_name(root, jobs, max_weight, stale_seconds).encode("utf-8") else: j = bytes() j_name = g.broadcast(0, j).decode("utf-8") for j in jobs: if j.name == j_name: g.message(f""" -------------------------------------------------------------------------------- Start job {j.name} -------------------------------------------------------------------------------- """) t0 = g.time() j(root) t1 = g.time() g.message(f""" -------------------------------------------------------------------------------- Completed {j.name} in {t1-t0} seconds -------------------------------------------------------------------------------- """) return j return None
def implicit_restart(self, H, evals, p): n = len(self.H) k = n - p Q = np.identity(n, np.complex128) eye = np.identity(n, np.complex128) t0 = g.time() for i in range(p): Qi, Ri = np.linalg.qr(H - evals[i] * eye) H = Ri @ Qi + evals[i] * eye Q = Q @ Qi t1 = g.time() if self.verbose: g.message(f"Arnoldi: QR in {t1-t0} s") r = g.eval(self.basis[k] * H[k, k - 1] + self.basis[-1] * self.H[-1][-1] * Q[n - 1, k - 1]) rn = g.norm2(r)**0.5 t0 = g.time() g.rotate(self.basis, np.ascontiguousarray(Q.T), 0, k, 0, n) t1 = g.time() if self.verbose: g.message(f"Arnoldi: rotate in {t1-t0} s") self.basis = self.basis[0:k] self.basis.append(g.eval(r / rn)) self.H = [[H[j, i] for j in range(i + 2)] for i in range(k)] self.H[-1][-1] = rn
def save(filename, objs, params): t0 = gpt.time() # create io x = gpt_io(filename, params, True) # create index f = io.StringIO("") x.create_index(f, "", objs) mvidx = memoryview(f.getvalue().encode("utf-8")) # write index to fs index_crc = gpt.crc32(mvidx) if gpt.rank() == 0: open(filename + "/index", "wb").write(mvidx) open(filename + "/index.crc32", "wt").write("%X\n" % index_crc) # close x.close() # goodbye if x.verbose: t1 = gpt.time() gpt.message("Completed writing %s in %g s" % (filename, t1 - t0))
def inv(dst, src): verbose = g.default.is_verbose("deflate") # |dst> = sum_n 1/ev[n] |n><n|src> t0 = g.time() grid = src[0].grid rip = np.zeros((len(src), len(self.evec)), dtype=np.complex128) block = self.params["block"] for i0 in range(0, len(self.evec), block): rip_block = g.rank_inner_product(self.evec[i0:i0 + block], src, True) for i in range(rip_block.shape[0]): for j in range(rip_block.shape[1]): rip[j, i0 + i] = rip_block[i, j] / self.ev[i0 + i] t1 = g.time() grid.globalsum(rip) t2 = g.time() # TODO: simultaneous linear_combinations for j in range(len(src)): g.linear_combination(dst[j], self.evec, rip[j]) t3 = g.time() if verbose: g.message( "Deflated %d vector(s) in %g s (%g s for rank_inner_product, %g s for global sum, %g s for linear combinations)" % (len(src), t3 - t0, t1 - t0, t2 - t1, t3 - t2)) return inverter(dst, src)
def save(self, obj): if type(obj) == list: for o in obj: self.save(o) elif type(obj) == gpt.lattice: self.save(obj.mview()) elif type(obj) == float: self.save(memoryview(struct.pack("d", obj))) elif type(obj) == complex: self.save(memoryview(struct.pack("dd", obj.real, obj.imag))) elif type(obj) == memoryview: self.f.seek(0, 1) sz = len(obj) szGB = sz / 1024.0**3 self.f.write(sz.to_bytes(8, "little")) t0 = gpt.time() self.f.write(gpt.crc32(obj).to_bytes(4, "little")) t1 = gpt.time() self.f.write(obj) self.f.flush() t2 = gpt.time() if self.verbose: if self.grid is None: gpt.message( "Checkpoint %g GB on head node at %g GB/s for crc32 and %g GB/s for write in %g s total" % (szGB, szGB / (t1 - t0), szGB / (t2 - t1), t2 - t0)) else: szGB = self.grid.globalsum(szGB) gpt.message( "Checkpoint %g GB at %g GB/s for crc32 and %g GB/s for write in %g s total" % (szGB, szGB / (t1 - t0), szGB / (t2 - t1), t2 - t0)) else: assert 0
def inv(dst, src): # verbosity verbose = g.default.is_verbose("split") if len(src) % nparallel != 0: raise Exception( f"Cannot divide {len(src)} global vectors into {nparallel} groups" ) t0 = g.time() src_split = g.split(src, matrix_split.grid[1], cache) dst_split = g.split(dst, matrix_split.grid[0], cache) t1 = g.time() operation_split(dst_split, src_split) t2 = g.time() g.unsplit(dst, dst_split, cache) t3 = g.time() if verbose: g.message( f"Split {len(src)} global vectors to {len(src_split)} local vectors\n" + f"Timing: {t1-t0} s (split), {t2-t1} s (operation), {t3-t2} s (unsplit)" )
def __init__(self, op, bs): self.op = op self.op_blk = [] dt = -gpt.time() # thanks to double copy inside operator, U only temporary Ublk = [sap_blk(op.U_grid, bs, eo) for eo in range(2)] U = [gpt.mcolor(Ublk[0].grid) for _ in range(4)] for eo in range(2): Ucoor = Ublk[eo].coor(op.U_grid) for mu in range(4): U[mu][Ublk[eo].pos] = op.U[mu][Ucoor] Ublk[eo].set_BC_Ufld(U) self.op_blk.append(op.updated(U)) if self.op.F_grid.nd == len(bs) + 1: _bs = [self.op.F_grid.fdimensions[0]] + bs else: _bs = bs blk = [sap_blk(self.op.F_grid, _bs, eo) for eo in range(2)] self.pos = blk[0].pos self.pos.flags["WRITEABLE"] = False self.coor = [blk[eo].coor(op.F_grid) for eo in range(2)] for eo in range(2): self.coor[eo].flags["WRITEABLE"] = False dt += gpt.time() gpt.message(f"SAP Initialized in {dt:g} secs")
def sample(self, t, p): if type(t) == list: for x in t: self.sample(x, p) return t elif t is None: return cgpt.random_sample(self.obj, p) elif type(t) == gpt.lattice: t0 = gpt.time() cgpt.random_sample( self.obj, { **p, **{ "lattices": [t] }, }, ) t1 = gpt.time() assert "pos" not in p # to ensure that deprecated code is not used # optimize memory mapping t.swap(gpt.copy(t)) if self.verbose_performance: szGB = t.global_bytes() / 1024.0**3.0 gpt.message("Generated %g GB of random data at %g GB/s" % (szGB, szGB / (t1 - t0))) return t else: assert 0
def tell(self): assert (not self.f is None) t0 = gpt.time() r = cgpt.ftell(self.f) t1 = gpt.time() #print("TELL %g" % (t1-t0)) return r
def write(self, d): assert (not self.f is None) t0 = gpt.time() if type(d) != memoryview: d = memoryview(d) assert (cgpt.fwrite(self.f, len(d), d) == 1) t1 = gpt.time()
def seek(self, offset, whence): assert (not self.f is None) t0 = gpt.time() r = cgpt.fseek(self.f, offset, whence) t1 = gpt.time() #print("SEEK %g" % (t1-t0)) return r
def __init__(self, fn, md): t0 = gpt.time() self.f = cgpt.fopen(fn, md) if self.f == 0: self.f = None raise FileNotFoundError("Can not open file %s" % fn) t1 = gpt.time()
def __call__(self, mat, src, psi): assert (src != psi) self.history = [] verbose = g.default.is_verbose("cg") t0 = g.time() p, mmp, r = g.copy(src), g.copy(src), g.copy(src) guess = g.norm2(psi) mat(psi, mmp) # in, out d = g.innerProduct(psi, mmp).real b = g.norm2(mmp) r @= src - mmp p @= r a = g.norm2(p) cp = a ssq = g.norm2(src) rsq = self.eps**2. * ssq for k in range(1, self.maxiter + 1): c = cp mat(p, mmp) dc = g.innerProduct(p, mmp) d = dc.real a = c / d cp = g.axpy_norm2(r, -a, mmp, r) b = cp / c psi += a * p p @= b * p + r self.history.append(cp) if verbose: g.message("res^2[ %d ] = %g" % (k, cp)) if cp <= rsq: if verbose: t1 = g.time() g.message("Converged in %g s" % (t1 - t0)) break
def sample(self, t, p): if type(t) == list: for x in t: self.sample(x, p) elif t is None: return cgpt.random_sample(self.obj, t, p) elif type(t) == gpt.lattice: if "pos" in p: pos = p["pos"] else: pos = gpt.coordinates(t) t0 = gpt.time() mv = cgpt.random_sample( self.obj, pos, { **p, **{ "shape": list(t.otype.shape), "grid": t.grid.obj, "precision": t.grid.precision, }, }, ) t1 = gpt.time() t[pos] = mv if self.verbose: szGB = mv.size * mv.itemsize / 1024.0**3.0 gpt.message("Generated %g GB of random data at %g GB/s" % (szGB, szGB / (t1 - t0))) return t else: assert 0
def read(self, sz): assert (not self.f is None) t0 = gpt.time() t = bytes(sz) assert (cgpt.fread(self.f, sz, memoryview(t)) == 1) t1 = gpt.time() #print("READ %g s, %g GB" % (t1-t0,sz/1024.**3.)) return t
def test(slv, name): t0 = g.time() dst = g.eval(slv * src) t1 = g.time() eps2 = g.norm2(dst_cg - dst) / g.norm2(dst_cg) g.message("%s finished: eps^2(CG) = %g" % (name, eps2)) timings[name] = t1 - t0 resid[name] = eps2**0.5 assert eps2 < 5e-7
def approx(dst, src): assert src != dst verbose = g.default.is_verbose("modes") t0 = g.time() dst[:] = 0 for i, x in enumerate(left): dst += f_evals[i] * x * g.inner_product(right[i], src) if verbose: t1 = g.time() g.message("Approximation by %d modes took %g s" % (len(left), t1 - t0))
def __call__(self, matrix, src, dst): verbose = g.default.is_verbose("deflate") # |dst> = sum_n 1/ev[n] |n><n|src> t0 = g.time() dst[:] = 0 for i, n in enumerate(self.evec): dst += n * g.innerProduct(n, src) / self.ev[i] t1 = g.time() if verbose: g.message("Deflated in %g s" % (t1 - t0)) return self.inverter(matrix, src, dst)
def inv(psi, src): # verbosity verbose = g.default.is_verbose("dci") t_start = g.time() # leading order n = len(src) _s = [g.copy(x) for x in src] for j in range(n): psi[j][:] = 0 self.history = [] for i in range(self.maxiter): # correction step t0 = g.time() _d = g.eval(inner_inv_mat * _s) t1 = g.time() for j in range(n): _s[j] -= outer_mat * _d[j] t2 = g.time() for j in range(n): psi[j] += _d[j] # true resid eps = max([ g.norm2(outer_mat * psi[j] - src[j])**0.5 for j in range(n) ]) self.history.append(eps) if verbose: g.message( "Defect-correcting inverter: eps[", i, "] =", eps, ". Timing:", t1 - t0, "s (innver_inv), ", t2 - t1, "s (outer_mat)", ) if eps < self.eps: if verbose: g.message( "Defect-correcting inverter: converged at iteration", i, "after", g.time() - t_start, "s", ) break
def __call__(self, src_coarse, dst_coarse): t0 = gpt.time() gpt.block.promote(src_coarse, self.src_fine, self.basis) t1 = gpt.time() self.op(self.src_fine, self.dst_fine) t2 = gpt.time() gpt.block.project(dst_coarse, self.dst_fine, self.basis) t3 = gpt.time() if self.verbose: gpt.message( "Timing: %g s (promote), %g s (matrix), %g s (project)" % (t1 - t0, t2 - t1, t3 - t2))
def rotate_basis_to_evec(self, little_evec): n = len(self.H) t0 = g.time() g.rotate(self.basis[0:n], np.ascontiguousarray(little_evec.T), 0, n, 0, n) t1 = g.time() if self.verbose: g.message(f"Arnoldi: rotate in {t1-t0} s") return self.basis[0:n]
def test(a, name): t0 = g.time() evec, evals = a(w, start) t1 = g.time() evals_test, evals_eps2 = g.algorithms.eigen.evals(w, evec) largest_eval = 7.437 g.message(f"{name} finished in {t1-t0} s") for i in range(len(evals_eps2)): assert evals_eps2[i] / largest_eval**2.0 < 1e-5 assert abs(evals_test[i] - evals[i]) < 1e-6
def mat(dst_coarse, src_coarse): t0 = gpt.time() self.promote(src_fine, src_coarse) t1 = gpt.time() op(dst_fine, src_fine) t2 = gpt.time() self.project(dst_coarse, dst_fine) t3 = gpt.time() if verbose: gpt.message( "Timing: %g s (promote), %g s (matrix), %g s (project)" % (t1 - t0, t2 - t1, t3 - t2) )
def inv(dst, src): dst[:] = 0 eta = gpt.copy(src) ws = [gpt.copy(src) for _ in range(2)] dt_solv = dt_distr = dt_hop = 0.0 for eo in range(2): ws[0][:] = 0 dt_distr -= gpt.time() src_blk[sap.pos] = eta[sap.coor[ eo]] # reminder view interface eta[[pos]], ... eta[...,idx] dt_distr += gpt.time() dt_solv -= gpt.time() solver[eo](dst_blk, src_blk) dt_solv += gpt.time() dt_distr -= gpt.time() ws[0][sap.coor[eo]] = dst_blk[sap.pos] dt_distr += gpt.time() dt_hop -= gpt.time() if eo == 0: sap.op(ws[1], ws[0]) eta -= ws[1] dst += ws[0] dt_hop += gpt.time() gpt.message( f"SAP cycle; |rho|^2 = {gpt.norm2(eta):g}; |dst|^2 = {gpt.norm2(dst):g}" ) gpt.message( f"SAP Timings: distr {dt_distr:g} secs, blk_solver {dt_solv:g} secs, hop+update {dt_hop:g} secs" )
def little_eig(self): t0 = g.time() H = self.hessenberg() t1 = g.time() evals, little_evec = np.linalg.eig(H) t2 = g.time() idx = evals.argsort() if self.verbose: g.message( f"Arnoldi: hessenberg() in {t1-t0} s and eig(H) in {t2-t1} s") return evals[idx], little_evec[:, idx]
def mat(dst_coarse, src_coarse): src_fine = [gpt.lattice(self.basis[0]) for x in src_coarse] dst_fine = [gpt.lattice(self.basis[0]) for x in src_coarse] t0 = gpt.time() self.promote(src_fine, src_coarse) t1 = gpt.time() fine_operator(dst_fine, src_fine) t2 = gpt.time() self.project(dst_coarse, dst_fine) t3 = gpt.time() if verbose: gpt.message( "coarse_operator acting on %d vector(s) in %g s (promote %g s, fine_operator %g s, project %g s)" % (len(src_coarse), t3 - t0, t1 - t0, t2 - t1, t3 - t2))
def mat(dst, src): csrc = [gpt.lattice(self.coarse_grid, coarse_otype) for x in src] cdst = [gpt.lattice(self.coarse_grid, coarse_otype) for x in src] t0 = gpt.time() self.project(csrc, src) t1 = gpt.time() coarse_operator(cdst, csrc) t2 = gpt.time() self.promote(dst, cdst) t3 = gpt.time() if verbose: gpt.message( "fine_operator acting on %d vector(s) in %g s (project %g s, coarse_operator %g s, promote %g s)" % (len(src), t3 - t0, t1 - t0, t2 - t1, t3 - t2))
def message(*a): # conversion to string can be an mpi process (i.e. for lattice), # so need to do it on all ranks s = [str(x) for x in a] if gpt.rank() == 0: print("GPT : %14.6f s :" % gpt.time(), *s) sys.stdout.flush()
def __call__(self, dst, src): dst = gpt.util.to_list(dst) src = gpt.util.to_list(src) if verbose_performance: t0 = gpt.time() cgpt.copy_execute_plan(self.obj, dst, src, self.lattice_view_location) if verbose_performance: t1 = gpt.time() info = [a for v in self.info().values() for a in v.values()] blocks = sum([a["blocks"] for a in info]) size = sum([a["size"] for a in info]) block_size = size // blocks GB = 2 * size / 1e9 # read + write = factor of 2 gpt.message( f"copy_plan: execute: {GB:g} GB at {GB/(t1-t0):g} GB/s/rank with block_size {block_size}" )
def save(filename, objs, params): t0 = gpt.time() # create io x = gpt_io(filename, True, params) # create index x.write(objs) # close x.close() # goodbye if x.verbose: t1 = gpt.time() gpt.message("Completed writing %s in %g s" % (filename, t1 - t0))