def __init__(self, root): self.root = root self.grid = None directory = "%s/%2.2d" % (root, gpt.rank() // 32) os.makedirs(directory, exist_ok=True) self.filename = "%s/%10.10d" % (directory, gpt.rank()) try: self.f = gpt.FILE(self.filename, "r+b") except FileNotFoundError: self.f = gpt.FILE(self.filename, "w+b") self.f.seek(0, 1) self.verbose = gpt.default.is_verbose("checkpointer")
def save(filename, objs, params): t0 = gpt.time() # create io x = gpt_io(filename, params, True) # create index f = io.StringIO("") x.create_index(f, "", objs) mvidx = memoryview(f.getvalue().encode("utf-8")) # write index to fs index_crc = gpt.crc32(mvidx) if gpt.rank() == 0: open(filename + "/index", "wb").write(mvidx) open(filename + "/index.crc32", "wt").write("%X\n" % index_crc) # close x.close() # goodbye if x.verbose: t1 = gpt.time() gpt.message("Completed writing %s in %g s" % (filename, t1 - t0))
def __init__(self, root, write, params): self.root = root self.params = params self.params["grids"] = {} self.verbose = gpt.default.is_verbose("io") if gpt.rank() == 0: os.makedirs(self.root, exist_ok=True) if write: self.glb = gpt.FILE(root + "/global", "wb") for f in glob.glob("%s/??/*.field" % self.root): os.unlink(f) else: self.glb = gpt.FILE(root + "/global", "rb") else: self.glb = None self.loc = {} self.pos = {} self.loc_desc = "" self.cache = {} # If we write, keep an index buffer self.index_file = io.StringIO("") if write else None # now sync since only root has created directory gpt.barrier()
def __init__(self, root, params, write): self.root = root self.params = params if not "grids" in self.params: self.params["grids"] = {} else: if type(self.params["grids"]) == gpt.grid: self.params["grids"] = [self.params["grids"]] self.params["grids"] = dict([(g.describe(), g) for g in self.params["grids"]]) self.verbose = gpt.default.is_verbose("io") if gpt.rank() == 0: os.makedirs(self.root, exist_ok=True) if write: self.glb = gpt.FILE(root + "/global", "wb") for f in glob.glob("%s/??/*.field" % self.root): os.unlink(f) else: self.glb = gpt.FILE(root + "/global", "r+b") else: self.glb = None self.loc = {} self.pos = {} self.loc_desc = "" # now sync since only root has created directory gpt.barrier()
def next(root, jobs, max_weight=None, stale_seconds=None): if g.rank() == 0: j = get_next_name(root, jobs, max_weight, stale_seconds).encode("utf-8") else: j = bytes() j_name = g.broadcast(0, j).decode("utf-8") for j in jobs: if j.name == j_name: g.message(f""" -------------------------------------------------------------------------------- Start job {j.name} -------------------------------------------------------------------------------- """) t0 = g.time() j(root) t1 = g.time() g.message(f""" -------------------------------------------------------------------------------- Completed {j.name} in {t1-t0} seconds -------------------------------------------------------------------------------- """) return j return None
def message(*a): # conversion to string can be an mpi process (i.e. for lattice), # so need to do it on all ranks s = [str(x) for x in a] if gpt.rank() == 0: print("GPT : %14.6f s :" % gpt.time(), *s) sys.stdout.flush()
def perform(self, root): for target in self.targets: dst = f"{root}/{target}/propagators" g.message(dst, "Delete", os.path.exists(dst)) if os.path.exists(dst) and g.rank() == 0: shutil.rmtree(dst) g.barrier()
def message(*a): # conversion to string can be an mpi process (i.e. for lattice), # so need to do it on all ranks s = " ".join([str(x) for x in a]) if gpt.rank() == 0: lines = s.split("\n") if len(lines) > 0: print("GPT : %14.6f s :" % gpt.time(), lines[0]) for line in lines[1:]: print(" :", line) sys.stdout.flush()
def read_numpy(self, start, end): if gpt.rank() == 0: self.glb.seek(start, 0) crc32_compare = int.from_bytes(self.glb.read(4), byteorder='little') data = self.glb.read(end - start - 4) else: data = None crc32_compare = None data = gpt.broadcast(0, data) crc32_computed = gpt.crc32(memoryview(data)) if not crc32_compare is None: assert (crc32_computed == crc32_compare) return numpy.load(io.BytesIO(data))
def flush(self): # if we read, no need to flush if self.index_file is None: return # get memoryview of current index mvidx = memoryview(self.index_file.getvalue().encode("utf-8")) # write index to fs index_crc = gpt.crc32(mvidx) if gpt.rank() == 0: f = open(self.root + "/index", "wb") f.write(mvidx) f.close() f = open(self.root + "/index.crc32", "wt") f.write("%X\n" % index_crc) f.close()
def download(dst, src): src_a = src.split("/") assert len(src_a) >= 3 host = src_a[0] assert host[-1] == ":" assert src_a[1] == "" path = "/".join(src_a[2:]) assert host in base baseurl = base[host] if gpt.rank() == 0: verbose = gpt.default.is_verbose("repository") t0 = gpt.time() if "GPT_REPOSITORY" in os.environ: root = os.environ["GPT_REPOSITORY"] shutil.copy2(f"{root}/{path}", dst) mode = "copy" else: filename, header = request.urlretrieve(f"{baseurl}/{path}", filename=dst) mode = "download" t1 = gpt.time() filesize = os.path.getsize(dst) speedMBs = filesize / 1024.0 ** 2.0 / (t1 - t0) if verbose: gpt.message(f"Repository {mode} {src} in {t1-t0:g} s at {speedMBs:g} MB/s") # add a barrier so that all nodes have file after download gpt.barrier() # os.scandir to trigger network filesystem synchronization os.scandir(os.path.dirname(dst))
def __call__(self, expression, verbose=False, separate_diagrams=False): if verbose: g.message(f"Open indices: {expression.indices}") f_context = fields_context() expression.contract(f_context, "") if verbose: g.message(f"Performing {len(f_context.fields)} wick contractions") # fill context with index - value pairs, initialize some to arguments given here # perform all wick contractions and iterate through them here, each contraction contractions = f_context.contract(verbose) if verbose: g.message(f"Resulting in {len(contractions)} diagrams") results = [] diag_index = 0 for sign, propagators in contractions: e_context = evaluation_context() e_context["propagators"] = propagators # set coordinates of propagators for propagator in propagators: for j, i in enumerate( f_context.coordinate_arguments[propagator[0]]): e_context[propagator[0] + "_c" + str(j)] = i # iterate through indices of propagators e_context["propagators_indices"] = [] e_context["n"] = 1 for propagator in propagators: for j, i in enumerate( f_context.index_arguments[propagator[0]]): e_context["propagators_indices"].append( (propagator[0] + "_i" + str(j), i[1])) e_context["n"] *= i[1] if diag_index % g.ranks() == g.rank(): r = expression.evaluate(e_context, "") else: r = tensor(basis(expression.indices), e_context["n"]) results.append(sign * r) # for each propagator do a loop over its spin/color indices following the algorithm below # set the index in the context for each field that is part of a propagator; they # should then evaluate as below; if a field is not part of the diagram, it should evaluate # to zero # <u(a) ubar(b)> = D^{-1}_{ab} = sum_{c} \delta_{ac} D^{-1}_{cb} # outer sum over c, evaluate field(a) to delta_{ac}, field.bar(b) to D^{-1}_{cb} if separate_diagrams: return [r.sum().global_sum().tensor_remove() for r in results] else: res = tensor(basis([]), results[0].n_parallel) for r in results: res = res + r return res.sum().global_sum().tensor_remove()
# for conf in groups[group]["confs"]: for group in groups: for conf in groups[group]["confs"]: for job in jobs: jid += 1 if only_on_conf is not None and only_on_conf != conf: continue root_job = f"{root_output}/{conf}/{job}" if not os.path.exists(root_job): os.makedirs(root_job) return group, job, conf, jid, n return None if g.rank() == 0: first_job = get_job() run_jobs = str( list( filter( lambda x: x is not None, [first_job] + [get_job(first_job[2]) for i in range(1, jobs_per_run)], ))).encode("utf-8") else: run_jobs = bytes() run_jobs = eval(g.broadcast(0, run_jobs).decode("utf-8")) # every node now knows what to do g.message(""" ================================================================================
def __init__(self, fn): if gpt.rank() == 0: self.f = open(fn, "w+b") else: self.f = None
def read_lattice_single(self): if self.bytes_header < 0: raise # define grid from header g = gpt.grid(self.fdimensions, self.precision) # create lattice l = gpt.lattice(g, self.otype) # performance dt_distr, dt_crc, dt_read, dt_misc = 0.0, 0.0, 0.0, 0.0 szGB = 0.0 crc_comp = 0 g.barrier() t0 = gpt.time() # single file: each rank opens it and reads it all g.barrier() dt_read -= gpt.time() cv = gpt.cartesian_view(gpt.rank(), self.cv_desc, g.fdimensions, g.cb, l.checkerboard()) pos = gpt.coordinates(cv) if gpt.rank() == 0: f = gpt.FILE(self.path, "rb") f.seek(self.bytes_header, 0) sz = self.size * int(numpy.prod(g.fdimensions)) data = memoryview(bytearray(f.read(sz))) f.close() dt_crc -= gpt.time() crc_comp = gpt.crc32(data) crc_comp = f"{crc_comp:8X}" assert crc_comp == self.crc_exp dt_crc += gpt.time() dt_misc -= gpt.time() self.swap(data) dt_misc += gpt.time() sys.stdout.flush() szGB += len(data) / 1024.0**3.0 else: assert len(pos) == 0 data = None g.barrier() dt_read += gpt.time() # distributes data accordingly dt_distr -= gpt.time() l[pos] = data g.barrier() dt_distr += gpt.time() g.barrier() t1 = gpt.time() szGB = g.globalsum(szGB) if self.verbose and dt_crc != 0.0: gpt.message( "Read %g GB at %g GB/s (%g GB/s for distribution, %g GB/s for reading + checksum, %g GB/s for checksum, %d views per node)" % ( szGB, szGB / (t1 - t0), szGB / dt_distr, szGB / dt_read, szGB / dt_crc, 1, )) return l
def save(filename, objs, params): # split data to save assert len(objs) == 3 basis = objs[0] cevec = objs[1] ev = objs[2] # verbosity verbose = gpt.default.is_verbose("io") if verbose: gpt.message( "Saving %d basis vectors, %d coarse-grid vectors, %d eigenvalues to %s" % (len(basis), len(cevec), len(ev), filename)) # create directory if gpt.rank() == 0: os.makedirs(filename, exist_ok=True) # now sync since only root has created directory gpt.barrier() # write eigenvalues if gpt.rank() == 0: f = open("%s/eigen-values.txt" % filename, "wt") f.write("%d\n" % len(ev)) for v in ev: f.write("%.15E\n" % v) f.close() # site checkerboard # only odd is used in this file format but # would be easy to generalize here site_cb = gpt.odd # grids assert len(basis) > 0 assert len(cevec) > 0 fgrid = basis[0].grid cgrid = cevec[0].grid # mpi layout if params["mpi"] is not None: mpi = params["mpi"] else: mpi = fgrid.mpi assert mpi[0] == 1 # assert no mpi in 5th direction # params assert basis[0].checkerboard() == site_cb nd = 5 assert len(fgrid.ldimensions) == nd fdimensions = fgrid.fdimensions ldimensions = [conformDiv(fdimensions[i], mpi[i]) for i in range(nd)] assert fgrid.precision == gpt.single s = ldimensions b = [ conformDiv(fgrid.fdimensions[i], cgrid.fdimensions[i]) for i in range(nd) ] nb = [conformDiv(s[i], b[i]) for i in range(nd)] neigen = len(cevec) nbasis = len(basis) if "nsingle" in params: nsingle = params["nsingle"] assert nsingle <= nbasis else: nsingle = nbasis nsingleCap = min([nsingle, nbasis]) blocks = numpy.prod(nb) FP16_COEF_EXP_SHARE_FLOATS = 10 # write metadata if gpt.rank() == 0: fmeta = open("%s/metadata.txt" % filename, "wt") for i in range(nd): fmeta.write("s[%d] = %d\n" % (i, s[(i + 1) % nd])) for i in range(nd): fmeta.write("b[%d] = %d\n" % (i, b[(i + 1) % nd])) for i in range(nd): fmeta.write("nb[%d] = %d\n" % (i, nb[(i + 1) % nd])) fmeta.write("neig = %d\n" % neigen) fmeta.write("nkeep = %d\n" % nbasis) fmeta.write("nkeep_single = %d\n" % nsingle) fmeta.write("blocks = %d\n" % blocks) fmeta.write("FP16_COEF_EXP_SHARE_FLOATS = %d\n" % FP16_COEF_EXP_SHARE_FLOATS) fmeta.flush() # write crc32 later # create cartesian view on fine grid cv0 = gpt.cartesian_view(-1, mpi, fdimensions, fgrid.cb, site_cb) views = cv0.views_for_node(fgrid) crc32 = numpy.array([0] * cv0.ranks, dtype=numpy.uint64) # timing t0 = gpt.time() totalSizeGB = 0 dt_fp16 = 1e-30 dt_distr = 1e-30 dt_munge = 1e-30 dt_crc = 1e-30 dt_fwrite = 1e-30 t0 = gpt.time() # load all views if verbose: gpt.message("Saving %s with %d views per node" % (filename, len(views))) for i, v in enumerate(views): cv = gpt.cartesian_view(v if v is not None else -1, mpi, fdimensions, fgrid.cb, site_cb) cvc = gpt.cartesian_view(v if v is not None else -1, mpi, cgrid.fdimensions, gpt.full, gpt.none) pos_coarse = gpt.coordinates(cvc, "canonical") dn, fn = get_local_name(filename, cv) if fn is not None: os.makedirs(dn, exist_ok=True) # sizes slot_lsites = numpy.prod(cv.view_dimensions) assert slot_lsites % blocks == 0 block_data_size_single = slot_lsites * 12 // 2 // blocks * 2 * 4 block_data_size_fp16 = FP_16_SIZE(slot_lsites * 12 // 2 // blocks * 2, 24) coarse_block_size_part_fp32 = 2 * (4 * nsingleCap) coarse_block_size_part_fp16 = 2 * (FP_16_SIZE( nbasis - nsingleCap, FP16_COEF_EXP_SHARE_FLOATS)) coarse_vector_size = (coarse_block_size_part_fp32 + coarse_block_size_part_fp16) * blocks totalSize = ( blocks * (block_data_size_single * nsingleCap + block_data_size_fp16 * (nbasis - nsingleCap)) + neigen * coarse_vector_size) totalSizeGB += totalSize / 1024.0**3.0 if v is not None else 0.0 # checksum crc32_comp = 0 # file f = gpt.FILE(fn, "wb") if fn is not None else None # block positions pos = [ cgpt.coordinates_from_block(cv.top, cv.bottom, b, nb, "canonicalOdd") for b in range(blocks) ] # group blocks read_blocks = blocks block_reduce = 1 max_read_blocks = get_param(params, "max_read_blocks", 8) while read_blocks > max_read_blocks and read_blocks % 2 == 0: pos = [ numpy.concatenate((pos[2 * i + 0], pos[2 * i + 1])) for i in range(read_blocks // 2) ] block_data_size_single *= 2 block_data_size_fp16 *= 2 read_blocks //= 2 block_reduce *= 2 # make read-only to enable caching for x in pos: x.setflags(write=0) # single-precision data data = memoryview(bytearray(block_data_size_single * nsingleCap)) data_munged = memoryview(bytearray(block_data_size_single * nsingleCap)) for b in range(read_blocks): fgrid.barrier() dt_distr -= gpt.time() lhs_size = basis[0].otype.nfloats * 4 * len(pos[b]) lhs = data_munged[0:lhs_size] distribute_plan = gpt.copy_plan(lhs, basis[0]) distribute_plan.destination += gpt.global_memory_view( fgrid, [[fgrid.processor, lhs, 0, lhs.nbytes]]) distribute_plan.source += basis[0].view[pos[b]] distribute_plan = distribute_plan() lhs = None for i in range(nsingleCap): distribute_plan( data_munged[block_data_size_single * i:block_data_size_single * (i + 1)], basis[i], ) dt_distr += gpt.time() if f is not None: dt_munge -= gpt.time() cgpt.munge_inner_outer( data, data_munged, block_reduce, nsingleCap, ) dt_munge += gpt.time() dt_crc -= gpt.time() crc32_comp = gpt.crc32(data, crc32_comp) dt_crc += gpt.time() fgrid.barrier() dt_fwrite -= gpt.time() if f is not None: f.write(data) globalWriteGB = len(data) / 1024.0**3.0 else: globalWriteGB = 0.0 globalWriteGB = fgrid.globalsum(globalWriteGB) dt_fwrite += gpt.time() totalSizeGB += globalWriteGB if verbose: gpt.message( "* write %g GB: fwrite at %g GB/s, crc32 at %g GB/s, munge at %g GB/s, distribute at %g GB/s" % ( totalSizeGB, totalSizeGB / dt_fwrite, totalSizeGB / dt_crc, totalSizeGB / dt_munge, totalSizeGB / dt_distr, )) # fp16 data if nbasis != nsingleCap: # allocate data buffer data_fp32 = memoryview( bytearray(block_data_size_single * (nbasis - nsingleCap))) data_munged = memoryview( bytearray(block_data_size_single * (nbasis - nsingleCap))) data = memoryview( bytearray(block_data_size_fp16 * (nbasis - nsingleCap))) for b in range(read_blocks): fgrid.barrier() dt_distr -= gpt.time() lhs_size = basis[0].otype.nfloats * 4 * len(pos[b]) lhs = data_munged[0:lhs_size] distribute_plan = gpt.copy_plan(lhs, basis[0]) distribute_plan.destination += gpt.global_memory_view( fgrid, [[fgrid.processor, lhs, 0, lhs.nbytes]]) distribute_plan.source += basis[0].view[pos[b]] distribute_plan = distribute_plan() lhs = None for i in range(nsingleCap, nbasis): j = i - nsingleCap distribute_plan( data_munged[j * block_data_size_single:(j + 1) * block_data_size_single], basis[i], ) dt_distr += gpt.time() if f is not None: dt_munge -= gpt.time() cgpt.munge_inner_outer( data_fp32, data_munged, block_reduce, nbasis - nsingleCap, ) dt_munge += gpt.time() dt_fp16 -= gpt.time() cgpt.fp32_to_fp16(data, data_fp32, 24) dt_fp16 += gpt.time() dt_crc -= gpt.time() crc32_comp = gpt.crc32(data, crc32_comp) dt_crc += gpt.time() fgrid.barrier() dt_fwrite -= gpt.time() if f is not None: f.write(data) globalWriteGB = len(data) / 1024.0**3.0 else: globalWriteGB = 0.0 globalWriteGB = fgrid.globalsum(globalWriteGB) dt_fwrite += gpt.time() totalSizeGB += globalWriteGB if verbose: gpt.message( "* write %g GB: fwrite at %g GB/s, crc32 at %g GB/s, munge at %g GB/s, distribute at %g GB/s, fp16 at %g GB/s" % ( totalSizeGB, totalSizeGB / dt_fwrite, totalSizeGB / dt_crc, totalSizeGB / dt_munge, totalSizeGB / dt_distr, totalSizeGB / dt_fp16, )) # coarse grid data data = memoryview(bytearray(coarse_vector_size)) data_fp32 = memoryview( bytearray(cevec[0].otype.nfloats * 4 * len(pos_coarse))) distribute_plan = gpt.copy_plan(data_fp32, cevec[0]) distribute_plan.destination += gpt.global_memory_view( cgrid, [[cgrid.processor, data_fp32, 0, data_fp32.nbytes]]) distribute_plan.source += cevec[0].view[pos_coarse] distribute_plan = distribute_plan() for j in range(neigen): fgrid.barrier() dt_distr -= gpt.time() distribute_plan(data_fp32, cevec[j]) dt_distr += gpt.time() if f is not None: dt_fp16 -= gpt.time() cgpt.fp32_to_mixed_fp32fp16( data, data_fp32, coarse_block_size_part_fp32, coarse_block_size_part_fp16, FP16_COEF_EXP_SHARE_FLOATS, ) dt_fp16 += gpt.time() dt_crc -= gpt.time() crc32_comp = gpt.crc32(data, crc32_comp) dt_crc += gpt.time() fgrid.barrier() dt_fwrite -= gpt.time() if f is not None: f.write(data) globalWriteGB = len(data) / 1024.0**3.0 else: globalWriteGB = 0.0 globalWriteGB = fgrid.globalsum(globalWriteGB) dt_fwrite += gpt.time() totalSizeGB += globalWriteGB if verbose and j % (neigen // 10) == 0: gpt.message( "* write %g GB: fwrite at %g GB/s, crc32 at %g GB/s, munge at %g GB/s, distribute at %g GB/s, fp16 at %g GB/s" % ( totalSizeGB, totalSizeGB / dt_fwrite, totalSizeGB / dt_crc, totalSizeGB / dt_munge, totalSizeGB / dt_distr, totalSizeGB / dt_fp16, )) # save crc crc32[cv.rank] = crc32_comp # synchronize crc32 fgrid.globalsum(crc32) # timing t1 = gpt.time() # write crc to metadata if gpt.rank() == 0: for i in range(len(crc32)): fmeta.write("crc32[%d] = %X\n" % (i, crc32[i])) fmeta.close() # verbosity if verbose: gpt.message("* save %g GB at %g GB/s" % (totalSizeGB, totalSizeGB / (t1 - t0)))
class job_create_file(g.jobs.base): def __init__(self, fn, needfn): self.fn = fn super().__init__("file_" + fn, ["file_" + f for f in needfn]) def perform(self, root): f = open(f"{root}/{self.name}/{self.fn}", "wt") f.write(self.fn + "\n") f.close() def check(self, root): return os.path.exists(f"{root}/{self.name}/{self.fn}") if os.path.exists(root) and g.rank() == 0: shutil.rmtree(root) g.barrier() fail_B = True for jid in range(5): j = g.jobs.next( root, [ job_create_file("A", []), job_create_file("C", ["B", "A"]), job_create_file("B", ["A"]), ], stale_seconds=0.0,
class job_create_file(g.jobs.base): def __init__(self, fn, needfn): self.fn = fn super().__init__("file_" + fn, ["file_" + f for f in needfn]) def perform(self, root): f = open(f"{root}/{self.name}/{self.fn}", "wt") f.write(self.fn + "\n") f.close() def check(self, root): return os.path.exists(f"{root}/{self.name}/{self.fn}") if os.path.exists("test.root") and g.rank() == 0: shutil.rmtree("test.root") g.barrier() fail_B = True for jid in range(5): j = g.jobs.next( "test.root", [ job_create_file("A", []), job_create_file("C", ["B", "A"]), job_create_file("B", ["A"]), ], stale_seconds=0.0,