def reconstruct_third_row(self, data): sz = len(data) assert sz % 2 == 0 sz = sz // 2 * 3 assert sz % 8 == 0 data_munged = cgpt.mview(cgpt.ndarray([sz // 8], numpy.float64)) cgpt.munge_reconstruct_third_row(data_munged, data, self.precision.nbytes) return data_munged
def __getitem__(self, key): # unpack cache cache, key = unpack_cache_key(key) cache_key = None if cache is None else "get" # general code path, map key pos, tidx, shape = gpt.map_key(self, key) n_pos = len(pos) # create target value = cgpt.ndarray((n_pos, *shape), self.grid.precision.complex_dtype) # create plan if cache_key is None or cache_key not in cache: plan = gpt.copy_plan(value, self) plan.destination += gpt.global_memory_view( self.grid, [[self.grid.processor, value, 0, value.nbytes]] if value.nbytes > 0 else None, ) plan.source += gpt.lattice_view(self, pos, tidx) xp = plan() if cache_key is not None: cache[cache_key] = xp else: xp = cache[cache_key] xp(value, self) # if only a single element is returned and we have the full shape, # wrap in a tensor if len(value) == 1 and shape == self.otype.shape: return gpt.util.value_to_tensor(value[0], self.otype) return value
def read_lattice(self): # define grid from header g = gpt.grid(self.fdimensions, self.precision) # create lattice l = [gpt.lattice(g, self.otype) for i in range(self.nfields)] # performance dt_distr, dt_cs, dt_read, dt_misc = 0.0, 0.0, 0.0, 0.0 szGB = 0.0 g.barrier() t0 = gpt.time() dt_read -= gpt.time() pos, nreader = distribute_cartesian_file(self.fdimensions, g, l[0].checkerboard()) if len(pos) > 0: sz = self.bytes_per_site * len(pos) f = gpt.FILE(self.path, "rb") f.seek(self.bytes_header + g.processor * sz, 0) data = memoryview(f.read(sz)) f.close() dt_misc -= gpt.time() data = self.munge(data) dt_misc += gpt.time() dt_cs -= gpt.time() cs_comp = cgpt.util_nersc_checksum(data, 0) dt_cs += gpt.time() dt_misc -= gpt.time() data = self.reconstruct(data) assert len(data) % 8 == 0 data_munged = cgpt.mview( cgpt.ndarray([len(data) // 8], numpy.float64)) cgpt.munge_inner_outer(data_munged, data, self.nfields, len(pos)) data = data_munged dt_misc += gpt.time() szGB += len(data) / 1024.0**3.0 else: data = memoryview(bytearray()) cs_comp = 0 cs_comp = g.globalsum(cs_comp) & 0xFFFFFFFF cs_exp = int(self.metadata["CHECKSUM"].upper(), 16) if cs_comp != cs_exp: gpt.message(f"cs_comp={cs_comp:X} cs_exp={cs_exp:X}") assert False dt_read += gpt.time() # distributes data accordingly g.barrier() dt_distr -= gpt.time() cache = {} lblock = len(data) // self.nfields for i in range(self.nfields): l[i][pos, cache] = data[lblock * i:lblock * (i + 1)] g.barrier() dt_distr += gpt.time() g.barrier() t1 = gpt.time() szGB = g.globalsum(szGB) if self.verbose and dt_cs != 0.0: gpt.message( "Read %g GB at %g GB/s (%g GB/s for distribution, %g GB/s for munged read, %g GB/s for checksum, %g GB/s for munging, %d readers)" % ( szGB, szGB / (t1 - t0), szGB / dt_distr, szGB / dt_read, szGB / dt_cs, szGB / dt_misc, nreader, )) # also check plaquette and link trace P_comp = gpt.qcd.gauge.plaquette(l) P_exp = float(self.metadata["PLAQUETTE"]) P_digits = len(self.metadata["PLAQUETTE"].split(".")[1]) P_eps = abs(P_comp - P_exp) P_eps_threshold = 10.0**(-P_digits + 2) P_eps_threshold = max([1e2 * self.precision.eps, P_eps_threshold]) assert P_eps < P_eps_threshold L_comp = (sum([ gpt.sum(gpt.trace(x)) / x.grid.gsites / x.otype.shape[0] for x in l ]).real / self.nfields) L_exp = float(self.metadata["LINK_TRACE"]) L_digits = len( self.metadata["LINK_TRACE"].split(".")[1].lower().split("e")[0]) L_eps_threshold = 10.0**(-L_digits + 2) L_eps_threshold = max([1e2 * self.precision.eps, L_eps_threshold]) L_eps = abs(L_comp - L_exp) assert L_eps < L_eps_threshold return l