Beispiel #1
0
 def reconstruct_third_row(self, data):
     sz = len(data)
     assert sz % 2 == 0
     sz = sz // 2 * 3
     assert sz % 8 == 0
     data_munged = cgpt.mview(cgpt.ndarray([sz // 8], numpy.float64))
     cgpt.munge_reconstruct_third_row(data_munged, data,
                                      self.precision.nbytes)
     return data_munged
Beispiel #2
0
    def __getitem__(self, key):

        # unpack cache
        cache, key = unpack_cache_key(key)
        cache_key = None if cache is None else "get"

        # general code path, map key
        pos, tidx, shape = gpt.map_key(self, key)
        n_pos = len(pos)

        # create target
        value = cgpt.ndarray((n_pos, *shape),
                             self.grid.precision.complex_dtype)

        # create plan
        if cache_key is None or cache_key not in cache:
            plan = gpt.copy_plan(value, self)
            plan.destination += gpt.global_memory_view(
                self.grid,
                [[self.grid.processor, value, 0, value.nbytes]]
                if value.nbytes > 0 else None,
            )
            plan.source += gpt.lattice_view(self, pos, tidx)
            xp = plan()

            if cache_key is not None:
                cache[cache_key] = xp
        else:
            xp = cache[cache_key]

        xp(value, self)

        # if only a single element is returned and we have the full shape,
        # wrap in a tensor
        if len(value) == 1 and shape == self.otype.shape:
            return gpt.util.value_to_tensor(value[0], self.otype)

        return value
Beispiel #3
0
    def read_lattice(self):
        # define grid from header
        g = gpt.grid(self.fdimensions, self.precision)
        # create lattice
        l = [gpt.lattice(g, self.otype) for i in range(self.nfields)]

        # performance
        dt_distr, dt_cs, dt_read, dt_misc = 0.0, 0.0, 0.0, 0.0
        szGB = 0.0
        g.barrier()
        t0 = gpt.time()

        dt_read -= gpt.time()

        pos, nreader = distribute_cartesian_file(self.fdimensions, g,
                                                 l[0].checkerboard())

        if len(pos) > 0:
            sz = self.bytes_per_site * len(pos)
            f = gpt.FILE(self.path, "rb")
            f.seek(self.bytes_header + g.processor * sz, 0)
            data = memoryview(f.read(sz))
            f.close()

            dt_misc -= gpt.time()
            data = self.munge(data)
            dt_misc += gpt.time()

            dt_cs -= gpt.time()
            cs_comp = cgpt.util_nersc_checksum(data, 0)
            dt_cs += gpt.time()

            dt_misc -= gpt.time()
            data = self.reconstruct(data)

            assert len(data) % 8 == 0
            data_munged = cgpt.mview(
                cgpt.ndarray([len(data) // 8], numpy.float64))
            cgpt.munge_inner_outer(data_munged, data, self.nfields, len(pos))
            data = data_munged
            dt_misc += gpt.time()

            szGB += len(data) / 1024.0**3.0
        else:
            data = memoryview(bytearray())
            cs_comp = 0

        cs_comp = g.globalsum(cs_comp) & 0xFFFFFFFF
        cs_exp = int(self.metadata["CHECKSUM"].upper(), 16)
        if cs_comp != cs_exp:
            gpt.message(f"cs_comp={cs_comp:X} cs_exp={cs_exp:X}")
            assert False

        dt_read += gpt.time()

        # distributes data accordingly
        g.barrier()
        dt_distr -= gpt.time()
        cache = {}
        lblock = len(data) // self.nfields
        for i in range(self.nfields):
            l[i][pos, cache] = data[lblock * i:lblock * (i + 1)]
        g.barrier()
        dt_distr += gpt.time()

        g.barrier()
        t1 = gpt.time()

        szGB = g.globalsum(szGB)
        if self.verbose and dt_cs != 0.0:
            gpt.message(
                "Read %g GB at %g GB/s (%g GB/s for distribution, %g GB/s for munged read, %g GB/s for checksum, %g GB/s for munging, %d readers)"
                % (
                    szGB,
                    szGB / (t1 - t0),
                    szGB / dt_distr,
                    szGB / dt_read,
                    szGB / dt_cs,
                    szGB / dt_misc,
                    nreader,
                ))

        # also check plaquette and link trace
        P_comp = gpt.qcd.gauge.plaquette(l)
        P_exp = float(self.metadata["PLAQUETTE"])
        P_digits = len(self.metadata["PLAQUETTE"].split(".")[1])
        P_eps = abs(P_comp - P_exp)
        P_eps_threshold = 10.0**(-P_digits + 2)
        P_eps_threshold = max([1e2 * self.precision.eps, P_eps_threshold])
        assert P_eps < P_eps_threshold

        L_comp = (sum([
            gpt.sum(gpt.trace(x)) / x.grid.gsites / x.otype.shape[0] for x in l
        ]).real / self.nfields)
        L_exp = float(self.metadata["LINK_TRACE"])
        L_digits = len(
            self.metadata["LINK_TRACE"].split(".")[1].lower().split("e")[0])
        L_eps_threshold = 10.0**(-L_digits + 2)
        L_eps_threshold = max([1e2 * self.precision.eps, L_eps_threshold])
        L_eps = abs(L_comp - L_exp)
        assert L_eps < L_eps_threshold

        return l