def __init__(self, path, bunchsize=4 * 1024 * 1024): self.path = path self.bunchsize = bunchsize if self.comm.rank == 0: datastorage = files.DataStorage(self.path, files.HaloLabelFile) else: datastorage = None datastorage = self.comm.bcast(datastorage) self.size = sum(datastorage.npart)
def parallel_read(self, columns, full=False): """ read data in parallel. if Full is True, neglect bunchsize. """ Ntot = 0 # avoid reading Velocity if RSD is not requested. # this is only needed for large data like a TPMSnapshot # for small Pandas reader etc it doesn't take time to # read velocity if self.rsd is not None: newcolumns = set(columns + ['Velocity']) else: newcolumns = set(columns) bunchsize = self.bunchsize if full: bunchsize = -1 if full and len(self.ptype) > 1: raise ValueError("cannot read multple ptype in a full load") for ptype in self.ptype: args = dict(ptype=ptype, posdtype=self.posdtype, veldtype=self.veldtype, massdtype=self.massdtype, iddtype=self.iddtype) if self.comm.rank == 0: datastorage = files.DataStorage(self.path, files.GadgetSnapshotFile, args) f0 = files.GadgetSnapshotFile(self.path, 0, args) boxsize = f0.header['boxsize'] else: datastorage = None boxsize = None boxsize = self.comm.bcast(boxsize) datastorage = self.comm.bcast(datastorage) for round, P in enumerate( datastorage.iter(comm=self.comm, columns=newcolumns, bunchsize=bunchsize)): P = dict(zip(newcolumns, P)) if 'Position' in P: P['Position'] /= boxsize P['Position'] *= self.BoxSize if 'Velocity' in P: raise KeyError('Velocity is not yet supported') if self.rsd is not None: dir = "xyz".index(self.rsd) P['Position'][:, dir] += P['Velocity'][:, dir] P['Position'][:, dir] %= self.BoxSize[dir] yield [P.get(key, None) for key in columns]
def __init__(self, path, BoxSize, rsd=None, bunchsize=4 * 1024 * 1024): self.path = path self.BoxSize = BoxSize self.rsd = rsd self.bunchsize = bunchsize if self.comm.rank == 0: datastorage = files.DataStorage(self.path, files.TPMSnapshotFile) size = sum(datastorage.npart) else: size = None self.size = self.comm.bcast(size)
def parallel_read(self, columns, full=False): """ read data in parallel. if Full is True, neglect bunchsize. This supports `Position`, `Velocity` columns """ Ntot = 0 # avoid reading Velocity if RSD is not requested. # this is only needed for large data like a TPMSnapshot # for small Pandas reader etc it doesn't take time to # read velocity if self.rsd is not None: newcolumns = set(columns + ['Velocity']) else: newcolumns = set(columns) if 'Mass' in newcolumns: newcolumns.remove('Mass') if 'Weight' in newcolumns: newcolumns.remove('Weight') bunchsize = self.bunchsize if full: bunchsize = -1 if self.comm.rank == 0: datastorage = files.DataStorage(self.path, files.TPMSnapshotFile) else: datastorage = None datastorage = self.comm.bcast(datastorage) for round, P0 in enumerate( datastorage.iter(comm=self.comm, columns=newcolumns, bunchsize=bunchsize)): P = dict(zip(newcolumns, P0)) if 'Position' in P: P['Position'] *= self.BoxSize if 'Velocity' in P: P['Velocity'] *= self.BoxSize if self.rsd is not None: dir = "xyz".index(self.rsd) P['Position'][:, dir] += P['Velocity'][:, dir] P['Position'][:, dir] %= self.BoxSize[dir] yield [P.get(key, None) for key in columns]
def read(self, columns, full=False): """ read data in parallel. if Full is True, neglect bunchsize. """ Ntot = 0 # avoid reading Velocity if RSD is not requested. # this is only needed for large data like a TPMSnapshot # for small Pandas reader etc it doesn't take time to # read velocity if self.rsd is not None: newcolumns = set(columns + ['Velocity']) else: newcolumns = set(columns) bunchsize = self.bunchsize if full: bunchsize = -1 args = dict(posdtype=self.posdtype, veldtype=self.veldtype, massdtype=self.massdtype, iddtype=self.iddtype) if self.comm.rank == 0: datastorage = files.DataStorage(self.path, files.GadgetGroupTabFile, args) else: datastorage = None datastorage = self.comm.bcast(datastorage) for round, P in enumerate( datastorage.iter(comm=self.comm, columns=newcolumns, bunchsize=bunchsize)): P = dict(zip(newcolumns, P)) if 'Position' in P: P['Position'] /= self.mpch if 'Velocity' in P: raise KeyError('Velocity is not yet supported') if self.rsd is not None: dir = "xyz".index(self.rsd) P['Position'][:, dir] += P['Velocity'][:, dir] P['Position'][:, dir] %= self.BoxSize[dir] yield [P[key] for key in columns]
def parallel_read(self, columns, full=False): """ read data in parallel. if Full is True, neglect bunchsize. """ Ntot = 0 # avoid reading Velocity if RSD is not requested. # this is only needed for large data like a TPMSnapshot # for small Pandas reader etc it doesn't take time to # read velocity bunchsize = self.bunchsize if full: bunchsize = -1 if self.comm.rank == 0: datastorage = files.DataStorage(self.path, files.HaloLabelFile) else: datastorage = None datastorage = self.comm.bcast(datastorage) for round, P in enumerate( datastorage.iter(comm=self.comm, columns=columns, bunchsize=bunchsize)): P = dict(zip(columns, P)) yield [P.get(key, None) for key in columns]