def getPositions(self, first=None, last=None, save=True): #Get data pointer data = bigfile.BigData(self.fp) #Read in positions in Mpc/h if (first is None) or (last is None): positions = data["Position"][:] * self.Mpc_over_h aemit = data["Aemit"][:] else: positions = data["Position"][first:last] * self.Mpc_over_h aemit = data["Aemit"][first:last] #Enforce periodic boundary conditions for n in (0, 1): positions[:, n][positions[:, n] < 0] += self.header["box_size"] positions[:, n][positions[:, n] > self.header["box_size"]] -= self.header["box_size"] #Maybe save if save: self.positions = positions self.aemit = aemit #Initialize useless attributes to None self.weights = None self.virial_radius = None self.concentration = None #Return return positions
def __init__(self, cachedir, aliases): import bigfile self.cachedir = cachedir with bigfile.BigFile(cachedir, create=True) as bf: bd = bigfile.BigData(bf) self._size = bd.size self._dtype = bd.dtype self.aliases = dict([(new, (old, transform)) for old, new, transform in aliases]) ColumnStore.__init__(self)
def read(self, columns, start, stop, step=1): """ Read the specified column(s) over the given range, as a dictionary 'start' and 'stop' should be between 0 and :attr:`size`, which is the total size of the binary file (in particles) """ import bigfile if isinstance(columns, string_types): columns = [columns] with bigfile.BigFile(filename=self.path)[self.dataset] as f: ds = bigfile.BigData(f, columns) return ds[start:stop][::step]
def setLimits(self): if self.pool is None: self._first = None self._last = None else: #Divide equally between tasks Nt, Np = self.pool.size + 1, bigfile.BigData(self.fp).size part_per_task = Np // Nt self._first = part_per_task * self.pool.rank self._last = part_per_task * (self.pool.rank + 1) #Add the remainder to the last task if (Np % Nt) and (self.pool.rank == Nt - 1): self._last += Np % Nt
def __init__(self, path, exclude=None, header=Automatic, dataset='./'): if not dataset.endswith('/'): dataset = dataset + '/' import bigfile self.dataset = dataset self.path = path # store the attributes self.attrs = {} # the file path with bigfile.BigFile(filename=path) as ff: columns = ff[self.dataset].blocks if header is Automatic: for header in ['Header', 'header', './']: if header in columns: break if exclude is None: exclude = [header] columns = list(set(columns) - set(exclude)) ds = bigfile.BigData(ff[self.dataset], columns) # set the data type and size self.dtype = ds.dtype self.size = ds.size header = ff[header] attrs = header.attrs # copy over the attrs for k in attrs.keys(): # load a JSON representation if str starts with json::// if isinstance(attrs[k], string_types) and attrs[k].startswith('json://'): self.attrs[k] = json.loads(attrs[k][7:], cls=JSONDecoder) # copy over an array else: self.attrs[k] = numpy.array(attrs[k], copy=True)
def parallel_read(self, columns, full=False): f = bigfile.BigFileMPI(self.comm, self.path) readcolumns = set(columns) # remove columns not in the file (None will be returned) for col in list(readcolumns): if col not in f: readcolumns.remove(col) done = False i = 0 while not numpy.all(self.comm.allgather(done)): ret = [] dataset = bigfile.BigData(f, readcolumns) Ntot = dataset.size start = self.comm.rank * Ntot // self.comm.size end = (self.comm.rank + 1) * Ntot // self.comm.size if not full: bunchstart = start + i * self.bunchsize bunchend = start + (i + 1) * self.bunchsize if bunchend > end: bunchend = end if bunchstart > end: bunchstart = end else: bunchstart = start bunchend = end if bunchend == end: done = True P = {} for column in readcolumns: data = dataset[column][bunchstart:bunchend] P[column] = data i = i + 1 yield [P.get(column, None) for column in columns]
def parallel_read(self, columns, full=False): f = bigfile.BigFileMPI(self.comm, self.path) try: header = f['.'] except: header = f['header'] boxsize = header.attrs['BoxSize'][0] RSD = header.attrs['RSDFactor'][0] if boxsize != self.BoxSize[0]: raise ValueError("Box size mismatch, expecting %g" % boxsize) readcolumns = set(columns) if self.rsd is not None: readcolumns = set(columns + ['Velocity']) if 'InitialPosition' in columns: readcolumns.add('ID') readcolumns.remove('InitialPosition') if 'Mass' in readcolumns: readcolumns.remove('Mass') # remove columns not in the file (None will be returned) for col in list(readcolumns): if col not in f: readcolumns.remove(col) done = False i = 0 while not numpy.all(self.comm.allgather(done)): ret = [] dataset = bigfile.BigData(f, readcolumns) Ntot = dataset.size start = self.comm.rank * Ntot // self.comm.size end = (self.comm.rank + 1) * Ntot // self.comm.size if not full: bunchstart = start + i * self.bunchsize bunchend = start + (i + 1) * self.bunchsize if bunchend > end: bunchend = end if bunchstart > end: bunchstart = end else: bunchstart = start bunchend = end if bunchend == end: done = True P = {} for column in readcolumns: data = dataset[column][bunchstart:bunchend] P[column] = data if 'Velocity' in P: P['Velocity'] *= RSD if 'Mass' in columns: P['Mass'] = numpy.ones(bunchend - bunchstart, dtype='u1') * self.M0 if self.rsd is not None: dir = "xyz".index(self.rsd) P['Position'][:, dir] += P['Velocity'][:, dir] P['Position'][:, dir] %= self.BoxSize[dir] if 'InitialPosition' in columns: P['InitialPosition'] = numpy.empty((len(P['ID']), 3), 'f4') nc = int(self.size**(1. / 3) + 0.5) id = P['ID'].copy() for nc in range(nc - 10, nc + 10): if nc**3 == self.size: break for d in [2, 1, 0]: P['InitialPosition'][:, d] = id % nc id[:] //= nc cellsize = self.BoxSize[0] / nc P['InitialPosition'][:] += 0.5 P['InitialPosition'][:] *= cellsize i = i + 1 yield [P.get(column, None) for column in columns]