def _read_obj_field(self, obj, field, fid_data): if fid_data is None: fid_data = (None, None) fid, data = fid_data if fid is None: close = True fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY) else: close = False if data is None: data = np.empty(obj.ActiveDimensions[::-1], dtype=self._field_dtype) ftype, fname = field try: node = "/Grid%08i/%s" % (obj.id, fname) dg = h5py.h5d.open(fid, b(node)) except KeyError: if fname == "Dark_Matter_Density": data[:] = 0 return data.T raise dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data) # I don't know why, but on some installations of h5py this works, but # on others, nope. Doesn't seem to be a version thing. #dg.close() if close: fid.close() return data.T
def _read_chunk_data(self, chunk, fields): fid = fn = None rv = {} mylog.debug("Preloading fields %s", fields) # Split into particles and non-particles fluid_fields, particle_fields = [], [] for ftype, fname in fields: if ftype in self.ds.particle_types: particle_fields.append((ftype, fname)) else: fluid_fields.append((ftype, fname)) if len(particle_fields) > 0: selector = AlwaysSelector(self.ds) rv.update( self._read_particle_selection([chunk], selector, particle_fields)) if len(fluid_fields) == 0: return rv h5_type = self._field_dtype for g in chunk.objs: rv[g.id] = gf = {} if g.id in self._cached_fields: rv[g.id].update(self._cached_fields[g.id]) if g.filename is None: continue elif g.filename != fn: if fid is not None: fid.close() fid = None if fid is None: fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY) fn = g.filename data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type) data_view = data.swapaxes(0, -1) for field in fluid_fields: if field in gf: self._hits += 1 continue self._misses += 1 ftype, fname = field try: node = "/Grid%08i/%s" % (g.id, fname) dg = h5py.h5d.open(fid, b(node)) except KeyError: if fname == "Dark_Matter_Density": continue raise dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data) gf[field] = data_view.copy() if fid: fid.close() if self._cache_on: for gid in rv: self._cached_fields.setdefault(gid, {}) self._cached_fields[gid].update(rv[gid]) return rv
def _read_chunk_data(self, chunk, fields): fid = fn = None rv = {} mylog.debug("Preloading fields %s", fields) # Split into particles and non-particles fluid_fields, particle_fields = [], [] for ftype, fname in fields: if ftype in self.ds.particle_types: particle_fields.append((ftype, fname)) else: fluid_fields.append((ftype, fname)) if len(particle_fields) > 0: selector = AlwaysSelector(self.ds) rv.update(self._read_particle_selection( [chunk], selector, particle_fields)) if len(fluid_fields) == 0: return rv h5_type = self._field_dtype for g in chunk.objs: rv[g.id] = gf = {} if g.id in self._cached_fields: rv[g.id].update(self._cached_fields[g.id]) if g.filename is None: continue elif g.filename != fn: if fid is not None: fid.close() fid = None if fid is None: fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY) fn = g.filename data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type) data_view = data.swapaxes(0,2) for field in fluid_fields: if field in gf: self._hits += 1 continue self._misses += 1 ftype, fname = field try: node = "/Grid%08i/%s" % (g.id, fname) dg = h5py.h5d.open(fid, b(node)) except KeyError: if fname == "Dark_Matter_Density": continue raise dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data) gf[field] = data_view.copy() if fid: fid.close() if self._cache_on: for gid in rv: self._cached_fields.setdefault(gid, {}) self._cached_fields[gid].update(rv[gid]) return rv
def _get_hosts(self): if self.comm.rank == 0 or self.comm.size == 1: #Temporary mac hostname fix try: server_address = socket.gethostname() socket.gethostbyname(server_address) except socket.gaierror: server_address = "localhost" sock = socket.socket() sock.bind(('', 0)) port = sock.getsockname()[-1] del sock else: server_address, port = None, None self.server_address, self.port = self.comm.mpi_bcast( (server_address, port)) self.server_address = six.b(str(self.server_address)) self.port = six.b(str(self.port))
def _sanitize_list(flist): temp = [] for item in flist: if isinstance(item, string_types): temp.append(b(item)) elif isinstance(item, tuple) and \ all(isinstance(i, string_types) for i in item): temp.append(tuple(_sanitize_list(list(item)))) else: temp.append(item) return temp
def _read_obj_field(self, obj, field, fid_data): if fid_data is None: fid_data = (None, None) fid, data = fid_data if fid is None: close = True fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY) else: close = False ftype, fname = field node = "/%s/field %s" % (obj.block_name, fname) dg = h5py.h5d.open(fid, b(node)) rdata = np.empty(self.ds.grid_dimensions[:self.ds.dimensionality], dtype=self._field_dtype) dg.read(h5py.h5s.ALL, h5py.h5s.ALL, rdata) if close: fid.close() data = rdata[self._base].T if self.ds.dimensionality < 3: nshape = data.shape + (1, ) * (3 - self.ds.dimensionality) data = np.reshape(data, nshape) return data
def __init__(self, ts, num_readers=1, num_writers=None, outbase="rockstar_halos", particle_type="all", force_res=None, total_particles=None, dm_only=False, particle_mass=None, min_halo_size=25): if is_root(): mylog.info( "The citation for the Rockstar halo finder can be found at") mylog.info("http://adsabs.harvard.edu/abs/2013ApJ...762..109B") ParallelAnalysisInterface.__init__(self) # Decide how we're working. if ytcfg.getboolean("yt", "inline") is True: self.runner = InlineRunner() else: self.runner = StandardRunner(num_readers, num_writers) self.num_readers = self.runner.num_readers self.num_writers = self.runner.num_writers mylog.info("Rockstar is using %d readers and %d writers", self.num_readers, self.num_writers) # Note that Rockstar does not support subvolumes. # We assume that all of the snapshots in the time series # use the same domain info as the first snapshots. if not isinstance(ts, DatasetSeries): ts = DatasetSeries([ts]) self.ts = ts self.particle_type = particle_type self.outbase = six.b(outbase) self.min_halo_size = min_halo_size if force_res is None: tds = ts[-1] # Cache a reference self.force_res = tds.index.get_smallest_dx().in_units("Mpc/h") # We have to delete now to wipe the index del tds else: self.force_res = force_res self.total_particles = total_particles self.dm_only = dm_only self.particle_mass = particle_mass # Setup pool and workgroups. self.pool, self.workgroup = self.runner.setup_pool() p = self._setup_parameters(ts) params = self.comm.mpi_bcast(p, root=self.pool['readers'].ranks[0]) self.__dict__.update(params) self.handler = rockstar_interface.RockstarInterface(self.ts)
def h5rd(fname, path, dtype=None): """ Read Data. Return a dataset located at <path> in file <fname> as a numpy array. e.g. rd( fname, '/PartType0/Coordinates' ). """ data = None fid = h5py.h5f.open(six.b(fname), h5py.h5f.ACC_RDONLY) dg = h5py.h5d.open(fid, path.encode('ascii')) if dtype is None: dtype = dg.dtype data = np.zeros(dg.shape, dtype=dtype) dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data) fid.close() return data
def io_iter(self, chunks, fields): for chunk in chunks: fid = None filename = -1 for obj in chunk.objs: if obj.filename is None: continue if obj.filename != filename: # Note one really important thing here: even if we do # implement LRU caching in the _read_obj_field function, # we'll still be doing file opening and whatnot. This is a # problem, but one we can return to. if fid is not None: fid.close() fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY) filename = obj.filename for field in fields: data = None yield field, obj, self._read_obj_field( obj, field, (fid, data)) if fid is not None: fid.close()
def io_iter(self, chunks, fields): h5_dtype = self._field_dtype for chunk in chunks: fid = None filename = -1 for obj in chunk.objs: if obj.filename is None: continue if obj.filename != filename: # Note one really important thing here: even if we do # implement LRU caching in the _read_obj_field function, # we'll still be doing file opening and whatnot. This is a # problem, but one we can return to. if fid is not None: fid.close() fid = h5py.h5f.open(b(obj.filename), h5py.h5f.ACC_RDONLY) filename = obj.filename for field in fields: nodal_flag = self.ds.field_info[field].nodal_flag dims = obj.ActiveDimensions[::-1] + nodal_flag[::-1] data = np.empty(dims, dtype=h5_dtype) yield field, obj, self._read_obj_field( obj, field, (fid, data)) if fid is not None: fid.close()
def _read_fluid_selection(self, chunks, selector, fields, size): rv = {} # Now we have to do something unpleasant chunks = list(chunks) if selector.__class__.__name__ == "GridSelector": if not (len(chunks) == len(chunks[0].objs) == 1): raise RuntimeError g = chunks[0].objs[0] f = h5py.File(u(g.filename), 'r') if g.id in self._cached_fields: gf = self._cached_fields[g.id] rv.update(gf) if len(rv) == len(fields): return rv gds = f.get("/Grid%08i" % g.id) for field in fields: if field in rv: self._hits += 1 continue self._misses += 1 ftype, fname = field if fname in gds: rv[(ftype, fname)] = gds.get(fname).value.swapaxes(0,2) else: rv[(ftype, fname)] = np.zeros(g.ActiveDimensions) if self._cache_on: for gid in rv: self._cached_fields.setdefault(gid, {}) self._cached_fields[gid].update(rv[gid]) f.close() return rv if size is None: size = sum((g.count(selector) for chunk in chunks for g in chunk.objs)) for field in fields: ftype, fname = field fsize = size rv[field] = np.empty(fsize, dtype="float64") ng = sum(len(c.objs) for c in chunks) mylog.debug("Reading %s cells of %s fields in %s grids", size, [f2 for f1, f2 in fields], ng) ind = 0 h5_type = self._field_dtype for chunk in chunks: fid = None for g in chunk.objs: if g.filename is None: continue if fid is None: fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY) gf = self._cached_fields.get(g.id, {}) data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type) data_view = data.swapaxes(0,2) nd = 0 for field in fields: if field in gf: nd = g.select(selector, gf[field], rv[field], ind) self._hits += 1 continue self._misses += 1 ftype, fname = field try: node = "/Grid%08i/%s" % (g.id, fname) dg = h5py.h5d.open(fid, b(node)) except KeyError: if fname == "Dark_Matter_Density": continue raise dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data) if self._cache_on: self._cached_fields.setdefault(g.id, {}) # Copy because it's a view into an empty temp array self._cached_fields[g.id][field] = data_view.copy() nd = g.select(selector, data_view, rv[field], ind) # caches ind += nd if fid: fid.close() return rv
def run(self, block_ratio=1, callbacks=None, restart=False): """ """ if block_ratio != 1: raise NotImplementedError self._get_hosts() # Find restart output number num_outputs = len(self.ts) if restart: restart_file = os.path.join(self.outbase, "restart.cfg") if not os.path.exists(restart_file): raise RuntimeError("Restart file %s not found" % (restart_file)) with open(restart_file) as restart_fh: for l in restart_fh: if l.startswith("RESTART_SNAP"): restart_num = int(l.split("=")[1]) if l.startswith("NUM_WRITERS"): num_writers = int(l.split("=")[1]) if num_writers != self.num_writers: raise RuntimeError( "Number of writers in restart has changed from the original " "run (OLD = %d, NEW = %d). To avoid problems in the " "restart, choose the same number of writers." % \ (num_writers, self.num_writers)) # Remove the datasets that were already analyzed self.ts._pre_outputs = self.ts._pre_outputs[restart_num:] else: restart_num = 0 self.handler.setup_rockstar(six.b(self.server_address), six.b(self.port), num_outputs, self.total_particles, self.particle_type, particle_mass=self.particle_mass, parallel=self.comm.size > 1, num_readers=self.num_readers, num_writers=self.num_writers, writing_port=-1, block_ratio=block_ratio, outbase=six.b(self.outbase), force_res=self.force_res, callbacks=callbacks, restart_num=restart_num, min_halo_size=self.min_halo_size) # Make the directory to store the halo lists in. if not self.outbase: self.outbase = os.getcwd() if self.comm.rank == 0 and not restart: if not os.path.exists(self.outbase): os.makedirs(self.outbase) # Make a record of which dataset corresponds to which set of # output files because it will be easy to lose this connection. fp = open(self.outbase + '/datasets.txt', 'w') fp.write("# dsname\tindex\n") for i, ds in enumerate(self.ts): dsloc = path.join(path.relpath(ds.fullpath), ds.basename) line = "%s\t%d\n" % (dsloc, i) fp.write(line) fp.close() # This barrier makes sure the directory exists before it might be used. self.comm.barrier() if self.comm.size == 1: self.handler.call_rockstar() else: # And run it! self.runner.run(self.handler, self.workgroup) self.comm.barrier() self.pool.free_all()