def _parse_index(self): self._copy_index_structure() mylog.debug("Copying reverse tree") reverse_tree = self.enzo.hierarchy_information["GridParentIDs"].ravel().tolist() # Initial setup: mylog.debug("Reconstructing parent-child relationships") grids = [] # We enumerate, so it's 0-indexed id and 1-indexed pid self.filenames = ["-1"] * self.num_grids for id,pid in enumerate(reverse_tree): grids.append(self.grid(id+1, self)) grids[-1].Level = self.grid_levels[id, 0] if pid > 0: grids[-1]._parent_id = pid grids[pid-1]._children_ids.append(grids[-1].id) self.max_level = self.grid_levels.max() mylog.debug("Preparing grids") self.grids = np.empty(len(grids), dtype='object') for i, grid in enumerate(grids): if (i%1e4) == 0: mylog.debug("Prepared % 7i / % 7i grids", i, self.num_grids) grid.filename = "Inline_processor_%07i" % (self.grid_procs[i,0]) grid._prepare_grid() grid.proc_num = self.grid_procs[i,0] self.grids[i] = grid mylog.debug("Prepared")
def _fill_fields(self, fields, vals, mask, data_file): if mask is None: size = 0 else: size = mask.sum() rv = {} for field in fields: mylog.debug("Allocating %s values for %s", size, field) if field in self._aux_fields: #Read each of the auxiliary fields rv[field] = self._read_aux_fields(field, mask, data_file) elif field in self._vector_fields: rv[field] = np.empty((size, 3), dtype="float64") if size == 0: continue rv[field][:,0] = vals[field]['x'][mask] rv[field][:,1] = vals[field]['y'][mask] rv[field][:,2] = vals[field]['z'][mask] else: rv[field] = np.empty(size, dtype="float64") if size == 0: continue rv[field][:] = vals[field][mask] if field == "Coordinates": eps = np.finfo(rv[field].dtype).eps for i in range(3): rv[field][:,i] = np.clip(rv[field][:,i], self.domain_left_edge[i] + eps, self.domain_right_edge[i] - eps) return rv
def _read_fluid_selection(self, chunks, selector, fields, size): rv = {} # Now we have to do something unpleasant chunks = list(chunks) if selector.__class__.__name__ == "GridSelector": if not (len(chunks) == len(chunks[0].objs) == 1): raise RuntimeError g = chunks[0].objs[0] for ftype, fname in fields: rv[(ftype, fname)] = self.grids_in_memory[grid.id][fname].swapaxes(0,2) return rv if size is None: size = sum((g.count(selector) for chunk in chunks for g in chunk.objs)) for field in fields: ftype, fname = field fsize = size rv[field] = np.empty(fsize, dtype="float64") ng = sum(len(c.objs) for c in chunks) mylog.debug("Reading %s cells of %s fields in %s grids", size, [f2 for f1, f2 in fields], ng) ind = 0 for chunk in chunks: for g in chunk.objs: # We want a *hard error* here. #if g.id not in self.grids_in_memory: continue for field in fields: ftype, fname = field data_view = self.grids_in_memory[g.id][fname][self.my_slice].swapaxes(0,2) nd = g.select(selector, data_view, rv[field], ind) ind += nd assert(ind == fsize) return rv
def _read_fluid_selection(self, chunks, selector, fields, size): rv = {} chunks = list(chunks) fields.sort(key=lambda a: self.field_dict[a[1]]) if selector.__class__.__name__ == "GridSelector": if not (len(chunks) == len(chunks[0].objs) == 1): raise RuntimeError grid = chunks[0].objs[0] for ftype, fname in fields: rv[ftype, fname] = self._read_data(grid, fname) return rv if size is None: size = sum((g.count(selector) for chunk in chunks for g in chunk.objs)) for field in fields: ftype, fname = field fsize = size rv[field] = np.empty(fsize, dtype="float64") ng = sum(len(c.objs) for c in chunks) mylog.debug("Reading %s cells of %s fields in %s grids", size, [f2 for f1, f2 in fields], ng) ind = 0 for chunk in chunks: for g in chunk.objs: nd = 0 for field in fields: ftype, fname = field data = self._read_data(g, fname) nd = g.select(selector, data, rv[field], ind) # caches ind += nd return rv
def _detect_output_fields(self): self.field_list = [] # Do this only on the root processor to save disk work. if self.comm.rank in (0, None): mylog.info("Gathering a field list (this may take a moment.)") field_list = set() random_sample = self._generate_random_grids() for grid in random_sample: if not hasattr(grid, 'filename'): continue try: gf = self.io._read_field_names(grid) except self.io._read_exception: raise IOError("Grid %s is a bit funky?", grid.id) mylog.debug("Grid %s has: %s", grid.id, gf) field_list = field_list.union(gf) if "AppendActiveParticleType" in self.dataset.parameters: ap_fields = self._detect_active_particle_fields() field_list = list(set(field_list).union(ap_fields)) ptypes = self.dataset.particle_types ptypes_raw = self.dataset.particle_types_raw else: field_list = None ptypes = None ptypes_raw = None self.field_list = list(self.comm.mpi_bcast(field_list)) self.dataset.particle_types = list(self.comm.mpi_bcast(ptypes)) self.dataset.particle_types_raw = list(self.comm.mpi_bcast(ptypes_raw))
def probe_loop(self, tag, callback): while 1: st = MPI.Status() self.comm.Probe(MPI.ANY_SOURCE, tag = tag, status = st) try: callback(st) except StopIteration: mylog.debug("Probe loop ending.") break
def _initialize_grid_arrays(self): mylog.debug("Allocating arrays for %s grids", self.num_grids) self.grid_dimensions = np.ones((self.num_grids,3), 'int32') self.grid_left_edge = self.ds.arr(np.zeros((self.num_grids,3), self.float_type), 'code_length') self.grid_right_edge = self.ds.arr(np.ones((self.num_grids,3), self.float_type), 'code_length') self.grid_levels = np.zeros((self.num_grids,1), 'int32') self.grid_particle_count = np.zeros((self.num_grids,1), 'int32')
def _generate_random_grids(self): my_rank = self.comm.rank my_grids = self.grids[self.grid_procs.ravel() == my_rank] if len(my_grids) > 40: starter = np.random.randint(0, 20) random_sample = np.mgrid[starter:len(my_grids)-1:20j].astype("int32") mylog.debug("Checking grids: %s", random_sample.tolist()) else: random_sample = np.mgrid[0:max(len(my_grids)-1,1)].astype("int32") return my_grids[(random_sample,)]
def barrierize(*args, **kwargs): if not parallel_capable: return func(*args, **kwargs) mylog.debug("Entering barrier before %s", func.__name__) comm = _get_comm(args) comm.barrier() retval = func(*args, **kwargs) mylog.debug("Entering barrier after %s", func.__name__) comm.barrier() return retval
def mpi_info_dict(self, info): if not self._distributed: return 0, {0:info} data = None if self.comm.rank == 0: data = {0:info} for i in range(1, self.comm.size): data[i] = self.comm.recv(source=i, tag=0) else: self.comm.send(info, dest=0, tag=0) mylog.debug("Opening MPI Broadcast on %s", self.comm.rank) data = self.comm.bcast(data, root=0) return self.comm.rank, data
def _read_data_set(self, grid, field): dest = self.proc_map[grid.id] msg = dict(grid_id = grid.id, field = field, op="read") mylog.debug("Requesting %s for %s from %s", field, grid, dest) if self.ds.field_info[field].particle_type: data = np.empty(grid.NumberOfParticles, 'float64') else: data = np.empty(grid.ActiveDimensions, 'float64') hook = self.comm.comm.Irecv([data, MPI.DOUBLE], source = dest) self.comm.comm.send(msg, dest = dest, tag = YT_TAG_MESSAGE) mylog.debug("Waiting for data.") MPI.Request.Wait(hook) return data
def _read_chunk_data(self, chunk, fields): fid = fn = None rv = {} mylog.debug("Preloading fields %s", fields) # Split into particles and non-particles fluid_fields, particle_fields = [], [] for ftype, fname in fields: if ftype in self.ds.particle_types: particle_fields.append((ftype, fname)) else: fluid_fields.append((ftype, fname)) if len(particle_fields) > 0: selector = AlwaysSelector(self.ds) rv.update(self._read_particle_selection( [chunk], selector, particle_fields)) if len(fluid_fields) == 0: return rv h5_type = self._field_dtype for g in chunk.objs: rv[g.id] = gf = {} if g.id in self._cached_fields: rv[g.id].update(self._cached_fields[g.id]) if g.filename is None: continue elif g.filename != fn: if fid is not None: fid.close() fid = None if fid is None: fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY) fn = g.filename data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type) data_view = data.swapaxes(0,2) for field in fluid_fields: if field in gf: self._hits += 1 continue self._misses += 1 ftype, fname = field try: node = "/Grid%08i/%s" % (g.id, fname) dg = h5py.h5d.open(fid, b(node)) except KeyError: if fname == "Dark_Matter_Density": continue raise dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data) gf[field] = data_view.copy() if fid: fid.close() if self._cache_on: for gid in rv: self._cached_fields.setdefault(gid, {}) self._cached_fields[gid].update(rv[gid]) return rv
def _setup_geometry(self): mylog.debug("Counting grids.") self._count_grids() mylog.debug("Initializing grid arrays.") self._initialize_grid_arrays() mylog.debug("Parsing index.") self._parse_index() mylog.debug("Constructing grid objects.") self._populate_grid_objects() mylog.debug("Re-examining index") self._initialize_level_stats()
def wait(self): status = MPI.Status() while 1: if self.comm.comm.Iprobe(MPI.ANY_SOURCE, YT_TAG_MESSAGE, status=status): msg = self.comm.comm.recv(source=status.source, tag=YT_TAG_MESSAGE) if msg['op'] == "end": mylog.debug("Shutting down IO.") break self._send_data(msg, status.source) status = MPI.Status() else: time.sleep(1e-2)
def retrieve_ghost_zones(self, ngz, fields, smoothed=False): try: new_subset = self._subset_with_gz mylog.debug("Reusing previous subset with ghost zone.") except AttributeError: new_subset = StreamOctreeSubset( self.base_region, self.ds, self.oct_handler, self._over_refine_factor, num_ghost_zones=ngz, ) self._subset_with_gz = new_subset return new_subset
def wait(self): status = MPI.Status() while 1: if self.comm.comm.Iprobe(MPI.ANY_SOURCE, YT_TAG_MESSAGE, status = status): msg = self.comm.comm.recv( source = status.source, tag = YT_TAG_MESSAGE) if msg['op'] == "end": mylog.debug("Shutting down IO.") break self._send_data(msg, status.source) status = MPI.Status() else: time.sleep(1e-2)
def _generate_random_grids(self): if self.num_grids > 40: starter = np.random.randint(0, 20) random_sample = np.mgrid[starter:len(self.grids)-1:20j].astype("int32") # We also add in a bit to make sure that some of the grids have # particles gwp = self.grid_particle_count > 0 if np.any(gwp) and not np.any(gwp[(random_sample,)]): # We just add one grid. This is not terribly efficient. first_grid = np.where(gwp)[0][0] random_sample.resize((21,)) random_sample[-1] = first_grid mylog.debug("Added additional grid %s", first_grid) mylog.debug("Checking grids: %s", random_sample.tolist()) else: random_sample = np.mgrid[0:max(len(self.grids),1)].astype("int32") return self.grids[(random_sample,)]
def _read_fluid_selection(self, chunks, selector, fields, size): chunks = list(chunks) if any((ftype != "fits" for ftype, fname in fields)): raise NotImplementedError rv = {} dt = "float64" for field in fields: rv[field] = np.empty(size, dtype=dt) ng = sum(len(c.objs) for c in chunks) mylog.debug( "Reading %s cells of %s fields in %s grids", size, [f2 for f1, f2 in fields], ng, ) dx = self.ds.domain_width / self.ds.domain_dimensions for field in fields: ftype, fname = field f = self.ds.index._file_map[fname] ds = f[self.ds.index._ext_map[fname]] bzero, bscale = self.ds.index._scale_map[fname] ind = 0 for chunk in chunks: for g in chunk.objs: start = ((g.LeftEdge - self.ds.domain_left_edge) / dx).d.astype("int") end = start + g.ActiveDimensions slices = [slice(start[i], end[i]) for i in range(3)] if self.ds.dimensionality == 2: nx, ny = g.ActiveDimensions[:2] nz = 1 data = np.zeros((nx, ny, nz)) data[:, :, 0] = ds.data[slices[1], slices[0]].T elif self.ds.naxis == 4: idx = self.ds.index._axis_map[fname] data = ds.data[idx, slices[2], slices[1], slices[0]].T else: data = ds.data[slices[2], slices[1], slices[0]].T if fname in self.ds.nan_mask: data[np.isnan(data)] = self.ds.nan_mask[fname] elif "all" in self.ds.nan_mask: data[np.isnan(data)] = self.ds.nan_mask["all"] data = bzero + bscale * data ind += g.select(selector, data.astype("float64"), rv[field], ind) return rv
def io_nodes(fn, n_io, n_work, func, *args, **kwargs): from yt.mods import load pool, wg = ProcessorPool.from_sizes([(n_io, "io"), (n_work, "work")]) rv = None if wg.name == "work": ds = load(fn) with remote_io(ds, wg, pool): rv = func(ds, *args, **kwargs) elif wg.name == "io": ds = load(fn) io = IOCommunicator(ds, wg, pool) io.wait() # We should broadcast the result rv = pool.comm.mpi_bcast(rv, root=pool['work'].ranks[0]) pool.free_all() mylog.debug("Return value: %s", rv) return rv
def _detect_output_fields(self): self.field_list = [] # Do this only on the root processor to save disk work. if self.comm.rank in (0, None): # Just check the first grid. grid = self.grids[0] field_list = self.io._read_field_names(grid) mylog.debug("Grid %s has: %s", grid.id, field_list) ptypes = self.dataset.particle_types ptypes_raw = self.dataset.particle_types_raw else: field_list = None ptypes = None ptypes_raw = None self.field_list = list(self.comm.mpi_bcast(field_list)) self.dataset.particle_types = list(self.comm.mpi_bcast(ptypes)) self.dataset.particle_types_raw = list(self.comm.mpi_bcast(ptypes_raw))
def _read_fluid_selection(self, chunks, selector, fields, size): rv = {} # Now we have to do something unpleasant chunks = list(chunks) if isinstance(selector, GridSelector): if not (len(chunks) == len(chunks[0].objs) == 1): raise RuntimeError g = chunks[0].objs[0] f = h5py.File(g.filename, mode="r") gds = f.get("/Grid%08i" % g.id) for ftype, fname in fields: rv[(ftype, fname)] = np.atleast_3d(gds.get(fname)[()].transpose()) f.close() return rv if size is None: size = sum( (g.count(selector) for chunk in chunks for g in chunk.objs)) for field in fields: ftype, fname = field fsize = size rv[field] = np.empty(fsize, dtype="float64") ng = sum(len(c.objs) for c in chunks) mylog.debug( "Reading %s cells of %s fields in %s grids", size, [f2 for f1, f2 in fields], ng, ) ind = 0 for chunk in chunks: f = None for g in chunk.objs: if f is None: # print("Opening (count) %s" % g.filename) f = h5py.File(g.filename, mode="r") gds = f.get("/Grid%08i" % g.id) if gds is None: gds = f for field in fields: ftype, fname = field ds = np.atleast_3d(gds.get(fname)[()].transpose()) nd = g.select(selector, ds, rv[field], ind) # caches ind += nd f.close() return rv
def _read_fluid_selection(self, chunks, selector, fields, size): chunks = list(chunks) if any((ftype != "athena_pp" for ftype, fname in fields)): raise NotImplementedError f = self._handle rv = {} for field in fields: # Always use *native* 64-bit float. rv[field] = np.empty(size, dtype="=f8") ng = sum(len(c.objs) for c in chunks) mylog.debug( "Reading %s cells of %s fields in %s blocks", size, [f2 for f1, f2 in fields], ng, ) last_dname = None for field in fields: ftype, fname = field dname, fdi = self.ds._field_map[fname] if dname != last_dname: ds = f[f"/{dname}"] ind = 0 for chunk in chunks: if self.ds.logarithmic: for mesh in chunk.objs: nx, ny, nz = mesh.mesh_dims // self.ds.index.mesh_factors data = np.empty(mesh.mesh_dims, dtype="=f8") for n, id in enumerate(mesh.mesh_blocks): data[ii[n] * nx:(ii[n] + 1) * nx, jj[n] * ny:(jj[n] + 1) * ny, kk[n] * nz:(kk[n] + 1) * nz, ] = ds[fdi, id, :, :, :].transpose() ind += mesh.select(selector, data, rv[field], ind) # caches else: for gs in grid_sequences(chunk.objs): start = gs[0].id - gs[0]._id_offset end = gs[-1].id - gs[-1]._id_offset + 1 data = ds[fdi, start:end, :, :, :].transpose() for i, g in enumerate(gs): ind += g.select(selector, data[..., i], rv[field], ind) last_dname = dname return rv
def _initialize_refined_index(self): mask = self.regions.masks.sum(axis=1).astype('uint8') max_npart = max( sum(d.total_particles.values()) for d in self.data_files) * 28 sub_mi1 = np.zeros(max_npart, "uint64") sub_mi2 = np.zeros(max_npart, "uint64") pb = get_pbar("Initializing refined index", len(self.data_files)) mask_threshold = getattr(self, '_index_mask_threshold', 2) count_threshold = getattr(self, '_index_count_threshold', 256) mylog.debug("Using estimated thresholds of %s and %s for refinement", mask_threshold, count_threshold) total_refined = 0 total_coarse_refined = ( (mask >= 2) & (self.regions.particle_counts > count_threshold)).sum() mylog.debug( "This should produce roughly %s zones, for %s of the domain", total_coarse_refined, 100 * total_coarse_refined / mask.size) for i, data_file in enumerate(self.data_files): coll = None pb.update(i) nsub_mi = 0 for ptype, pos in self.io._yield_coordinates(data_file): if pos.size == 0: continue if hasattr(self.ds, '_sph_ptypes') and ptype == self.ds._sph_ptypes[0]: hsml = self.io._get_smoothing_length( data_file, pos.dtype, pos.shape) else: hsml = None nsub_mi, coll = self.regions._refined_index_data_file( coll, pos, hsml, mask, sub_mi1, sub_mi2, data_file.file_id, nsub_mi, count_threshold=count_threshold, mask_threshold=mask_threshold) total_refined += nsub_mi self.regions.bitmasks.append(data_file.file_id, coll) pb.finish() self.regions.find_collisions_refined()
def _read_fluid_selection(self, chunks, selector, fields, size): tr = defaultdict(list) # Set of field types ftypes = {f[0] for f in fields} for chunk in chunks: # Gather fields by type to minimize i/o operations for ft in ftypes: # Get all the fields of the same type field_subs = list(filter(lambda f: f[0] == ft, fields)) # Loop over subsets for subset in chunk.objs: fname = None for fh in subset.domain.field_handlers: if fh.ftype == ft: file_handler = fh fname = fh.fname break if fname is None: raise YTFieldTypeNotFound(ft) # Now we read the entire thing with FortranFile(fname) as fd: # This contains the boundary information, so we skim through # and pick off the right vectors rv = subset.fill(fd, field_subs, selector, file_handler) for ft, f in field_subs: d = rv.pop(f) mylog.debug( "Filling %s with %s (%0.3e %0.3e) (%s zones)", f, d.size, d.min(), d.max(), d.size, ) tr[(ft, f)].append(d) d = {} for field in fields: d[field] = np.concatenate(tr.pop(field)) return d
def _read_fluid_selection(self, chunks, selector, fields, size): chunks = list(chunks) if any((ftype not in ("gas",) for ftype, fname in fields)): raise NotImplementedError rv = {} for field in fields: rv[field] = self.ds.arr(np.empty(size, dtype="float64")) ng = sum(len(c.objs) for c in chunks) mylog.debug("Reading %s cells of %s fields in %s blocks", size, [f2 for f1, f2 in fields], ng) for field in fields: ftype, fname = field ind = 0 for chunk in chunks: for g in chunk.objs: ds = self.fields[g.id][ftype, fname] ind += g.select(selector, ds, rv[field], ind) # caches return rv
def _read_fluid_selection(self, chunks, selector, fields, size): chunks = list(chunks) if any((ftype not in ("gas", ) for ftype, fname in fields)): raise NotImplementedError rv = {} for field in fields: rv[field] = self.ds.arr(np.empty(size, dtype="float64")) ng = sum(len(c.objs) for c in chunks) mylog.debug("Reading %s cells of %s fields in %s blocks", size, [f2 for f1, f2 in fields], ng) for field in fields: ftype, fname = field ind = 0 for chunk in chunks: for g in chunk.objs: ds = self.fields[g.id][ftype, fname] ind += g.select(selector, ds, rv[field], ind) # caches return rv
def _read_fluid_selection(self, chunks, selector, fields, size): chunks = list(chunks) # generator --> list if any( (ftype != "gamer" for ftype, fname in fields) ): raise NotImplementedError rv = {} for field in fields: rv[field] = np.empty( size, dtype=self._field_dtype ) ng = sum( len(c.objs) for c in chunks ) # c.objs is a list of grids mylog.debug( "Reading %s cells of %s fields in %s grids", size, [f2 for f1, f2 in fields], ng ) # shortcuts ps2 = self.patch_size ps1 = ps2//2 for field in fields: ds = self._group_grid[ field[1] ] offset = 0 for chunk in chunks: for gs in grid_sequences(chunk.objs): start = (gs[ 0].id )*self.pgroup end = (gs[-1].id+1)*self.pgroup buf = ds[start:end,:,:,:] ngrid = len( gs ) data = np.empty( (ngrid,ps2,ps2,ps2), dtype=self._field_dtype ) for g in range(ngrid): pid0 = g*self.pgroup data[g, 0:ps1, 0:ps1, 0:ps1] = buf[pid0+0,:,:,:] data[g, 0:ps1, 0:ps1, ps1:ps2] = buf[pid0+1,:,:,:] data[g, 0:ps1, ps1:ps2, 0:ps1] = buf[pid0+2,:,:,:] data[g, ps1:ps2, 0:ps1, 0:ps1] = buf[pid0+3,:,:,:] data[g, 0:ps1, ps1:ps2, ps1:ps2] = buf[pid0+4,:,:,:] data[g, ps1:ps2, ps1:ps2, 0:ps1] = buf[pid0+5,:,:,:] data[g, ps1:ps2, 0:ps1, ps1:ps2] = buf[pid0+6,:,:,:] data[g, ps1:ps2, ps1:ps2, ps1:ps2] = buf[pid0+7,:,:,:] data = data.transpose() for i, g in enumerate(gs): offset += g.select( selector, data[...,i], rv[field], offset ) return rv
def update_data(self, data): """ Update the stream data with a new data dict. If fields already exist, they will be replaced, but if they do not, they will be added. Fields already in the stream but not part of the data dict will be left alone. """ # Alias ds = self.ds handler = ds.stream_handler # Preprocess field_units, data, _ = process_data(data) pdata = {} for key in data.keys(): if not isinstance(key, tuple): field = ("io", key) mylog.debug("Reassigning '%s' to '%s'", key, field) else: field = key pdata[field] = data[key] data = pdata # Drop reference count particle_types = set_particle_types(data) # Update particle types handler.particle_types.update(particle_types) ds._find_particle_types() # Update fields handler.field_units.update(field_units) fields = handler.fields for field in data.keys(): if field not in fields._additional_fields: fields._additional_fields += (field, ) fields["stream_file"].update(data) # Update field list for field in self.ds.field_list: if field[0] in ["all", "nbody"]: self.ds.field_list.remove(field) self._detect_output_fields() self.ds.create_field_info()
def _read_fluid_selection(self, chunks, selector, fields, size): rv = {} # Now we have to do something unpleasant chunks = list(chunks) if selector.__class__.__name__ == "GridSelector": if not (len(chunks) == len(chunks[0].objs) == 1): raise RuntimeError g = chunks[0].objs[0] f = h5py.File(g.filename, 'r') gds = f.get("/Grid%08i" % g.id) for ftype, fname in fields: rv[(ftype, fname)] = np.atleast_3d(gds.get(fname).value) f.close() return rv if size is None: size = sum((g.count(selector) for chunk in chunks for g in chunk.objs)) for field in fields: ftype, fname = field fsize = size rv[field] = np.empty(fsize, dtype="float64") ng = sum(len(c.objs) for c in chunks) mylog.debug("Reading %s cells of %s fields in %s grids", size, [f2 for f1, f2 in fields], ng) ind = 0 for chunk in chunks: f = None for g in chunk.objs: if f is None: #print "Opening (count) %s" % g.filename f = h5py.File(g.filename, "r") gds = f.get("/Grid%08i" % g.id) if gds is None: gds = f for field in fields: ftype, fname = field ds = np.atleast_3d(gds.get(fname).value.transpose()) nd = g.select(selector, ds, rv[field], ind) # caches ind += nd f.close() return rv
def _read_fluid_selection(self, chunks, selector, fields, size): chunks = list(chunks) assert(len(chunks) == 1) chunk = chunks[0] rv = {} for field in fields: ftype, fname = field rv[field] = np.empty(size, dtype="float64") ngrids = sum(len(chunk.objs) for chunk in chunks) mylog.debug("Reading %s cells of %s fields in %s blocks", size, [fname for ftype, fname in fields], ngrids) for field in fields: ind = 0 ftype, fname = field for chunk in chunks: for g in chunk.objs: ds = self.fields[g.mesh_id].get(field, None) if ds is None: ds = self.fields[g.mesh_id][fname] ind += g.select(selector, ds, rv[field], ind) # caches return rv
def _read_fluid_selection(self, chunks, selector, fields, size): chunks = list(chunks) assert(len(chunks) == 1) chunk = chunks[0] rv = {} for field in fields: ftype, fname = field rv[field] = np.empty(size, dtype="float64") ngrids = sum(len(chunk.objs) for chunk in chunks) mylog.debug("Reading %s cells of %s fields in %s blocks", size, [fn for ft, fn in fields], ngrids) for field in fields: ind = 0 ftype, fname = field for chunk in chunks: for g in chunk.objs: ds = self.fields[g.mesh_id].get(field, None) if ds is None: ds = self.fields[g.mesh_id][fname] ind += g.select(selector, ds, rv[field], ind) # caches return rv
def _initialize_index(self, data_file, regions): ds = data_file.ds morton = np.empty(sum(data_file.total_particles.values()), dtype="uint64") ind = 0 DLE, DRE = ds.domain_left_edge, ds.domain_right_edge dx = (DRE - DLE) / (2**_ORDER_MAX) self.domain_left_edge = DLE.in_units("code_length").ndarray_view() self.domain_right_edge = DRE.in_units("code_length").ndarray_view() with open(data_file.filename, "rb") as f: f.seek(ds._header_offset) for iptype, ptype in enumerate(self._ptypes): # We'll just add the individual types separately count = data_file.total_particles[ptype] if count == 0: continue start, stop = ind, ind + count while ind < stop: c = min(CHUNKSIZE, stop - ind) pp = np.fromfile(f, dtype = self._pdtypes[ptype], count = c) mis = np.empty(3, dtype="float64") mas = np.empty(3, dtype="float64") for axi, ax in enumerate('xyz'): mi = pp["Coordinates"][ax].min() ma = pp["Coordinates"][ax].max() mylog.debug("Spanning: %0.3e .. %0.3e in %s", mi, ma, ax) mis[axi] = mi mas[axi] = ma pos = np.empty((pp.size, 3), dtype="float64") for i, ax in enumerate("xyz"): eps = np.finfo(pp["Coordinates"][ax].dtype).eps pos[:,i] = pp["Coordinates"][ax] regions.add_data_file(pos, data_file.file_id, data_file.ds.filter_bbox) morton[ind:ind+c] = compute_morton( pos[:,0], pos[:,1], pos[:,2], DLE, DRE, data_file.ds.filter_bbox) ind += c mylog.info("Adding %0.3e particles", morton.size) return morton
def preload(self, chunk, fields, max_size): if len(fields) == 0: yield self return old_cache_on = self._cache_on old_cached_fields = self._cached_fields self._cached_fields = cf = {} self._cache_on = True for gid in old_cached_fields: # Will not copy numpy arrays, which is good! cf[gid] = old_cached_fields[gid].copy() self._hits = self._misses = 0 self._cached_fields = self._read_chunk_data(chunk, fields) mylog.debug("(1st) Hits = % 10i Misses = % 10i", self._hits, self._misses) self._hits = self._misses = 0 yield self mylog.debug("(2nd) Hits = % 10i Misses = % 10i", self._hits, self._misses) self._cached_fields = old_cached_fields self._cache_on = old_cache_on # Randomly remove some grids from the cache. Note that we're doing # this on a grid basis, not a field basis. Performance will be # slightly non-deterministic as a result of this, but it should roughly # be statistically alright, assuming (as we do) that this will get # called during largely unbalanced stuff. if len(self._cached_fields) > max_size: to_remove = random.sample(self._cached_fields.keys(), len(self._cached_fields) - max_size) mylog.debug("Purging from cache %s", len(to_remove)) for k in to_remove: self._cached_fields.pop(k) else: mylog.warning("Cache size % 10i (max % 10i)", len(self._cached_fields), max_size)
def _read_fluid_selection(self, chunks, selector, fields, size): chunks = list(chunks) if any((ftype != "fits" for ftype, fname in fields)): raise NotImplementedError rv = {} dt = "float64" for field in fields: rv[field] = np.empty(size, dtype=dt) ng = sum(len(c.objs) for c in chunks) mylog.debug("Reading %s cells of %s fields in %s grids", size, [f2 for f1, f2 in fields], ng) dx = self.ds.domain_width/self.ds.domain_dimensions for field in fields: ftype, fname = field f = self.ds.index._file_map[fname] ds = f[self.ds.index._ext_map[fname]] bzero, bscale = self.ds.index._scale_map[fname] ind = 0 for chunk in chunks: for g in chunk.objs: start = ((g.LeftEdge-self.ds.domain_left_edge)/dx).to_ndarray().astype("int") end = start + g.ActiveDimensions slices = [slice(start[i],end[i]) for i in range(3)] if self.ds.dimensionality == 2: nx, ny = g.ActiveDimensions[:2] nz = 1 data = np.zeros((nx,ny,nz)) data[:,:,0] = ds.data[slices[1],slices[0]].transpose() elif self.ds.naxis == 4: idx = self.ds.index._axis_map[fname] data = ds.data[idx,slices[2],slices[1],slices[0]].transpose() else: data = ds.data[slices[2],slices[1],slices[0]].transpose() if fname in self.ds.nan_mask: data[np.isnan(data)] = self.ds.nan_mask[fname] elif "all" in self.ds.nan_mask: data[np.isnan(data)] = self.ds.nan_mask["all"] data = bzero + bscale*data ind += g.select(selector, data.astype("float64"), rv[field], ind) return rv
def _read_fluid_selection(self, chunks, selector, fields, size): rv = {} # Now we have to do something unpleasant chunks = list(chunks) if isinstance(selector, GridSelector): if not (len(chunks) == len(chunks[0].objs) == 1): raise RuntimeError g = chunks[0].objs[0] for ftype, fname in fields: rv[(ftype, fname)] = self.grids_in_memory[g.id][fname].swapaxes(0, 2) return rv if size is None: size = sum( (g.count(selector) for chunk in chunks for g in chunk.objs)) for field in fields: ftype, fname = field fsize = size rv[field] = np.empty(fsize, dtype="float64") ng = sum(len(c.objs) for c in chunks) mylog.debug( "Reading %s cells of %s fields in %s grids", size, [f2 for f1, f2 in fields], ng, ) ind = 0 for chunk in chunks: for g in chunk.objs: # We want a *hard error* here. # if g.id not in self.grids_in_memory: continue for field in fields: ftype, fname = field data_view = self.grids_in_memory[g.id][fname][ self.my_slice].swapaxes(0, 2) nd = g.select(selector, data_view, rv[field], ind) ind += nd assert ind == fsize return rv
def _initialize_index(self, data_file, regions): ds = data_file.ds morton = np.empty(sum(data_file.total_particles.values()), dtype="uint64") ind = 0 DLE, DRE = ds.domain_left_edge, ds.domain_right_edge dx = (DRE - DLE) / (2**_ORDER_MAX) self.domain_left_edge = DLE.in_units("code_length").ndarray_view() self.domain_right_edge = DRE.in_units("code_length").ndarray_view() with open(data_file.filename, "rb") as f: f.seek(ds._header_offset) for iptype, ptype in enumerate(self._ptypes): # We'll just add the individual types separately count = data_file.total_particles[ptype] if count == 0: continue start, stop = ind, ind + count while ind < stop: c = min(CHUNKSIZE, stop - ind) pp = np.fromfile(f, dtype=self._pdtypes[ptype], count=c) mis = np.empty(3, dtype="float64") mas = np.empty(3, dtype="float64") for axi, ax in enumerate('xyz'): mi = pp["Coordinates"][ax].min() ma = pp["Coordinates"][ax].max() mylog.debug("Spanning: %0.3e .. %0.3e in %s", mi, ma, ax) mis[axi] = mi mas[axi] = ma pos = np.empty((pp.size, 3), dtype="float64") for i, ax in enumerate("xyz"): eps = np.finfo(pp["Coordinates"][ax].dtype).eps pos[:, i] = pp["Coordinates"][ax] regions.add_data_file(pos, data_file.file_id, data_file.ds.filter_bbox) morton[ind:ind + c] = compute_morton( pos[:, 0], pos[:, 1], pos[:, 2], DLE, DRE, data_file.ds.filter_bbox) ind += c mylog.info("Adding %0.3e particles", morton.size) return morton
def _read_fluid_selection(self, chunks, selector, fields, size): # Chunks in this case will have affiliated domain subset objects # Each domain subset will contain a hydro_offset array, which gives # pointers to level-by-level hydro information tr = defaultdict(list) for chunk in chunks: for subset in chunk.objs: # Now we read the entire thing f = open(subset.domain.hydro_fn, "rb") # This contains the boundary information, so we skim through # and pick off the right vectors content = IO(f.read()) rv = subset.fill(content, fields, selector) for ft, f in fields: d = rv.pop(f) mylog.debug("Filling %s with %s (%0.3e %0.3e) (%s zones)", f, d.size, d.min(), d.max(), d.size) tr[(ft, f)].append(d) d = {} for field in fields: d[field] = np.concatenate(tr.pop(field)) return d
def create_field_info(self): self.field_dependencies = {} self.derived_field_list = [] self.filtered_particle_types = [] self.field_info = self._field_info_class(self, self.field_list) self.coordinates.setup_fields(self.field_info) self.field_info.setup_fluid_fields() for ptype in self.particle_types: self.field_info.setup_particle_fields(ptype) self._setup_gas_alias() self.field_info.setup_fluid_index_fields() if "all" not in self.particle_types: mylog.debug("Creating Particle Union 'all'") pu = ParticleUnion("all", list(self.particle_types_raw)) self.add_particle_union(pu) self.field_info.setup_extra_union_fields() mylog.debug("Loading field plugins.") self.field_info.load_all_plugins() deps, unloaded = self.field_info.check_derived_fields() self.field_dependencies.update(deps)
def _read_chunk_data(self, chunk, fields): fid = fn = None rv = {} mylog.debug("Preloading fields %s", fields) # Split into particles and non-particles fluid_fields, particle_fields = [], [] for ftype, fname in fields: if ftype in self.ds.particle_types: particle_fields.append((ftype, fname)) else: fluid_fields.append((ftype, fname)) if len(particle_fields) > 0: selector = AlwaysSelector(self.ds) rv.update(self._read_particle_selection( [chunk], selector, particle_fields)) if len(fluid_fields) == 0: return rv for g in chunk.objs: rv[g.id] = gf = {} if g.filename is None: continue elif g.filename != fn: if fid is not None: fid.close() fid = None if fid is None: fid = h5py.h5f.open(g.filename.encode('ascii'), h5py.h5f.ACC_RDONLY) fn = g.filename data = np.empty(g.ActiveDimensions[::-1], dtype="float64") data_view = data.swapaxes(0,2) for field in fluid_fields: ftype, fname = field try: node = "/Grid%08i/%s" % (g.id, fname) dg = h5py.h5d.open(fid, node.encode('ascii')) except KeyError: if fname == "Dark_Matter_Density": continue raise dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data) gf[field] = data_view.copy() if fid: fid.close() return rv
def _detect_output_fields(self): self.field_list = [] # Do this only on the root processor to save disk work. if self.comm.rank in (0, None): mylog.info("Gathering a field list (this may take a moment.)") field_list = set() random_sample = self._generate_random_grids() for grid in random_sample: if not hasattr(grid, 'filename'): continue try: gf = self.io._read_field_names(grid) except self.io._read_exception: mylog.debug("Grid %s is a bit funky?", grid.id) continue mylog.debug("Grid %s has: %s", grid.id, gf) field_list = field_list.union(gf) if "AppendActiveParticleType" in self.dataset.parameters: ap_fields = self._detect_active_particle_fields() field_list = list(set(field_list).union(ap_fields)) else: field_list = None self.field_list = list(self.comm.mpi_bcast(field_list))
def __init__(self, points, ds=None, field_parameters=None, data_source=None): validate_object(ds, Dataset) validate_object(field_parameters, dict) validate_object(data_source, YTSelectionContainer) validate_object(points, YTArray) points = fix_length(points, ds) if len(points) < 2: raise YTException( f"Not enough points. Expected at least 2, got {len(points)}" ) mylog.debug("Building minimal sphere around points.") mb = _miniball.Miniball(points) if not mb.is_valid(): raise YTException("Could not build valid sphere around points.") center = ds.arr(mb.center(), points.units) radius = ds.quan(np.sqrt(mb.squared_radius()), points.units) super().__init__(center, ds, field_parameters, data_source) self.set_field_parameter("radius", radius) self.set_field_parameter("center", self.center) self.radius = radius
def _detect_output_fields(self): self.field_list = [] # Do this only on the root processor to save disk work. if self.comm.rank in (0, None): mylog.info("Gathering a field list (this may take a moment.)") field_list = set() random_sample = self._generate_random_grids() for grid in random_sample: if not hasattr(grid, "filename"): continue try: gf = self.io._read_field_names(grid) except self.io._read_exception as e: raise IOError("Grid %s is a bit funky?", grid.id) from e mylog.debug("Grid %s has: %s", grid.id, gf) field_list = field_list.union(gf) if "AppendActiveParticleType" in self.dataset.parameters: ap_fields = self._detect_active_particle_fields() field_list = list(set(field_list).union(ap_fields)) if not any(f[0] == "io" for f in field_list): if "io" in self.dataset.particle_types_raw: ptypes_raw = list(self.dataset.particle_types_raw) ptypes_raw.remove("io") self.dataset.particle_types_raw = tuple(ptypes_raw) if "io" in self.dataset.particle_types: ptypes = list(self.dataset.particle_types) ptypes.remove("io") self.dataset.particle_types = tuple(ptypes) ptypes = self.dataset.particle_types ptypes_raw = self.dataset.particle_types_raw else: field_list = None ptypes = None ptypes_raw = None self.field_list = list(self.comm.mpi_bcast(field_list)) self.dataset.particle_types = list(self.comm.mpi_bcast(ptypes)) self.dataset.particle_types_raw = list(self.comm.mpi_bcast(ptypes_raw))
def _parse_index(self): self.grid_dimensions = self.stream_handler.dimensions self.grid_left_edge[:] = self.stream_handler.left_edges self.grid_right_edge[:] = self.stream_handler.right_edges self.grid_levels[:] = self.stream_handler.levels self.min_level = self.grid_levels.min() self.grid_procs = self.stream_handler.processor_ids self.grid_particle_count[:] = self.stream_handler.particle_count mylog.debug("Copying reverse tree") self.grids = [] # We enumerate, so it's 0-indexed id and 1-indexed pid for id in range(self.num_grids): self.grids.append(self.grid(id, self)) self.grids[id].Level = self.grid_levels[id, 0] parent_ids = self.stream_handler.parent_ids if parent_ids is not None: reverse_tree = self.stream_handler.parent_ids.tolist() # Initial setup: for gid, pid in enumerate(reverse_tree): if pid >= 0: self.grids[gid]._parent_id = pid self.grids[pid]._children_ids.append(self.grids[gid].id) else: mylog.debug("Reconstructing parent-child relationships") self._reconstruct_parent_child() self.max_level = self.grid_levels.max() mylog.debug("Preparing grids") temp_grids = np.empty(self.num_grids, dtype="object") for i, grid in enumerate(self.grids): if (i % 1e4) == 0: mylog.debug("Prepared % 7i / % 7i grids", i, self.num_grids) grid.filename = None grid._prepare_grid() grid._setup_dx() grid.proc_num = self.grid_procs[i] temp_grids[i] = grid self.grids = temp_grids mylog.debug("Prepared")
def _read_fluid_selection(self, chunks, selector, fields, size): # Chunks in this case will have affiliated domain subset objects # Each domain subset will contain a hydro_offset array, which gives # pointers to level-by-level hydro information tr = defaultdict(list) cp = 0 for chunk in chunks: for subset in chunk.objs: # Now we read the entire thing f = open(subset.domain.hydro_fn, "rb") # This contains the boundary information, so we skim through # and pick off the right vectors content = IO(f.read()) rv = subset.fill(content, fields, selector) for ft, f in fields: d = rv.pop(f) mylog.debug("Filling %s with %s (%0.3e %0.3e) (%s zones)", f, d.size, d.min(), d.max(), d.size) tr[(ft, f)].append(d) d = {} for field in fields: d[field] = np.concatenate(tr.pop(field)) return d
def _reconstruct_parent_child(self): mask = np.empty(len(self.grids), dtype="int32") mylog.debug("First pass; identifying child grids") for i, grid in enumerate(self.grids): get_box_grids_level( self.grid_left_edge[i, :], self.grid_right_edge[i, :], self.grid_levels[i] + 1, self.grid_left_edge, self.grid_right_edge, self.grid_levels, mask, ) ids = np.where(mask.astype("bool")) grid._children_ids = ids[0] # where is a tuple mylog.debug("Second pass; identifying parents") self.stream_handler.parent_ids = ( np.zeros(self.stream_handler.num_grids, "int64") - 1) for i, grid in enumerate(self.grids): # Second pass for child in grid.Children: child._parent_id = i # _id_offset = 0 self.stream_handler.parent_ids[child.id] = i
def partition_index_2d(self, axis): if not self._distributed: return False, self.index.grid_collection(self.center, self.index.grids) xax = self.ds.coordinates.x_axis[axis] yax = self.ds.coordinates.y_axis[axis] cc = MPI.Compute_dims(self.comm.size, 2) mi = self.comm.rank cx, cy = np.unravel_index(mi, cc) x = np.mgrid[0 : 1 : (cc[0] + 1) * 1j][cx : cx + 2] y = np.mgrid[0 : 1 : (cc[1] + 1) * 1j][cy : cy + 2] DLE, DRE = self.ds.domain_left_edge.copy(), self.ds.domain_right_edge.copy() LE = np.ones(3, dtype="float64") * DLE RE = np.ones(3, dtype="float64") * DRE LE[xax] = x[0] * (DRE[xax] - DLE[xax]) + DLE[xax] RE[xax] = x[1] * (DRE[xax] - DLE[xax]) + DLE[xax] LE[yax] = y[0] * (DRE[yax] - DLE[yax]) + DLE[yax] RE[yax] = y[1] * (DRE[yax] - DLE[yax]) + DLE[yax] mylog.debug("Dimensions: %s %s", LE, RE) reg = self.ds.region(self.center, LE, RE) return True, reg
def __init__( self, filename, dataset_type="openPMD", storage_filename=None, units_override=None, unit_system="mks", **kwargs, ): self._handle = HDF5FileHandler(filename) self.gridsize = kwargs.pop("open_pmd_virtual_gridsize", 10 ** 9) self.standard_version = StrictVersion(self._handle.attrs["openPMD"].decode()) self.iteration = kwargs.pop("iteration", None) self._set_paths(self._handle, path.dirname(filename), self.iteration) Dataset.__init__( self, filename, dataset_type, units_override=units_override, unit_system=unit_system, ) self.storage_filename = storage_filename self.fluid_types += ("openPMD",) try: particles = tuple( str(c) for c in self._handle[self.base_path + self.particles_path].keys() ) if len(particles) > 1: # Only use on-disk particle names if there is more than one species self.particle_types = particles mylog.debug("self.particle_types: %s", self.particle_types) self.particle_types_raw = self.particle_types self.particle_types = tuple(self.particle_types) except (KeyError, TypeError, AttributeError): pass
def _read_fluid_selection(self, chunks, selector, fields, size): rv = {} chunks = list(chunks) fields.sort(key=lambda a: self.field_dict[a[1]]) if isinstance(selector, GridSelector): if not (len(chunks) == len(chunks[0].objs) == 1): raise RuntimeError grid = chunks[0].objs[0] for ftype, fname in fields: rv[ftype, fname] = self._read_data(grid, fname) return rv if size is None: size = sum( (g.count(selector) for chunk in chunks for g in chunk.objs)) for field in fields: ftype, fname = field fsize = size rv[field] = np.empty(fsize, dtype="float64") ng = sum(len(c.objs) for c in chunks) mylog.debug( "Reading %s cells of %s fields in %s grids", size, [f2 for f1, f2 in fields], ng, ) ind = 0 for chunk in chunks: for g in chunk.objs: nd = 0 for field in fields: ftype, fname = field data = self._read_data(g, fname) nd = g.select(selector, data, rv[field], ind) # caches ind += nd return rv
def partition_index_2d(self, axis): if not self._distributed: return False, self.index.grid_collection(self.center, self.index.grids) xax = self.ds.coordinates.x_axis[axis] yax = self.ds.coordinates.y_axis[axis] cc = MPI.Compute_dims(self.comm.size, 2) mi = self.comm.rank cx, cy = np.unravel_index(mi, cc) x = np.mgrid[0:1:(cc[0]+1)*1j][cx:cx+2] y = np.mgrid[0:1:(cc[1]+1)*1j][cy:cy+2] DLE, DRE = self.ds.domain_left_edge.copy(), self.ds.domain_right_edge.copy() LE = np.ones(3, dtype='float64') * DLE RE = np.ones(3, dtype='float64') * DRE LE[xax] = x[0] * (DRE[xax]-DLE[xax]) + DLE[xax] RE[xax] = x[1] * (DRE[xax]-DLE[xax]) + DLE[xax] LE[yax] = y[0] * (DRE[yax]-DLE[yax]) + DLE[yax] RE[yax] = y[1] * (DRE[yax]-DLE[yax]) + DLE[yax] mylog.debug("Dimensions: %s %s", LE, RE) reg = self.ds.region(self.center, LE, RE) return True, reg
def _read_fluid_selection(self, chunks, selector, fields, size): chunks = list(chunks) # generator --> list if any( (ftype != "gamer" for ftype, fname in fields) ): raise NotImplementedError rv = {} for field in fields: rv[field] = np.empty( size, dtype=self._field_dtype ) ng = sum( len(c.objs) for c in chunks ) # c.objs is a list of grids mylog.debug( "Reading %s cells of %s fields in %s grids", size, [f2 for f1, f2 in fields], ng ) for field in fields: ds = self._group_grid[ field[1] ] offset = 0 for chunk in chunks: for gs in grid_sequences(chunk.objs): start = gs[ 0].id end = gs[-1].id + 1 data = ds[start:end,:,:,:].transpose() for i, g in enumerate(gs): offset += g.select( selector, data[...,i], rv[field], offset ) return rv
def __init__(self, ds, dataset_type): ParallelAnalysisInterface.__init__(self) self.dataset = weakref.proxy(ds) self.ds = self.dataset self._initialize_state_variables() mylog.debug("Initializing data storage.") self._initialize_data_storage() mylog.debug("Setting up domain geometry.") self._setup_geometry() mylog.debug("Initializing data grid data IO") self._setup_data_io() # Note that this falls under the "geometry" object since it's # potentially quite expensive, and should be done with the indexing. mylog.debug("Detecting fields.") self._detect_output_fields()
def _guess_dataset_type(self, rank, test_grid, test_grid_id): if test_grid[0] != os.path.sep: test_grid = os.path.join(self.directory, test_grid) if not os.path.exists(test_grid): test_grid = os.path.join(self.directory, os.path.basename(test_grid)) mylog.debug("Your data uses the annoying hardcoded path.") self._strip_path = True if self.dataset_type is not None: return if rank == 3: mylog.debug("Detected packed HDF5") if self.parameters.get("WriteGhostZones", 0) == 1: self.dataset_type= "enzo_packed_3d_gz" self.grid = EnzoGridGZ else: self.dataset_type = 'enzo_packed_3d' elif rank == 2: mylog.debug("Detect packed 2D") self.dataset_type = 'enzo_packed_2d' elif rank == 1: mylog.debug("Detect packed 1D") self.dataset_type = 'enzo_packed_1d' else: raise NotImplementedError
def _setup_geometry(self): mylog.debug("Initializing Particle Geometry Handler.") self._initialize_particle_handler()
def _get_field(self, field): if field in self.cache.keys() and self.caching: mylog.debug("Cached %s", str(field)) return self.cache[field] mylog.debug("Reading %s", str(field)) tr = {} ftype, fname = field ptmax = self.ws[-1] pbool, idxa, idxb = _determine_field_size(self.ds, ftype, self.ls, ptmax) npa = idxb - idxa sizes = np.diff(np.concatenate(([0], self.ls))) rp = lambda ax: read_particles( self.file_particle, self.Nrow, idxa=idxa, idxb=idxb, fields=ax) for i, ax in enumerate('xyz'): if fname.startswith("particle_position_%s" % ax): # This is not the same as domain_dimensions dd = self.ds.parameters['ng'] off = 1.0/dd tr[field] = rp([ax])[0]/dd - off if fname.startswith("particle_velocity_%s" % ax): tr[field], = rp(['v'+ax]) if fname.startswith("particle_mass"): a = 0 data = np.zeros(npa, dtype='f8') for ptb, size, m in zip(pbool, sizes, self.ws): if ptb: data[a:a+size] = m a += size tr[field] = data elif fname == "particle_index": tr[field] = np.arange(idxa, idxb) elif fname == "particle_type": a = 0 data = np.zeros(npa, dtype='int') for i, (ptb, size) in enumerate(zip(pbool, sizes)): if ptb: data[a: a + size] = i a += size tr[field] = data if fname == "particle_creation_time": self.tb, self.ages, data = interpolate_ages( tr[field][-nstars:], self.file_stars, self.tb, self.ages, self.ds.current_time) temp = tr.get(field, np.zeros(npa, 'f8')) temp[-nstars:] = data tr[field] = temp del data # We check again, after it's been filled if fname.startswith("particle_mass"): # We now divide by NGrid in order to make this match up. Note that # this means that even when requested in *code units*, we are # giving them as modified by the ng value. This only works for # dark_matter -- stars are regular matter. tr[field] /= self.ds.domain_dimensions.prod() if tr == {}: tr[field] = np.array([]) if self.caching: self.cache[field] = tr[field] return self.cache[field] else: return tr[field]
def barrier(self): if not self._distributed: return mylog.debug("Opening MPI Barrier on %s", self.comm.rank) self.comm.Barrier()
def _read_fluid_selection(self, chunks, selector, fields, size): rv = {} # Now we have to do something unpleasant chunks = list(chunks) if selector.__class__.__name__ == "GridSelector": if not (len(chunks) == len(chunks[0].objs) == 1): raise RuntimeError g = chunks[0].objs[0] f = h5py.File(u(g.filename), 'r') if g.id in self._cached_fields: gf = self._cached_fields[g.id] rv.update(gf) if len(rv) == len(fields): return rv gds = f.get("/Grid%08i" % g.id) for field in fields: if field in rv: self._hits += 1 continue self._misses += 1 ftype, fname = field if fname in gds: rv[(ftype, fname)] = gds.get(fname).value.swapaxes(0,2) else: rv[(ftype, fname)] = np.zeros(g.ActiveDimensions) if self._cache_on: for gid in rv: self._cached_fields.setdefault(gid, {}) self._cached_fields[gid].update(rv[gid]) f.close() return rv if size is None: size = sum((g.count(selector) for chunk in chunks for g in chunk.objs)) for field in fields: ftype, fname = field fsize = size rv[field] = np.empty(fsize, dtype="float64") ng = sum(len(c.objs) for c in chunks) mylog.debug("Reading %s cells of %s fields in %s grids", size, [f2 for f1, f2 in fields], ng) ind = 0 h5_type = self._field_dtype for chunk in chunks: fid = None for g in chunk.objs: if g.filename is None: continue if fid is None: fid = h5py.h5f.open(b(g.filename), h5py.h5f.ACC_RDONLY) gf = self._cached_fields.get(g.id, {}) data = np.empty(g.ActiveDimensions[::-1], dtype=h5_type) data_view = data.swapaxes(0,2) nd = 0 for field in fields: if field in gf: nd = g.select(selector, gf[field], rv[field], ind) self._hits += 1 continue self._misses += 1 ftype, fname = field try: node = "/Grid%08i/%s" % (g.id, fname) dg = h5py.h5d.open(fid, b(node)) except KeyError: if fname == "Dark_Matter_Density": continue raise dg.read(h5py.h5s.ALL, h5py.h5s.ALL, data) if self._cache_on: self._cached_fields.setdefault(g.id, {}) # Copy because it's a view into an empty temp array self._cached_fields[g.id][field] = data_view.copy() nd = g.select(selector, data_view, rv[field], ind) # caches ind += nd if fid: fid.close() return rv