def _chunk_io(self, dobj, cache=True, local_only=False): gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) for subset in gobjs: yield YTDataChunk(dobj, "io", [subset], self._count_selection(dobj, [subset]), cache=cache)
def _chunk_spatial(self, dobj, ngz, sort=None, preload_fields=None): sobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) for i, og in enumerate(sobjs): if ngz > 0: g = og.retrieve_ghost_zones(ngz, [], smoothed=True) else: g = og yield YTDataChunk(dobj, "spatial", [g], None)
def _chunk_spatial(self, dobj, ngz, sort=None, preload_fields=None): sobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) for og in sobjs: with og._expand_data_files(): if ngz > 0: g = og.retrieve_ghost_zones(ngz, [], smoothed=True) else: g = og yield YTDataChunk(dobj, "spatial", [g])
def _chunk_io( self, dobj, cache=True, local_only=False, preload_fields=None, chunk_sizing="auto", ): # local_only is only useful for inline datasets and requires # implementation by subclasses. if preload_fields is None: preload_fields = [] preload_fields, _ = self._split_fields(preload_fields) gfiles = defaultdict(list) gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) fast_index = dobj._current_chunk._fast_index for g in gobjs: # Force to be a string because sometimes g.filename is None. gfiles[str(g.filename)].append(g) # We can apply a heuristic here to make sure we aren't loading too # many grids all at once. if chunk_sizing == "auto": chunk_ngrids = len(gobjs) if chunk_ngrids > 0: nproc = int( ytcfg.get("yt", "internals", "global_parallel_size")) chunking_factor = np.ceil(self._grid_chunksize * nproc / chunk_ngrids).astype("int") size = max(self._grid_chunksize // chunking_factor, 1) else: size = self._grid_chunksize elif chunk_sizing == "config_file": size = ytcfg.get("yt", "chunk_size") elif chunk_sizing == "just_one": size = 1 elif chunk_sizing == "old": size = self._grid_chunksize else: raise RuntimeError( f"{chunk_sizing} is an invalid value for the 'chunk_sizing' argument." ) for fn in sorted(gfiles): gs = gfiles[fn] for grids in (gs[pos:pos + size] for pos in range(0, len(gs), size)): dc = YTDataChunk( dobj, "io", grids, self._count_selection(dobj, grids), cache=cache, fast_index=fast_index, ) # We allow four full chunks to be included. with self.io.preload(dc, preload_fields, 4.0 * size): yield dc
def _chunk_spatial(self, dobj, ngz, preload_fields=None): if ngz > 0: raise NotImplementedError sobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) for og in sobjs: if ngz > 0: g = og.retrieve_ghost_zones(ngz, [], smoothed=True) else: g = og yield YTDataChunk(dobj, "spatial", [g], None, cache=True)
def _chunk_io(self, dobj, cache=True, local_only=False): """ Since subsets are calculated per domain, i.e. per file, yield each domain at a time to organize by IO. We will eventually chunk out NMSU ART to be level-by-level. """ oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) for subset in oobjs: yield YTDataChunk(dobj, "io", [subset], None, cache=cache)
def _chunk_all(self, dobj, cache=True, fast_index=None): gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) fast_index = fast_index or getattr(dobj._current_chunk, "_fast_index", None) yield YTDataChunk(dobj, "all", gobjs, dobj.size, cache, fast_index=fast_index)
def _chunk_io(self, dobj, cache = True, local_only = False): # local_only is only useful for inline datasets and requires # implementation by subclasses. gfiles = defaultdict(list) gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) for g in gobjs: gfiles[g.id].append(g) for fn in sorted(gfiles): gs = gfiles[fn] yield YTDataChunk(dobj, "io", gs, self._count_selection(dobj, gs), cache = cache)
def _chunk_spatial(self, dobj, ngz, sort=None, preload_fields=None): sobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) # This is where we will perform cutting of the Octree and # load-balancing. That may require a specialized selector object to # cut based on some space-filling curve index. for og in sobjs: if ngz > 0: g = og.retrieve_ghost_zones(ngz, [], smoothed=True) else: g = og yield YTDataChunk(dobj, "spatial", [g])
def _chunk_io(self, dobj, cache=True, local_only=False): gfiles = defaultdict(list) gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) for g in gobjs: gfiles[g.filename].append(g) for fn in sorted(gfiles): if local_only: gobjs = [g for g in gfiles[fn] if g.proc_num == self.comm.rank] gfiles[fn] = gobjs gs = gfiles[fn] count = self._count_selection(dobj, gs) yield YTDataChunk(dobj, "io", gs, count, cache=cache)
def _chunk_spatial(self, dobj, ngz, sort=None, preload_fields=None): sobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) # We actually do not really use the data files except as input to the # ParticleOctreeSubset. # This is where we will perform cutting of the Octree and # load-balancing. That may require a specialized selector object to # cut based on some space-filling curve index. for i, og in enumerate(sobjs): if ngz > 0: g = og.retrieve_ghost_zones(ngz, [], smoothed=True) else: g = og yield YTDataChunk(dobj, "spatial", [g])
def _chunk_io(self, dobj, cache = True, local_only = False): # local_only is only useful for inline datasets and requires # implementation by subclasses. gfiles = defaultdict(list) gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) for g in gobjs: gfiles[g.filename].append(g) for fn in sorted(gfiles): # We can apply a heuristic here to make sure we aren't loading too # many grids all at once. gs = gfiles[fn] size = self._grid_chunksize for grids in (gs[pos:pos + size] for pos in xrange(0, len(gs), size)): yield YTDataChunk(dobj, "io", grids, self._count_selection(dobj, grids), cache = cache)
def _chunk_spatial(self, dobj, ngz, sort=None, preload_fields=None): gobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) if sort in ("+level", "level"): giter = sorted(gobjs, key=lambda g: g.Level) elif sort == "-level": giter = sorted(gobjs, key=lambda g: -g.Level) elif sort is None: giter = gobjs if preload_fields is None: preload_fields = [] preload_fields, _ = self._split_fields(preload_fields) if self._preload_implemented and len(preload_fields) > 0 and ngz == 0: giter = ChunkDataCache(list(giter), preload_fields, self) for i, og in enumerate(giter): if ngz > 0: g = og.retrieve_ghost_zones(ngz, [], smoothed=True) else: g = og size = self._count_selection(dobj, [og]) if size == 0: continue # We don't want to cache any of the masks or icoords or fcoords for # individual grids. yield YTDataChunk(dobj, "spatial", [g], size, cache=False)
def _chunk_all(self, dobj): oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) # We pass the chunk both the current chunk and list of chunks, # as well as the referring data source yield YTDataChunk(dobj, "all", oobjs, None)
def _chunk_all(self, dobj, cache=True): oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) yield YTDataChunk(dobj, "all", oobjs, dobj.size, cache)
def _chunk_io(self, dobj, cache=True, local_only=False): # _current_chunk is made from identify_base_chunk oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) for chunk in oobjs: yield YTDataChunk(dobj, "io", [chunk], None, cache=cache)
def _chunk_io(self, dobj, cache=True, local_only=False): oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) for container in oobjs: yield YTDataChunk(dobj, "io", [container], None, cache=cache)
def _chunk_all(self, dobj): oobjs = getattr(dobj._current_chunk, "objs", dobj._chunk_info) yield YTDataChunk(dobj, "all", oobjs, None)