def _initialize_particle_handler(self): self._setup_data_io() template = self.dataset.filename_template ndoms = self.dataset.file_count cls = self.dataset._file_class self.data_files = [cls(self.dataset, self.io, template % {'num':i}, i) for i in range(ndoms)] self.total_particles = sum( sum(d.total_particles.values()) for d in self.data_files) ds = self.dataset self.oct_handler = ParticleOctreeContainer( [1, 1, 1], ds.domain_left_edge, ds.domain_right_edge, over_refine = ds.over_refine_factor) self.oct_handler.n_ref = ds.n_ref mylog.info("Allocating for %0.3e particles", self.total_particles) # No more than 256^3 in the region finder. N = min(len(self.data_files), 256) self.regions = ParticleRegions( ds.domain_left_edge, ds.domain_right_edge, [N, N, N], len(self.data_files)) self._initialize_indices() self.oct_handler.finalize() self.max_level = self.oct_handler.max_level tot = sum(self.oct_handler.recursively_count().values()) mylog.info("Identified %0.3e octs", tot)
def _detect_output_fields(self): self.field_list = [] # Do this only on the root processor to save disk work. if self.comm.rank in (0, None): mylog.info("Gathering a field list (this may take a moment.)") field_list = set() random_sample = self._generate_random_grids() for grid in random_sample: if not hasattr(grid, 'filename'): continue try: gf = self.io._read_field_names(grid) except self.io._read_exception: raise IOError("Grid %s is a bit funky?", grid.id) mylog.debug("Grid %s has: %s", grid.id, gf) field_list = field_list.union(gf) if "AppendActiveParticleType" in self.dataset.parameters: ap_fields = self._detect_active_particle_fields() field_list = list(set(field_list).union(ap_fields)) ptypes = self.dataset.particle_types ptypes_raw = self.dataset.particle_types_raw else: field_list = None ptypes = None ptypes_raw = None self.field_list = list(self.comm.mpi_bcast(field_list)) self.dataset.particle_types = list(self.comm.mpi_bcast(ptypes)) self.dataset.particle_types_raw = list(self.comm.mpi_bcast(ptypes_raw))
def _get_all_outputs(self, find_outputs=False): """ Get all potential datasets and combine into a time-sorted list. """ # Create the set of outputs from which further selection will be done. if find_outputs: self._find_outputs() elif self.parameters['dtDataDump'] > 0 and \ self.parameters['CycleSkipDataDump'] > 0: mylog.info( "Simulation %s has both dtDataDump and CycleSkipDataDump set.", self.parameter_filename ) mylog.info( " Unable to calculate datasets. " + "Attempting to search in the current directory") self._find_outputs() else: # Get all time or cycle outputs. if self.parameters['CycleSkipDataDump'] > 0: self._calculate_cycle_outputs() else: self._calculate_time_outputs() # Calculate times for redshift outputs. if self.cosmological_simulation: for output in self.all_redshift_outputs: output["time"] = self.cosmology.t_from_z(output["redshift"]) self.all_redshift_outputs.sort(key=lambda obj:obj["time"]) self.all_outputs = self.all_time_outputs + self.all_redshift_outputs if self.parameters['CycleSkipDataDump'] <= 0: self.all_outputs.sort(key=lambda obj:obj['time'].to_ndarray())
def _parse_parameter_file(self): hvals = self._get_hvals() self.dimensionality = 3 self.refine_by = 2 self.parameters["HydroMethod"] = "sph" self.unique_identifier = \ int(os.stat(self.parameter_filename)[stat.ST_CTIME]) # Set standard values # We may have an overridden bounding box. if self.domain_left_edge is None: self.domain_left_edge = np.zeros(3, "float64") self.domain_right_edge = np.ones(3, "float64") * hvals["BoxSize"] nz = 1 << self.over_refine_factor self.domain_dimensions = np.ones(3, "int32") * nz self.periodicity = (True, True, True) self.cosmological_simulation = 1 self.current_redshift = hvals["Redshift"] self.omega_lambda = hvals["OmegaLambda"] self.omega_matter = hvals["Omega0"] self.hubble_constant = hvals["HubbleParam"] # According to the Gadget manual, OmegaLambda will be zero for # non-cosmological datasets. However, it may be the case that # individuals are running cosmological simulations *without* Lambda, in # which case we may be doing something incorrect here. # It may be possible to deduce whether ComovingIntegration is on # somehow, but opinions on this vary. if self.omega_lambda == 0.0: mylog.info("Omega Lambda is 0.0, so we are turning off Cosmology.") self.hubble_constant = 1.0 # So that scaling comes out correct self.cosmological_simulation = 0 self.current_redshift = 0.0 # This may not be correct. self.current_time = hvals["Time"] else: # Now we calculate our time based on the cosmology, because in # ComovingIntegration hvals["Time"] will in fact be the expansion # factor, not the actual integration time, so we re-calculate # global time from our Cosmology. cosmo = Cosmology(self.hubble_constant, self.omega_matter, self.omega_lambda) self.current_time = cosmo.hubble_time(self.current_redshift) mylog.info("Calculating time from %0.3e to be %0.3e seconds", hvals["Time"], self.current_time) self.parameters = hvals prefix = os.path.abspath( os.path.join(os.path.dirname(self.parameter_filename), os.path.basename(self.parameter_filename).split(".", 1)[0])) if hvals["NumFiles"] > 1: self.filename_template = "%s.%%(num)s%s" % (prefix, self._suffix) else: self.filename_template = self.parameter_filename self.file_count = hvals["NumFiles"]
def _parse_parameter_file(self): if self.parameter_filename.startswith("http"): sdf_class = HTTPSDFRead else: sdf_class = SDFRead self.sdf_container = sdf_class(self.parameter_filename, header=self.sdf_header) # Reference self.parameters = self.sdf_container.parameters self.dimensionality = 3 self.refine_by = 2 try: self.unique_identifier = \ int(os.stat(self.parameter_filename)[stat.ST_CTIME]) except: self.unique_identifier = time.time() if None in (self.domain_left_edge, self.domain_right_edge): R0 = self.parameters['R0'] if 'offset_center' in self.parameters and self.parameters['offset_center']: self.domain_left_edge = np.array([0, 0, 0]) self.domain_right_edge = np.array([ 2.0 * self.parameters.get("R%s" % ax, R0) for ax in 'xyz']) else: self.domain_left_edge = np.array([ -self.parameters.get("R%s" % ax, R0) for ax in 'xyz']) self.domain_right_edge = np.array([ +self.parameters.get("R%s" % ax, R0) for ax in 'xyz']) self.domain_left_edge *= self.parameters.get("a", 1.0) self.domain_right_edge *= self.parameters.get("a", 1.0) nz = 1 << self.over_refine_factor self.domain_dimensions = np.ones(3, "int32") * nz if "do_periodic" in self.parameters and self.parameters["do_periodic"]: self.periodicity = (True, True, True) else: self.periodicity = (False, False, False) self.cosmological_simulation = 1 self.current_redshift = self.parameters.get("redshift", 0.0) self.omega_lambda = self.parameters["Omega0_lambda"] self.omega_matter = self.parameters["Omega0_m"] if "Omega0_fld" in self.parameters: self.omega_lambda += self.parameters["Omega0_fld"] if "Omega0_r" in self.parameters: # not correct, but most codes can't handle Omega0_r self.omega_matter += self.parameters["Omega0_r"] self.hubble_constant = self.parameters["h_100"] self.current_time = units_2HOT_v2_time * self.parameters.get("tpos", 0.0) mylog.info("Calculating time to be %0.3e seconds", self.current_time) self.filename_template = self.parameter_filename self.file_count = 1
def _set_code_unit_attributes(self): # Set a sane default for cosmological simulations. if self._unit_base is None and self.cosmological_simulation == 1: mylog.info("Assuming length units are in Mpc/h (comoving)") self._unit_base = dict(length = (1.0, "Mpccm/h")) # The other same defaults we will use from the standard Gadget # defaults. unit_base = self._unit_base or {} if "length" in unit_base: length_unit = unit_base["length"] elif "UnitLength_in_cm" in unit_base: if self.cosmological_simulation == 0: length_unit = (unit_base["UnitLength_in_cm"], "cm") else: length_unit = (unit_base["UnitLength_in_cm"], "cmcm/h") else: raise RuntimeError length_unit = _fix_unit_ordering(length_unit) self.length_unit = self.quan(length_unit[0], length_unit[1]) if "velocity" in unit_base: velocity_unit = unit_base["velocity"] elif "UnitVelocity_in_cm_per_s" in unit_base: velocity_unit = (unit_base["UnitVelocity_in_cm_per_s"], "cm/s") else: if self.cosmological_simulation == 0: velocity_unit = (1e5, "cm/s") else: velocity_unit = (1e5, "cmcm/s") velocity_unit = _fix_unit_ordering(velocity_unit) self.velocity_unit = self.quan(velocity_unit[0], velocity_unit[1]) # We set hubble_constant = 1.0 for non-cosmology, so this is safe. # Default to 1e10 Msun/h if mass is not specified. if "mass" in unit_base: mass_unit = unit_base["mass"] elif "UnitMass_in_g" in unit_base: if self.cosmological_simulation == 0: mass_unit = (unit_base["UnitMass_in_g"], "g") else: mass_unit = (unit_base["UnitMass_in_g"], "g/h") else: # Sane default mass_unit = (1.0, "1e10*Msun/h") mass_unit = _fix_unit_ordering(mass_unit) self.mass_unit = self.quan(mass_unit[0], mass_unit[1]) if "time" in unit_base: time_unit = unit_base["time"] elif "UnitTime_in_s" in unit_base: time_unit = (unit_base["UnitTime_in_s"], "s") else: time_unit = (1., "s") self.time_unit = self.quan(time_unit[0], time_unit[1])
def upload(self): api_key = ytcfg.get("yt", "hub_api_key") url = ytcfg.get("yt", "hub_url") if api_key == '': raise YTHubRegisterError metadata, (final_name, chunks) = self._generate_post() if hasattr(self, "_ds_mrep"): self._ds_mrep.upload() for i in metadata: if isinstance(metadata[i], np.ndarray): metadata[i] = metadata[i].tolist() elif hasattr(metadata[i], 'dtype'): metadata[i] = np.asscalar(metadata[i]) metadata['obj_type'] = self.type if len(chunks) == 0: chunk_info = {'chunks': []} else: chunk_info = {'final_name': final_name, 'chunks': []} for cn, cv in chunks: chunk_info['chunks'].append((cn, cv.size * cv.itemsize)) metadata = json.dumps(metadata) chunk_info = json.dumps(chunk_info) datagen, headers = multipart_encode({'metadata': metadata, 'chunk_info': chunk_info, 'api_key': api_key}) request = urllib.request.Request(url, datagen, headers) # Actually do the request, and get the response try: rv = urllib.request.urlopen(request).read() except urllib.error.HTTPError as ex: if ex.code == 401: mylog.error("You must create an API key before uploading.") mylog.error("https://data.yt-project.org/getting_started.html") return else: raise ex uploader_info = json.loads(rv) new_url = url + "/handler/%s" % uploader_info['handler_uuid'] for i, (cn, cv) in enumerate(chunks): remaining = cv.size * cv.itemsize f = TemporaryFile() np.save(f, cv) f.seek(0) pbar = UploaderBar("%s, % 2i/% 2i" % (self.type, i + 1, len(chunks))) datagen, headers = multipart_encode({'chunk_data': f}, cb=pbar) request = urllib.request.Request(new_url, datagen, headers) rv = urllib.request.urlopen(request).read() datagen, headers = multipart_encode({'status': 'FINAL'}) request = urllib.request.Request(new_url, datagen, headers) rv = json.loads(urllib.request.urlopen(request).read()) mylog.info("Upload succeeded! View here: %s", rv['url']) return rv
def _write_cosmology_outputs(self, filename, outputs, start_index, decimals=3): r""" Write cosmology output parameters for a cosmology splice. """ mylog.info("Writing redshift output list to %s.", filename) f = open(filename, "w") for output in outputs: f.write("%f\n" % (1. / (1. + output["redshift"]))) f.close()
def find_particles_by_type(self, ptype, max_num=None, additional_fields=None): """ Returns a structure of arrays with all of the particles' positions, velocities, masses, types, IDs, and attributes for a particle type **ptype** for a maximum of **max_num** particles. If non-default particle fields are used, provide them in **additional_fields**. """ # Not sure whether this routine should be in the general HierarchyType. if self.grid_particle_count.sum() == 0: mylog.info("Data contains no particles."); return None if additional_fields is None: additional_fields = ['metallicity_fraction', 'creation_time', 'dynamical_time'] pfields = [f for f in self.field_list if f.startswith('particle_')] nattr = self.dataset['NumberOfParticleAttributes'] if nattr > 0: pfields += additional_fields[:nattr] # Find where the particles reside and count them if max_num is None: max_num = 1e100 total = 0 pstore = [] for level in range(self.max_level, -1, -1): for grid in self.select_grids(level): index = np.where(grid['particle_type'] == ptype)[0] total += len(index) pstore.append(index) if total >= max_num: break if total >= max_num: break result = None if total > 0: result = {} for p in pfields: result[p] = np.zeros(total, 'float64') # Now we retrieve data for each field ig = count = 0 for level in range(self.max_level, -1, -1): for grid in self.select_grids(level): nidx = len(pstore[ig]) if nidx > 0: for p in pfields: result[p][count:count+nidx] = grid[p][pstore[ig]] count += nidx ig += 1 if count >= total: break if count >= total: break # Crop data if retrieved more than max_num if count > max_num: for p in pfields: result[p] = result[p][0:max_num] return result
def _write_cosmology_outputs(self, filename, outputs, start_index, decimals=3): """ Write cosmology output parameters for a cosmology splice. """ mylog.info("Writing redshift output list to %s.", filename) f = open(filename, 'w') for q, output in enumerate(outputs): z_string = "%%s[%%d] = %%.%df" % decimals f.write(("CosmologyOutputRedshift[%d] = %." + str(decimals) + "f\n") % ((q + start_index), output['redshift'])) f.close()
def _set_code_unit_attributes(self): # If no units passed in by user, set a sane default (Gadget-2 users guide). if self._unit_base is None: if self.cosmological_simulation == 1: mylog.info("Assuming length units are in kpc/h (comoving)") self._unit_base = dict(length = (1.0, "kpccm/h")) else: mylog.info("Assuming length units are in kpc (physical)") self._unit_base = dict(length = (1.0, "kpc")) # If units passed in by user, decide what to do about # co-moving and factors of h unit_base = self._unit_base or {} if "length" in unit_base: length_unit = unit_base["length"] elif "UnitLength_in_cm" in unit_base: if self.cosmological_simulation == 0: length_unit = (unit_base["UnitLength_in_cm"], "cm") else: length_unit = (unit_base["UnitLength_in_cm"], "cmcm/h") else: raise RuntimeError length_unit = _fix_unit_ordering(length_unit) self.length_unit = self.quan(length_unit[0], length_unit[1]) unit_base = self._unit_base or {} if "velocity" in unit_base: velocity_unit = unit_base["velocity"] elif "UnitVelocity_in_cm_per_s" in unit_base: velocity_unit = (unit_base["UnitVelocity_in_cm_per_s"], "cm/s") else: velocity_unit = (1e5, "cm/s") velocity_unit = _fix_unit_ordering(velocity_unit) self.velocity_unit = self.quan(velocity_unit[0], velocity_unit[1]) # We set hubble_constant = 1.0 for non-cosmology, so this is safe. # Default to 1e10 Msun/h if mass is not specified. if "mass" in unit_base: mass_unit = unit_base["mass"] elif "UnitMass_in_g" in unit_base: if self.cosmological_simulation == 0: mass_unit = (unit_base["UnitMass_in_g"], "g") else: mass_unit = (unit_base["UnitMass_in_g"], "g/h") else: # Sane default mass_unit = (1.0, "1e10*Msun/h") mass_unit = _fix_unit_ordering(mass_unit) self.mass_unit = self.quan(mass_unit[0], mass_unit[1]) self.time_unit = self.length_unit / self.velocity_unit
def save(self, name=None, suffix=None): r""" Saves a 1d profile plot. Parameters ---------- name : str The output file keyword. suffix : string Specify the image type by its suffix. If not specified, the output type will be inferred from the filename. Defaults to PNG. """ if not self._plot_valid: self._setup_plots() unique = set(self.figures.values()) if len(unique) < len(self.figures): iters = izip(range(len(unique)), sorted(unique)) else: iters = iteritems(self.figures) if not suffix: suffix = "png" suffix = ".%s" % suffix if name is None: if len(self.profiles) == 1: prefix = self.profiles[0].ds else: prefix = "Multi-data" name = "%s%s" % (prefix, suffix) else: sfx = get_image_suffix(name) if sfx != '': suffix = sfx prefix = name[:name.rfind(suffix)] else: prefix = name xfn = self.profiles[0].x_field if isinstance(xfn, tuple): xfn = xfn[1] canvas_cls = get_canvas(name) fns = [] for uid, fig in iters: if isinstance(uid, tuple): uid = uid[1] canvas = canvas_cls(fig) fns.append("%s_1d-Profile_%s_%s%s" % (prefix, xfn, uid, suffix)) mylog.info("Saving %s", fns[-1]) canvas.print_figure(fns[-1]) return fns
def lock_grids_to_parents(self): r"""This function locks grid edges to their parents. This is useful in cases where the grid structure may be somewhat irregular, or where setting the left and right edges is a lossy process. It is designed to correct situations where left/right edges may be set slightly incorrectly, resulting in discontinuities in images and the like. """ mylog.info("Locking grids to parents.") for i, g in enumerate(self.grids): si = g.get_global_startindex() g.LeftEdge = self.ds.domain_left_edge + g.dds * si g.RightEdge = g.LeftEdge + g.ActiveDimensions * g.dds self.grid_left_edge[i,:] = g.LeftEdge self.grid_right_edge[i,:] = g.RightEdge
def interpolate_ages(data, file_stars, interp_tb=None, interp_ages=None, current_time=None): if interp_tb is None: t_stars, a_stars = read_star_field(file_stars, field="t_stars") # timestamp of file should match amr timestamp if current_time: tdiff = YTQuantity(b2t(t_stars), 'Gyr') - current_time.in_units('Gyr') if np.abs(tdiff) > 1e-4: mylog.info("Timestamp mismatch in star " + "particle header: %s", tdiff) mylog.info("Interpolating ages") interp_tb, interp_ages = b2t(data) interp_tb = YTArray(interp_tb, 'Gyr') interp_ages = YTArray(interp_ages, 'Gyr') temp = np.interp(data, interp_tb, interp_ages) return interp_tb, interp_ages, temp
def enable_plugins(): import yt from yt.fields.my_plugin_fields import my_plugins_fields from yt.config import ytcfg my_plugin_name = ytcfg.get("yt","pluginfilename") # We assume that it is with respect to the $HOME/.yt directory if os.path.isfile(my_plugin_name): _fn = my_plugin_name else: _fn = os.path.expanduser("~/.yt/%s" % my_plugin_name) if os.path.isfile(_fn): mylog.info("Loading plugins from %s", _fn) execdict = yt.__dict__.copy() execdict['add_field'] = my_plugins_fields.add_field with open(_fn) as f: code = compile(f.read(), _fn, 'exec') exec(code, execdict)
def _initialize_index(self, data_file, regions): ds = data_file.ds morton = np.empty(sum(data_file.total_particles.values()), dtype="uint64") ind = 0 DLE, DRE = ds.domain_left_edge, ds.domain_right_edge dx = (DRE - DLE) / (2**_ORDER_MAX) self.domain_left_edge = DLE.in_units("code_length").ndarray_view() self.domain_right_edge = DRE.in_units("code_length").ndarray_view() with open(data_file.filename, "rb") as f: f.seek(ds._header_offset) for iptype, ptype in enumerate(self._ptypes): # We'll just add the individual types separately count = data_file.total_particles[ptype] if count == 0: continue start, stop = ind, ind + count while ind < stop: c = min(CHUNKSIZE, stop - ind) pp = np.fromfile(f, dtype = self._pdtypes[ptype], count = c) mis = np.empty(3, dtype="float64") mas = np.empty(3, dtype="float64") for axi, ax in enumerate('xyz'): mi = pp["Coordinates"][ax].min() ma = pp["Coordinates"][ax].max() mylog.debug("Spanning: %0.3e .. %0.3e in %s", mi, ma, ax) mis[axi] = mi mas[axi] = ma pos = np.empty((pp.size, 3), dtype="float64") for i, ax in enumerate("xyz"): eps = np.finfo(pp["Coordinates"][ax].dtype).eps pos[:,i] = pp["Coordinates"][ax] regions.add_data_file(pos, data_file.file_id, data_file.ds.filter_bbox) morton[ind:ind+c] = compute_morton( pos[:,0], pos[:,1], pos[:,2], DLE, DRE, data_file.ds.filter_bbox) ind += c mylog.info("Adding %0.3e particles", morton.size) return morton
def save_data(self, array, node, name, set_attr=None, force=False, passthrough = False): """ Arbitrary numpy data will be saved to the region in the datafile described by *node* and *name*. If data file does not exist, it throws no error and simply does not save. """ if self._data_mode != 'a': return try: node_loc = self._data_file[node] if name in node_loc and force: mylog.info("Overwriting node %s/%s", node, name) del self._data_file[node][name] elif name in node_loc and passthrough: return except: pass myGroup = self._data_file['/'] for q in node.split('/'): if q: myGroup = myGroup.require_group(q) arr = myGroup.create_dataset(name,data=array) if set_attr is not None: for i, j in set_attr.items(): arr.attrs[i] = j self._data_file.flush()
def __init__(self, ds, wg, pool): mylog.info("Initializing IOCommunicator") self.ds = ds self.wg = wg # We don't need to use this! self.pool = pool self.comm = pool.comm # We read our grids here self.grids = [] storage = {} grids = ds.index.grids.tolist() grids.sort(key=lambda a:a.filename) for sto, g in parallel_objects(grids, storage = storage): sto.result = self.comm.rank sto.result_id = g.id self.grids.append(g) self._id_offset = ds.index.grids[0]._id_offset mylog.info("Reading from disk ...") self.initialize_data() mylog.info("Broadcasting ...") self.comm.comm.bcast(storage, root = wg.ranks[0]) mylog.info("Done.") self.hooks = []
def _rebuild_top_grids(self, level = 0): mylog.info("Rebuilding grids on level %s", level) cmask = (self.grid_levels.flat == (level + 1)) cmsum = cmask.sum() mask = np.zeros(self.num_grids, dtype='bool') for grid in self.select_grids(level): mask[:] = 0 LE = self.grid_left_edge[grid.id - grid._id_offset] RE = self.grid_right_edge[grid.id - grid._id_offset] grids, grid_i = self.get_box_grids(LE, RE) mask[grid_i] = 1 grid._children_ids = [] cgrids = self.grids[ ( mask * cmask).astype('bool') ] mylog.info("%s: %s / %s", grid, len(cgrids), cmsum) for cgrid in cgrids: grid._children_ids.append(cgrid.id) cgrid._parent_id = grid.id mylog.info("Finished rebuilding")
def add_sph_fields(self, n_neighbors=32, kernel="cubic", sph_ptype="io"): """Add SPH fields for the specified particle type. For a particle type with "particle_position" and "particle_mass" already defined, this method adds the "smoothing_length" and "density" fields. "smoothing_length" is computed as the distance to the nth nearest neighbor. "density" is computed as the SPH (gather) smoothed mass. The SPH fields are added only if they don't already exist. Parameters ---------- n_neighbors : int The number of neighbors to use in smoothing length computation. kernel : str The kernel function to use in density estimation. sph_ptype : str The SPH particle type. Each dataset has one sph_ptype only. This method will overwrite existing sph_ptype of the dataset. """ mylog.info("Generating SPH fields") # Unify units l_unit = "code_length" m_unit = "code_mass" d_unit = "code_mass / code_length**3" # Read basic fields ad = self.all_data() pos = ad[sph_ptype, "particle_position"].to(l_unit).d mass = ad[sph_ptype, "particle_mass"].to(m_unit).d # Construct k-d tree kdtree = PyKDTree( pos.astype("float64"), left_edge=self.domain_left_edge.to_value(l_unit), right_edge=self.domain_right_edge.to_value(l_unit), periodic=self.periodicity, leafsize=2 * int(n_neighbors), ) order = np.argsort(kdtree.idx) def exists(fname): if (sph_ptype, fname) in self.derived_field_list: mylog.info("Field ('%s','%s') already exists. Skipping", sph_ptype, fname) return True else: mylog.info("Generating field ('%s','%s')", sph_ptype, fname) return False data = {} # Add smoothing length field fname = "smoothing_length" if not exists(fname): hsml = generate_smoothing_length(pos[kdtree.idx], kdtree, n_neighbors) hsml = hsml[order] data[(sph_ptype, "smoothing_length")] = (hsml, l_unit) else: hsml = ad[sph_ptype, fname].to(l_unit).d # Add density field fname = "density" if not exists(fname): dens = estimate_density( pos[kdtree.idx], mass[kdtree.idx], hsml[kdtree.idx], kdtree, kernel_name=kernel, ) dens = dens[order] data[(sph_ptype, "density")] = (dens, d_unit) # Add fields self._sph_ptypes = (sph_ptype, ) self.index.update_data(data) self.num_neighbors = n_neighbors
def finish(self): mylog.info("Finishing '%s'", self.title)
def parallel_objects(objects, njobs = 0, storage = None, barrier = True, dynamic = False): r"""This function dispatches components of an iterable to different processors. The parallel_objects function accepts an iterable, *objects*, and based on the number of jobs requested and number of available processors, decides how to dispatch individual objects to processors or sets of processors. This can implicitly include multi-level parallelism, such that the processor groups assigned each object can be composed of several or even hundreds of processors. *storage* is also available, for collation of results at the end of the iteration loop. Calls to this function can be nested. This should not be used to iterate over datasets -- :class:`~yt.data_objects.time_series.DatasetSeries` provides a much nicer interface for that. Parameters ---------- objects : iterable The list of objects to dispatch to different processors. njobs : int How many jobs to spawn. By default, one job will be dispatched for each available processor. storage : dict This is a dictionary, which will be filled with results during the course of the iteration. The keys will be the dataset indices and the values will be whatever is assigned to the *result* attribute on the storage during iteration. barrier : bool Should a barier be placed at the end of iteration? dynamic : bool This governs whether or not dynamic load balancing will be enabled. This requires one dedicated processor; if this is enabled with a set of 128 processors available, only 127 will be available to iterate over objects as one will be load balancing the rest. Examples -------- Here is a simple example of iterating over a set of centers and making slice plots centered at each. >>> for c in parallel_objects(centers): ... SlicePlot(ds, "x", "Density", center = c).save() ... Here's an example of calculating the angular momentum vector of a set of spheres, but with a set of four jobs of multiple processors each. Note that we also store the results. >>> storage = {} >>> for sto, c in parallel_objects(centers, njobs=4, storage=storage): ... sp = ds.sphere(c, (100, "kpc")) ... sto.result = sp.quantities["AngularMomentumVector"]() ... >>> for sphere_id, L in sorted(storage.items()): ... print centers[sphere_id], L ... """ if dynamic: from .task_queue import dynamic_parallel_objects for my_obj in dynamic_parallel_objects(objects, njobs=njobs, storage=storage): yield my_obj return if not parallel_capable: njobs = 1 my_communicator = communication_system.communicators[-1] my_size = my_communicator.size mylog.info("you have %s processors",my_size) if njobs <= 0: njobs = my_size if njobs > my_size: mylog.error("You have asked for %s jobs, but you only have %s processors.", njobs, my_size) raise RuntimeError my_rank = my_communicator.rank mylog.info("I am %s processor",my_rank) all_new_comms = np.array_split(np.arange(my_size), njobs) for i,comm_set in enumerate(all_new_comms): if my_rank in comm_set: my_new_id = i break if parallel_capable: communication_system.push_with_ids(all_new_comms[my_new_id].tolist()) to_share = {} # If our objects object is slice-aware, like time series data objects are, # this will prevent intermediate objects from being created. oiter = itertools.islice(enumerate(objects), my_new_id, None, njobs) for result_id, obj in oiter: if storage is not None: rstore = ResultsStorage() rstore.result_id = result_id yield rstore, obj to_share[rstore.result_id] = rstore.result else: yield obj if parallel_capable: communication_system.pop() if storage is not None: # Now we have to broadcast it new_storage = my_communicator.par_combine_object( to_share, datatype = 'dict', op = 'join') mylog.info("my storage: %s",type(new_storage)) storage.update(new_storage) if barrier: my_communicator.barrier()
def enable_parallelism(suppress_logging=False, communicator=None): """ This method is used inside a script to turn on MPI parallelism, via mpi4py. More information about running yt in parallel can be found here: http://yt-project.org/docs/3.0/analyzing/parallel_computation.html Parameters ---------- suppress_logging : bool If set to True, only rank 0 will log information after the initial setup of MPI. communicator : mpi4py.MPI.Comm The MPI communicator to use. This controls which processes yt can see. If not specified, will be set to COMM_WORLD. """ global parallel_capable, MPI try: from mpi4py import MPI as _MPI except ImportError: mylog.info("mpi4py was not found. Disabling parallel computation") parallel_capable = False return MPI = _MPI exe_name = os.path.basename(sys.executable) # if no communicator specified, set to COMM_WORLD if communicator is None: communicator = MPI.COMM_WORLD parallel_capable = (communicator.size > 1) if not parallel_capable: return False mylog.info("Global parallel computation enabled: %s / %s", communicator.rank, communicator.size) communication_system.push(communicator) ytcfg["yt","__global_parallel_rank"] = str(communicator.rank) ytcfg["yt","__global_parallel_size"] = str(communicator.size) ytcfg["yt","__parallel"] = "True" if exe_name == "embed_enzo" or \ ("_parallel" in dir(sys) and sys._parallel is True): ytcfg["yt","inline"] = "True" if communicator.rank > 0: if ytcfg.getboolean("yt","LogFile"): ytcfg["yt","LogFile"] = "False" yt.utilities.logger.disable_file_logging() yt.utilities.logger.uncolorize_logging() # Even though the uncolorize function already resets the format string, # we reset it again so that it includes the processor. f = logging.Formatter("P%03i %s" % (communicator.rank, yt.utilities.logger.ufstring)) if len(yt.utilities.logger.ytLogger.handlers) > 0: yt.utilities.logger.ytLogger.handlers[0].setFormatter(f) if ytcfg.getboolean("yt", "parallel_traceback"): sys.excepthook = traceback_writer_hook("_%03i" % communicator.rank) else: sys.excepthook = default_mpi_excepthook if ytcfg.getint("yt","LogLevel") < 20: yt.utilities.logger.ytLogger.warning( "Log Level is set low -- this could affect parallel performance!") dtype_names.update(dict( float32 = MPI.FLOAT, float64 = MPI.DOUBLE, int32 = MPI.INT, int64 = MPI.LONG, c = MPI.CHAR, )) op_names.update(dict( sum = MPI.SUM, min = MPI.MIN, max = MPI.MAX )) # Turn off logging on all but the root rank, if specified. if suppress_logging: if communicator.rank > 0: mylog.addFilter(FilterAllMessages()) return True
def virial_quantities(halo, fields, overdensity_field=("gas", "overdensity"), critical_overdensity=200, profile_storage="profiles"): r""" Calculate the value of the given fields at the virial radius defined at the given critical density by interpolating from radial profiles. Parameters ---------- halo : Halo object The Halo object to be provided by the HaloCatalog. fields : string or list of strings The fields whose virial values are to be calculated. overdensity_field : string or tuple of strings The field used as the overdensity from which interpolation is done to calculate virial quantities. Default: ("gas", "overdensity") critical_overdensity : float The value of the overdensity at which to evaulate the virial quantities. Overdensity is with respect to the critical density. Default: 200 profile_storage : string Name of the halo attribute that holds the profiles to be used. Default: "profiles" """ mylog.info("Calculating virial quantities for halo %d." % halo.quantities["particle_identifier"]) fields = ensure_list(fields) fields = [halo.data_object._determine_fields(field)[0] for field in fields] dds = halo.halo_catalog.data_ds profile_data = getattr(halo, profile_storage) if overdensity_field not in profile_data: raise RuntimeError( "virial_quantities callback requires profile of %s." % str(overdensity_field)) overdensity = profile_data[overdensity_field] dfilter = np.isfinite(overdensity) & profile_data["used"] & (overdensity > 0) v_fields = {} for field in fields: if isinstance(field, tuple): my_field = field[-1] else: my_field = field v_fields[field] = my_field v_field = "%s_%d" % (my_field, critical_overdensity) if v_field not in halo.halo_catalog.quantities: halo.halo_catalog.quantities.append(v_field) vquantities = dict([("%s_%d" % (v_fields[field], critical_overdensity), dds.quan(0, profile_data[field].units)) \ for field in fields]) if dfilter.sum() < 2: halo.quantities.update(vquantities) return # find interpolation index # require a negative slope, but not monotonicity vod = overdensity[dfilter].to_ndarray() if (vod > critical_overdensity).all(): if vod[-1] < vod[-2]: index = -2 else: halo.quantities.update(vquantities) return elif (vod < critical_overdensity).all(): if vod[0] > vod[1]: index = 0 else: halo.quantities.update(vquantities) return else: # take first instance of downward intersection with critical value intersections = (vod[:-1] >= critical_overdensity) & \ (vod[1:] < critical_overdensity) if not intersections.any(): halo.quantities.update(vquantities) return index = np.where(intersections)[0][0] for field in fields: v_prof = profile_data[field][dfilter].to_ndarray() slope = np.log(v_prof[index + 1] / v_prof[index]) / \ np.log(vod[index + 1] / vod[index]) value = dds.quan( np.exp(slope * np.log(critical_overdensity / vod[index])) * v_prof[index], profile_data[field].units).in_cgs() vquantities["%s_%d" % (v_fields[field], critical_overdensity)] = value halo.quantities.update(vquantities)
def load_profiles(halo, storage="profiles", fields=None, filename=None, output_dir="."): r""" Load profile data from disk. Parameters ---------- halo : Halo object The Halo object to be provided by the HaloCatalog. storage : string Name of the dictionary attribute to store profile data. Default: "profiles" fields : string or list of strings The fields to be loaded. If None, all fields present will be loaded. Default : None filename : string The name of the file to be loaded. The final filename will be "<filename>_<id>.h5". If None, filename is set to the value given by the storage keyword. Default: None output_dir : string Name of directory where profile data will be read. The full path will be the output_dir of the halo catalog concatenated with this directory. Default : "." """ if filename is None: filename = storage output_file = os.path.join( halo.halo_catalog.output_dir, output_dir, "%s_%06d.h5" % (filename, halo.quantities["particle_identifier"])) if not os.path.exists(output_file): raise RuntimeError("Profile file not found: %s." % output_file) mylog.info("Loading halo %d profile data from %s." % (halo.quantities["particle_identifier"], output_file)) fh = h5py.File(output_file, "r") if fields is None: profile_fields = fh["profiles"].keys() else: profile_fields = fields my_profile = {} my_group = fh["profiles"] for field in profile_fields: if field not in my_group: raise RuntimeError("%s field not present in %s." % (field, output_file)) my_profile[field] = _hdf5_yt_array(my_group, field, ds=halo.halo_catalog.halos_ds) setattr(halo, storage, my_profile) if "variance" in fh: my_variance = {} my_group = fh["variance"] if fields is None: profile_fields = my_group.keys() for field in profile_fields: if field not in my_group: raise RuntimeError("%s field not present in %s." % (field, output_file)) my_variance[field] = _hdf5_yt_array(my_group, field, ds=halo.halo_catalog.halos_ds) setattr(halo, "%s_variance" % storage, my_variance) fh.close()
def enable_parallelism(suppress_logging=False, communicator=None): """ This method is used inside a script to turn on MPI parallelism, via mpi4py. More information about running yt in parallel can be found here: https://yt-project.org/docs/3.0/analyzing/parallel_computation.html Parameters ---------- suppress_logging : bool If set to True, only rank 0 will log information after the initial setup of MPI. communicator : mpi4py.MPI.Comm The MPI communicator to use. This controls which processes yt can see. If not specified, will be set to COMM_WORLD. """ global parallel_capable, MPI try: from mpi4py import MPI as _MPI except ImportError: mylog.info("mpi4py was not found. Disabling parallel computation") parallel_capable = False return MPI = _MPI exe_name = os.path.basename(sys.executable) # if no communicator specified, set to COMM_WORLD if communicator is None: communicator = MPI.COMM_WORLD parallel_capable = (communicator.size > 1) if not parallel_capable: return False mylog.info("Global parallel computation enabled: %s / %s", communicator.rank, communicator.size) communication_system.push(communicator) ytcfg["yt", "__global_parallel_rank"] = str(communicator.rank) ytcfg["yt", "__global_parallel_size"] = str(communicator.size) ytcfg["yt", "__parallel"] = "True" if exe_name == "embed_enzo" or \ ("_parallel" in dir(sys) and sys._parallel): ytcfg["yt", "inline"] = "True" if communicator.rank > 0: if ytcfg.getboolean("yt", "LogFile"): ytcfg["yt", "LogFile"] = "False" yt.utilities.logger.disable_file_logging() yt.utilities.logger.uncolorize_logging() # Even though the uncolorize function already resets the format string, # we reset it again so that it includes the processor. f = logging.Formatter("P%03i %s" % (communicator.rank, yt.utilities.logger.ufstring)) if len(yt.utilities.logger.ytLogger.handlers) > 0: yt.utilities.logger.ytLogger.handlers[0].setFormatter(f) if ytcfg.getboolean("yt", "parallel_traceback"): sys.excepthook = traceback_writer_hook("_%03i" % communicator.rank) else: sys.excepthook = default_mpi_excepthook if ytcfg.getint("yt", "LogLevel") < 20: yt.utilities.logger.ytLogger.warning( "Log Level is set low -- this could affect parallel performance!") dtype_names.update( dict( float32=MPI.FLOAT, float64=MPI.DOUBLE, int32=MPI.INT, int64=MPI.LONG, c=MPI.CHAR, )) op_names.update(dict(sum=MPI.SUM, min=MPI.MIN, max=MPI.MAX)) # Turn off logging on all but the root rank, if specified. if suppress_logging: if communicator.rank > 0: mylog.addFilter(FilterAllMessages()) return True
def io_nodes(fn, n_io, n_work, func, *args, **kwargs): from yt.mods import load pool, wg = ProcessorPool.from_sizes([(n_io, "io"), (n_work, "work")]) rv = None if wg.name == "work": ds = load(fn) with remote_io(ds, wg, pool): rv = func(ds, *args, **kwargs) elif wg.name == "io": ds = load(fn) io = IOCommunicator(ds, wg, pool) io.wait() # We should broadcast the result rv = pool.comm.mpi_bcast(rv, root=pool['work'].ranks[0]) pool.free_all() mylog.debug("Return value: %s", rv) return rv # Here is an example of how to use this functionality. if __name__ == "__main__": def gq(ds): dd = ds.all_data() return dd.quantities["TotalQuantity"]("CellMassMsun") q = io_nodes("DD0087/DD0087", 8, 24, gq) mylog.info(q)
def par_combine_object(self, data, op, datatype=None): # op can be chosen from: # cat # join # data is selected to be of types: # np.ndarray # dict # data field dict if datatype is not None: pass elif isinstance(data, dict): datatype == "dict" elif isinstance(data, np.ndarray): datatype == "array" elif isinstance(data, list): datatype == "list" # Now we have our datatype, and we conduct our operation if datatype == "dict" and op == "join": if self.comm.rank == 0: for i in range(1, self.comm.size): data.update(self.comm.recv(source=i, tag=0)) else: self.comm.send(data, dest=0, tag=0) data = self.comm.bcast(data, root=0) return data elif datatype == "dict" and op == "cat": field_keys = data.keys() field_keys.sort() size = data[field_keys[0]].shape[-1] sizes = np.zeros(self.comm.size, dtype='int64') outsize = np.array(size, dtype='int64') self.comm.Allgather([outsize, 1, MPI.LONG], [sizes, 1, MPI.LONG]) # This nested concatenate is to get the shapes to work out correctly; # if we just add [0] to sizes, it will broadcast a summation, not a # concatenation. offsets = np.add.accumulate(np.concatenate([[0], sizes]))[:-1] arr_size = self.comm.allreduce(size, op=MPI.SUM) for key in field_keys: dd = data[key] rv = self.alltoallv_array(dd, arr_size, offsets, sizes) data[key] = rv return data elif datatype == "array" and op == "cat": if data is None: ncols = -1 size = 0 dtype = 'float64' mylog.info( 'Warning: Array passed to par_combine_object was None. Setting dtype to float64. This may break things!' ) else: dtype = data.dtype if len(data) == 0: ncols = -1 size = 0 elif len(data.shape) == 1: ncols = 1 size = data.shape[0] else: ncols, size = data.shape ncols = self.comm.allreduce(ncols, op=MPI.MAX) if ncols == 0: data = np.zeros(0, dtype=dtype) # This only works for elif data is None: data = np.zeros((ncols, 0), dtype=dtype) size = data.shape[-1] sizes = np.zeros(self.comm.size, dtype='int64') outsize = np.array(size, dtype='int64') self.comm.Allgather([outsize, 1, MPI.LONG], [sizes, 1, MPI.LONG]) # This nested concatenate is to get the shapes to work out correctly; # if we just add [0] to sizes, it will broadcast a summation, not a # concatenation. offsets = np.add.accumulate(np.concatenate([[0], sizes]))[:-1] arr_size = self.comm.allreduce(size, op=MPI.SUM) data = self.alltoallv_array(data, arr_size, offsets, sizes) return data elif datatype == "list" and op == "cat": recv_data = self.comm.allgather(data) # Now flatten into a single list, since this # returns us a list of lists. data = [] while recv_data: data.extend(recv_data.pop(0)) return data raise NotImplementedError
ds.index.io.terminate() ds.index.io = original_io def io_nodes(fn, n_io, n_work, func, *args, **kwargs): from yt.mods import load pool, wg = ProcessorPool.from_sizes([(n_io, "io"), (n_work, "work")]) rv = None if wg.name == "work": ds = load(fn) with remote_io(ds, wg, pool): rv = func(ds, *args, **kwargs) elif wg.name == "io": ds = load(fn) io = IOCommunicator(ds, wg, pool) io.wait() # We should broadcast the result rv = pool.comm.mpi_bcast(rv, root=pool['work'].ranks[0]) pool.free_all() mylog.debug("Return value: %s", rv) return rv # Here is an example of how to use this functionality. if __name__ == "__main__": def gq(ds): dd = ds.all_data() return dd.quantities["TotalQuantity"]("CellMassMsun") q = io_nodes("DD0087/DD0087", 8, 24, gq) mylog.info(q)
def rootloginfo(*args): from yt.config import ytcfg if ytcfg.getint("yt", "__topcomm_parallel_rank") > 0: return mylog.info(*args)
def __init__(self, title, maxval): self.title = title mylog.info("Starting '%s'", title)
def create_spectral_slabs(filename, slab_centers, slab_width, **kwargs): r""" Given a dictionary of spectral slab centers and a width in spectral units, extract data from a spectral cube at these slab centers and return a `FITSDataset` instance containing the different slabs as separate yt fields. Useful for extracting individual lines from a spectral cube and separating them out as different fields. Requires the SpectralCube (https://spectral-cube.readthedocs.io/en/latest/) library. All keyword arguments will be passed on to the `FITSDataset` constructor. Parameters ---------- filename : string The spectral cube FITS file to extract the data from. slab_centers : dict of (float, string) tuples or YTQuantities The centers of the slabs, where the keys are the names of the new fields and the values are (float, string) tuples or YTQuantities, specifying a value for each center and its unit. slab_width : YTQuantity or (float, string) tuple The width of the slab along the spectral axis. Examples -------- >>> slab_centers = {'13CN': (218.03117, 'GHz'), ... 'CH3CH2CHO': (218.284256, 'GHz'), ... 'CH3NH2': (218.40956, 'GHz')} >>> slab_width = (0.05, "GHz") >>> ds = create_spectral_slabs("intensity_cube.fits", ... slab_centers, slab_width, ... nan_mask=0.0) """ from spectral_cube import SpectralCube from yt.frontends.fits.api import FITSDataset from yt.visualization.fits_image import FITSImageData cube = SpectralCube.read(filename) if not isinstance(slab_width, YTQuantity): slab_width = YTQuantity(slab_width[0], slab_width[1]) slab_data = {} field_units = cube.header.get("bunit", "dimensionless") for k, v in slab_centers.items(): if not isinstance(v, YTQuantity): slab_center = YTQuantity(v[0], v[1]) else: slab_center = v mylog.info("Adding slab field %s at %g %s", k, slab_center.v, slab_center.units) slab_lo = (slab_center - 0.5 * slab_width).to_astropy() slab_hi = (slab_center + 0.5 * slab_width).to_astropy() subcube = cube.spectral_slab(slab_lo, slab_hi) slab_data[k] = YTArray(subcube.filled_data[:, :, :], field_units) width = subcube.header["naxis3"] * cube.header["cdelt3"] w = subcube.wcs.copy() w.wcs.crpix[-1] = 0.5 w.wcs.crval[-1] = -0.5 * width fid = FITSImageData(slab_data, wcs=w) for hdu in fid: hdu.header.pop("RESTFREQ", None) hdu.header.pop("RESTFRQ", None) ds = FITSDataset(fid, **kwargs) return ds
def load_sample(fn=None, specific_file=None, pbar=True): """ Load sample data with yt. Simple wrapper around yt.load to include fetching data with pooch. Parameters ---------- fn : str or None The name of the sample data to load. This is generally the name of the folder of the dataset. For IsolatedGalaxy, the name would be `IsolatedGalaxy`. If `None` is supplied, the return value will be a list of all known datasets (by name). specific_file : str, optional optional argument -- the name of the file to load that is located within sample dataset of `name`. For the dataset `enzo_cosmology_plus`, which has a number of timesteps available, one may wish to choose DD0003. The file specifically would be `enzo_cosmology_plus/DD0003/DD0003`, and the argument passed to this variable would be `DD0003/DD0003` pbar: bool display a progress bar """ fido = PoochHandle() if fn is None: keys = [] for key in fido._registry: for ext in _extensions_to_strip: if key.endswith(ext): key = key[: -len(ext)] keys.append(key) return keys base_path = fido.pooch_obj.path registered_fname, name, extension = fido._validate_sample_fname( fn ) # todo: make this part of the class downloader = None if pbar: downloader = pooch.pooch.HTTPDownloader(progressbar=True) if extension != "h5": # we are going to assume most files that exist on the hub are # compressed in .tar folders. Some may not. processor = pooch.pooch.Untar() else: processor = None storage_fname = fido.pooch_obj.fetch( registered_fname, processor=processor, downloader=downloader ) # The `folder_path` variable is used here to notify the user where the # files have been unpacked to. However, we can't assume this is reliable # because in some cases the common path will overlap with the `load_name` # variable of the file. folder_path = os.path.commonprefix(storage_fname) mylog.info("Files located at %s", folder_path) # Location of the file to load automatically, registered in the Fido class info = fido[registered_fname] file_lookup = info["load_name"] optional_args = info["load_kwargs"] if specific_file is None: # right now work on loading only untarred files. build out h5 later mylog.info("Default to loading %s for %s dataset", file_lookup, name) loaded_file = os.path.join( base_path, f"{registered_fname}.untar", name, file_lookup ) else: mylog.info("Loading %s for %s dataset", specific_file, name) loaded_file = os.path.join( base_path, f"{registered_fname}.untar", name, specific_file ) return load(loaded_file, **optional_args)
def find_particles_by_type(self, ptype, max_num=None, additional_fields=None): """ Returns a structure of arrays with all of the particles' positions, velocities, masses, types, IDs, and attributes for a particle type **ptype** for a maximum of **max_num** particles. If non-default particle fields are used, provide them in **additional_fields**. """ # Not sure whether this routine should be in the general HierarchyType. if self.grid_particle_count.sum() == 0: mylog.info("Data contains no particles.") return None if additional_fields is None: additional_fields = [ "metallicity_fraction", "creation_time", "dynamical_time", ] pfields = [f for f in self.field_list if f.startswith("particle_")] nattr = self.dataset["NumberOfParticleAttributes"] if nattr > 0: pfields += additional_fields[:nattr] # Find where the particles reside and count them if max_num is None: max_num = 1e100 total = 0 pstore = [] for level in range(self.max_level, -1, -1): for grid in self.select_grids(level): index = np.where(grid["particle_type"] == ptype)[0] total += len(index) pstore.append(index) if total >= max_num: break if total >= max_num: break result = None if total > 0: result = {} for p in pfields: result[p] = np.zeros(total, "float64") # Now we retrieve data for each field ig = count = 0 for level in range(self.max_level, -1, -1): for grid in self.select_grids(level): nidx = len(pstore[ig]) if nidx > 0: for p in pfields: result[p][count:count + nidx] = grid[p][pstore[ig]] count += nidx ig += 1 if count >= total: break if count >= total: break # Crop data if retrieved more than max_num if count > max_num: for p in pfields: result[p] = result[p][0:max_num] return result
def get_time_series(self, initial_time=None, final_time=None, initial_redshift=None, final_redshift=None, times=None, redshifts=None, tolerance=None, parallel=True, setup_function=None): """ Instantiate a DatasetSeries object for a set of outputs. If no additional keywords given, a DatasetSeries object will be created with all potential datasets created by the simulation. Outputs can be gather by specifying a time or redshift range (or combination of time and redshift), with a specific list of times or redshifts), or by simply searching all subdirectories within the simulation directory. initial_time : tuple of type (float, str) The earliest time for outputs to be included. This should be given as the value and the string representation of the units. For example, (5.0, "Gyr"). If None, the initial time of the simulation is used. This can be used in combination with either final_time or final_redshift. Default: None. final_time : tuple of type (float, str) The latest time for outputs to be included. This should be given as the value and the string representation of the units. For example, (13.7, "Gyr"). If None, the final time of the simulation is used. This can be used in combination with either initial_time or initial_redshift. Default: None. times : tuple of type (float array, str) A list of times for which outputs will be found and the units of those values. For example, ([0, 1, 2, 3], "s"). Default: None. initial_redshift : float The earliest redshift for outputs to be included. If None, the initial redshift of the simulation is used. This can be used in combination with either final_time or final_redshift. Default: None. final_redshift : float The latest redshift for outputs to be included. If None, the final redshift of the simulation is used. This can be used in combination with either initial_time or initial_redshift. Default: None. redshifts : array_like A list of redshifts for which outputs will be found. Default: None. tolerance : float Used in combination with "times" or "redshifts" keywords, this is the tolerance within which outputs are accepted given the requested times or redshifts. If None, the nearest output is always taken. Default: None. parallel : bool/int If True, the generated DatasetSeries will divide the work such that a single processor works on each dataset. If an integer is supplied, the work will be divided into that number of jobs. Default: True. setup_function : callable, accepts a ds This function will be called whenever a dataset is loaded. Examples -------- >>> import yt >>> gs = yt.simulation("my_simulation.par", "Gadget") >>> gs.get_time_series(initial_redshift=10, final_time=(13.7, "Gyr")) >>> gs.get_time_series(redshifts=[3, 2, 1, 0]) >>> # after calling get_time_series >>> for ds in gs.piter(): ... p = ProjectionPlot(ds, "x", "density") ... p.save() >>> # An example using the setup_function keyword >>> def print_time(ds): ... print ds.current_time >>> gs.get_time_series(setup_function=print_time) >>> for ds in gs: ... SlicePlot(ds, "x", "Density").save() """ if (initial_redshift is not None or \ final_redshift is not None) and \ not self.cosmological_simulation: raise InvalidSimulationTimeSeries( "An initial or final redshift has been given for a " + "noncosmological simulation.") my_all_outputs = self.all_outputs if not my_all_outputs: DatasetSeries.__init__(self, outputs=[], parallel=parallel, unit_base=self.unit_base) mylog.info("0 outputs loaded into time series.") return # Apply selection criteria to the set. if times is not None: my_outputs = self._get_outputs_by_key("time", times, tolerance=tolerance, outputs=my_all_outputs) elif redshifts is not None: my_outputs = self._get_outputs_by_key("redshift", redshifts, tolerance=tolerance, outputs=my_all_outputs) else: if initial_time is not None: if isinstance(initial_time, float): initial_time = self.quan(initial_time, "code_time") elif isinstance(initial_time, tuple) and len(initial_time) == 2: initial_time = self.quan(*initial_time) elif not isinstance(initial_time, YTArray): raise RuntimeError( "Error: initial_time must be given as a float or " + "tuple of (value, units).") elif initial_redshift is not None: my_initial_time = self.cosmology.t_from_z(initial_redshift) else: my_initial_time = self.initial_time if final_time is not None: if isinstance(final_time, float): final_time = self.quan(final_time, "code_time") elif isinstance(final_time, tuple) and len(final_time) == 2: final_time = self.quan(*final_time) elif not isinstance(final_time, YTArray): raise RuntimeError( "Error: final_time must be given as a float or " + "tuple of (value, units).") my_final_time = final_time.in_units("s") elif final_redshift is not None: my_final_time = self.cosmology.t_from_z(final_redshift) else: my_final_time = self.final_time my_initial_time.convert_to_units("s") my_final_time.convert_to_units("s") my_times = np.array([a["time"] for a in my_all_outputs]) my_indices = np.digitize([my_initial_time, my_final_time], my_times) if my_initial_time == my_times[my_indices[0] - 1]: my_indices[0] -= 1 my_outputs = my_all_outputs[my_indices[0]:my_indices[1]] init_outputs = [] for output in my_outputs: if os.path.exists(output["filename"]): init_outputs.append(output["filename"]) if len(init_outputs) == 0 and len(my_outputs) > 0: mylog.warn("Could not find any datasets. " + "Check the value of OutputDir in your parameter file.") DatasetSeries.__init__(self, outputs=init_outputs, parallel=parallel, setup_function=setup_function, unit_base=self.unit_base) mylog.info("%d outputs loaded into time series.", len(init_outputs))
def _parse_parameter_file(self): """ Parse the SWIFT "parameter file" -- really this actually reads info from the main HDF5 file as everything is replicated there and usually parameterfiles are not transported. The header information from the HDF5 file is stored in an un-parsed format in self.parameters should users wish to use it. """ self.unique_identifier = uuid4() # Read from the HDF5 file, this gives us all the info we need. The rest # of this function is just parsing. header = self._get_info_attributes("Header") runtime_parameters = self._get_info_attributes("RuntimePars") policy = self._get_info_attributes("Policy") # These are the parameterfile parameters from *.yml at runtime parameters = self._get_info_attributes("Parameters") # Not used in this function, but passed to parameters hydro = self._get_info_attributes("HydroScheme") subgrid = self._get_info_attributes("SubgridScheme") self.domain_right_edge = header["BoxSize"] self.domain_left_edge = np.zeros_like(self.domain_right_edge) self.dimensionality = int(header["Dimension"]) # SWIFT is either all periodic, or not periodic at all periodic = int(runtime_parameters["PeriodicBoundariesOn"]) if periodic: self._periodicity = [True] * self.dimensionality else: self._periodicity = [False] * self.dimensionality # Units get attached to this self.current_time = float(header["Time"]) # Now cosmology enters the fray, as a runtime parameter. self.cosmological_simulation = int(policy["cosmological integration"]) if self.cosmological_simulation: try: self.current_redshift = float(header["Redshift"]) # These won't be present if self.cosmological_simulation is false self.omega_lambda = float(parameters["Cosmology:Omega_lambda"]) self.omega_matter = float(parameters["Cosmology:Omega_m"]) # This is "little h" self.hubble_constant = float(parameters["Cosmology:h"]) except KeyError: mylog.warning( "Could not find cosmology information in Parameters, " "despite having ran with -c signifying a cosmological " "run.") mylog.info("Setting up as a non-cosmological run. Check this!") self.cosmological_simulation = 0 self.current_redshift = 0.0 self.omega_lambda = 0.0 self.omega_matter = 0.0 self.hubble_constant = 0.0 else: self.current_redshift = 0.0 self.omega_lambda = 0.0 self.omega_matter = 0.0 self.hubble_constant = 0.0 # Store the un-parsed information should people want it. self.parameters = dict( header=header, runtime_parameters=runtime_parameters, policy=policy, parameters=parameters, hydro=hydro, subgrid=subgrid, ) # SWIFT never has multi file snapshots self.file_count = 1 self.filename_template = self.parameter_filename return
def load_sample(fn=None, progressbar: bool = True, timeout=None, **kwargs): """ Load sample data with yt. This is a simple wrapper around `yt.load` to include fetching data with pooch from remote source. yt sample data can be found at: https://yt-project.org/data. The data registry table can be retrieved and visualized using `yt.sample_data.api.get_data_registry_table()`. This function requires pandas and pooch to be installed. Parameters ---------- fn : str The `filename` of the dataset to load, as defined in the data registry table. progressbar: bool display a progress bar (tqdm). timeout: float or int (optional) Maximal waiting time, in seconds, after which download is aborted. `None` means "no limit". This parameter is directly passed to down to requests.get via pooch.HTTPDownloader Any additional keyword argument is passed down to `yt.load`. Note that in case of collision with predefined keyword arguments as set in the data registry, the ones passed to this function take priority. """ if fn is None: print( "One can see which sample datasets are available at: https://yt-project.org/data\n" "or alternatively by running: yt.sample_data.api.get_data_registry_table()", file=sys.stderr, ) return None from yt.sample_data.api import ( _download_sample_data_file, _get_test_data_dir_path, get_data_registry_table, ) pooch_logger = pooch.utils.get_logger() topdir, _, specific_file = str(fn).partition(os.path.sep) registry_table = get_data_registry_table() # PR 3089 # note: in the future the registry table should be reindexed # so that the following line can be replaced with # # specs = registry_table.loc[fn] # # however we don't want to do it right now because the "filename" column is # currently incomplete try: specs = registry_table.query(f"`filename` == '{topdir}'").iloc[0] except IndexError as err: raise KeyError(f"Could not find '{fn}' in the registry.") from err if not specs["load_name"]: raise ValueError( "Registry appears to be corrupted: could not find a 'load_name' entry for this dataset." ) kwargs = {**specs["load_kwargs"], **kwargs} try: data_dir = lookup_on_disk_data(fn) except FileNotFoundError: mylog.info("'%s' is not available locally. Looking up online.", fn) else: # if the data is already available locally, `load_sample` # only acts as a thin wrapper around `load` loadable_path = data_dir.joinpath(specs["load_name"], specific_file) mylog.info("Sample dataset found in '%s'", data_dir) if timeout is not None: mylog.info("Ignoring the `timeout` keyword argument received.") return load(loadable_path, **kwargs) try: save_dir = _get_test_data_dir_path() assert save_dir.is_dir() except (OSError, AssertionError): mylog.warning( "yt test data directory is not properly set up. " "Data will be saved to the current work directory instead.") save_dir = Path.cwd() # effectively silence the pooch's logger and create our own log instead pooch_logger.setLevel(100) mylog.info("Downloading from %s", specs["url"]) # downloading via a pooch.Pooch instance behind the scenes filename = urlsplit(specs["url"]).path.split("/")[-1] tmp_file = _download_sample_data_file(filename, progressbar=progressbar, timeout=timeout) # pooch has functionalities to unpack downloaded archive files, # but it needs to be told in advance that we are downloading a tarball. # Since that information is not necessarily trival to guess from the filename, # we rely on the standard library to perform a conditional unpacking instead. if tarfile.is_tarfile(tmp_file): mylog.info("Untaring downloaded file to '%s'", save_dir) with tarfile.open(tmp_file) as fh: fh.extractall(save_dir) os.remove(tmp_file) else: os.replace(tmp_file, save_dir) loadable_path = Path.joinpath(save_dir, fn, specs["load_name"], specific_file) if specific_file and not loadable_path.exists(): raise ValueError(f"Could not find file '{specific_file}'.") return load(loadable_path, **kwargs)
def get_time_series(self, time_data=True, redshift_data=True, initial_time=None, final_time=None, initial_redshift=None, final_redshift=None, initial_cycle=None, final_cycle=None, times=None, redshifts=None, tolerance=None, parallel=True, setup_function=None): """ Instantiate a DatasetSeries object for a set of outputs. If no additional keywords given, a DatasetSeries object will be created with all potential datasets created by the simulation. Outputs can be gather by specifying a time or redshift range (or combination of time and redshift), with a specific list of times or redshifts, a range of cycle numbers (for cycle based output), or by simply searching all subdirectories within the simulation directory. time_data : bool Whether or not to include time outputs when gathering datasets for time series. Default: True. redshift_data : bool Whether or not to include redshift outputs when gathering datasets for time series. Default: True. initial_time : tuple of type (float, str) The earliest time for outputs to be included. This should be given as the value and the string representation of the units. For example, (5.0, "Gyr"). If None, the initial time of the simulation is used. This can be used in combination with either final_time or final_redshift. Default: None. final_time : tuple of type (float, str) The latest time for outputs to be included. This should be given as the value and the string representation of the units. For example, (13.7, "Gyr"). If None, the final time of the simulation is used. This can be used in combination with either initial_time or initial_redshift. Default: None. times : tuple of type (float array, str) A list of times for which outputs will be found and the units of those values. For example, ([0, 1, 2, 3], "s"). Default: None. initial_redshift : float The earliest redshift for outputs to be included. If None, the initial redshift of the simulation is used. This can be used in combination with either final_time or final_redshift. Default: None. final_redshift : float The latest redshift for outputs to be included. If None, the final redshift of the simulation is used. This can be used in combination with either initial_time or initial_redshift. Default: None. redshifts : array_like A list of redshifts for which outputs will be found. Default: None. initial_cycle : float The earliest cycle for outputs to be included. If None, the initial cycle of the simulation is used. This can only be used with final_cycle. Default: None. final_cycle : float The latest cycle for outputs to be included. If None, the final cycle of the simulation is used. This can only be used in combination with initial_cycle. Default: None. tolerance : float Used in combination with "times" or "redshifts" keywords, this is the tolerance within which outputs are accepted given the requested times or redshifts. If None, the nearest output is always taken. Default: None. parallel : bool/int If True, the generated DatasetSeries will divide the work such that a single processor works on each dataset. If an integer is supplied, the work will be divided into that number of jobs. Default: True. setup_function : callable, accepts a ds This function will be called whenever a dataset is loaded. Examples -------- >>> import yt >>> es = yt.simulation("my_simulation.par", "Enzo") >>> es.get_time_series(initial_redshift=10, final_time=(13.7, "Gyr"), redshift_data=False) >>> es.get_time_series(redshifts=[3, 2, 1, 0]) >>> es.get_time_series(final_cycle=100000) >>> # after calling get_time_series >>> for ds in es.piter(): ... p = ProjectionPlot(ds, 'x', "density") ... p.save() >>> # An example using the setup_function keyword >>> def print_time(ds): ... print ds.current_time >>> es.get_time_series(setup_function=print_time) >>> for ds in es: ... SlicePlot(ds, "x", "Density").save() """ if (initial_redshift is not None or \ final_redshift is not None) and \ not self.cosmological_simulation: raise InvalidSimulationTimeSeries( "An initial or final redshift has been given for a " + "noncosmological simulation.") if time_data and redshift_data: my_all_outputs = self.all_outputs elif time_data: my_all_outputs = self.all_time_outputs elif redshift_data: my_all_outputs = self.all_redshift_outputs else: raise InvalidSimulationTimeSeries('Both time_data and redshift_data are False.') if not my_all_outputs: DatasetSeries.__init__(self, outputs=[], parallel=parallel) mylog.info("0 outputs loaded into time series.") return # Apply selection criteria to the set. if times is not None: my_outputs = self._get_outputs_by_key("time", times, tolerance=tolerance, outputs=my_all_outputs) elif redshifts is not None: my_outputs = self._get_outputs_by_key("redshift", redshifts, tolerance=tolerance, outputs=my_all_outputs) elif initial_cycle is not None or final_cycle is not None: if initial_cycle is None: initial_cycle = 0 else: initial_cycle = max(initial_cycle, 0) if final_cycle is None: final_cycle = self.parameters['StopCycle'] else: final_cycle = min(final_cycle, self.parameters['StopCycle']) my_outputs = my_all_outputs[int(ceil(float(initial_cycle) / self.parameters['CycleSkipDataDump'])): (final_cycle / self.parameters['CycleSkipDataDump'])+1] else: if initial_time is not None: if isinstance(initial_time, float): initial_time = self.quan(initial_time, "code_time") elif isinstance(initial_time, tuple) and len(initial_time) == 2: initial_time = self.quan(*initial_time) elif not isinstance(initial_time, YTArray): raise RuntimeError( "Error: initial_time must be given as a float or " + "tuple of (value, units).") elif initial_redshift is not None: my_initial_time = self.cosmology.t_from_z(initial_redshift) else: my_initial_time = self.initial_time if final_time is not None: if isinstance(final_time, float): final_time = self.quan(final_time, "code_time") elif isinstance(final_time, tuple) and len(final_time) == 2: final_time = self.quan(*final_time) elif not isinstance(final_time, YTArray): raise RuntimeError( "Error: final_time must be given as a float or " + "tuple of (value, units).") my_final_time = final_time.in_units("s") elif final_redshift is not None: my_final_time = self.cosmology.t_from_z(final_redshift) else: my_final_time = self.final_time my_initial_time.convert_to_units("s") my_final_time.convert_to_units("s") my_times = np.array([a['time'] for a in my_all_outputs]) my_indices = np.digitize([my_initial_time, my_final_time], my_times) if my_initial_time == my_times[my_indices[0] - 1]: my_indices[0] -= 1 my_outputs = my_all_outputs[my_indices[0]:my_indices[1]] init_outputs = [] for output in my_outputs: if os.path.exists(output['filename']): init_outputs.append(output['filename']) DatasetSeries.__init__(self, outputs=init_outputs, parallel=parallel, setup_function=setup_function) mylog.info("%d outputs loaded into time series.", len(init_outputs))
def uuid_serve_functions(pre_routed = None, open_browser=False, port=9099, repl = None, token = None): if pre_routed == None: pre_routed = route_functions debug(mode=True) if token is None: token = uuid.uuid1() for r in pre_routed: args, kwargs, f = pre_routed[r] if r[0] == "/": r = r[1:] rp = "/%s/%s" % (token, r) func_name = getattr(f, 'func_name', str(f)) print("Routing from %s => %s" % (rp, func_name)) route(rp, *args, **kwargs)(f) for w in route_watchers: if not hasattr(w, "_route_prefix"): print("WARNING: %s has no _route_prefix attribute. Not notifying.") continue w._route_prefix = token repl._global_token = token repl.activate() repl.execution_thread.wait() print() print() print("=============================================================================") print("=============================================================================") print("Greetings, and welcome to Reason!") print("Your private token is %s ." % token) print("DO NOT SHARE THIS TOKEN.") print() print("Please direct your browser to:") print() print(" http://localhost:%s/%s/" % (port, token)) print() print("=============================================================================") print() print("If you are currently ssh'd into a remote machine, you should be able") print("to create a new SSH tunnel by typing or copy/pasting this text") print("verbatim, while waiting to see the 'ssh>' prompt after the first line.") print() print("~C") print("-L%s:localhost:%s" % (port, port)) print() print("and then pointing a web browser on your local machine to the above URL.") print() print("=============================================================================") print("=============================================================================") print() print() if open_browser: # We do some fancy footwork so that we can open the browser while the # server starts up. I got this from some recipe whose URL escapes me. # Thank you, to whoever wrote it! def local_browse(): """Start a browser after waiting for half a second.""" import webbrowser, threading def _local_browse(): webbrowser.open('http://localhost:%s/%s/' % (port, token)) thread = threading.Timer(0.5, _local_browse) thread.start() local_browse() try: import yt.extern.rocket as rocket server_type = YTRocketServer log = logging.getLogger('Rocket') log.setLevel(logging.WARNING) kwargs = {'timeout': 600, 'max_threads': 2} if repl is not None: repl.server = YTRocketServer.server_info except ImportError: server_type = server_names.get("wsgiref") kwargs = {} server = server_type(host='localhost', port=port, **kwargs) mylog.info("Starting up the server.") run(server=server)
def profile(halo, bin_fields, profile_fields, n_bins=32, extrema=None, logs=None, units=None, weight_field="cell_mass", accumulation=False, fractional=False, storage="profiles", output_dir="."): r""" Create 1, 2, or 3D profiles of a halo. Store profile data in a dictionary associated with the halo object. Parameters ---------- halo : Halo object The Halo object to be provided by the HaloCatalog. bin_fields : list of strings The binning fields for the profile. profile_fields : string or list of strings The fields to be profiled. n_bins : int or list of ints The number of bins in each dimension. If None, 32 bins for each bin are used for each bin field. Default: 32. extrema : dict of min, max tuples Minimum and maximum values of the bin_fields for the profiles. The keys correspond to the field names. Defaults to the extrema of the bin_fields of the dataset. If a units dict is provided, extrema are understood to be in the units specified in the dictionary. logs : dict of boolean values Whether or not to log the bin_fields for the profiles. The keys correspond to the field names. Defaults to the take_log attribute of the field. units : dict of strings The units of the fields in the profiles, including the bin_fields. weight_field : string Weight field for profiling. Default : "cell_mass" accumulation : bool or list of bools If True, the profile values for a bin n are the cumulative sum of all the values from bin 0 to n. If -True, the sum is reversed so that the value for bin n is the cumulative sum from bin N (total bins) to n. If the profile is 2D or 3D, a list of values can be given to control the summation in each dimension independently. Default: False. fractional : If True the profile values are divided by the sum of all the profile data such that the profile represents a probability distribution function. storage : string Name of the dictionary to store profiles. Default: "profiles" output_dir : string Name of directory where profile data will be written. The full path will be the output_dir of the halo catalog concatenated with this directory. Default : "." """ mylog.info("Calculating 1D profile for halo %d." % halo.quantities["particle_identifier"]) dds = halo.halo_catalog.data_ds if dds is None: raise RuntimeError("Profile callback requires a data ds.") if not hasattr(halo, "data_object"): raise RuntimeError("Profile callback requires a data container.") if halo.data_object is None: mylog.info("Skipping halo %d since data_object is None." % halo.quantities["particle_identifier"]) return if output_dir is None: output_dir = storage output_dir = os.path.join(halo.halo_catalog.output_dir, output_dir) bin_fields = ensure_list(bin_fields) my_profile = create_profile(halo.data_object, bin_fields, profile_fields, n_bins=n_bins, extrema=extrema, logs=logs, units=units, weight_field=weight_field, accumulation=accumulation, fractional=fractional) prof_store = dict([(field, my_profile[field]) \ for field in my_profile.field_data]) prof_store[my_profile.x_field] = my_profile.x if len(bin_fields) > 1: prof_store[my_profile.y_field] = my_profile.y if len(bin_fields) > 2: prof_store[my_profile.z_field] = my_profile.z if hasattr(halo, storage): halo_store = getattr(halo, storage) if "used" in halo_store: halo_store["used"] &= my_profile.used else: halo_store = {"used": my_profile.used} setattr(halo, storage, halo_store) halo_store.update(prof_store) if my_profile.standard_deviation is not None: variance_store = dict([(field, my_profile.standard_deviation[field]) \ for field in my_profile.standard_deviation]) variance_storage = "%s_variance" % storage if hasattr(halo, variance_storage): halo_variance_store = getattr(halo, variance_storage) else: halo_variance_store = {} setattr(halo, variance_storage, halo_variance_store) halo_variance_store.update(variance_store)
def par_combine_object(self, data, op, datatype = None): # op can be chosen from: # cat # join # data is selected to be of types: # np.ndarray # dict # data field dict if not isinstance(data,dict): print( 'DATA IS NOT A DICTIONARY') if datatype is not None: pass elif isinstance(data, dict): datatype == "dict" elif isinstance(data, np.ndarray): datatype == "array" elif isinstance(data, list): datatype == "list" # Now we have our datatype, and we conduct our operation if datatype == "dict" and op == "join": if self.comm.rank == 0: for i in range(1,self.comm.size): data.update(self.comm.recv(source=i, tag=0)) else: self.comm.send(data, dest=0, tag=0) if self.comm.rank == 0: mylog.info("datatype %s",type(data)) for i in range(1,self.comm.size): self.comm.send(data, dest=i, tag=i) else: data = self.comm.recv(source=0,tag=self.comm.rank) return data elif datatype == "dict" and op == "cat": field_keys = data.keys() field_keys.sort() size = data[field_keys[0]].shape[-1] sizes = np.zeros(self.comm.size, dtype='int64') outsize = np.array(size, dtype='int64') self.comm.Allgather([outsize, 1, MPI.LONG], [sizes, 1, MPI.LONG] ) # This nested concatenate is to get the shapes to work out correctly; # if we just add [0] to sizes, it will broadcast a summation, not a # concatenation. offsets = np.add.accumulate(np.concatenate([[0], sizes]))[:-1] arr_size = self.comm.allreduce(size, op=MPI.SUM) for key in field_keys: dd = data[key] rv = self.alltoallv_array(dd, arr_size, offsets, sizes) data[key] = rv return data elif datatype == "array" and op == "cat": if data is None: ncols = -1 size = 0 dtype = 'float64' mylog.info('Warning: Array passed to par_combine_object was None. Setting dtype to float64. This may break things!') else: dtype = data.dtype if len(data) == 0: ncols = -1 size = 0 elif len(data.shape) == 1: ncols = 1 size = data.shape[0] else: ncols, size = data.shape ncols = self.comm.allreduce(ncols, op=MPI.MAX) if ncols == 0: data = np.zeros(0, dtype=dtype) # This only works for elif data is None: data = np.zeros((ncols, 0), dtype=dtype) size = data.shape[-1] sizes = np.zeros(self.comm.size, dtype='int64') outsize = np.array(size, dtype='int64') self.comm.Allgather([outsize, 1, MPI.LONG], [sizes, 1, MPI.LONG] ) # This nested concatenate is to get the shapes to work out correctly; # if we just add [0] to sizes, it will broadcast a summation, not a # concatenation. offsets = np.add.accumulate(np.concatenate([[0], sizes]))[:-1] arr_size = self.comm.allreduce(size, op=MPI.SUM) data = self.alltoallv_array(data, arr_size, offsets, sizes) return data elif datatype == "list" and op == "cat": recv_data = self.comm.allgather(data) # Now flatten into a single list, since this # returns us a list of lists. data = [] while recv_data: data.extend(recv_data.pop(0)) return data raise NotImplementedError
def setup_fluid_fields(self): setup_magnetic_field_aliases(self, "amrvac", [f"mag{ax}" for ax in "xyz"]) self._setup_velocity_fields() # gas velocities self._setup_dust_fields() # dust derived fields (including velocities) # fields with nested dependencies are defined thereafter # by increasing level of complexity us = self.ds.unit_system def _kinetic_energy_density(field, data): # devnote : have a look at issue 1301 return 0.5 * data["gas", "density"] * data["gas", "velocity_magnitude"]**2 self.add_field( ("gas", "kinetic_energy_density"), function=_kinetic_energy_density, units=us["density"] * us["velocity"]**2, dimensions=dimensions.density * dimensions.velocity**2, sampling_type="cell", ) # magnetic energy density if ("amrvac", "b1") in self.field_list: def _magnetic_energy_density(field, data): emag = 0.5 * data["gas", "magnetic_1"]**2 for idim in "23": if not ("amrvac", f"b{idim}") in self.field_list: break emag += 0.5 * data["gas", f"magnetic_{idim}"]**2 # in AMRVAC the magnetic field is defined in units where mu0 = 1, # such that # Emag = 0.5*B**2 instead of Emag = 0.5*B**2 / mu0 # To correctly transform the dimensionality from gauss**2 -> rho*v**2, # we have to take mu0 into account. If we divide here, units when adding # the field should be us["density"]*us["velocity"]**2. # If not, they should be us["magnetic_field"]**2 and division should # happen elsewhere. emag /= 4 * np.pi # divided by mu0 = 4pi in cgs, # yt handles 'mks' and 'code' unit systems internally. return emag self.add_field( ("gas", "magnetic_energy_density"), function=_magnetic_energy_density, units=us["density"] * us["velocity"]**2, dimensions=dimensions.density * dimensions.velocity**2, sampling_type="cell", ) # Adding the thermal pressure field. # In AMRVAC we have multiple physics possibilities: # - if HD/MHD + energy equation P = (gamma-1)*(e - ekin (- emag)) for (M)HD # - if HD/MHD but solve_internal_e is true in parfile, P = (gamma-1)*e for both # - if (m)hd_energy is false in parfile (isothermal), P = c_adiab * rho**gamma def _full_thermal_pressure_HD(field, data): # energy density and pressure are actually expressed in the same unit pthermal = (data.ds.gamma - 1) * (data["gas", "energy_density"] - data["gas", "kinetic_energy_density"]) return pthermal def _full_thermal_pressure_MHD(field, data): pthermal = ( _full_thermal_pressure_HD(field, data) - (data.ds.gamma - 1) * data["gas", "magnetic_energy_density"]) return pthermal def _polytropic_thermal_pressure(field, data): return (data.ds.gamma - 1) * data["gas", "energy_density"] def _adiabatic_thermal_pressure(field, data): return data.ds._c_adiab * data["gas", "density"]**data.ds.gamma pressure_recipe = None if ("amrvac", "e") in self.field_list: if self.ds._e_is_internal: pressure_recipe = _polytropic_thermal_pressure mylog.info("Using polytropic EoS for thermal pressure.") elif ("amrvac", "b1") in self.field_list: pressure_recipe = _full_thermal_pressure_MHD mylog.info("Using full MHD energy for thermal pressure.") else: pressure_recipe = _full_thermal_pressure_HD mylog.info("Using full HD energy for thermal pressure.") elif self.ds._c_adiab is not None: pressure_recipe = _adiabatic_thermal_pressure mylog.info( "Using adiabatic EoS for thermal pressure (isothermal).") mylog.warning("If you used usr_set_pthermal you should " "redefine the thermal_pressure field.") if pressure_recipe is not None: self.add_field( ("gas", "thermal_pressure"), function=pressure_recipe, units=us["density"] * us["velocity"]**2, dimensions=dimensions.density * dimensions.velocity**2, sampling_type="cell", ) # sound speed and temperature depend on thermal pressure def _sound_speed(field, data): return np.sqrt(data.ds.gamma * data["gas", "thermal_pressure"] / data["gas", "density"]) self.add_field( ("gas", "sound_speed"), function=_sound_speed, units=us["velocity"], dimensions=dimensions.velocity, sampling_type="cell", ) else: mylog.warning( "e not found and no parfile passed, can not set thermal_pressure." )
def make_light_ray(self, seed=None, periodic=True, left_edge=None, right_edge=None, min_level=None, start_position=None, end_position=None, trajectory=None, fields=None, setup_function=None, solution_filename=None, data_filename=None, get_los_velocity=None, use_peculiar_velocity=True, redshift=None, field_parameters=None, njobs=-1): """ make_light_ray(seed=None, periodic=True, left_edge=None, right_edge=None, min_level=None, start_position=None, end_position=None, trajectory=None, fields=None, setup_function=None, solution_filename=None, data_filename=None, use_peculiar_velocity=True, redshift=None, njobs=-1) Create a light ray and get field values for each lixel. A light ray consists of a list of field values for cells intersected by the ray and the path length of the ray through those cells. Light ray data must be written out to an hdf5 file. Parameters ---------- seed : optional, int Seed for the random number generator. Default: None. periodic : optional, bool If True, ray trajectories will make use of periodic boundaries. If False, ray trajectories will not be periodic. Default : True. left_edge : optional, iterable of floats or YTArray The left corner of the region in which rays are to be generated. If None, the left edge will be that of the domain. If specified without units, it is assumed to be in code units. Default: None. right_edge : optional, iterable of floats or YTArray The right corner of the region in which rays are to be generated. If None, the right edge will be that of the domain. If specified without units, it is assumed to be in code units. Default: None. min_level : optional, int The minimum refinement level of the spatial region in which the ray passes. This can be used with zoom-in simulations where the high resolution region does not keep a constant geometry. Default: None. start_position : optional, iterable of floats or YTArray. Used only if creating a light ray from a single dataset. The coordinates of the starting position of the ray. If specified without units, it is assumed to be in code units. Default: None. end_position : optional, iterable of floats or YTArray. Used only if creating a light ray from a single dataset. The coordinates of the ending position of the ray. If specified without units, it is assumed to be in code units. Default: None. trajectory : optional, list of floats Used only if creating a light ray from a single dataset. The (r, theta, phi) direction of the light ray. Use either end_position or trajectory, not both. Default: None. fields : optional, list A list of fields for which to get data. Default: None. setup_function : optional, callable, accepts a ds This function will be called on each dataset that is loaded to create the light ray. For, example, this can be used to add new derived fields. Default: None. solution_filename : optional, string Path to a text file where the trajectories of each subray is written out. Default: None. data_filename : optional, string Path to output file for ray data. Default: None. use_peculiar_velocity : optional, bool If True, the peculiar velocity along the ray will be sampled for calculating the effective redshift combining the cosmological redshift and the doppler redshift. Default: True. redshift : optional, float Used with light rays made from single datasets to specify a starting redshift for the ray. If not used, the starting redshift will be 0 for a non-cosmological dataset and the dataset redshift for a cosmological dataset. Default: None. njobs : optional, int The number of parallel jobs over which the segments will be split. Choose -1 for one processor per segment. Default: -1. Examples -------- Make a light ray from multiple datasets: >>> import yt >>> from yt.analysis_modules.cosmological_observation.light_ray.api import \ ... LightRay >>> my_ray = LightRay("enzo_tiny_cosmology/32Mpc_32.enzo", "Enzo", ... 0., 0.1, time_data=False) ... >>> my_ray.make_light_ray(seed=12345, ... solution_filename="solution.txt", ... data_filename="my_ray.h5", ... fields=["temperature", "density"], ... use_peculiar_velocity=True) Make a light ray from a single dataset: >>> import yt >>> from yt.analysis_modules.cosmological_observation.light_ray.api import \ ... LightRay >>> my_ray = LightRay("IsolatedGalaxy/galaxy0030/galaxy0030") ... >>> my_ray.make_light_ray(start_position=[0., 0., 0.], ... end_position=[1., 1., 1.], ... solution_filename="solution.txt", ... data_filename="my_ray.h5", ... fields=["temperature", "density"], ... use_peculiar_velocity=True) """ if self.simulation_type is None: domain = self.ds else: domain = self.simulation assumed_units = "code_length" if left_edge is None: left_edge = domain.domain_left_edge elif not hasattr(left_edge, 'units'): left_edge = domain.arr(left_edge, assumed_units) left_edge.convert_to_units('unitary') if right_edge is None: right_edge = domain.domain_right_edge elif not hasattr(right_edge, 'units'): right_edge = domain.arr(right_edge, assumed_units) right_edge.convert_to_units('unitary') if start_position is not None: if hasattr(start_position, 'units'): start_position = start_position else: start_position = self.ds.arr(start_position, assumed_units) start_position.convert_to_units('unitary') if end_position is not None: if hasattr(end_position, 'units'): end_position = end_position else: end_position = self.ds.arr(end_position, assumed_units) end_position.convert_to_units('unitary') if get_los_velocity is not None: use_peculiar_velocity = get_los_velocity mylog.warn("'get_los_velocity' kwarg is deprecated. " + \ "Use 'use_peculiar_velocity' instead.") # Calculate solution. self._calculate_light_ray_solution(seed=seed, left_edge=left_edge, right_edge=right_edge, min_level=min_level, periodic=periodic, start_position=start_position, end_position=end_position, trajectory=trajectory, filename=solution_filename) if field_parameters is None: field_parameters = {} # Initialize data structures. self._data = {} # temperature field is automatically added to fields if fields is None: fields = [] if (('gas', 'temperature') not in fields) and \ ('temperature' not in fields): fields.append(('gas', 'temperature')) data_fields = fields[:] all_fields = fields[:] all_fields.extend(['dl', 'dredshift', 'redshift']) all_fields.extend(['x', 'y', 'z', 'dx', 'dy', 'dz']) data_fields.extend(['x', 'y', 'z', 'dx', 'dy', 'dz']) if use_peculiar_velocity: all_fields.extend(['velocity_x', 'velocity_y', 'velocity_z', 'velocity_los', 'redshift_eff', 'redshift_dopp']) data_fields.extend(['velocity_x', 'velocity_y', 'velocity_z']) all_ray_storage = {} for my_storage, my_segment in parallel_objects(self.light_ray_solution, storage=all_ray_storage, njobs=njobs): # In case of simple rays, use the already loaded dataset: self.ds, # otherwise, load dataset for segment. if self.ds is None: ds = load(my_segment['filename'], **self.load_kwargs) else: ds = self.ds my_segment['unique_identifier'] = ds.unique_identifier if redshift is not None: if ds.cosmological_simulation and redshift != ds.current_redshift: mylog.warn("Generating light ray with different redshift than " + "the dataset itself.") my_segment["redshift"] = redshift if setup_function is not None: setup_function(ds) if not ds.cosmological_simulation: next_redshift = my_segment["redshift"] elif self.near_redshift == self.far_redshift: if isinstance(my_segment["traversal_box_fraction"], YTArray) and \ not my_segment["traversal_box_fraction"].units.is_dimensionless: segment_length = \ my_segment["traversal_box_fraction"].in_units("Mpccm / h") else: segment_length = my_segment["traversal_box_fraction"] * \ ds.domain_width[0].in_units("Mpccm / h") next_redshift = my_segment["redshift"] - \ self._deltaz_forward(my_segment["redshift"], segment_length) elif my_segment.get("next", None) is None: next_redshift = self.near_redshift else: next_redshift = my_segment['next']['redshift'] # Make sure start, end, left, right # are using the dataset's unit system. my_start = ds.arr(my_segment['start']) my_end = ds.arr(my_segment['end']) my_left = ds.arr(left_edge) my_right = ds.arr(right_edge) mylog.info("Getting segment at z = %s: %s to %s." % (my_segment['redshift'], my_start, my_end)) # Break periodic ray into non-periodic segments. sub_segments = periodic_ray(my_start, my_end, left=my_left, right=my_right) # Prepare data structure for subsegment. sub_data = {} sub_data['segment_redshift'] = my_segment['redshift'] for field in all_fields: sub_data[field] = [] # Get data for all subsegments in segment. for sub_segment in sub_segments: mylog.info("Getting subsegment: %s to %s." % (list(sub_segment[0]), list(sub_segment[1]))) sub_ray = ds.ray(sub_segment[0], sub_segment[1]) for key, val in field_parameters.items(): sub_ray.set_field_parameter(key, val) asort = np.argsort(sub_ray["t"]) sub_data['dl'].extend(sub_ray['dts'][asort] * vector_length(sub_ray.start_point, sub_ray.end_point)) for field in data_fields: sub_data[field].extend(sub_ray[field][asort]) if use_peculiar_velocity: line_of_sight = sub_segment[0] - sub_segment[1] line_of_sight /= ((line_of_sight**2).sum())**0.5 sub_vel = ds.arr([sub_ray['velocity_x'], sub_ray['velocity_y'], sub_ray['velocity_z']]) # Line of sight velocity = vel_los sub_vel_los = (np.rollaxis(sub_vel, 1) * \ line_of_sight).sum(axis=1) sub_data['velocity_los'].extend(sub_vel_los[asort]) # doppler redshift: # See https://en.wikipedia.org/wiki/Redshift and # Peebles eqns: 5.48, 5.49 # 1 + redshift_dopp = (1 + v*cos(theta)/c) / # sqrt(1 - v**2/c**2) # where v is the peculiar velocity (ie physical velocity # without the hubble flow, but no hubble flow in sim, so # just the physical velocity). # the bulk of the doppler redshift is from line of sight # motion, but there is a small amount from time dilation # of transverse motion, hence the inclusion of theta (the # angle between line of sight and the velocity). # theta is the angle between the ray vector (i.e. line of # sight) and the velocity vectors: a dot b = ab cos(theta) sub_vel_mag = sub_ray['velocity_magnitude'] cos_theta = line_of_sight.dot(sub_vel) / sub_vel_mag # Protect against stituations where velocity mag is exactly # zero, in which case zero / zero = NaN. cos_theta = np.nan_to_num(cos_theta) redshift_dopp = \ (1 + sub_vel_mag * cos_theta / speed_of_light_cgs) / \ np.sqrt(1 - sub_vel_mag**2 / speed_of_light_cgs**2) - 1 sub_data['redshift_dopp'].extend(redshift_dopp[asort]) del sub_vel, sub_vel_los, sub_vel_mag, cos_theta, \ redshift_dopp sub_ray.clear_data() del sub_ray, asort for key in sub_data: sub_data[key] = ds.arr(sub_data[key]).in_cgs() # Get redshift for each lixel. Assume linear relation between l # and z. sub_data['dredshift'] = (my_segment['redshift'] - next_redshift) * \ (sub_data['dl'] / vector_length(my_start, my_end).in_cgs()) sub_data['redshift'] = my_segment['redshift'] - \ sub_data['dredshift'].cumsum() + sub_data['dredshift'] # When using the peculiar velocity, create effective redshift # (redshift_eff) field combining cosmological redshift and # doppler redshift. # then to add cosmological redshift and doppler redshifts, follow # eqn 3.75 in Peacock's Cosmological Physics: # 1 + z_eff = (1 + z_cosmo) * (1 + z_doppler) if use_peculiar_velocity: sub_data['redshift_eff'] = ((1 + sub_data['redshift_dopp']) * \ (1 + sub_data['redshift'])) - 1 # Remove empty lixels. sub_dl_nonzero = sub_data['dl'].nonzero() for field in all_fields: sub_data[field] = sub_data[field][sub_dl_nonzero] del sub_dl_nonzero # Add to storage. my_storage.result = sub_data del ds # Reconstruct ray data from parallel_objects storage. all_data = [my_data for my_data in all_ray_storage.values()] # This is now a list of segments where each one is a dictionary # with all the fields. all_data.sort(key=lambda a:a['segment_redshift'], reverse=True) # Flatten the list into a single dictionary containing fields # for the whole ray. all_data = _flatten_dict_list(all_data, exceptions=['segment_redshift']) self._data = all_data if data_filename is not None: self._write_light_ray(data_filename, all_data) ray_ds = load(data_filename) return ray_ds else: return None