def _save_catalog(self, filename, ds, halos, fields=None): """ Save halo catalog with descendent information. """ if self.comm is None: rank = 0 else: rank = self.comm.rank filename = get_output_filename( filename, "%s.%d" % (_get_tree_basename(ds), rank), ".h5") if fields is None: my_fields = [] else: my_fields = fields[:] default_fields = \ ["particle_identifier", "descendent_identifier", "particle_mass"] + \ ["particle_position_%s" % ax for ax in "xyz"] + \ ["particle_velocity_%s" % ax for ax in "xyz"] for field in default_fields: if field not in my_fields: my_fields.append(field) if isinstance(halos, list): num_halos = len(halos) data = self._create_halo_data_lists(halos, my_fields) else: num_halos = ds.index.particle_count[halos] data = dict((field, ds.r[halos, field].in_base()) for field in my_fields if field != "descendent_identifier") data["descendent_identifier"] = -1 * np.ones(num_halos) ftypes = dict([(field, ".") for field in data]) extra_attrs = {"num_halos": num_halos, "data_type": "halo_catalog"} mylog.info("Saving catalog with %d halos to %s." % (num_halos, filename)) save_as_dataset(ds, filename, data, field_types=ftypes, extra_attrs=extra_attrs)
def save_as_dataset(self, filename=None, fields=None): r"""Export a fixed resolution buffer to a reloadable yt dataset. This function will take a fixed resolution buffer and output a dataset containing either the fields presently existing or fields given in the ``fields`` list. The resulting dataset can be reloaded as a yt dataset. Parameters ---------- filename : str, optional The name of the file to be written. If None, the name will be a combination of the original dataset and the type of data container. fields : list of strings or tuples, optional If this is supplied, it is the list of fields to be saved to disk. If not supplied, all the fields that have been queried will be saved. Returns ------- filename : str The name of the file that has been created. Examples -------- >>> import yt >>> ds = yt.load("enzo_tiny_cosmology/DD0046/DD0046") >>> proj = ds.proj("density", "x", weight_field="density") >>> frb = proj.to_frb(1.0, (800, 800)) >>> fn = frb.save_as_dataset(fields=["density"]) >>> ds2 = yt.load(fn) >>> print (ds2.data["density"]) [[ 1.25025353e-30 1.25025353e-30 1.25025353e-30 ..., 7.90820691e-31 7.90820691e-31 7.90820691e-31] [ 1.25025353e-30 1.25025353e-30 1.25025353e-30 ..., 7.90820691e-31 7.90820691e-31 7.90820691e-31] [ 1.25025353e-30 1.25025353e-30 1.25025353e-30 ..., 7.90820691e-31 7.90820691e-31 7.90820691e-31] ..., [ 1.55834239e-30 1.55834239e-30 1.55834239e-30 ..., 8.51353199e-31 8.51353199e-31 8.51353199e-31] [ 1.55834239e-30 1.55834239e-30 1.55834239e-30 ..., 8.51353199e-31 8.51353199e-31 8.51353199e-31] [ 1.55834239e-30 1.55834239e-30 1.55834239e-30 ..., 8.51353199e-31 8.51353199e-31 8.51353199e-31]] g/cm**3 """ keyword = "%s_%s_frb" % (str(self.ds), self.data_source._type_name) filename = get_output_filename(filename, keyword, ".h5") data = {} if fields is not None: for f in self.data_source._determine_fields(fields): data[f] = self[f] else: data.update(self.data) ftypes = dict([(field, "grid") for field in data]) extra_attrs = dict([ (arg, getattr(self.data_source, arg, None)) for arg in self.data_source._con_args + self.data_source._tds_attrs ]) extra_attrs["con_args"] = self.data_source._con_args extra_attrs["left_edge"] = self.ds.arr( [self.bounds[0], self.bounds[2]]) extra_attrs["right_edge"] = self.ds.arr( [self.bounds[1], self.bounds[3]]) extra_attrs["ActiveDimensions"] = self.buff_size extra_attrs["level"] = 0 extra_attrs["data_type"] = "yt_frb" extra_attrs["container_type"] = self.data_source._type_name extra_attrs["dimensionality"] = self.data_source._dimensionality save_as_dataset(self.ds, filename, data, field_types=ftypes, extra_attrs=extra_attrs) return filename
def save_as_dataset(self, filename=None, fields=None): r"""Export clump tree to a reloadable yt dataset. This function will take a clump object and output a dataset containing the fields given in the ``fields`` list and all info items. The resulting dataset can be reloaded as a yt dataset. Parameters ---------- filename : str, optional The name of the file to be written. If None, the name will be a combination of the original dataset and the clump index. fields : list of strings or tuples, optional If this is supplied, it is the list of fields to be saved to disk. Returns ------- filename : str The name of the file that has been created. Examples -------- >>> import yt >>> from yt.data_objects.level_sets.api import \ ... Clump, find_clumps >>> ds = yt.load("IsolatedGalaxy/galaxy0030/galaxy0030") >>> data_source = ds.disk([0.5, 0.5, 0.5], [0., 0., 1.], ... (8, 'kpc'), (1, 'kpc')) >>> field = ("gas", "density") >>> step = 2.0 >>> c_min = 10**np.floor(np.log10(data_source[field]).min() ) >>> c_max = 10**np.floor(np.log10(data_source[field]).max()+1) >>> master_clump = Clump(data_source, field) >>> master_clump.add_info_item("center_of_mass") >>> master_clump.add_validator("min_cells", 20) >>> find_clumps(master_clump, c_min, c_max, step) >>> fn = master_clump.save_as_dataset(fields=["density", "particle_mass"]) >>> new_ds = yt.load(fn) >>> print (ds.tree["clump", "cell_mass"]) 1296926163.91 Msun >>> print ds.tree["grid", "density"] [ 2.54398434e-26 2.46620353e-26 2.25120154e-26 ..., 1.12879234e-25 1.59561490e-25 1.09824903e-24] g/cm**3 >>> print ds.tree["all", "particle_mass"] [ 4.25472446e+38 4.25472446e+38 4.25472446e+38 ..., 2.04238266e+38 2.04523901e+38 2.04770938e+38] g >>> print ds.tree.children[0]["clump", "cell_mass"] 909636495.312 Msun >>> print ds.leaves[0]["clump", "cell_mass"] 3756566.99809 Msun >>> print ds.leaves[0]["grid", "density"] [ 6.97820274e-24 6.58117370e-24 7.32046082e-24 6.76202430e-24 7.41184837e-24 6.76981480e-24 6.94287213e-24 6.56149658e-24 6.76584569e-24 6.94073710e-24 7.06713082e-24 7.22556526e-24 7.08338898e-24 6.78684331e-24 7.40647040e-24 7.03050456e-24 7.12438678e-24 6.56310217e-24 7.23201662e-24 7.17314333e-24] g/cm**3 """ ds = self.data.ds keyword = "%s_clump_%d" % (str(ds), self.clump_id) filename = get_output_filename(filename, keyword, ".h5") # collect clump info fields clump_info = dict([(ci.name, []) for ci in self.base.clump_info]) clump_info.update( dict([(field, []) for field in ["clump_id", "parent_id", "contour_key", "contour_id"]])) for clump in self: clump_info["clump_id"].append(clump.clump_id) if clump.parent is None: parent_id = -1 else: parent_id = clump.parent.clump_id clump_info["parent_id"].append(parent_id) contour_key = clump.contour_key if contour_key is None: contour_key = -1 clump_info["contour_key"].append(contour_key) contour_id = clump.contour_id if contour_id is None: contour_id = -1 clump_info["contour_id"].append(contour_id) for ci in self.base.clump_info: ci(clump) clump_info[ci.name].append(clump.info[ci.name][1]) for ci in clump_info: if hasattr(clump_info[ci][0], "units"): clump_info[ci] = ds.arr(clump_info[ci]) else: clump_info[ci] = np.array(clump_info[ci]) ftypes = dict([(ci, "clump") for ci in clump_info]) # collect data fields if fields is not None: contour_fields = \ [("index", "contours_%s" % ckey) for ckey in np.unique(clump_info["contour_key"]) \ if str(ckey) != "-1"] ptypes = [] field_data = {} need_grid_positions = False for f in self.base.data._determine_fields(fields) + contour_fields: if ds.field_info[f].particle_type: if f[0] not in ptypes: ptypes.append(f[0]) ftypes[f] = f[0] else: need_grid_positions = True if f[1] in ('x', 'y', 'z', 'dx', 'dy', 'dz'): # skip 'xyz' if a user passes that in because they # will be added to ftypes below continue ftypes[f] = "grid" field_data[f] = self.base[f] if len(ptypes) > 0: for ax in "xyz": for ptype in ptypes: p_field = (ptype, "particle_position_%s" % ax) if p_field in ds.field_info and \ p_field not in field_data: ftypes[p_field] = p_field[0] field_data[p_field] = self.base[p_field] for clump in self: if clump.contour_key is None: continue for ptype in ptypes: cfield = (ptype, "contours_%s" % clump.contour_key) if cfield not in field_data: field_data[cfield] = \ clump.data._part_ind(ptype).astype(np.int64) ftypes[cfield] = ptype field_data[cfield][clump.data._part_ind(ptype)] = \ clump.contour_id if need_grid_positions: for ax in "xyz": g_field = ("index", ax) if g_field in ds.field_info and \ g_field not in field_data: field_data[g_field] = self.base[g_field] ftypes[g_field] = "grid" g_field = ("index", "d" + ax) if g_field in ds.field_info and \ g_field not in field_data: ftypes[g_field] = "grid" field_data[g_field] = self.base[g_field] if self.contour_key is not None: cfilters = {} for field in field_data: if ftypes[field] == "grid": ftype = "index" else: ftype = field[0] cfield = (ftype, "contours_%s" % self.contour_key) if cfield not in cfilters: cfilters[cfield] = field_data[cfield] == self.contour_id field_data[field] = field_data[field][cfilters[cfield]] clump_info.update(field_data) extra_attrs = {"data_type": "yt_clump_tree", "container_type": "yt_clump_tree"} save_as_dataset(ds, filename, clump_info, field_types=ftypes, extra_attrs=extra_attrs) return filename
def trace_descendents(self, halo_type, fields=None, filename=None): """ Trace the descendents of all halos. A merger-tree for all halos will be created, starting with the first halo catalog and moving forward. Parameters ---------- halo_type : string The type of halo, typically "FOF" for FoF groups or "Subfind" for subhalos. fields : optional, list of strings List of additional fields to be saved to halo catalogs. filename : optional, string Directory in which merger-tree catalogs will be saved. """ output_dir = os.path.dirname(filename) if self.comm.rank == 0 and len(output_dir) > 0: ensure_dir(output_dir) all_outputs = self.ts.outputs[:] ds1 = ds2 = None for i, fn2 in enumerate(all_outputs[1:]): fn1 = all_outputs[i] target_filename = get_output_filename( filename, "%s.%d" % (_get_tree_basename(fn1), 0), ".h5") catalog_filename = get_output_filename( filename, "%s.%d" % (_get_tree_basename(fn2), 0), ".h5") if os.path.exists(target_filename): continue if ds1 is None: ds1 = self._load_ds(fn1, index_ptype=halo_type) ds2 = self._load_ds(fn2, index_ptype=halo_type) if self.comm.rank == 0: _print_link_info(ds1, ds2) target_halos = [] if ds1.index.particle_count[halo_type] == 0: self._save_catalog(filename, ds1, target_halos, fields) ds1 = ds2 continue target_ids = \ ds1.r[halo_type, "particle_identifier"].d.astype(np.int64) njobs = min(self.comm.size, target_ids.size) pbar = get_pbar("Linking halos", target_ids.size, parallel=True) my_i = 0 for halo_id in parallel_objects(target_ids, njobs=njobs): my_halo = ds1.halo(halo_type, halo_id) target_halos.append(my_halo) self._find_descendent(my_halo, ds2) my_i += njobs pbar.update(my_i) pbar.finish() self._save_catalog(filename, ds1, target_halos, fields) ds1 = ds2 clear_id_cache() if os.path.exists(catalog_filename): return if ds2 is None: ds2 = self._load_ds(fn2, index_ptype=halo_type) if self.comm.rank == 0: self._save_catalog(filename, ds2, halo_type, fields)
def trace_ancestors(self, halo_type, root_ids, fields=None, filename=None): """ Trace the ancestry of a given set of halos. A merger-tree for a specific set of halos will be created, starting with the last halo catalog and moving backward. Parameters ---------- halo_type : string The type of halo, typically "FOF" for FoF groups or "Subfind" for subhalos. root_ids : integer or array of integers The halo IDs from the last halo catalog for the targeted halos. fields : optional, list of strings List of additional fields to be saved to halo catalogs. filename : optional, string Directory in which merger-tree catalogs will be saved. """ output_dir = os.path.dirname(filename) if self.comm.rank == 0 and len(output_dir) > 0: ensure_dir(output_dir) all_outputs = self.ts.outputs[::-1] ds1 = None for i, fn2 in enumerate(all_outputs[1:]): fn1 = all_outputs[i] target_filename = get_output_filename( filename, "%s.%d" % (_get_tree_basename(fn1), 0), ".h5") catalog_filename = get_output_filename( filename, "%s.%d" % (_get_tree_basename(fn2), 0), ".h5") if os.path.exists(catalog_filename): continue if ds1 is None: ds1 = self._load_ds(fn1, index_ptype=halo_type) ds2 = self._load_ds(fn2, index_ptype=halo_type) if self.comm.rank == 0: _print_link_info(ds1, ds2) if ds2.index.particle_count[halo_type] == 0: mylog.info("%s has no halos of type %s, ending." % (ds2, halo_type)) break if i == 0: target_ids = root_ids if not iterable(target_ids): target_ids = np.array([target_ids]) if isinstance(target_ids, YTArray): target_ids = target_ids.d if target_ids.dtype != np.int64: target_ids = target_ids.astype(np.int64) else: mylog.info("Loading target ids from %s.", target_filename) ds_target = yt_load(target_filename) target_ids = \ ds_target.r["halos", "particle_identifier"].d.astype(np.int64) del ds_target id_store = [] target_halos = [] ancestor_halos = [] njobs = min(self.comm.size, target_ids.size) pbar = get_pbar("Linking halos", target_ids.size, parallel=True) my_i = 0 for halo_id in parallel_objects(target_ids, njobs=njobs): my_halo = ds1.halo(halo_type, halo_id) target_halos.append(my_halo) my_ancestors = self._find_ancestors(my_halo, ds2, id_store=id_store) ancestor_halos.extend(my_ancestors) my_i += njobs pbar.update(my_i) pbar.finish() if i == 0: for halo in target_halos: halo.descendent_identifier = -1 self._save_catalog(filename, ds1, target_halos, fields) self._save_catalog(filename, ds2, ancestor_halos, fields) if len(ancestor_halos) == 0: break ds1 = ds2 clear_id_cache()
def save_as_dataset(self, filename=None): r"""Export a profile to a reloadable yt dataset. This function will take a profile and output a dataset containing all relevant fields. The resulting dataset can be reloaded as a yt dataset. Parameters ---------- filename : str, optional The name of the file to be written. If None, the name will be a combination of the original dataset plus the type of object, e.g., Profile1D. Returns ------- filename : str The name of the file that has been created. Examples -------- >>> import yt >>> ds = yt.load("enzo_tiny_cosmology/DD0046/DD0046") >>> ad = ds.all_data() >>> profile = yt.create_profile(ad, ["density", "temperature"], ... "cell_mass", weight_field=None, ... n_bins=(128, 128)) >>> fn = profile.save_as_dataset() >>> prof_ds = yt.load(fn) >>> print (prof_ds.data["cell_mass"]) (128, 128) >>> print (prof_ds.data["x"].shape) # x bins as 1D array (128,) >>> print (prof_ds.data["density"]) # x bins as 2D array (128, 128) >>> p = yt.PhasePlot(prof_ds.data, "density", "temperature", ... "cell_mass", weight_field=None) >>> p.save() """ keyword = "%s_%s" % (str(self.ds), self.__class__.__name__) filename = get_output_filename(filename, keyword, ".h5") args = ("field", "log") extra_attrs = { "data_type": "yt_profile", "profile_dimensions": self.size, "weight_field": self.weight_field, "fractional": self.fractional, "accumulation": self.accumulation } data = {} data.update(self.field_data) data["weight"] = self.weight data["used"] = self.used.astype("float64") dimensionality = 0 bin_data = [] for ax in "xyz": if hasattr(self, ax): dimensionality += 1 data[ax] = getattr(self, ax) bin_data.append(data[ax]) bin_field_name = "%s_bins" % ax data[bin_field_name] = getattr(self, bin_field_name) extra_attrs["%s_range" % ax] = self.ds.arr( [data[bin_field_name][0], data[bin_field_name][-1]]) for arg in args: key = "%s_%s" % (ax, arg) extra_attrs[key] = getattr(self, key) bin_fields = np.meshgrid(*bin_data) for i, ax in enumerate("xyz"[:dimensionality]): data[getattr(self, "%s_field" % ax)] = bin_fields[i] extra_attrs["dimensionality"] = dimensionality ftypes = dict([(field, "data") for field in data]) save_as_dataset(self.ds, filename, data, field_types=ftypes, extra_attrs=extra_attrs) return filename