def _yield_halos(self, njobs="auto", dynamic=False): my_size = self.comm.size if njobs == "auto": # use task queue if odd number of cores more than 2 my_dynamic = my_size > 2 and my_size % 2 my_njobs = -1 else: my_dynamic = dynamic my_njobs = njobs for chunk in self.data_source.chunks([], "io"): if self.comm.rank == 0: chunk.get_data(self.pipeline.field_quantities) if my_size > 1: fdata = self.comm.comm.bcast(chunk.field_data, root=0) chunk.field_data.update(fdata) target_indices = range(chunk[self.halo_field_type, self._id_field].size) my_indices = parallel_objects(target_indices, njobs=my_njobs, dynamic=my_dynamic) for my_index in my_indices: my_halo = Halo(self, chunk, my_index) yield my_halo
def _check_for_outputs(self, potential_outputs): r"""Check a list of files to see if they are valid datasets.""" only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): if self.parameters['DataDumpDir'] in output: dir_key = self.parameters['DataDumpDir'] output_key = self.parameters['DataDumpName'] else: dir_key = self.parameters['RedshiftDumpDir'] output_key = self.parameters['RedshiftDumpName'] index = output[output.find(dir_key) + len(dir_key):] filename = os.path.join(self.parameters['GlobalDir'], "%s%s" % (dir_key, index), "%s%s" % (output_key, index)) if os.path.exists(filename): try: ds = load(filename) if ds is not None: my_storage.result = { 'filename': filename, 'time': ds.current_time.in_units("s") } if ds.cosmological_simulation: my_storage.result['redshift'] = ds.current_redshift except YTOutputNotIdentified: mylog.error('Failed to load %s', filename) my_outputs = [my_output for my_output in my_outputs.values() \ if my_output is not None] return my_outputs
def _check_for_outputs(self, potential_outputs): r""" Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): if os.path.exists(output): try: ds = load(output) if ds is not None: my_storage.result = { "filename": output, "time": ds.current_time.in_units("s") } if ds.cosmological_simulation: my_storage.result["redshift"] = ds.current_redshift except YTOutputNotIdentified: mylog.error("Failed to load %s", output) my_outputs = [my_output for my_output in my_outputs.values() \ if my_output is not None] return my_outputs
def _check_for_outputs(self, potential_outputs): r""" Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): if os.path.exists(output): try: ds = load(output) if ds is not None: num_steps = ds.num_steps my_storage.result = { "filename": output, "num_steps": num_steps } except YTOutputNotIdentified: mylog.error("Failed to load %s", output) my_outputs = [ my_output for my_output in my_outputs.values() if my_output is not None ] return my_outputs
def _check_for_outputs(self, potential_outputs): """ Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): if self.parameters['DataDumpDir'] in output: dir_key = self.parameters['DataDumpDir'] output_key = self.parameters['DataDumpName'] else: dir_key = self.parameters['RedshiftDumpDir'] output_key = self.parameters['RedshiftDumpName'] index = output[output.find(dir_key) + len(dir_key):] filename = os.path.join(self.parameters['GlobalDir'], "%s%s" % (dir_key, index), "%s%s" % (output_key, index)) if os.path.exists(filename): try: ds = load(filename) if ds is not None: my_storage.result = {'filename': filename, 'time': ds.current_time.in_units("s")} if ds.cosmological_simulation: my_storage.result['redshift'] = ds.current_redshift except YTOutputNotIdentified: mylog.error('Failed to load %s', filename) my_outputs = [my_output for my_output in my_outputs.values() \ if my_output is not None] return my_outputs
def _check_for_outputs(self, potential_outputs): r""" Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): try: ds = load(output) except (FileNotFoundError, YTUnidentifiedDataType): mylog.error("Failed to load %s", output) continue my_storage.result = { "filename": output, "time": ds.current_time.in_units("s"), } if ds.cosmological_simulation: my_storage.result["redshift"] = ds.current_redshift my_outputs = [ my_output for my_output in my_outputs.values() if my_output is not None ] return my_outputs
def _check_for_outputs(self, potential_outputs): r""" Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} llevel = mylog.level # suppress logging as we load every dataset, unless set to debug if llevel > 10 and llevel < 40: mylog.setLevel(40) for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): if os.path.exists(output): try: ds = load(output) if ds is not None: my_storage.result = {"filename": output, "time": ds.current_time.in_units("s")} if ds.cosmological_simulation: my_storage.result["redshift"] = ds.current_redshift except YTUnidentifiedDataType: mylog.error("Failed to load %s", output) mylog.setLevel(llevel) my_outputs = [my_output for my_output in my_outputs.values() \ if my_output is not None] return my_outputs
def add_fields(self, fields): """Add fields to profile Parameters ---------- fields : list of field names A list of fields to create profile histograms for """ fields = self.data_source._determine_fields(fields) for f in fields: self.field_info[f] = self.data_source.ds.field_info[f] temp_storage = ProfileFieldAccumulator(len(fields), self.size) citer = self.data_source.chunks([], "io") for chunk in parallel_objects(citer): self._bin_chunk(chunk, fields, temp_storage) self._finalize_storage(fields, temp_storage)
def _check_for_outputs(self, potential_outputs): """ Check a list of files to see if they are valid datasets. """ only_on_root( mylog.info, "Checking %d potential outputs.", len(potential_outputs) ) my_outputs = {} llevel = mylog.level # suppress logging as we load every dataset, unless set to debug if llevel > 10 and llevel < 40: mylog.setLevel(40) for my_storage, output in parallel_objects( potential_outputs, storage=my_outputs ): if self.parameters["DataDumpDir"] in output: dir_key = self.parameters["DataDumpDir"] output_key = self.parameters["DataDumpName"] else: dir_key = self.parameters["RedshiftDumpDir"] output_key = self.parameters["RedshiftDumpName"] index = output[output.find(dir_key) + len(dir_key) :] filename = os.path.join( self.parameters["GlobalDir"], f"{dir_key}{index}", f"{output_key}{index}", ) try: ds = load(filename) except (FileNotFoundError, YTUnidentifiedDataType): mylog.error("Failed to load %s", filename) continue my_storage.result = { "filename": filename, "time": ds.current_time.in_units("s"), } if ds.cosmological_simulation: my_storage.result["redshift"] = ds.current_redshift mylog.setLevel(llevel) my_outputs = [ my_output for my_output in my_outputs.values() if my_output is not None ] return my_outputs
def __call__(self, *args, **kwargs): """Calculate results for the derived quantity""" self.count_values(*args, **kwargs) chunks = self.data_source.chunks([], chunking_style="io") storage = {} for sto, ds in parallel_objects(chunks, -1, storage = storage): sto.result = self.process_chunk(ds, *args, **kwargs) # Now storage will have everything, and will be done via pickling, so # the units will be preserved. (Credit to Nathan for this # idea/implementation.) values = [ [] for i in range(self.num_vals) ] for key in sorted(storage): for i in range(self.num_vals): values[i].append(storage[key][i]) # These will be YTArrays values = [self.data_source.ds.arr(values[i]) for i in range(self.num_vals)] values = self.reduce_intermediate(values) return values
def prep_field_data(ds, field, offset=1): """ Prepare the grid data. Read the field data grid by grid, remove bad values. Return the numpy array with shape [ngrid, nx, ny, nz]. """ mylog.info('Calculating field: %s', field) comm = communication_system.communicators[-1] dtype = 'float64' # data.shape should be (ngrid, nxb, nyb, nzb) data = np.zeros([ds.index.num_grids, *ds.index.grid_dimensions[0]], dtype=dtype) # Go through all the grids in the index if comm.rank == 0: t0 = time.time() t1_prev = t0 for g in parallel_objects(ds.index.grids, njobs=0): if comm.rank == 0: t1 = time.time() sys.stdout.write('\rWorking on Grid %7i / %7i - %7.3f s' % (g.id, ds.index.num_grids, t1 - t1_prev)) t1_prev = t1 # Print the grid if nan or inf is in it #if np.nan in g[field].v or np.inf in g[field].v: # mylog.warning('Encountered non-physical values in %s', g) # g[field].v) # Calculate the field values in each grid # Use numpy nan_to_num to convert the bad values anyway # Transpose the array since the array in FLASH is fortran-ordered #data[g.id-offset] = np.nan_to_num(g[field].in_units('Jy/cm/arcsec**2').v.transpose()) data[g.id - offset] = np.nan_to_num(g[field].v.transpose()) if comm.rank == 0: sys.stdout.write(' - mpi_reduce') t2 = time.time() temp = data.copy() comm.comm.Reduce([temp, get_mpi_type(dtype)], [data, get_mpi_type(dtype)], op=op_names['sum']) if comm.rank == 0: t3 = time.time() sys.stdout.write( ' - Done!\nGrid Calculation: %.1f MPI: %.1f Total: %.1f\n' % (t2 - t0, t3 - t2, t3 - t0)) return data
def __call__(self, *args, **kwargs): """Calculate results for the derived quantity""" self.count_values(*args, **kwargs) chunks = self.data_source.chunks([], chunking_style="io") storage = {} for sto, ds in parallel_objects(chunks, -1, storage=storage): sto.result = self.process_chunk(ds, *args, **kwargs) # Now storage will have everything, and will be done via pickling, so # the units will be preserved. (Credit to Nathan for this # idea/implementation.) values = [[] for i in range(self.num_vals)] for key in sorted(storage): for i in range(self.num_vals): values[i].append(storage[key][i]) # These will be YTArrays values = [ self.data_source.ds.arr(values[i]) for i in range(self.num_vals) ] values = self.reduce_intermediate(values) return values
def _initialize_coarse_index(self): max_hsml = 0.0 pb = get_pbar("Initializing coarse index ", len(self.data_files)) for i, data_file in parallel_objects(enumerate(self.data_files)): pb.update(i + 1) for ptype, pos in self.io._yield_coordinates(data_file): ds = self.ds if hasattr(ds, "_sph_ptypes") and ptype == ds._sph_ptypes[0]: hsml = self.io._get_smoothing_length( data_file, pos.dtype, pos.shape) if hsml is not None and hsml.size > 0.0: max_hsml = max(max_hsml, hsml.max()) else: hsml = None self.regions._coarse_index_data_file(pos, hsml, data_file.file_id) pb.finish() self.regions.masks = self.comm.mpi_allreduce(self.regions.masks, op="sum") self.regions.particle_counts = self.comm.mpi_allreduce( self.regions.particle_counts, op="sum") for data_file in self.data_files: self.regions._set_coarse_index_data_file(data_file.file_id) self.regions.find_collisions_coarse() if max_hsml > 0.0 and len(self.data_files) > 1: # By passing this in, we only allow index_order2 to be increased by # two at most, never increased. One place this becomes particularly # useful is in the case of an extremely small section of gas # particles embedded in a much much larger domain. The max # smoothing length will be quite small, so based on the larger # domain, it will correspond to a very very high index order, which # is a large amount of memory! Having multiple indexes, one for # each particle type, would fix this. new_order2 = self.regions.update_mi2(max_hsml, ds.index_order[1] + 2) mylog.info("Updating index_order2 from %s to %s", ds.index_order[1], new_order2) self.ds.index_order = (self.ds.index_order[0], new_order2)
def _check_for_outputs(self, potential_outputs): r""" Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): try: ds = load(output) except (FileNotFoundError, YTUnidentifiedDataType): mylog.error("Failed to load %s", output) continue my_storage.result = {"filename": output, "num_steps": ds.num_steps} my_outputs = [ my_output for my_output in my_outputs.values() if my_output is not None ] return my_outputs
def _check_for_outputs(self, potential_outputs): r""" Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): if os.path.exists(output): try: ds = load(output) if ds is not None: my_storage.result = {"filename": output, "time": ds.current_time.in_units("s")} if ds.cosmological_simulation: my_storage.result["redshift"] = ds.current_redshift except YTOutputNotIdentified: mylog.error("Failed to load %s", output) my_outputs = [my_output for my_output in my_outputs.values() \ if my_output is not None] return my_outputs
def from_data_source(cls, data_source, redshift, area, exp_time, source_model, point_sources=False, parameters=None, center=None, dist=None, cosmology=None, velocity_fields=None): r""" Initialize a :class:`~pyxsim.photon_list.PhotonList` from a yt data source. The redshift, collecting area, exposure time, and cosmology are stored in the *parameters* dictionary which is passed to the *source_model* function. Parameters ---------- data_source : :class:`~yt.data_objects.data_containers.YTSelectionContainer` The data source from which the photons will be generated. redshift : float The cosmological redshift for the photons. area : float, (value, unit) tuple, :class:`~yt.units.yt_array.YTQuantity`, or :class:`~astropy.units.Quantity` The collecting area to determine the number of photons. If units are not specified, it is assumed to be in cm^2. exp_time : float, (value, unit) tuple, :class:`~yt.units.yt_array.YTQuantity`, or :class:`~astropy.units.Quantity` The exposure time to determine the number of photons. If units are not specified, it is assumed to be in seconds. source_model : :class:`~pyxsim.source_models.SourceModel` A source model used to generate the photons. point_sources : boolean, optional If True, the photons will be assumed to be generated from the exact positions of the cells or particles and not smeared around within a volume. Default: False parameters : dict, optional A dictionary of parameters to be passed for the source model to use, if necessary. center : string or array_like, optional The origin of the photon spatial coordinates. Accepts "c", "max", or a coordinate. If not specified, pyxsim attempts to use the "center" field parameter of the data_source. dist : float, (value, unit) tuple, :class:`~yt.units.yt_array.YTQuantity`, or :class:`~astropy.units.Quantity` The angular diameter distance, used for nearby sources. This may be optionally supplied instead of it being determined from the *redshift* and given *cosmology*. If units are not specified, it is assumed to be in kpc. To use this, the redshift must be set to zero. cosmology : :class:`~yt.utilities.cosmology.Cosmology`, optional Cosmological information. If not supplied, we try to get the cosmology from the dataset. Otherwise, LCDM with the default yt parameters is assumed. velocity_fields : list of fields The yt fields to use for the velocity. If not specified, the following will be assumed: ['velocity_x', 'velocity_y', 'velocity_z'] for grid datasets ['particle_velocity_x', 'particle_velocity_y', 'particle_velocity_z'] for particle datasets Examples -------- >>> thermal_model = ThermalSourceModel(apec_model, Zmet=0.3) >>> redshift = 0.05 >>> area = 6000.0 # assumed here in cm**2 >>> time = 2.0e5 # assumed here in seconds >>> sp = ds.sphere("c", (500., "kpc")) >>> my_photons = PhotonList.from_data_source(sp, redshift, area, ... time, thermal_model) """ ds = data_source.ds if parameters is None: parameters = {} if cosmology is None: if hasattr(ds, 'cosmology'): cosmo = ds.cosmology else: cosmo = Cosmology() else: cosmo = cosmology if dist is None: if redshift <= 0.0: msg = "If redshift <= 0.0, you must specify a distance to the " \ "source using the 'dist' argument!" mylog.error(msg) raise ValueError(msg) D_A = cosmo.angular_diameter_distance(0.0, redshift).in_units("Mpc") else: D_A = parse_value(dist, "kpc") if redshift > 0.0: mylog.warning("Redshift must be zero for nearby sources. " "Resetting redshift to 0.0.") redshift = 0.0 if isinstance(center, string_types): if center == "center" or center == "c": parameters["center"] = ds.domain_center elif center == "max" or center == "m": parameters["center"] = ds.find_max("density")[-1] elif iterable(center): if isinstance(center, YTArray): parameters["center"] = center.in_units("code_length") elif isinstance(center, tuple): if center[0] == "min": parameters["center"] = ds.find_min(center[1])[-1] elif center[0] == "max": parameters["center"] = ds.find_max(center[1])[-1] else: raise RuntimeError else: parameters["center"] = ds.arr(center, "code_length") elif center is None: if hasattr(data_source, "left_edge"): parameters["center"] = 0.5*(data_source.left_edge+data_source.right_edge) else: parameters["center"] = data_source.get_field_parameter("center") parameters["fid_exp_time"] = parse_value(exp_time, "s") parameters["fid_area"] = parse_value(area, "cm**2") parameters["fid_redshift"] = redshift parameters["fid_d_a"] = D_A parameters["hubble"] = cosmo.hubble_constant parameters["omega_matter"] = cosmo.omega_matter parameters["omega_lambda"] = cosmo.omega_lambda if redshift > 0.0: mylog.info("Cosmology: h = %g, omega_matter = %g, omega_lambda = %g" % (cosmo.hubble_constant, cosmo.omega_matter, cosmo.omega_lambda)) else: mylog.info("Observing local source at distance %s." % D_A) D_A = parameters["fid_d_a"].in_cgs() dist_fac = 1.0/(4.*np.pi*D_A.value*D_A.value*(1.+redshift)**2) spectral_norm = parameters["fid_area"].v*parameters["fid_exp_time"].v*dist_fac source_model.setup_model(data_source, redshift, spectral_norm) p_fields, v_fields, w_field = determine_fields(ds, source_model.source_type, point_sources) if velocity_fields is not None: v_fields = velocity_fields if p_fields[0] == ("index", "x"): parameters["data_type"] = "cells" else: parameters["data_type"] = "particles" citer = data_source.chunks([], "io") photons = defaultdict(list) for chunk in parallel_objects(citer): chunk_data = source_model(chunk) if chunk_data is not None: ncells, number_of_photons, idxs, energies = chunk_data photons["num_photons"].append(number_of_photons) photons["energy"].append(energies) photons["pos"].append(np.array([chunk[p_fields[0]].d[idxs], chunk[p_fields[1]].d[idxs], chunk[p_fields[2]].d[idxs]])) photons["vel"].append(np.array([chunk[v_fields[0]].d[idxs], chunk[v_fields[1]].d[idxs], chunk[v_fields[2]].d[idxs]])) if w_field is None: photons["dx"].append(np.zeros(ncells)) else: photons["dx"].append(chunk[w_field].d[idxs]) source_model.cleanup_model() photon_units = {"pos": ds.field_info[p_fields[0]].units, "vel": ds.field_info[v_fields[0]].units, "energy": "keV"} if w_field is None: photon_units["dx"] = "kpc" else: photon_units["dx"] = ds.field_info[w_field].units concatenate_photons(ds, photons, photon_units) c = parameters["center"].to("kpc") if sum(ds.periodicity) > 0: # Fix photon coordinates for regions crossing a periodic boundary dw = ds.domain_width.to("kpc") le, re = find_object_bounds(data_source) for i in range(3): if ds.periodicity[i] and photons["pos"].shape[0] > 0: tfl = photons["pos"][:,i] < le[i] tfr = photons["pos"][:,i] > re[i] photons["pos"][tfl,i] += dw[i] photons["pos"][tfr,i] -= dw[i] # Re-center all coordinates if photons["pos"].shape[0] > 0: photons["pos"] -= c mylog.info("Finished generating photons.") mylog.info("Number of photons generated: %d" % int(np.sum(photons["num_photons"]))) mylog.info("Number of cells with photons: %d" % photons["dx"].size) return cls(photons, parameters, cosmo)
def trace_ancestors(self, halo_type, root_ids, fields=None, filename=None): """ Trace the ancestry of a given set of halos. A merger-tree for a specific set of halos will be created, starting with the last halo catalog and moving backward. Parameters ---------- halo_type : string The type of halo, typically "FOF" for FoF groups or "Subfind" for subhalos. root_ids : integer or array of integers The halo IDs from the last halo catalog for the targeted halos. fields : optional, list of strings List of additional fields to be saved to halo catalogs. filename : optional, string Directory in which merger-tree catalogs will be saved. """ output_dir = os.path.dirname(filename) if self.comm.rank == 0 and len(output_dir) > 0: ensure_dir(output_dir) all_outputs = self.ts.outputs[::-1] ds1 = None for i, fn2 in enumerate(all_outputs[1:]): fn1 = all_outputs[i] target_filename = get_output_filename( filename, "%s.%d" % (_get_tree_basename(fn1), 0), ".h5") catalog_filename = get_output_filename( filename, "%s.%d" % (_get_tree_basename(fn2), 0), ".h5") if os.path.exists(catalog_filename): continue if ds1 is None: ds1 = self._load_ds(fn1, index_ptype=halo_type) ds2 = self._load_ds(fn2, index_ptype=halo_type) if self.comm.rank == 0: _print_link_info(ds1, ds2) if ds2.index.particle_count[halo_type] == 0: mylog.info("%s has no halos of type %s, ending." % (ds2, halo_type)) break if i == 0: target_ids = root_ids if not iterable(target_ids): target_ids = np.array([target_ids]) if isinstance(target_ids, YTArray): target_ids = target_ids.d if target_ids.dtype != np.int64: target_ids = target_ids.astype(np.int64) else: mylog.info("Loading target ids from %s.", target_filename) ds_target = yt_load(target_filename) target_ids = \ ds_target.r["halos", "particle_identifier"].d.astype(np.int64) del ds_target id_store = [] target_halos = [] ancestor_halos = [] njobs = min(self.comm.size, target_ids.size) pbar = get_pbar("Linking halos", target_ids.size, parallel=True) my_i = 0 for halo_id in parallel_objects(target_ids, njobs=njobs): my_halo = ds1.halo(halo_type, halo_id) target_halos.append(my_halo) my_ancestors = self._find_ancestors(my_halo, ds2, id_store=id_store) ancestor_halos.extend(my_ancestors) my_i += njobs pbar.update(my_i) pbar.finish() if i == 0: for halo in target_halos: halo.descendent_identifier = -1 self._save_catalog(filename, ds1, target_halos, fields) self._save_catalog(filename, ds2, ancestor_halos, fields) if len(ancestor_halos) == 0: break ds1 = ds2 clear_id_cache()
def trace_descendents(self, halo_type, fields=None, filename=None): """ Trace the descendents of all halos. A merger-tree for all halos will be created, starting with the first halo catalog and moving forward. Parameters ---------- halo_type : string The type of halo, typically "FOF" for FoF groups or "Subfind" for subhalos. fields : optional, list of strings List of additional fields to be saved to halo catalogs. filename : optional, string Directory in which merger-tree catalogs will be saved. """ output_dir = os.path.dirname(filename) if self.comm.rank == 0 and len(output_dir) > 0: ensure_dir(output_dir) all_outputs = self.ts.outputs[:] ds1 = ds2 = None for i, fn2 in enumerate(all_outputs[1:]): fn1 = all_outputs[i] target_filename = get_output_filename( filename, "%s.%d" % (_get_tree_basename(fn1), 0), ".h5") catalog_filename = get_output_filename( filename, "%s.%d" % (_get_tree_basename(fn2), 0), ".h5") if os.path.exists(target_filename): continue if ds1 is None: ds1 = self._load_ds(fn1, index_ptype=halo_type) ds2 = self._load_ds(fn2, index_ptype=halo_type) if self.comm.rank == 0: _print_link_info(ds1, ds2) target_halos = [] if ds1.index.particle_count[halo_type] == 0: self._save_catalog(filename, ds1, target_halos, fields) ds1 = ds2 continue target_ids = \ ds1.r[halo_type, "particle_identifier"].d.astype(np.int64) njobs = min(self.comm.size, target_ids.size) pbar = get_pbar("Linking halos", target_ids.size, parallel=True) my_i = 0 for halo_id in parallel_objects(target_ids, njobs=njobs): my_halo = ds1.halo(halo_type, halo_id) target_halos.append(my_halo) self._find_descendent(my_halo, ds2) my_i += njobs pbar.update(my_i) pbar.finish() self._save_catalog(filename, ds1, target_halos, fields) ds1 = ds2 clear_id_cache() if os.path.exists(catalog_filename): return if ds2 is None: ds2 = self._load_ds(fn2, index_ptype=halo_type) if self.comm.rank == 0: self._save_catalog(filename, ds2, halo_type, fields)
def from_data_source(cls, data_source, redshift, area, exp_time, source_model, point_sources=False, parameters=None, center=None, dist=None, cosmology=None, velocity_fields=None): r""" Initialize a :class:`~pyxsim.photon_list.PhotonList` from a yt data source. The redshift, collecting area, exposure time, and cosmology are stored in the *parameters* dictionary which is passed to the *source_model* function. Parameters ---------- data_source : :class:`~yt.data_objects.data_containers.YTSelectionContainer` The data source from which the photons will be generated. redshift : float The cosmological redshift for the photons. area : float, (value, unit) tuple, :class:`~yt.units.yt_array.YTQuantity`, or :class:`~astropy.units.Quantity` The collecting area to determine the number of photons. If units are not specified, it is assumed to be in cm^2. exp_time : float, (value, unit) tuple, :class:`~yt.units.yt_array.YTQuantity`, or :class:`~astropy.units.Quantity` The exposure time to determine the number of photons. If units are not specified, it is assumed to be in seconds. source_model : :class:`~pyxsim.source_models.SourceModel` A source model used to generate the photons. point_sources : boolean, optional If True, the photons will be assumed to be generated from the exact positions of the cells or particles and not smeared around within a volume. Default: False parameters : dict, optional A dictionary of parameters to be passed for the source model to use, if necessary. center : string or array_like, optional The origin of the photon spatial coordinates. Accepts "c", "max", or a coordinate. If not specified, pyxsim attempts to use the "center" field parameter of the data_source. dist : float, (value, unit) tuple, :class:`~yt.units.yt_array.YTQuantity`, or :class:`~astropy.units.Quantity` The angular diameter distance, used for nearby sources. This may be optionally supplied instead of it being determined from the *redshift* and given *cosmology*. If units are not specified, it is assumed to be in kpc. To use this, the redshift must be set to zero. cosmology : :class:`~yt.utilities.cosmology.Cosmology`, optional Cosmological information. If not supplied, we try to get the cosmology from the dataset. Otherwise, LCDM with the default yt parameters is assumed. velocity_fields : list of fields The yt fields to use for the velocity. If not specified, the following will be assumed: ['velocity_x', 'velocity_y', 'velocity_z'] for grid datasets ['particle_velocity_x', 'particle_velocity_y', 'particle_velocity_z'] for particle datasets Examples -------- >>> thermal_model = ThermalSourceModel(apec_model, Zmet=0.3) >>> redshift = 0.05 >>> area = 6000.0 # assumed here in cm**2 >>> time = 2.0e5 # assumed here in seconds >>> sp = ds.sphere("c", (500., "kpc")) >>> my_photons = PhotonList.from_data_source(sp, redshift, area, ... time, thermal_model) """ ds = data_source.ds if parameters is None: parameters = {} if cosmology is None: if hasattr(ds, 'cosmology'): cosmo = ds.cosmology else: cosmo = Cosmology() else: cosmo = cosmology if dist is None: if redshift <= 0.0: msg = "If redshift <= 0.0, you must specify a distance to the " \ "source using the 'dist' argument!" mylog.error(msg) raise ValueError(msg) D_A = cosmo.angular_diameter_distance(0.0, redshift).in_units("Mpc") else: D_A = parse_value(dist, "kpc") if redshift > 0.0: mylog.warning("Redshift must be zero for nearby sources. " "Resetting redshift to 0.0.") redshift = 0.0 if isinstance(center, string_types): if center == "center" or center == "c": parameters["center"] = ds.domain_center elif center == "max" or center == "m": parameters["center"] = ds.find_max("density")[-1] elif iterable(center): if isinstance(center, YTArray): parameters["center"] = center.in_units("code_length") elif isinstance(center, tuple): if center[0] == "min": parameters["center"] = ds.find_min(center[1])[-1] elif center[0] == "max": parameters["center"] = ds.find_max(center[1])[-1] else: raise RuntimeError else: parameters["center"] = ds.arr(center, "code_length") elif center is None: if hasattr(data_source, "left_edge"): parameters["center"] = 0.5 * (data_source.left_edge + data_source.right_edge) else: parameters["center"] = data_source.get_field_parameter( "center") parameters["fid_exp_time"] = parse_value(exp_time, "s") parameters["fid_area"] = parse_value(area, "cm**2") parameters["fid_redshift"] = redshift parameters["fid_d_a"] = D_A parameters["hubble"] = cosmo.hubble_constant parameters["omega_matter"] = cosmo.omega_matter parameters["omega_lambda"] = cosmo.omega_lambda if redshift > 0.0: mylog.info( "Cosmology: h = %g, omega_matter = %g, omega_lambda = %g" % (cosmo.hubble_constant, cosmo.omega_matter, cosmo.omega_lambda)) else: mylog.info("Observing local source at distance %s." % D_A) D_A = parameters["fid_d_a"].in_cgs() dist_fac = 1.0 / (4. * np.pi * D_A.value * D_A.value * (1. + redshift)**2) spectral_norm = parameters["fid_area"].v * parameters[ "fid_exp_time"].v * dist_fac source_model.setup_model(data_source, redshift, spectral_norm) p_fields, v_fields, w_field = determine_fields( ds, source_model.source_type, point_sources) if velocity_fields is not None: v_fields = velocity_fields if p_fields[0] == ("index", "x"): parameters["data_type"] = "cells" else: parameters["data_type"] = "particles" citer = data_source.chunks([], "io") photons = defaultdict(list) for chunk in parallel_objects(citer): chunk_data = source_model(chunk) if chunk_data is not None: ncells, number_of_photons, idxs, energies = chunk_data photons["num_photons"].append(number_of_photons) photons["energy"].append(energies) photons["pos"].append( np.array([ chunk[p_fields[0]].d[idxs], chunk[p_fields[1]].d[idxs], chunk[p_fields[2]].d[idxs] ])) photons["vel"].append( np.array([ chunk[v_fields[0]].d[idxs], chunk[v_fields[1]].d[idxs], chunk[v_fields[2]].d[idxs] ])) if w_field is None: photons["dx"].append(np.zeros(ncells)) else: photons["dx"].append(chunk[w_field].d[idxs]) source_model.cleanup_model() photon_units = { "pos": ds.field_info[p_fields[0]].units, "vel": ds.field_info[v_fields[0]].units, "energy": "keV" } if w_field is None: photon_units["dx"] = "kpc" else: photon_units["dx"] = ds.field_info[w_field].units concatenate_photons(ds, photons, photon_units) c = parameters["center"].to("kpc") if sum(ds.periodicity) > 0: # Fix photon coordinates for regions crossing a periodic boundary dw = ds.domain_width.to("kpc") le, re = find_object_bounds(data_source) for i in range(3): if ds.periodicity[i] and photons["pos"].shape[0] > 0: tfl = photons["pos"][:, i] < le[i] tfr = photons["pos"][:, i] > re[i] photons["pos"][tfl, i] += dw[i] photons["pos"][tfr, i] -= dw[i] # Re-center all coordinates if photons["pos"].shape[0] > 0: photons["pos"] -= c mylog.info("Finished generating photons.") mylog.info("Number of photons generated: %d" % int(np.sum(photons["num_photons"]))) mylog.info("Number of cells with photons: %d" % photons["dx"].size) return cls(photons, parameters, cosmo)
def setup_model(self, data_source, redshift, spectral_norm): self.redshift = redshift ptype = None if not self.nei and not isinstance(self.Zmet, float): Z_units = str(data_source.ds._get_field_info(self.Zmet).units) if Z_units in ["dimensionless", "", "code_metallicity"]: self.Zconvert = 1.0 / metal_abund[self.abund_table] elif Z_units == "Zsun": self.Zconvert = 1.0 else: raise RuntimeError( "I don't understand metallicity units of %s!" % Z_units) if self.num_var_elem > 0: for key, value in self.var_elem.items(): if not isinstance(value, float): if "^" in key: elem = key.split("^")[0] else: elem = key n_elem = elem_names.index(elem) m_units = str(data_source.ds._get_field_info(value).units) if m_units in ["dimensionless", "", "code_metallicity"]: self.mconvert[key] = atomic_weights[1] / ( self.atable[n_elem] * atomic_weights[n_elem] * solar_H_abund) elif m_units == "Zsun": self.mconvert[key] = 1.0 else: raise RuntimeError( "I don't understand units of %s for element %s!" % (m_units, key)) if self.emission_measure_field is None: found_dfield = [ fd for fd in particle_dens_fields if fd in data_source.ds.field_list ] if len(found_dfield) > 0: ptype = found_dfield[0][0] def _emission_measure(field, data): nenh = data[found_dfield[0]] * data['particle_mass'] nenh /= mp * mp nenh.convert_to_units("cm**-3") if data.has_field_parameter("X_H"): X_H = data.get_field_parameter("X_H") else: X_H = primordial_H_abund if (ptype, 'ElectronAbundance') in data_source.ds.field_list: nenh *= X_H * data[ptype, 'ElectronAbundance'] nenh *= X_H * (1. - data[ptype, 'NeutralHydrogenAbundance']) else: nenh *= 0.5 * (1. + X_H) * X_H return nenh data_source.ds.add_field((ptype, 'emission_measure'), function=_emission_measure, particle_type=True, units="cm**-3") self.emission_measure_field = (ptype, 'emission_measure') else: self.emission_measure_field = ('gas', 'emission_measure') mylog.info("Using emission measure field '(%s, %s)'." % self.emission_measure_field) if self.temperature_field is None: found_tfield = [ fd for fd in particle_temp_fields if fd in data_source.ds.derived_field_list ] if len(found_tfield) > 0: self.temperature_field = found_tfield[0] # What we have to do here is make sure that the temperature is set correctly # for SPH datasets that don't have the temperature field defined. What this # means is that we must set the mean molecular weight to the value for a # fully ionized gas if the ionization fraction is not available in the dataset. if self.temperature_field not in data_source.ds.field_list and ptype is not None: if (ptype, 'ElectronAbundance' ) not in data_source.ds.field_list: if data_source.has_field_parameter("X_H"): X_H = data_source.get_field_parameter("X_H") else: X_H = 0.76 data_source.set_field_parameter( "mean_molecular_weight", 4.0 / (5 * X_H + 3)) else: self.temperature_field = ('gas', 'temperature') mylog.info("Using temperature field '(%s, %s)'." % self.temperature_field) self.spectral_model.prepare_spectrum(redshift) self.spectral_norm = spectral_norm if self.kT_scale == "linear": self.kT_bins = np.linspace(self.kT_min, self.kT_max, num=self.n_kT + 1) elif self.kT_scale == "log": self.kT_bins = np.logspace(np.log10(self.kT_min), np.log10(self.kT_max), num=self.n_kT + 1) self.dkT = np.diff(self.kT_bins) citer = data_source.chunks([], "io") num_cells = 0 T_min = self.kT_min * K_per_keV T_max = self.kT_max * K_per_keV for chunk in parallel_objects(citer): T = chunk[self.temperature_field].d num_cells += np.count_nonzero((T > T_min) & (T < T_max)) num_cells = comm.mpi_allreduce(num_cells) self.source_type = data_source.ds._get_field_info( self.emission_measure_field).name[0] self.pbar = get_pbar("Processing cells/particles ", num_cells)
def __call__(self, data_source, parameters): ds = data_source.ds exp_time = parameters["FiducialExposureTime"] area = parameters["FiducialArea"] redshift = parameters["FiducialRedshift"] D_A = parameters["FiducialAngularDiameterDistance"].in_cgs() dist_fac = 1.0/(4.*np.pi*D_A.value*D_A.value*(1.+redshift)**2) src_ctr = parameters["center"] my_kT_min, my_kT_max = data_source.quantities.extrema("kT") self.spectral_model.prepare_spectrum(redshift) emid = self.spectral_model.emid ebins = self.spectral_model.ebins nchan = len(emid) citer = data_source.chunks([], "io") photons = {} photons["x"] = [] photons["y"] = [] photons["z"] = [] photons["vx"] = [] photons["vy"] = [] photons["vz"] = [] photons["dx"] = [] photons["Energy"] = [] photons["NumberOfPhotons"] = [] spectral_norm = area.v*exp_time.v*dist_fac tot_num_cells = data_source.ires.shape[0] pbar = get_pbar("Generating photons ", tot_num_cells) cell_counter = 0 for chunk in parallel_objects(citer): kT = chunk["kT"].v num_cells = len(kT) if num_cells == 0: continue vol = chunk["cell_volume"].in_cgs().v EM = (chunk["density"]/mp).v**2 EM *= 0.5*(1.+self.X_H)*self.X_H*vol if isinstance(self.Zmet, string_types): metalZ = chunk[self.Zmet].v else: metalZ = self.Zmet*np.ones(num_cells) idxs = np.argsort(kT) kT_bins = np.linspace(kT_min, max(my_kT_max, kT_max), num=n_kT+1) dkT = kT_bins[1]-kT_bins[0] kT_idxs = np.digitize(kT[idxs], kT_bins) kT_idxs = np.minimum(np.maximum(1, kT_idxs), n_kT) - 1 bcounts = np.bincount(kT_idxs).astype("int") bcounts = bcounts[bcounts > 0] n = int(0) bcell = [] ecell = [] for bcount in bcounts: bcell.append(n) ecell.append(n+bcount) n += bcount kT_idxs = np.unique(kT_idxs) cell_em = EM[idxs]*spectral_norm number_of_photons = np.zeros(num_cells, dtype="uint64") energies = np.zeros(self.photons_per_chunk) start_e = 0 end_e = 0 for ibegin, iend, ikT in zip(bcell, ecell, kT_idxs): kT = kT_bins[ikT] + 0.5*dkT n_current = iend-ibegin cem = cell_em[ibegin:iend] cspec, mspec = self.spectral_model.get_spectrum(kT) tot_ph_c = cspec.d.sum() tot_ph_m = mspec.d.sum() u = np.random.random(size=n_current) cell_norm_c = tot_ph_c*cem cell_norm_m = tot_ph_m*metalZ[ibegin:iend]*cem cell_norm = np.modf(cell_norm_c + cell_norm_m) cell_n = np.uint64(cell_norm[1]) + np.uint64(cell_norm[0] >= u) number_of_photons[ibegin:iend] = cell_n end_e += int(cell_n.sum()) if end_e > self.photons_per_chunk: raise RuntimeError("Number of photons generated for this chunk "+ "exceeds photons_per_chunk (%d)! " % self.photons_per_chunk + "Increase photons_per_chunk!") if self.method == "invert_cdf": cumspec_c = np.cumsum(cspec.d) cumspec_m = np.cumsum(mspec.d) cumspec_c = np.insert(cumspec_c, 0, 0.0) cumspec_m = np.insert(cumspec_m, 0, 0.0) ei = start_e for cn, Z in zip(number_of_photons[ibegin:iend], metalZ[ibegin:iend]): if cn == 0: continue if self.method == "invert_cdf": cumspec = cumspec_c + Z*cumspec_m cumspec /= cumspec[-1] randvec = np.random.uniform(size=cn) randvec.sort() cell_e = np.interp(randvec, cumspec, ebins) elif self.method == "accept_reject": tot_spec = cspec.d+Z*mspec.d tot_spec /= tot_spec.sum() eidxs = np.random.choice(nchan, size=cn, p=tot_spec) cell_e = emid[eidxs] energies[ei:ei+cn] = cell_e cell_counter += 1 pbar.update(cell_counter) ei += cn start_e = end_e active_cells = number_of_photons > 0 idxs = idxs[active_cells] photons["NumberOfPhotons"].append(number_of_photons[active_cells]) photons["Energy"].append(ds.arr(energies[:end_e].copy(), "keV")) photons["x"].append((chunk["x"][idxs]-src_ctr[0]).in_units("kpc")) photons["y"].append((chunk["y"][idxs]-src_ctr[1]).in_units("kpc")) photons["z"].append((chunk["z"][idxs]-src_ctr[2]).in_units("kpc")) photons["vx"].append(chunk["velocity_x"][idxs].in_units("km/s")) photons["vy"].append(chunk["velocity_y"][idxs].in_units("km/s")) photons["vz"].append(chunk["velocity_z"][idxs].in_units("km/s")) photons["dx"].append(chunk["dx"][idxs].in_units("kpc")) pbar.finish() for key in photons: if len(photons[key]) > 0: photons[key] = uconcatenate(photons[key]) elif key == "NumberOfPhotons": photons[key] = np.array([]) else: photons[key] = YTArray([], photon_units[key]) mylog.info("Number of photons generated: %d" % int(np.sum(photons["NumberOfPhotons"]))) mylog.info("Number of cells with photons: %d" % len(photons["x"])) return photons
def _initialize_refined_index(self): mask = self.regions.masks.sum(axis=1).astype("uint8") max_npart = max( sum(d.total_particles.values()) for d in self.data_files) * 28 sub_mi1 = np.zeros(max_npart, "uint64") sub_mi2 = np.zeros(max_npart, "uint64") pb = get_pbar("Initializing refined index", len(self.data_files)) mask_threshold = getattr(self, "_index_mask_threshold", 2) count_threshold = getattr(self, "_index_count_threshold", 256) mylog.debug( "Using estimated thresholds of %s and %s for refinement", mask_threshold, count_threshold, ) total_refined = 0 total_coarse_refined = ( (mask >= 2) & (self.regions.particle_counts > count_threshold)).sum() mylog.debug( "This should produce roughly %s zones, for %s of the domain", total_coarse_refined, 100 * total_coarse_refined / mask.size, ) storage = {} for sto, (i, data_file) in parallel_objects(enumerate(self.data_files), storage=storage): coll = None pb.update(i + 1) nsub_mi = 0 for ptype, pos in self.io._yield_coordinates(data_file): if pos.size == 0: continue if hasattr(self.ds, "_sph_ptypes") and ptype == self.ds._sph_ptypes[0]: hsml = self.io._get_smoothing_length( data_file, pos.dtype, pos.shape) else: hsml = None nsub_mi, coll = self.regions._refined_index_data_file( coll, pos, hsml, mask, sub_mi1, sub_mi2, data_file.file_id, nsub_mi, count_threshold=count_threshold, mask_threshold=mask_threshold, ) total_refined += nsub_mi sto.result_id = i if coll is None: coll_str = b"" else: coll_str = coll.dumps() sto.result = (data_file.file_id, coll_str) pb.finish() for i in sorted(storage): file_id, coll_str = storage[i] coll = BoolArrayCollection() coll.loads(coll_str) self.regions.bitmasks.append(file_id, coll) self.regions.find_collisions_refined()
def piter(self, storage=None): r"""Iterate over time series components in parallel. This allows you to iterate over a time series while dispatching individual components of that time series to different processors or processor groups. If the parallelism strategy was set to be multi-processor (by "parallel = N" where N is an integer when the DatasetSeries was created) this will issue each dataset to an N-processor group. For instance, this would allow you to start a 1024 processor job, loading up 100 datasets in a time series and creating 8 processor groups of 128 processors each, each of which would be assigned a different dataset. This could be accomplished as shown in the examples below. The *storage* option is as seen in :func:`~yt.utilities.parallel_tools.parallel_analysis_interface.parallel_objects` which is a mechanism for storing results of analysis on an individual dataset and then combining the results at the end, so that the entire set of processors have access to those results. Note that supplying a *store* changes the iteration mechanism; see below. Parameters ---------- storage : dict This is a dictionary, which will be filled with results during the course of the iteration. The keys will be the dataset indices and the values will be whatever is assigned to the *result* attribute on the storage during iteration. Examples -------- Here is an example of iteration when the results do not need to be stored. One processor will be assigned to each dataset. >>> ts = DatasetSeries("DD*/DD*.index") >>> for ds in ts.piter(): ... SlicePlot(ds, "x", "Density").save() ... This demonstrates how one might store results: >>> def print_time(ds): ... print ds.current_time ... >>> ts = DatasetSeries("DD*/DD*.index", ... setup_function = print_time ) ... >>> my_storage = {} >>> for sto, ds in ts.piter(storage=my_storage): ... v, c = ds.find_max("density") ... sto.result = (v, c) ... >>> for i, (v, c) in sorted(my_storage.items()): ... print "% 4i %0.3e" % (i, v) ... This shows how to dispatch 4 processors to each dataset: >>> ts = DatasetSeries("DD*/DD*.index", ... parallel = 4) >>> for ds in ts.piter(): ... ProjectionPlot(ds, "x", "Density").save() ... """ dynamic = False if self.parallel == False: njobs = 1 else: if self.parallel == True: njobs = -1 else: njobs = self.parallel return parallel_objects(self, njobs=njobs, storage=storage, dynamic=dynamic)
def project_light_cone(self, field_of_view, image_resolution, field, weight_field=None, photon_field=False, save_stack=True, save_final_image=True, save_slice_images=False, cmap_name="algae", njobs=1, dynamic=False): r"""Create projections for light cone, then add them together. Parameters ---------- field_of_view : YTQuantity or tuple of (float, str) The field of view of the image and the units. image_resolution : YTQuantity or tuple of (float, str) The size of each image pixel and the units. field : string The projected field. weight_field : string the weight field of the projection. This has the same meaning as in standard projections. Default: None. photon_field : bool if True, the projection data for each slice is decremented by 4 Pi R^2`, where R is the luminosity distance between the observer and the slice redshift. Default: False. save_stack : bool if True, the light cone data including each individual slice is written to an hdf5 file. Default: True. save_final_image : bool if True, save an image of the final light cone projection. Default: True. save_slice_images : bool save images for each individual projection slice. Default: False. cmap_name : string color map for images. Default: "algae". njobs : int The number of parallel jobs over which the light cone projection will be split. Choose -1 for one processor per individual projection and 1 to have all processors work together on each projection. Default: 1. dynamic : bool If True, use dynamic load balancing to create the projections. Default: False. """ if isinstance(field_of_view, tuple) and len(field_of_view) == 2: field_of_view = self.simulation.quan(field_of_view[0], field_of_view[1]) elif not isinstance(field_of_view, YTArray): raise RuntimeError("field_of_view argument must be either a YTQauntity " + "or a tuple of type (float, str).") if isinstance(image_resolution, tuple) and len(image_resolution) == 2: image_resolution = self.simulation.quan(image_resolution[0], image_resolution[1]) elif not isinstance(image_resolution, YTArray): raise RuntimeError("image_resolution argument must be either a YTQauntity " + "or a tuple of type (float, str).") # Calculate number of pixels on a side. pixels = (field_of_view / image_resolution).in_units("") # Clear projection stack. projection_stack = [] projection_weight_stack = [] if "object" in self.light_cone_solution[-1]: del self.light_cone_solution[-1]["object"] # for q, output in enumerate(self.light_cone_solution): all_storage = {} for my_storage, output in parallel_objects(self.light_cone_solution, storage=all_storage, dynamic=dynamic): output["object"] = load(output["filename"]) output["object"].parameters.update(self.set_parameters) # Calculate fraction of box required for width corresponding to # requested image size. proper_box_size = self.simulation.box_size / \ (1.0 + output["redshift"]) output["box_width_fraction"] = (output["box_width_per_angle"] * field_of_view).in_units("") frb = _light_cone_projection(output, field, pixels, weight_field=weight_field) if photon_field: # Decrement the flux by the luminosity distance. # Assume field in frb is in erg/s/cm^2/Hz dL = self.cosmology.luminosity_distance(self.observer_redshift, output["redshift"]) proper_box_size = self.simulation.box_size / \ (1.0 + output["redshift"]) pixel_area = (proper_box_size.in_cgs() / pixels)**2 #in proper cm^2 factor = pixel_area / (4.0 * np.pi * dL.in_cgs()**2) mylog.info("Distance to slice = %s" % dL) frb[field] *= factor #in erg/s/cm^2/Hz on observer"s image plane. if weight_field is None: my_storage.result = {"field": frb[field]} else: my_storage.result = {"field": (frb[field] * frb["weight_field"]), "weight_field": frb["weight_field"]} del output["object"] # Combine results from each slice. all_slices = list(all_storage.keys()) all_slices.sort() for my_slice in all_slices: if save_slice_images: name = os.path.join(self.output_dir, "%s_%04d_%04d" % (self.output_prefix, my_slice, len(self.light_cone_solution))) if weight_field is None: my_image = all_storage[my_slice]["field"] else: my_image = all_storage[my_slice]["field"] / \ all_storage[my_slice]["weight_field"] only_on_root(write_image, np.log10(my_image), "%s_%s.png" % (name, field), cmap_name=cmap_name) projection_stack.append(all_storage[my_slice]["field"]) if weight_field is not None: projection_weight_stack.append(all_storage[my_slice]["field"]) projection_stack = self.simulation.arr(projection_stack) projection_weight_stack = self.simulation.arr(projection_weight_stack) # Add up slices to make light cone projection. if (weight_field is None): light_cone_projection = projection_stack.sum(axis=0) else: light_cone_projection = \ projection_stack.sum(axis=0) / \ self.simulation.arr(projection_weight_stack).sum(axis=0) filename = os.path.join(self.output_dir, self.output_prefix) # Write image. if save_final_image: only_on_root(write_image, np.log10(light_cone_projection), "%s_%s.png" % (filename, field), cmap_name=cmap_name) # Write stack to hdf5 file. if save_stack: self._save_light_cone_stack(field, weight_field, projection_stack, projection_weight_stack, filename=filename, attrs={"field_of_view": str(field_of_view), "image_resolution": str(image_resolution)})
def parallel_tree_nodes(tree, group="forest", njobs=0, dynamic=False): """ Iterate over nodes in a single tree in parallel. Nodes are divided up between the available processor groups. Analysis field values can then be assigned to each node (halo). Note, unlike the parallel_trees and parallel_nodes function, no saving is performed internally. Results saving with the :func:`~ytree.data_structures.arbor.Arbor.save_arbor` must be done manually. This uses the yt :func:`~yt.utilities.parallel_tools.parallel_analysis_interface.parallel_objects` function, which is parallelized with MPI underneath and so is suitable for parallelism across compute nodes. Parameters ---------- tree : :class:`~ytree.data_structures.tree_node.TreeNode` The tree whose nodes will be iterated over. group : optional, str ("forest", "tree", or "prog") Determines the nodes to be iterated over in the tree: "forest" for all nodes in the forest, "tree" for all nodes in the tree, or "prog" for all nodes in the line of main progenitors. Default: "forest" njobs : optional, int The number of process groups for parallel iteration. Set to 0 to make the same number of process groups as available processors. Hence, each node will be allocated to a single processor. Set to a number less than the total number of processors to create groups with multiple processors, which will allow for further parallelization. For example, running with 8 processors and setting njobs to 4 will result in 4 groups of 2 processors each. Default: 0 dynamic : optional, bool Set to False to divide iterations evenly among process groups. Set to True to allocate iterations with a task queue. If True, the number of processors available will be one fewer than the total as one will act as the task queue server. Default: False Examples -------- >>> import ytree >>> a = ytree.load("arbor/arbor.h5") >>> a.add_analysis_field("test_field", default=-1, units="Msun") >>> trees = list(a[:]) >>> for tree in trees: ... for node in ytree.parallel_tree_nodes(tree): ... node["test_field"] = 2 * node["mass"] # some analysis See Also -------- parallel_trees, parallel_nodes """ afields = _get_analysis_fields(tree.arbor) my_halos = list(tree[group]) tree_storage = {} for halo_store, ihalo in parallel_objects(range(len(my_halos)), storage=tree_storage, njobs=njobs, dynamic=dynamic): my_halo = my_halos[ihalo] yield my_halo if is_root(): halo_store.result_id = my_halo.tree_id halo_store.result = {field: my_halo[field] for field in afields} else: halo_store.result_id = -1 # combine results for this tree if is_root(): for tree_id, result in sorted(tree_storage.items()): if tree_id == -1: continue my_halo = tree.get_node("forest", tree_id) for field, value in result.items(): my_halo[field] = value
def project_light_cone(self, field_of_view, image_resolution, field, weight_field=None, photon_field=False, save_stack=True, save_final_image=True, save_slice_images=False, cmap_name=None, njobs=1, dynamic=False): r"""Create projections for light cone, then add them together. Parameters ---------- field_of_view : YTQuantity or tuple of (float, str) The field of view of the image and the units. image_resolution : YTQuantity or tuple of (float, str) The size of each image pixel and the units. field : string The projected field. weight_field : string the weight field of the projection. This has the same meaning as in standard projections. Default: None. photon_field : bool if True, the projection data for each slice is decremented by 4 Pi R^2`, where R is the luminosity distance between the observer and the slice redshift. Default: False. save_stack : bool if True, the light cone data including each individual slice is written to an hdf5 file. Default: True. save_final_image : bool if True, save an image of the final light cone projection. Default: True. save_slice_images : bool save images for each individual projection slice. Default: False. cmap_name : string color map for images. Default: your default colormap. njobs : int The number of parallel jobs over which the light cone projection will be split. Choose -1 for one processor per individual projection and 1 to have all processors work together on each projection. Default: 1. dynamic : bool If True, use dynamic load balancing to create the projections. Default: False. """ if cmap_name is None: cmap_name = ytcfg.get("yt", "default_colormap") if isinstance(field_of_view, tuple) and len(field_of_view) == 2: field_of_view = self.simulation.quan(field_of_view[0], field_of_view[1]) elif not isinstance(field_of_view, YTArray): raise RuntimeError( "field_of_view argument must be either a YTQuantity " + "or a tuple of type (float, str).") if isinstance(image_resolution, tuple) and len(image_resolution) == 2: image_resolution = self.simulation.quan(image_resolution[0], image_resolution[1]) elif not isinstance(image_resolution, YTArray): raise RuntimeError( "image_resolution argument must be either a YTQuantity " + "or a tuple of type (float, str).") # Calculate number of pixels on a side. pixels = int((field_of_view / image_resolution).in_units("")) # Clear projection stack. projection_stack = [] projection_weight_stack = [] if "object" in self.light_cone_solution[-1]: del self.light_cone_solution[-1]["object"] # for q, output in enumerate(self.light_cone_solution): all_storage = {} for my_storage, output in parallel_objects(self.light_cone_solution, storage=all_storage, dynamic=dynamic): output["object"] = load(output["filename"]) output["object"].parameters.update(self.set_parameters) # Calculate fraction of box required for width corresponding to # requested image size. proper_box_size = self.simulation.box_size / \ (1.0 + output["redshift"]) output["box_width_fraction"] = (output["box_width_per_angle"] * field_of_view).in_units("") frb = _light_cone_projection(output, field, pixels, weight_field=weight_field) if photon_field: # Decrement the flux by the luminosity distance. # Assume field in frb is in erg/s/cm^2/Hz dL = self.cosmology.luminosity_distance( self.observer_redshift, output["redshift"]) proper_box_size = self.simulation.box_size / \ (1.0 + output["redshift"]) pixel_area = (proper_box_size.in_cgs() / pixels)**2 #in proper cm^2 factor = pixel_area / (4.0 * np.pi * dL.in_cgs()**2) mylog.info("Distance to slice = %s" % dL) frb[field] *= factor #in erg/s/cm^2/Hz on observer"s image plane. if weight_field is None: my_storage.result = {"field": frb[field]} else: my_storage.result = { "field": (frb[field] * frb["weight_field"]), "weight_field": frb["weight_field"] } del output["object"] # Combine results from each slice. all_slices = list(all_storage.keys()) all_slices.sort() for my_slice in all_slices: if save_slice_images: name = os.path.join( self.output_dir, "%s_%04d_%04d" % (self.output_prefix, my_slice, len(self.light_cone_solution))) if weight_field is None: my_image = all_storage[my_slice]["field"] else: my_image = all_storage[my_slice]["field"] / \ all_storage[my_slice]["weight_field"] only_on_root(write_image, np.log10(my_image), "%s_%s.png" % (name, field), cmap_name=cmap_name) projection_stack.append(all_storage[my_slice]["field"]) if weight_field is not None: projection_weight_stack.append(all_storage[my_slice]["field"]) projection_stack = self.simulation.arr(projection_stack) projection_weight_stack = self.simulation.arr(projection_weight_stack) # Add up slices to make light cone projection. if (weight_field is None): light_cone_projection = projection_stack.sum(axis=0) else: light_cone_projection = \ projection_stack.sum(axis=0) / \ self.simulation.arr(projection_weight_stack).sum(axis=0) filename = os.path.join(self.output_dir, self.output_prefix) # Write image. if save_final_image: only_on_root(write_image, np.log10(light_cone_projection), "%s_%s.png" % (filename, field), cmap_name=cmap_name) # Write stack to hdf5 file. if save_stack: self._save_light_cone_stack(field, weight_field, projection_stack, projection_weight_stack, filename=filename, attrs={ "field_of_view": str(field_of_view), "image_resolution": str(image_resolution) })
def __call__(self, data_source, parameters): ds = data_source.ds exp_time = parameters["FiducialExposureTime"] area = parameters["FiducialArea"] redshift = parameters["FiducialRedshift"] D_A = parameters["FiducialAngularDiameterDistance"].in_cgs() dist_fac = 1.0 / (4. * np.pi * D_A.value * D_A.value * (1. + redshift)**2) src_ctr = parameters["center"] my_kT_min, my_kT_max = data_source.quantities.extrema("kT") self.spectral_model.prepare_spectrum(redshift) emid = self.spectral_model.emid ebins = self.spectral_model.ebins nchan = len(emid) citer = data_source.chunks([], "io") photons = {} photons["x"] = [] photons["y"] = [] photons["z"] = [] photons["vx"] = [] photons["vy"] = [] photons["vz"] = [] photons["dx"] = [] photons["Energy"] = [] photons["NumberOfPhotons"] = [] spectral_norm = area.v * exp_time.v * dist_fac tot_num_cells = data_source.ires.shape[0] pbar = get_pbar("Generating photons ", tot_num_cells) cell_counter = 0 for chunk in parallel_objects(citer): kT = chunk["kT"].v num_cells = len(kT) if num_cells == 0: continue vol = chunk["cell_volume"].in_cgs().v EM = (chunk["density"] / mp).in_cgs().v**2 EM *= 0.5 * (1. + self.X_H) * self.X_H * vol if isinstance(self.Zmet, string_types): metalZ = chunk[self.Zmet].v else: metalZ = self.Zmet * np.ones(num_cells) idxs = np.argsort(kT) kT_bins = np.linspace(kT_min, max(my_kT_max.v, kT_max), num=n_kT + 1) dkT = kT_bins[1] - kT_bins[0] kT_idxs = np.digitize(kT[idxs], kT_bins) kT_idxs = np.minimum(np.maximum(1, kT_idxs), n_kT) - 1 bcounts = np.bincount(kT_idxs).astype("int") bcounts = bcounts[bcounts > 0] n = int(0) bcell = [] ecell = [] for bcount in bcounts: bcell.append(n) ecell.append(n + bcount) n += bcount kT_idxs = np.unique(kT_idxs) cell_em = EM[idxs] * spectral_norm number_of_photons = np.zeros(num_cells, dtype="uint64") energies = np.zeros(self.photons_per_chunk) start_e = 0 end_e = 0 for ibegin, iend, ikT in zip(bcell, ecell, kT_idxs): kT = kT_bins[ikT] + 0.5 * dkT n_current = iend - ibegin cem = cell_em[ibegin:iend] cspec, mspec = self.spectral_model.get_spectrum(kT) tot_ph_c = cspec.d.sum() tot_ph_m = mspec.d.sum() u = self.prng.uniform(size=n_current) cell_norm_c = tot_ph_c * cem cell_norm_m = tot_ph_m * metalZ[ibegin:iend] * cem cell_norm = np.modf(cell_norm_c + cell_norm_m) cell_n = np.uint64(cell_norm[1]) + np.uint64(cell_norm[0] >= u) number_of_photons[ibegin:iend] = cell_n end_e += int(cell_n.sum()) if end_e > self.photons_per_chunk: raise RuntimeError( "Number of photons generated for this chunk " + "exceeds photons_per_chunk (%d)! " % self.photons_per_chunk + "Increase photons_per_chunk!") if self.method == "invert_cdf": cumspec_c = np.cumsum(cspec.d) cumspec_m = np.cumsum(mspec.d) cumspec_c = np.insert(cumspec_c, 0, 0.0) cumspec_m = np.insert(cumspec_m, 0, 0.0) ei = start_e for cn, Z in zip(number_of_photons[ibegin:iend], metalZ[ibegin:iend]): if cn == 0: continue # The rather verbose form of the few next statements is a # result of code optimization and shouldn't be changed # without checking for performance degradation. See # https://bitbucket.org/yt_analysis/yt/pull-requests/1766 # for details. if self.method == "invert_cdf": cumspec = cumspec_c cumspec += Z * cumspec_m norm_factor = 1.0 / cumspec[-1] cumspec *= norm_factor randvec = self.prng.uniform(size=cn) randvec.sort() cell_e = np.interp(randvec, cumspec, ebins) elif self.method == "accept_reject": tot_spec = cspec.d tot_spec += Z * mspec.d norm_factor = 1.0 / tot_spec.sum() tot_spec *= norm_factor eidxs = self.prng.choice(nchan, size=cn, p=tot_spec) cell_e = emid[eidxs] energies[int(ei):int(ei + cn)] = cell_e cell_counter += 1 pbar.update(cell_counter) ei += cn start_e = end_e active_cells = number_of_photons > 0 idxs = idxs[active_cells] photons["NumberOfPhotons"].append(number_of_photons[active_cells]) photons["Energy"].append(ds.arr(energies[:end_e].copy(), "keV")) photons["x"].append( (chunk["x"][idxs] - src_ctr[0]).in_units("kpc")) photons["y"].append( (chunk["y"][idxs] - src_ctr[1]).in_units("kpc")) photons["z"].append( (chunk["z"][idxs] - src_ctr[2]).in_units("kpc")) photons["vx"].append(chunk["velocity_x"][idxs].in_units("km/s")) photons["vy"].append(chunk["velocity_y"][idxs].in_units("km/s")) photons["vz"].append(chunk["velocity_z"][idxs].in_units("km/s")) photons["dx"].append(chunk["dx"][idxs].in_units("kpc")) pbar.finish() for key in photons: if len(photons[key]) > 0: photons[key] = uconcatenate(photons[key]) elif key == "NumberOfPhotons": photons[key] = np.array([]) else: photons[key] = YTArray([], photon_units[key]) mylog.info("Number of photons generated: %d" % int(np.sum(photons["NumberOfPhotons"]))) mylog.info("Number of cells with photons: %d" % len(photons["x"])) self.spectral_model.cleanup_spectrum() return photons
def _add_lines_to_spectrum(self, field_data, use_peculiar_velocity, output_absorbers_file, store_observables, subgrid_resolution=10, observing_redshift=0., njobs=-1, min_tau=1e-3): """ Add the absorption lines to the spectrum. """ if len(self.line_list) == 0: return if self.bin_space == 'velocity': wavelength_zero_point = self.line_list[0]['wavelength'] # Change the redshifts of individual absorbers to account for the # redshift at which the observer sits redshift, redshift_eff = self._apply_observing_redshift(field_data, use_peculiar_velocity, observing_redshift) # step through each ionic transition (e.g. HI, HII, MgII) specified # and deposit the lines into the spectrum for store, line in parallel_objects(self.line_list, njobs=njobs, storage=self.line_observables_dict): column_density = field_data[line['field_name']] * field_data['dl'] if (column_density < 0).any(): mylog.warning( "Setting negative densities for field %s to 0! Bad!" % line['field_name']) np.clip(column_density, 0, np.inf, out=column_density) if (column_density == 0).all(): mylog.info("Not adding line %s: insufficient column density" % line['label']) continue # redshift_eff field combines cosmological and velocity redshifts # so delta_lambda gives the offset in angstroms from the rest frame # wavelength to the observed wavelength of the transition if use_peculiar_velocity: delta_lambda = line['wavelength'] * redshift_eff else: delta_lambda = line['wavelength'] * redshift # lambda_obs is central wavelength of line after redshift lambda_obs = (line['wavelength'] + delta_lambda).to('angstrom') # either the observed wavelength or velocity offset if self.bin_space == 'wavelength': my_obs = lambda_obs[:] elif self.bin_space == 'velocity': my_obs = c_kms * \ (lambda_obs - wavelength_zero_point) / \ wavelength_zero_point my_obs.convert_to_units(_bin_space_units[self.bin_space]) else: raise RuntimeError('What bin_space is this?') # the total number of absorbers per transition n_absorbers = len(lambda_obs) # thermal broadening b parameter thermal_b = np.sqrt((2 * boltzmann_constant_cgs * field_data['temperature']) / line['atomic_mass']) # the actual thermal width of the lines thermal_width = (lambda_obs * thermal_b / c_kms).to('angstrom') # Sanitize units for faster runtime of the tau_profile machinery. lambda_0 = line['wavelength'].d # line's rest frame; angstroms cdens = column_density.in_units("cm**-2").d # cm**-2 thermb = thermal_b.to('cm/s').d # thermal b coefficient; cm / s dlambda = delta_lambda.d # lambda offset; angstroms # Array to store sum of the tau values for each index in the # light ray that is deposited to the final spectrum if store_observables: tau_ray = np.zeros(cdens.size) if use_peculiar_velocity: vlos = field_data['velocity_los'].in_units("km/s").d # km/s else: vlos = np.zeros(field_data['temperature'].size) # When we actually deposit the voigt profile, sometimes we will # have underresolved lines (ie lines with smaller widths than # the spectral bin size). Here, we create virtual wavelength bins # small enough in width to well resolve each line, deposit the # voigt profile into them, then numerically integrate their tau # values and sum them to redeposit them into the actual spectral # bins. # virtual bins (vbins) will be: # 1) <= the bin_width; assures at least as good as spectral bins # 2) <= 1/10th the thermal width; assures resolving voigt profiles # (actually 1/subgrid_resolution value, default is 1/10) # 3) a bin width will be divisible by vbin_width times a power of # 10; this will assure we don't get spikes in the deposited # spectra from uneven numbers of vbins per bin if self.bin_space == 'wavelength': my_width = thermal_width elif self.bin_space == 'velocity': my_width = thermal_b else: raise RuntimeError('What bin space is this?') resolution = my_width / self.bin_width n_vbins_per_bin = (10 ** (np.ceil( np.log10( subgrid_resolution / resolution) ).clip(0, np.inf) ) ).astype('int') vbin_width = self.bin_width.d / n_vbins_per_bin # a note to the user about which lines components are unresolved if (my_width < self.bin_width).any(): mylog.info("%d out of %d line components will be " + "deposited as unresolved lines.", (my_width < self.bin_width).sum(), n_absorbers) # Keep track of the lambda field before depositing a new line # so we can add the current_tau_field and the tau_field together. last_lambda_field = self.lambda_field # provide a progress bar with information about lines processsed pbar = get_pbar("Adding line - %s [%f A]: " % \ (line['label'], line['wavelength']), n_absorbers) # for a given transition, step through each location in the # observed spectrum where it occurs and deposit a voigt profile for i in parallel_objects(np.arange(n_absorbers), njobs=-1): # if there is a ray element with temperature = 0 or column # density = 0, skip it if (thermal_b[i] == 0.) or (cdens[i] == 0.): pbar.update(i) continue # the virtual window into which the line is deposited initially # spans a region of 2 coarse spectral bins # (one on each side of the center_index) but the window # can expand as necessary. # it will continue to expand until the tau value in the far # edge of the wings is less than the min_tau value or it # reaches the edge of the spectrum window_width_in_bins = 2 # Widen wavelength window until optical depth falls below min_tau # value at the ends to assure that the wings of a line have been # fully resolved. while True: # calculate wavelength window if self._auto_lambda and self.lambda_field is None: my_lambda_min = my_obs[i] - \ window_width_in_bins * self.bin_width / 2 # round off to multiple of bin_width my_lambda_min = self.bin_width * \ np.ceil(my_lambda_min / self.bin_width) my_lambda = my_lambda_min + \ self.bin_width * np.arange(window_width_in_bins) else: my_lambda = self.lambda_field # we want to know the bin index in the lambda_field array # where each line has its central wavelength after being # redshifted. however, because we don't know a priori how wide # a line will be (ie DLAs), we have to include bin indices # *outside* the spectral range of the AbsorptionSpectrum # object. Thus, we find the "equivalent" bin index, which # may be <0 or >the size of the array. In the end, we deposit # the bins that actually overlap with the AbsorptionSpectrum's # range in lambda. left_index, center_index, right_index = \ self._get_bin_indices( my_lambda, self.bin_width, my_obs[i], window_width_in_bins) n_vbins = window_width_in_bins * n_vbins_per_bin[i] # the array of virtual bins in lambda space vbins = \ np.linspace(my_lambda.d[0] + self.bin_width.d * left_index, my_lambda.d[0] + self.bin_width.d * right_index, n_vbins, endpoint=False) if self.bin_space == 'wavelength': my_vbins = vbins elif self.bin_space == 'velocity': my_vbins = vbins * \ wavelength_zero_point.d / c_kms.d + \ wavelength_zero_point.d else: raise RuntimeError('What bin_space is this?') # the virtual bins and their corresponding opacities my_vbins, vtau = \ tau_profile( lambda_0, line['f_value'], line['gamma'], thermb[i], cdens[i], delta_lambda=dlambda[i], lambda_bins=my_vbins) # If tau has not dropped below min tau threshold by the # edges (ie the wings), then widen the wavelength # window and repeat process. if (vtau[0] < min_tau and vtau[-1] < min_tau): if self._auto_lambda: self._create_auto_field_arrays( left_index, right_index, my_lambda) left_index, center_index, right_index = \ self._get_bin_indices( self.lambda_field, self.bin_width, my_obs[i], window_width_in_bins) break window_width_in_bins *= 2 if center_index is None: pbar.update(i) continue # Numerically integrate the virtual bins to calculate a # virtual "equivalent width" of optical depth; then sum these # virtual equivalent widths in tau and deposit back into each # original spectral tau bin # Please note: this is not a true equivalent width in the # normal use of the word by observers. It is an equivalent # with in tau, not in flux, and is only used internally in # this subgrid deposition as EW_tau. vEW_tau = vtau * vbin_width[i] EW_tau = np.zeros(right_index - left_index) EW_tau_indices = np.arange(left_index, right_index) for k, val in enumerate(EW_tau_indices): EW_tau[k] = vEW_tau[n_vbins_per_bin[i] * k: n_vbins_per_bin[i] * (k + 1)].sum() EW_tau = EW_tau/self.bin_width.d # only deposit EW_tau bins that actually intersect the original # spectral wavelength range (i.e. lambda_field) # if EW_tau bins don't intersect the original spectral range at # all then skip the deposition if ((left_index >= self.lambda_field.size) or \ (right_index < 0)): pbar.update(i) continue # otherwise, determine how much of the original spectrum # is intersected by the expanded line window to be deposited, # and deposit the Equivalent Width in tau into that intersecting # window in the original spectrum's tau array else: intersect_left_index = max(left_index, 0) intersect_right_index = min(right_index, self.lambda_field.size) EW_tau_deposit = EW_tau[(intersect_left_index - left_index): \ (intersect_right_index - left_index)] self.current_tau_field[intersect_left_index:intersect_right_index] \ += EW_tau_deposit if store_observables: tau_ray[i] = np.sum(EW_tau_deposit) # write out absorbers to file if the column density of # an absorber is greater than the specified "label_threshold" # of that absorption line if output_absorbers_file and \ line['label_threshold'] is not None and \ cdens[i] >= line['label_threshold']: if use_peculiar_velocity: peculiar_velocity = vlos[i] else: peculiar_velocity = 0.0 self.absorbers_list.append({'label': line['label'], 'wavelength': (lambda_0 + dlambda[i]), 'column_density': column_density[i], 'b_thermal': thermal_b[i], 'redshift': redshift[i], 'redshift_eff': redshift_eff[i], 'v_pec': peculiar_velocity}) pbar.update(i) pbar.finish() # Expand the tau_field array to match the updated wavelength # array from the last line deposition. self._adjust_field_array(last_lambda_field, self.lambda_field, "tau_field") if self.current_tau_field is not None: # Now add the current_tau_field. self.tau_field += self.current_tau_field ## Check keyword before storing any observables if store_observables: # If running in parallel, make sure that the observable # quantities for the dictionary are combined correctly. comm = _get_comm(()) if self._auto_lambda: global_lambda_field = self._get_global_lambda_field(comm=comm) self._adjust_field_array(self.lambda_field, global_lambda_field, "current_tau_field") if comm.size > 1: obs_dict_fields = \ [column_density, tau_ray, self.current_tau_field, delta_lambda, lambda_obs, thermal_b, thermal_width] obs_dict_fields = [comm.mpi_allreduce(field,op="sum") for field in obs_dict_fields] # Calculate the flux decrement equivalent width (the true # equivalent width!) for use in post-processing if self.current_tau_field is None: EW = 0. else: EW = np.sum(1-np.exp(-self.current_tau_field))*self.bin_width # Update the line_observables_dict with values for this line obs_dict = {"column_density":column_density, "tau_ray":tau_ray, "EW":EW, "delta_lambda":delta_lambda, "lambda_obs":lambda_obs, "thermal_b":thermal_b, "thermal_width":thermal_width} if self.bin_space == 'velocity': obs_dict['velocity_offset'] = my_obs store.result_id = line['label'] store.result = obs_dict ## Can only delete these if in this statement: del obs_dict, tau_ray self.current_tau_field = None # These always need to be deleted del column_density, delta_lambda, lambda_obs, my_obs, \ thermal_b, thermal_width, cdens, thermb, dlambda, \ vlos, resolution, vbin_width, n_vbins, n_vbins_per_bin comm = _get_comm(()) if self._auto_lambda: new_lambda = self._get_global_lambda_field(comm=comm) self._adjust_field_array(self.lambda_field, new_lambda, "tau_field") self.lambda_field = new_lambda self.tau_field = comm.mpi_allreduce(self.tau_field, op="sum") if output_absorbers_file: self.absorbers_list = comm.par_combine_object( self.absorbers_list, "cat", datatype="list")
def _write_fields_to_gdf(ds, fhandle, fields, particle_type_name, field_parameters=None): for field_name in fields: # add field info to field_types group g = fhandle["field_types"] # create the subgroup with the field's name if isinstance(field_name, tuple): field_name = field_name[1] fi = ds._get_field_info(field_name) try: sg = g.create_group(field_name) except ValueError: print("Error - File already contains field called " + field_name) sys.exit(1) # grab the display name and units from the field info container. display_name = fi.display_name units = fi.units # check that they actually contain something... if display_name: sg.attrs["field_name"] = np.string_(display_name) else: sg.attrs["field_name"] = np.string_(field_name) if units: sg.attrs["field_units"] = np.string_(units) else: sg.attrs["field_units"] = np.string_("None") # @todo: is this always true? sg.attrs["staggering"] = 0 # first we must create the datasets on all processes. g = fhandle["data"] for grid in ds.index.grids: for field_name in fields: # sanitize get the field info object if isinstance(field_name, tuple): field_name = field_name[1] fi = ds._get_field_info(field_name) grid_group = g["grid_%010i" % (grid.id - grid._id_offset)] particles_group = grid_group["particles"] pt_group = particles_group[particle_type_name] if fi.particle_type: # particle data pt_group.create_dataset(field_name, grid.ActiveDimensions, dtype="float64") else: # a field grid_group.create_dataset(field_name, grid.ActiveDimensions, dtype="float64") # now add the actual data, grid by grid g = fhandle["data"] data_source = ds.all_data() citer = data_source.chunks([], "io", local_only=True) for region in parallel_objects(citer): # is there a better way to the get the grids on each chunk? for chunk in ds.index._chunk_io(region): for grid in chunk.objs: for field_name in fields: # sanitize and get the field info object if isinstance(field_name, tuple): field_name = field_name[1] fi = ds._get_field_info(field_name) # set field parameters, if specified if field_parameters is not None: for k, v in field_parameters.items(): grid.set_field_parameter(k, v) grid_group = g["grid_%010i" % (grid.id - grid._id_offset)] particles_group = grid_group["particles"] pt_group = particles_group[particle_type_name] # add the field data to the grid group # Check if this is a real field or particle data. grid.get_data(field_name) units = fhandle["field_types"][field_name].attrs[ "field_units"] if fi.particle_type: # particle data dset = pt_group[field_name] dset[:] = grid[field_name].in_units(units) else: # a field dset = grid_group[field_name] dset[:] = grid[field_name].in_units(units)
def _write_fields_to_gdf(ds, fhandle, fields, particle_type_name, field_parameters=None): for field_name in fields: # add field info to field_types group g = fhandle["field_types"] # create the subgroup with the field's name if isinstance(field_name, tuple): field_name = field_name[1] fi = ds._get_field_info(field_name) try: sg = g.create_group(field_name) except ValueError: print("Error - File already contains field called " + field_name) sys.exit(1) # grab the display name and units from the field info container. display_name = fi.display_name units = fi.units # check that they actually contain something... if display_name: sg.attrs["field_name"] = np.string_(display_name) else: sg.attrs["field_name"] = np.string_(field_name) if units: sg.attrs["field_units"] = np.string_(units) else: sg.attrs["field_units"] = np.string_("None") # @todo: is this always true? sg.attrs["staggering"] = 0 # first we must create the datasets on all processes. g = fhandle["data"] for grid in ds.index.grids: for field_name in fields: # sanitize get the field info object if isinstance(field_name, tuple): field_name = field_name[1] fi = ds._get_field_info(field_name) grid_group = g["grid_%010i" % (grid.id - grid._id_offset)] particles_group = grid_group["particles"] pt_group = particles_group[particle_type_name] if fi.particle_type: # particle data pt_group.create_dataset(field_name, grid.ActiveDimensions, dtype="float64") else: # a field grid_group.create_dataset(field_name, grid.ActiveDimensions, dtype="float64") # now add the actual data, grid by grid g = fhandle["data"] data_source = ds.all_data() citer = data_source.chunks([], "io", local_only=True) for chunk in parallel_objects(citer): # is there a better way to the get the grids on each chunk? for grid in list(ds.index._chunk_io(chunk))[0].objs: for field_name in fields: # sanitize and get the field info object if isinstance(field_name, tuple): field_name = field_name[1] fi = ds._get_field_info(field_name) # set field parameters, if specified if field_parameters is not None: for k, v in field_parameters.iteritems(): grid.set_field_parameter(k, v) grid_group = g["grid_%010i" % (grid.id - grid._id_offset)] particles_group = grid_group["particles"] pt_group = particles_group[particle_type_name] # add the field data to the grid group # Check if this is a real field or particle data. grid.get_data(field_name) units = fhandle["field_types"][field_name].attrs["field_units"] if fi.particle_type: # particle data dset = pt_group[field_name] dset[:] = grid[field_name].in_units(units) else: # a field dset = grid_group[field_name] dset[:] = grid[field_name].in_units(units)
def setup_model(self, data_source, redshift, spectral_norm): self.redshift = redshift ptype = None if not self.nei and not isinstance(self.Zmet, float): Z_units = str(data_source.ds._get_field_info(self.Zmet).units) if Z_units in ["dimensionless", "", "code_metallicity"]: self.Zconvert = 1.0/metal_abund[self.abund_table] elif Z_units == "Zsun": self.Zconvert = 1.0 else: raise RuntimeError("I don't understand metallicity units of %s!" % Z_units) if self.num_var_elem > 0: for key, value in self.var_elem.items(): if not isinstance(value, float): if "^" in key: elem = key.split("^")[0] else: elem = key n_elem = elem_names.index(elem) m_units = str(data_source.ds._get_field_info(value).units) if m_units in ["dimensionless", "", "code_metallicity"]: self.mconvert[key] = atomic_weights[1]/(self.atable[n_elem] * atomic_weights[n_elem] * solar_H_abund) elif m_units == "Zsun": self.mconvert[key] = 1.0 else: raise RuntimeError("I don't understand units of %s for element %s!" % (m_units, key)) if self.emission_measure_field is None: found_dfield = [fd for fd in particle_dens_fields if fd in data_source.ds.field_list] if len(found_dfield) > 0: ptype = found_dfield[0][0] def _emission_measure(field, data): nenh = data[found_dfield[0]]*data['particle_mass'] nenh /= mp*mp nenh.convert_to_units("cm**-3") if data.has_field_parameter("X_H"): X_H = data.get_field_parameter("X_H") else: X_H = primordial_H_abund if (ptype, 'ElectronAbundance') in data_source.ds.field_list: nenh *= X_H * data[ptype, 'ElectronAbundance'] nenh *= X_H * (1.-data[ptype, 'NeutralHydrogenAbundance']) else: nenh *= 0.5*(1.+X_H)*X_H return nenh data_source.ds.add_field((ptype, 'emission_measure'), function=_emission_measure, particle_type=True, units="cm**-3") self.emission_measure_field = (ptype, 'emission_measure') else: self.emission_measure_field = ('gas', 'emission_measure') mylog.info("Using emission measure field '(%s, %s)'." % self.emission_measure_field) if self.temperature_field is None: found_tfield = [fd for fd in particle_temp_fields if fd in data_source.ds.derived_field_list] if len(found_tfield) > 0: self.temperature_field = found_tfield[0] # What we have to do here is make sure that the temperature is set correctly # for SPH datasets that don't have the temperature field defined. What this # means is that we must set the mean molecular weight to the value for a # fully ionized gas if the ionization fraction is not available in the dataset. if self.temperature_field not in data_source.ds.field_list and ptype is not None: if (ptype, 'ElectronAbundance') not in data_source.ds.field_list: if data_source.has_field_parameter("X_H"): X_H = data_source.get_field_parameter("X_H") else: X_H = 0.76 data_source.set_field_parameter("mean_molecular_weight", 4.0/(5*X_H+3)) else: self.temperature_field = ('gas', 'temperature') mylog.info("Using temperature field '(%s, %s)'." % self.temperature_field) self.spectral_model.prepare_spectrum(redshift) self.spectral_norm = spectral_norm if self.kT_scale == "linear": self.kT_bins = np.linspace(self.kT_min, self.kT_max, num=self.n_kT+1) elif self.kT_scale == "log": self.kT_bins = np.logspace(np.log10(self.kT_min), np.log10(self.kT_max), num=self.n_kT+1) self.dkT = np.diff(self.kT_bins) citer = data_source.chunks([], "io") num_cells = 0 T_min = self.kT_min*K_per_keV T_max = self.kT_max*K_per_keV for chunk in parallel_objects(citer): T = chunk[self.temperature_field].d num_cells += np.count_nonzero((T > T_min) & (T < T_max)) num_cells = comm.mpi_allreduce(num_cells) self.source_type = data_source.ds._get_field_info(self.emission_measure_field).name[0] self.pbar = get_pbar("Processing cells/particles ", num_cells)
def __init__(self, ds, normal, field, velocity_bounds, center="c", width=(1.0, "unitary"), dims=100, thermal_broad=False, atomic_weight=56., depth=(1.0, "unitary"), depth_res=256, method="integrate", weight_field=None, no_shifting=False, north_vector=None, no_ghost=True): r""" Initialize a PPVCube object. Parameters ---------- ds : dataset The dataset. normal : array_like or string The normal vector along with to make the projections. If an array, it will be normalized. If a string, it will be assumed to be along one of the principal axes of the domain ("x", "y", or "z"). field : string The field to project. velocity_bounds : tuple A 4-tuple of (vmin, vmax, nbins, units) for the velocity bounds to integrate over. center : A sequence of floats, a string, or a tuple. The coordinate of the center of the image. If set to 'c', 'center' or left blank, the plot is centered on the middle of the domain. If set to 'max' or 'm', the center will be located at the maximum of the ('gas', 'density') field. Centering on the max or min of a specific field is supported by providing a tuple such as ("min","temperature") or ("max","dark_matter_density"). Units can be specified by passing in *center* as a tuple containing a coordinate and string unit name or by passing in a YTArray. If a list or unitless array is supplied, code units are assumed. width : float, tuple, or YTQuantity. The width of the projection. A float will assume the width is in code units. A (value, unit) tuple or YTQuantity allows for the units of the width to be specified. Implies width = height, e.g. the aspect ratio of the PPVCube's spatial dimensions is 1. dims : integer, optional The spatial resolution of the cube. Implies nx = ny, e.g. the aspect ratio of the PPVCube's spatial dimensions is 1. thermal_broad : boolean, optional Whether or not to broaden the line using the gas temperature. Default: False. atomic_weight : float, optional Set this value to the atomic weight of the particle that is emitting the line if *thermal_broad* is True. Defaults to 56 (Fe). depth : A tuple or a float, optional A tuple containing the depth to project through and the string key of the unit: (width, 'unit'). If set to a float, code units are assumed. Only for off-axis cubes. depth_res : integer, optional The resolution of integration along the line of sight for off-axis cubes. Default: 256 method : string, optional Set the projection method to be used. "integrate" : line of sight integration over the line element. "sum" : straight summation over the line of sight. weight_field : string, optional The name of the weighting field. Set to None for no weight. no_shifting : boolean, optional If set, no shifting due to velocity will occur but only thermal broadening. Should not be set when *thermal_broad* is False, otherwise nothing happens! north_vector : a sequence of floats A vector defining the 'up' direction. This option sets the orientation of the plane of projection. If not set, an arbitrary grid-aligned north_vector is chosen. Ignored in the case of on-axis cubes. no_ghost: bool, optional Optimization option for off-axis cases. If True, homogenized bricks will extrapolate out from grid instead of interpolating from ghost zones that have to first be calculated. This can lead to large speed improvements, but at a loss of accuracy/smoothness in resulting image. The effects are less notable when the transfer function is smooth and broad. Default: True Examples -------- >>> i = 60*np.pi/180. >>> L = [0.0,np.sin(i),np.cos(i)] >>> cube = PPVCube(ds, L, "density", (-5.,4.,100,"km/s"), width=(10.,"kpc")) """ self.ds = ds self.field = field self.width = width self.particle_mass = atomic_weight * mh self.thermal_broad = thermal_broad self.no_shifting = no_shifting if not isinstance(normal, string_types): width = ds.coordinates.sanitize_width(normal, width, depth) width = tuple(el.in_units('code_length').v for el in width) if no_shifting and not thermal_broad: raise RuntimeError( "no_shifting cannot be True when thermal_broad is False!") self.center = ds.coordinates.sanitize_center(center, normal)[0] self.nx = dims self.ny = dims self.nv = velocity_bounds[2] if method not in ["integrate", "sum"]: raise RuntimeError("Only the 'integrate' and 'sum' projection +" "methods are supported in PPVCube.") dd = ds.all_data() fd = dd._determine_fields(field)[0] self.field_units = ds._get_field_info(fd).units self.vbins = ds.arr( np.linspace(velocity_bounds[0], velocity_bounds[1], velocity_bounds[2] + 1), velocity_bounds[3]) self._vbins = self.vbins.copy() self.vmid = 0.5 * (self.vbins[1:] + self.vbins[:-1]) self.vmid_cgs = self.vmid.in_cgs().v self.dv = self.vbins[1] - self.vbins[0] self.dv_cgs = self.dv.in_cgs().v self.current_v = 0.0 _vlos = create_vlos(normal, self.no_shifting) self.ds.add_field(("gas", "v_los"), function=_vlos, units="cm/s") _intensity = self._create_intensity() self.ds.add_field(("gas", "intensity"), function=_intensity, units=self.field_units) if method == "integrate" and weight_field is None: self.proj_units = str(ds.quan(1.0, self.field_units + "*cm").units) elif method == "sum": self.proj_units = self.field_units storage = {} pbar = get_pbar("Generating cube.", self.nv) for sto, i in parallel_objects(range(self.nv), storage=storage): self.current_v = self.vmid_cgs[i] if isinstance(normal, string_types): prj = ds.proj("intensity", ds.coordinates.axis_id[normal], method=method, weight_field=weight_field) buf = prj.to_frb(width, self.nx, center=self.center)["intensity"] else: buf = off_axis_projection(ds, self.center, normal, width, (self.nx, self.ny, depth_res), "intensity", north_vector=north_vector, no_ghost=no_ghost, method=method, weight=weight_field).swapaxes(0, 1) sto.result_id = i sto.result = buf pbar.update(i) pbar.finish() self.data = ds.arr(np.zeros((self.nx, self.ny, self.nv)), self.proj_units) if is_root(): for i, buf in sorted(storage.items()): self.data[:, :, i] = buf.transpose() self.axis_type = "velocity" # Now fix the width if iterable(self.width): self.width = ds.quan(self.width[0], self.width[1]) elif not isinstance(self.width, YTQuantity): self.width = ds.quan(self.width, "code_length") self.ds.field_info.pop(("gas", "intensity")) self.ds.field_info.pop(("gas", "v_los"))
def parallel_trees(trees, save_every=None, filename=None, njobs=0, dynamic=False): """ Iterate over a list of trees in parallel. Trees are divided up between the available processor groups. Analysis field values can then be assigned to halos within the tree. The trees will be saved either at the end of the loop or after a number of trees given by the ``save_every`` keyword are completed. This uses the yt :func:`~yt.utilities.parallel_tools.parallel_analysis_interface.parallel_objects` function, which is parallelized with MPI underneath and so is suitable for parallelism across compute nodes. Parameters ---------- trees : list of :class:`~ytree.data_structures.tree_node.TreeNode` objects The trees to be iterated over in parallel. save_every : optional, int or False Number of trees to be completed before results are saved. This is used to save intermediate results in case scripts need to be restarted. If None, save will only occur after iterating over all trees. If False, no saving will be done. Default: None filename : optional, string The name of the new arbor to be saved. If None, the naming convention will follow the filename keyword of the :func:`~ytree.data_structures.arbor.Arbor.save_arbor` function. Default: None njobs : optional, int The number of process groups for parallel iteration. Set to 0 to make the same number of process groups as available processors. Hence, each tree will be allocated to a single processor. Set to a number less than the total number of processors to create groups with multiple processors, which will allow for further parallelization within a tree. For example, running with 8 processors and setting njobs to 4 will result in 4 groups of 2 processors each. Default: 0 dynamic : optional, bool Set to False to divide iterations evenly among process groups. Set to True to allocate iterations with a task queue. If True, the number of processors available will be one fewer than the total as one will act as the task queue server. Default: False Examples -------- >>> import ytree >>> a = ytree.load("arbor/arbor.h5") >>> a.add_analysis_field("test_field", default=-1, units="Msun") >>> trees = list(a[:]) >>> for tree in ytree.parallel_trees(trees): ... for node in tree["forest"]: ... node["test_field"] = 2 * node["mass"] # some analysis See Also -------- parallel_tree_nodes, parallel_nodes """ arbor = trees[0].arbor afields = _get_analysis_fields(arbor) nt = len(trees) save = True if save_every is None: save_every = nt elif save_every is False: save_every = nt save = False nb = int(np.ceil(nt / save_every)) for ib in range(nb): start = ib * save_every end = min(start + save_every, nt) arbor_storage = {} for tree_store, itree in parallel_objects(range(start, end), storage=arbor_storage, njobs=njobs, dynamic=dynamic): my_tree = trees[itree] yield my_tree if is_root(): my_root = my_tree.find_root() tree_store.result_id = (my_root._arbor_index, my_tree.tree_id) # If the tree is not a root, only save the "tree" selection # as we could overwrite other trees in the forest. if my_tree.is_root: selection = "forest" else: selection = "tree" tree_store.result = { field: my_tree[selection, field] for field in afields } else: tree_store.result_id = None # combine results for all trees if is_root(): for itree in range(start, end): my_tree = trees[itree] my_root = my_tree.find_root() key = (my_root._arbor_index, my_tree.tree_id) data = arbor_storage[key] if my_tree.is_root: indices = slice(None) else: indices = [my_tree._tree_field_indices] for field in afields: if field not in my_root.field_data: arbor._node_io._initialize_analysis_field( my_root, field) my_root.field_data[field][indices] = data[field] if save: fn = arbor.save_arbor(filename=filename, trees=trees) arbor = ytree_load(fn) trees = [ regenerate_node(arbor, tree, new_index=i) for i, tree in enumerate(trees) ]
def make_light_ray(self, seed=None, start_position=None, end_position=None, trajectory=None, fields=None, solution_filename=None, data_filename=None, get_los_velocity=True, njobs=-1): """ Create a light ray and get field values for each lixel. A light ray consists of a list of field values for cells intersected by the ray and the path length of the ray through those cells. Light ray data can be written out to an hdf5 file. Parameters ---------- seed : int Seed for the random number generator. Default: None. start_position : list of floats Used only if creating a light ray from a single dataset. The coordinates of the starting position of the ray. Default: None. end_position : list of floats Used only if creating a light ray from a single dataset. The coordinates of the ending position of the ray. Default: None. trajectory : list of floats Used only if creating a light ray from a single dataset. The (r, theta, phi) direction of the light ray. Use either end_position or trajectory, not both. Default: None. fields : list A list of fields for which to get data. Default: None. solution_filename : string Path to a text file where the trajectories of each subray is written out. Default: None. data_filename : string Path to output file for ray data. Default: None. get_los_velocity : bool If True, the line of sight velocity is calculated for each point in the ray. Default: True. njobs : int The number of parallel jobs over which the segments will be split. Choose -1 for one processor per segment. Default: -1. Examples -------- >>> from yt.mods import * >>> from yt.analysis_modules.cosmological_analysis.light_ray.api import \ ... LightRay >>> my_ray = LightRay('enzo_tiny_simulation/32Mpc_32.enzo', 'Enzo', ... 0., 0.1, time_data=False) ... >>> my_ray.make_light_ray(seed=12345, ... solution_filename='solution.txt', ... data_filename='my_ray.h5', ... fields=['temperature', 'density'], ... get_los_velocity=True) """ # Calculate solution. self._calculate_light_ray_solution(seed=seed, start_position=start_position, end_position=end_position, trajectory=trajectory, filename=solution_filename) # Initialize data structures. self._data = {} if fields is None: fields = [] data_fields = fields[:] all_fields = fields[:] all_fields.extend(['dl', 'dredshift', 'redshift']) if get_los_velocity: all_fields.extend( ['x-velocity', 'y-velocity', 'z-velocity', 'los_velocity']) data_fields.extend(['x-velocity', 'y-velocity', 'z-velocity']) all_ray_storage = {} for my_storage, my_segment in parallel_objects(self.light_ray_solution, storage=all_ray_storage, njobs=njobs): # Load dataset for segment. ds = load(my_segment['filename']) my_segment["start"] = ds.domain_width * my_segment["start"] + \ ds.domain_left_edge my_segment["end"] = ds.domain_width * my_segment["end"] + \ ds.domain_left_edge if self.near_redshift == self.far_redshift: next_redshift = my_segment["redshift"] - \ self._deltaz_forward(my_segment["redshift"], ds.domain_width[0].in_units("Mpccm / h") * my_segment["traversal_box_fraction"]) elif my_segment['next'] is None: next_redshift = self.near_redshift else: next_redshift = my_segment['next']['redshift'] mylog.info("Getting segment at z = %s: %s to %s." % (my_segment['redshift'], my_segment['start'], my_segment['end'])) # Break periodic ray into non-periodic segments. sub_segments = periodic_ray(my_segment['start'], my_segment['end'], left=ds.domain_left_edge, right=ds.domain_right_edge) # Prepare data structure for subsegment. sub_data = {} sub_data['segment_redshift'] = my_segment['redshift'] for field in all_fields: sub_data[field] = [] # Get data for all subsegments in segment. for sub_segment in sub_segments: mylog.info("Getting subsegment: %s to %s." % (list(sub_segment[0]), list(sub_segment[1]))) sub_ray = ds.ray(sub_segment[0], sub_segment[1]) asort = np.argsort(sub_ray["t"]) sub_data['dl'].extend( sub_ray['dts'][asort] * vector_length(sub_ray.start_point, sub_ray.end_point)) for field in data_fields: sub_data[field].extend(sub_ray[field][asort]) if get_los_velocity: line_of_sight = sub_segment[1] - sub_segment[0] line_of_sight /= ((line_of_sight**2).sum())**0.5 sub_vel = ds.arr([ sub_ray['x-velocity'], sub_ray['y-velocity'], sub_ray['z-velocity'] ]) sub_data['los_velocity'].extend( (np.rollaxis(sub_vel, 1) * line_of_sight).sum(axis=1)[asort]) del sub_vel sub_ray.clear_data() del sub_ray, asort for key in sub_data: if key in "xyz": continue sub_data[key] = ds.arr(sub_data[key]).in_cgs() # Get redshift for each lixel. Assume linear relation between l and z. sub_data['dredshift'] = (my_segment['redshift'] - next_redshift) * \ (sub_data['dl'] / vector_length(my_segment['start'], my_segment['end']).in_cgs()) sub_data['redshift'] = my_segment['redshift'] - \ sub_data['dredshift'].cumsum() + sub_data['dredshift'] # Remove empty lixels. sub_dl_nonzero = sub_data['dl'].nonzero() for field in all_fields: sub_data[field] = sub_data[field][sub_dl_nonzero] del sub_dl_nonzero # Add to storage. my_storage.result = sub_data del ds # Reconstruct ray data from parallel_objects storage. all_data = [my_data for my_data in all_ray_storage.values()] # This is now a list of segments where each one is a dictionary # with all the fields. all_data.sort(key=lambda a: a['segment_redshift'], reverse=True) # Flatten the list into a single dictionary containing fields # for the whole ray. all_data = _flatten_dict_list(all_data, exceptions=['segment_redshift']) if data_filename is not None: self._write_light_ray(data_filename, all_data) self._data = all_data return all_data
def make_light_ray(self, seed=None, periodic=True, left_edge=None, right_edge=None, min_level=None, start_position=None, end_position=None, trajectory=None, fields=None, setup_function=None, solution_filename=None, data_filename=None, get_los_velocity=None, use_peculiar_velocity=True, redshift=None, field_parameters=None, njobs=-1): """ Actually generate the LightRay by traversing the desired dataset. A light ray consists of a list of field values for cells intersected by the ray and the path length of the ray through those cells. Light ray data must be written out to an hdf5 file. **Parameters** :seed: optional, int Seed for the random number generator. Default: None. :periodic: optional, bool If True, ray trajectories will make use of periodic boundaries. If False, ray trajectories will not be periodic. Default : True. :left_edge: optional, iterable of floats or YTArray The left corner of the region in which rays are to be generated. If None, the left edge will be that of the domain. If specified without units, it is assumed to be in code units. Default: None. :right_edge: optional, iterable of floats or YTArray The right corner of the region in which rays are to be generated. If None, the right edge will be that of the domain. If specified without units, it is assumed to be in code units. Default: None. :min_level: optional, int The minimum refinement level of the spatial region in which the ray passes. This can be used with zoom-in simulations where the high resolution region does not keep a constant geometry. Default: None. :start_position: optional, iterable of floats or YTArray. Used only if creating a light ray from a single dataset. The coordinates of the starting position of the ray. If specified without units, it is assumed to be in code units. Default: None. :end_position: optional, iterable of floats or YTArray. Used only if creating a light ray from a single dataset. The coordinates of the ending position of the ray. If specified without units, it is assumed to be in code units. Default: None. :trajectory: optional, list of floats Used only if creating a light ray from a single dataset. The (r, theta, phi) direction of the light ray. Use either end_position or trajectory, not both. Default: None. :fields: optional, list A list of fields for which to get data. Default: None. :setup_function: optional, callable, accepts a ds This function will be called on each dataset that is loaded to create the light ray. For, example, this can be used to add new derived fields. Default: None. :solution_filename: optional, string Path to a text file where the trajectories of each subray is written out. Default: None. :data_filename: optional, string Path to output file for ray data. Default: None. :use_peculiar_velocity: optional, bool If True, the peculiar velocity along the ray will be sampled for calculating the effective redshift combining the cosmological redshift and the doppler redshift. Default: True. :redshift: optional, float Used with light rays made from single datasets to specify a starting redshift for the ray. If not used, the starting redshift will be 0 for a non-cosmological dataset and the dataset redshift for a cosmological dataset. Default: None. :field_parameters: optional, dict Used to set field parameters in light rays. For example, if the 'bulk_velocity' field parameter is set, the relative velocities used to calculate peculiar velocity will be adjusted accordingly. Default: None. :njobs: optional, int The number of parallel jobs over which the segments will be split. Choose -1 for one processor per segment. Default: -1. **Examples** Make a light ray from multiple datasets: >>> import yt >>> from trident import LightRay >>> my_ray = LightRay("enzo_tiny_cosmology/32Mpc_32.enzo", "Enzo", ... 0., 0.1, time_data=False) ... >>> my_ray.make_light_ray(seed=12345, ... solution_filename="solution.txt", ... data_filename="my_ray.h5", ... fields=["temperature", "density"], ... use_peculiar_velocity=True) Make a light ray from a single dataset: >>> import yt >>> from trident import LightRay >>> my_ray = LightRay("IsolatedGalaxy/galaxy0030/galaxy0030") ... >>> my_ray.make_light_ray(start_position=[0., 0., 0.], ... end_position=[1., 1., 1.], ... solution_filename="solution.txt", ... data_filename="my_ray.h5", ... fields=["temperature", "density"], ... use_peculiar_velocity=True) """ if self.simulation_type is None: domain = self.ds else: domain = self.simulation assumed_units = "code_length" if left_edge is None: left_edge = domain.domain_left_edge elif not hasattr(left_edge, 'units'): left_edge = domain.arr(left_edge, assumed_units) left_edge.convert_to_units('unitary') if right_edge is None: right_edge = domain.domain_right_edge elif not hasattr(right_edge, 'units'): right_edge = domain.arr(right_edge, assumed_units) right_edge.convert_to_units('unitary') if start_position is not None: if hasattr(start_position, 'units'): start_position = start_position else: start_position = self.ds.arr(start_position, assumed_units) start_position.convert_to_units('unitary') if end_position is not None: if hasattr(end_position, 'units'): end_position = end_position else: end_position = self.ds.arr(end_position, assumed_units) end_position.convert_to_units('unitary') if get_los_velocity is not None: use_peculiar_velocity = get_los_velocity mylog.warn("'get_los_velocity' kwarg is deprecated. " + \ "Use 'use_peculiar_velocity' instead.") # Calculate solution. self._calculate_light_ray_solution(seed=seed, left_edge=left_edge, right_edge=right_edge, min_level=min_level, periodic=periodic, start_position=start_position, end_position=end_position, trajectory=trajectory, filename=solution_filename) if field_parameters is None: field_parameters = {} # Initialize data structures. self._data = {} # temperature field is automatically added to fields if fields is None: fields = [] if (('gas', 'temperature') not in fields) and \ ('temperature' not in fields): fields.append(('gas', 'temperature')) data_fields = fields[:] all_fields = fields[:] all_fields.extend(['l', 'dl', 'redshift']) all_fields.extend(['x', 'y', 'z']) data_fields.extend(['x', 'y', 'z']) if use_peculiar_velocity: all_fields.extend([ 'relative_velocity_x', 'relative_velocity_y', 'relative_velocity_z', 'velocity_los', 'redshift_eff', 'redshift_dopp' ]) data_fields.extend([ 'relative_velocity_x', 'relative_velocity_y', 'relative_velocity_z' ]) all_ray_storage = {} for my_storage, my_segment in parallel_objects(self.light_ray_solution, storage=all_ray_storage, njobs=njobs): # In case of simple rays, use the already loaded dataset: self.ds, # otherwise, load dataset for segment. if self.ds is None: ds = load(my_segment['filename'], **self.load_kwargs) else: ds = self.ds if redshift is not None: if ds.cosmological_simulation and redshift != ds.current_redshift: mylog.warn( "Generating light ray with different redshift than " + "the dataset itself.") my_segment["redshift"] = redshift if setup_function is not None: setup_function(ds) if not ds.cosmological_simulation: next_redshift = my_segment["redshift"] elif self.near_redshift == self.far_redshift: if isinstance(my_segment["traversal_box_fraction"], YTArray) and \ not my_segment["traversal_box_fraction"].units.is_dimensionless: segment_length = \ my_segment["traversal_box_fraction"].in_units("Mpccm / h") else: segment_length = my_segment["traversal_box_fraction"] * \ ds.domain_width[0].in_units("Mpccm / h") next_redshift = my_segment["redshift"] - \ self._deltaz_forward(my_segment["redshift"], segment_length) elif my_segment.get("next", None) is None: next_redshift = self.near_redshift else: next_redshift = my_segment['next']['redshift'] # Make sure start, end, left, right # are using the dataset's unit system. my_start = ds.arr(my_segment['start']) my_end = ds.arr(my_segment['end']) my_left = ds.arr(left_edge) my_right = ds.arr(right_edge) mylog.info("Getting segment at z = %s: %s to %s." % (my_segment['redshift'], my_start, my_end)) # Break periodic ray into non-periodic segments. sub_segments = periodic_ray(my_start, my_end, left=my_left, right=my_right) # Prepare data structure for subsegment. sub_data = {} # Put supplementary data that we want communicated across # processors in here. sub_data['extra_data'] = {} sub_data['extra_data']['segment_redshift'] = \ my_segment['redshift'] sub_data['extra_data']['unique_identifier'] = \ ds.unique_identifier for field in all_fields: sub_data[field] = [] # Get data for all subsegments in segment. for sub_segment in sub_segments: mylog.info("Getting subsegment: %s to %s." % (list(sub_segment[0]), list(sub_segment[1]))) sub_ray = ds.ray(sub_segment[0], sub_segment[1]) for key, val in field_parameters.items(): sub_ray.set_field_parameter(key, val) asort = np.argsort(sub_ray["t"]) sub_data['l'].extend( sub_ray['t'][asort] * vector_length(sub_ray.start_point, sub_ray.end_point)) sub_data['dl'].extend( sub_ray['dts'][asort] * vector_length(sub_ray.start_point, sub_ray.end_point)) for field in data_fields: sub_data[field].extend(sub_ray[field][asort]) if use_peculiar_velocity: line_of_sight = sub_segment[0] - sub_segment[1] line_of_sight /= ((line_of_sight**2).sum())**0.5 sub_vel = ds.arr([ sub_ray['relative_velocity_x'], sub_ray['relative_velocity_y'], sub_ray['relative_velocity_z'] ]) # Line of sight velocity = vel_los sub_vel_los = (np.rollaxis(sub_vel, 1) * \ line_of_sight).sum(axis=1) sub_data['velocity_los'].extend(sub_vel_los[asort]) # doppler redshift: # See https://en.wikipedia.org/wiki/Redshift and # Peebles eqns: 5.48, 5.49 # 1 + redshift_dopp = (1 + v*cos(theta)/c) / # sqrt(1 - v**2/c**2) # where v is the peculiar velocity (ie physical velocity # without the hubble flow, but no hubble flow in sim, so # just the physical velocity). # the bulk of the doppler redshift is from line of sight # motion, but there is a small amount from time dilation # of transverse motion, hence the inclusion of theta (the # angle between line of sight and the velocity). # theta is the angle between the ray vector (i.e. line of # sight) and the velocity vectors: a dot b = ab cos(theta) sub_vel_mag = sub_ray['velocity_magnitude'] cos_theta = line_of_sight.dot(sub_vel) / sub_vel_mag # Protect against stituations where velocity mag is exactly # zero, in which case zero / zero = NaN. cos_theta = np.nan_to_num(cos_theta) redshift_dopp = \ (1 + sub_vel_mag * cos_theta / speed_of_light_cgs) / \ np.sqrt(1 - sub_vel_mag**2 / speed_of_light_cgs**2) - 1 sub_data['redshift_dopp'].extend(redshift_dopp[asort]) del sub_vel, sub_vel_los, sub_vel_mag, cos_theta, \ redshift_dopp sub_ray.clear_data() del sub_ray, asort for key in sub_data: if key == "extra_data": continue sub_data[key] = ds.arr(sub_data[key]).in_cgs() # Get redshift for each lixel. Assume linear relation between l # and z. so z = z_start - (l * (z_range / l_range)) sub_data['redshift'] = my_segment['redshift'] - \ (sub_data['l'] * \ (my_segment['redshift'] - next_redshift) / \ vector_length(my_start, my_end).in_cgs()) # When using the peculiar velocity, create effective redshift # (redshift_eff) field combining cosmological redshift and # doppler redshift. # then to add cosmological redshift and doppler redshifts, follow # eqn 3.75 in Peacock's Cosmological Physics: # 1 + z_eff = (1 + z_cosmo) * (1 + z_doppler) if use_peculiar_velocity: sub_data['redshift_eff'] = ((1 + sub_data['redshift_dopp']) * \ (1 + sub_data['redshift'])) - 1 # Remove empty lixels. sub_dl_nonzero = sub_data['dl'].nonzero() for field in all_fields: sub_data[field] = sub_data[field][sub_dl_nonzero] del sub_dl_nonzero # Add to storage. my_storage.result = sub_data del ds # Reconstruct ray data from parallel_objects storage. all_data = [my_data for my_data in all_ray_storage.values()] # This is now a list of segments where each one is a dictionary # with all the fields. all_data.sort(key=lambda a: a['extra_data']['segment_redshift'], reverse=True) # Gather segment data to add to the light ray solution. for segment_data, my_segment in \ zip(all_data, self.light_ray_solution): my_segment["unique_identifier"] = \ segment_data["extra_data"]["unique_identifier"] # Flatten the list into a single dictionary containing fields # for the whole ray. all_data = _flatten_dict_list(all_data, exceptions=['extra_data']) self._data = all_data if data_filename is not None: self._write_light_ray(data_filename, all_data) ray_ds = load(data_filename) return ray_ds else: return None
def _add_lines_to_spectrum(self, field_data, use_peculiar_velocity, output_absorbers_file, subgrid_resolution=10, observing_redshift=0., njobs=-1): """ Add the absorption lines to the spectrum. """ # Change the redshifts of individual absorbers to account for the # redshift at which the observer sits redshift, redshift_eff = self._apply_observing_redshift( field_data, use_peculiar_velocity, observing_redshift) # Widen wavelength window until optical depth falls below this tau # value at the ends to assure that the wings of a line have been # fully resolved. min_tau = 1e-3 # step through each ionic transition (e.g. HI, HII, MgII) specified # and deposit the lines into the spectrum for line in parallel_objects(self.line_list, njobs=njobs): column_density = field_data[line['field_name']] * field_data['dl'] if (column_density < 0).any(): mylog.warn( "Setting negative densities for field %s to 0! Bad!" % line['field_name']) np.clip(column_density, 0, np.inf, out=column_density) if (column_density == 0).all(): mylog.info("Not adding line %s: insufficient column density" % line['label']) continue # redshift_eff field combines cosmological and velocity redshifts # so delta_lambda gives the offset in angstroms from the rest frame # wavelength to the observed wavelength of the transition if use_peculiar_velocity: delta_lambda = line['wavelength'] * redshift_eff else: delta_lambda = line['wavelength'] * redshift # lambda_obs is central wavelength of line after redshift lambda_obs = line['wavelength'] + delta_lambda # the total number of absorbers per transition n_absorbers = len(lambda_obs) # we want to know the bin index in the lambda_field array # where each line has its central wavelength after being # redshifted. however, because we don't know a priori how wide # a line will be (ie DLAs), we have to include bin indices # *outside* the spectral range of the AbsorptionSpectrum # object. Thus, we find the "equivalent" bin index, which # may be <0 or >the size of the array. In the end, we deposit # the bins that actually overlap with the AbsorptionSpectrum's # range in lambda. # this equation gives us the "equivalent" bin index for each line # if it were placed into the self.lambda_field array center_index = (lambda_obs.in_units('Angstrom').d - self.lambda_min) \ / self.bin_width.d center_index = np.ceil(center_index).astype('int') # thermal broadening b parameter thermal_b = np.sqrt( (2 * boltzmann_constant_cgs * field_data['temperature']) / line['atomic_mass']) # the actual thermal width of the lines thermal_width = (lambda_obs * thermal_b / speed_of_light_cgs).convert_to_units("angstrom") # Sanitize units for faster runtime of the tau_profile machinery. lambda_0 = line['wavelength'].d # line's rest frame; angstroms cdens = column_density.in_units("cm**-2").d # cm**-2 thermb = thermal_b.in_cgs().d # thermal b coefficient; cm / s dlambda = delta_lambda.d # lambda offset; angstroms if use_peculiar_velocity: vlos = field_data['velocity_los'].in_units("km/s").d # km/s else: vlos = np.zeros(field_data['temperature'].size) # When we actually deposit the voigt profile, sometimes we will # have underresolved lines (ie lines with smaller widths than # the spectral bin size). Here, we create virtual wavelength bins # small enough in width to well resolve each line, deposit the # voigt profile into them, then numerically integrate their tau # values and sum them to redeposit them into the actual spectral # bins. # virtual bins (vbins) will be: # 1) <= the bin_width; assures at least as good as spectral bins # 2) <= 1/10th the thermal width; assures resolving voigt profiles # (actually 1/subgrid_resolution value, default is 1/10) # 3) a bin width will be divisible by vbin_width times a power of # 10; this will assure we don't get spikes in the deposited # spectra from uneven numbers of vbins per bin resolution = thermal_width / self.bin_width n_vbins_per_bin = (10**(np.ceil( np.log10(subgrid_resolution / resolution)).clip( 0, np.inf))).astype('int') vbin_width = self.bin_width.d / n_vbins_per_bin # a note to the user about which lines components are unresolved if (thermal_width < self.bin_width).any(): mylog.info( "%d out of %d line components will be " + "deposited as unresolved lines.", (thermal_width < self.bin_width).sum(), n_absorbers) # provide a progress bar with information about lines processed pbar = get_pbar("Adding line - %s [%f A]: " % \ (line['label'], line['wavelength']), n_absorbers) # for a given transition, step through each location in the # observed spectrum where it occurs and deposit a voigt profile for i in parallel_objects(np.arange(n_absorbers), njobs=-1): # if there is a ray element with temperature = 0 or column # density = 0, skip it if (thermal_b[i] == 0.) or (cdens[i] == 0.): pbar.update(i) continue # the virtual window into which the line is deposited initially # spans a region of 2 coarse spectral bins # (one on each side of the center_index) but the window # can expand as necessary. # it will continue to expand until the tau value in the far # edge of the wings is less than the min_tau value or it # reaches the edge of the spectrum window_width_in_bins = 2 while True: left_index = (center_index[i] - window_width_in_bins // 2) right_index = (center_index[i] + window_width_in_bins // 2) n_vbins = (right_index - left_index) * n_vbins_per_bin[i] # the array of virtual bins in lambda space vbins = \ np.linspace(self.lambda_min + self.bin_width.d * left_index, self.lambda_min + self.bin_width.d * right_index, n_vbins, endpoint=False) # the virtual bins and their corresponding opacities vbins, vtau = \ tau_profile( lambda_0, line['f_value'], line['gamma'], thermb[i], cdens[i], delta_lambda=dlambda[i], lambda_bins=vbins) # If tau has not dropped below min tau threshold by the # edges (ie the wings), then widen the wavelength # window and repeat process. if (vtau[0] < min_tau and vtau[-1] < min_tau): break window_width_in_bins *= 2 # numerically integrate the virtual bins to calculate a # virtual equivalent width; then sum the virtual equivalent # widths and deposit into each spectral bin vEW = vtau * vbin_width[i] EW = np.zeros(right_index - left_index) EW_indices = np.arange(left_index, right_index) for k, val in enumerate(EW_indices): EW[k] = vEW[n_vbins_per_bin[i] * k: \ n_vbins_per_bin[i] * (k + 1)].sum() EW = EW / self.bin_width.d # only deposit EW bins that actually intersect the original # spectral wavelength range (i.e. lambda_field) # if EW bins don't intersect the original spectral range at all # then skip the deposition if ((left_index >= self.n_lambda) or \ (right_index < 0)): pbar.update(i) continue # otherwise, determine how much of the original spectrum # is intersected by the expanded line window to be deposited, # and deposit the Equivalent Width data into that intersecting # window in the original spectrum's tau else: intersect_left_index = max(left_index, 0) intersect_right_index = min(right_index, self.n_lambda - 1) self.tau_field[intersect_left_index:intersect_right_index] \ += EW[(intersect_left_index - left_index): \ (intersect_right_index - left_index)] # write out absorbers to file if the column density of # an absorber is greater than the specified "label_threshold" # of that absorption line if output_absorbers_file and \ line['label_threshold'] is not None and \ cdens[i] >= line['label_threshold']: if use_peculiar_velocity: peculiar_velocity = vlos[i] else: peculiar_velocity = 0.0 self.absorbers_list.append({ 'label': line['label'], 'wavelength': (lambda_0 + dlambda[i]), 'column_density': column_density[i], 'b_thermal': thermal_b[i], 'redshift': redshift[i], 'redshift_eff': redshift_eff[i], 'v_pec': peculiar_velocity }) pbar.update(i) pbar.finish() del column_density, delta_lambda, lambda_obs, center_index, \ thermal_b, thermal_width, cdens, thermb, dlambda, \ vlos, resolution, vbin_width, n_vbins, n_vbins_per_bin comm = _get_comm(()) self.tau_field = comm.mpi_allreduce(self.tau_field, op="sum") if output_absorbers_file: self.absorbers_list = comm.par_combine_object(self.absorbers_list, "cat", datatype="list")
def _run(self, save_halos, save_catalog, njobs=-1, dynamic=False): r""" Run the requested halo analysis. Parameters ---------- save_halos : bool If True, a list of all Halo objects is retained under the "halo_list" attribute. If False, only the compiles quantities are saved under the "catalog" attribute. save_catalog : bool If True, save the final catalog to disk. njobs : int The number of jobs over which to divide halo analysis. Choose -1 to allocate one processor per halo. Default: -1 dynamic : int If False, halo analysis is divided evenly between all available processors. If True, parallelism is performed via a task queue. Default: False See Also -------- create, load """ self.catalog = [] if save_halos: self.halo_list = [] if self.halos_ds is None: # Find the halos and make a dataset of them self.halos_ds = self.finder_method(self.data_ds) if self.halos_ds is None: mylog.warning('No halos were found for {0}'.format(\ self.data_ds.basename)) if save_catalog: self.halos_ds = self.data_ds self.save_catalog() self.halos_ds = None return self.halos_ds.index # Assign ds and data sources appropriately self.data_source = self.halos_ds.all_data() # Add all of the default quantities that all halos must have self.add_default_quantities('all') my_index = np.argsort(self.data_source["all", "particle_identifier"]) for i in parallel_objects(my_index, njobs=njobs, dynamic=dynamic): new_halo = Halo(self) halo_filter = True for action_type, action in self.actions: if action_type == "callback": action(new_halo) elif action_type == "filter": halo_filter = action(new_halo) if not halo_filter: break elif action_type == "quantity": key, quantity = action if quantity in self.halos_ds.field_info: new_halo.quantities[key] = \ self.data_source[quantity][int(i)].in_cgs() elif callable(quantity): new_halo.quantities[key] = quantity(new_halo) else: raise RuntimeError("Action must be a callback, filter, or quantity.") if halo_filter: self.catalog.append(new_halo.quantities) if save_halos and halo_filter: self.halo_list.append(new_halo) else: del new_halo self.catalog.sort(key=lambda a:a['particle_identifier'].to_ndarray()) if save_catalog: self.save_catalog()
def _run(self, save_halos, save_catalog, njobs=-1, dynamic=False): r""" Run the requested halo analysis. Parameters ---------- save_halos : bool If True, a list of all Halo objects is retained under the "halo_list" attribute. If False, only the compiles quantities are saved under the "catalog" attribute. save_catalog : bool If True, save the final catalog to disk. njobs : int The number of jobs over which to divide halo analysis. Choose -1 to allocate one processor per halo. Default: -1 dynamic : int If False, halo analysis is divided evenly between all available processors. If True, parallelism is performed via a task queue. Default: False See Also -------- create, load """ self.catalog = [] if save_halos: self.halo_list = [] if self.halos_ds is None: # Find the halos and make a dataset of them self.halos_ds = self.finder_method(self.data_ds) if self.halos_ds is None: mylog.warning('No halos were found for {0}'.format(\ self.data_ds.basename)) if save_catalog: self.halos_ds = self.data_ds self.save_catalog() self.halos_ds = None return self.halos_ds.index # Assign ds and data sources appropriately self.data_source = self.halos_ds.all_data() # Add all of the default quantities that all halos must have self.add_default_quantities('all') my_index = np.argsort(self.data_source["all", "particle_identifier"]) for i in parallel_objects(my_index, njobs=njobs, dynamic=dynamic): new_halo = Halo(self) halo_filter = True for action_type, action in self.actions: if action_type == "callback": action(new_halo) elif action_type == "filter": halo_filter = action(new_halo) if not halo_filter: break elif action_type == "quantity": key, quantity = action if quantity in self.halos_ds.field_info: new_halo.quantities[key] = \ self.data_source[quantity][int(i)] elif callable(quantity): new_halo.quantities[key] = quantity(new_halo) else: raise RuntimeError( "Action must be a callback, filter, or quantity.") if halo_filter: for quantity in new_halo.quantities.values(): quantity.convert_to_base() self.catalog.append(new_halo.quantities) if save_halos and halo_filter: self.halo_list.append(new_halo) else: del new_halo self.catalog.sort(key=lambda a: a['particle_identifier'].to_ndarray()) if save_catalog: self.save_catalog()
def make_light_ray(self, seed=None, start_position=None, end_position=None, trajectory=None, fields=None, setup_function=None, solution_filename=None, data_filename=None, get_los_velocity=True, redshift=None, njobs=-1): """ make_light_ray(seed=None, start_position=None, end_position=None, trajectory=None, fields=None, setup_function=None, solution_filename=None, data_filename=None, get_los_velocity=True, redshift=None, njobs=-1) Create a light ray and get field values for each lixel. A light ray consists of a list of field values for cells intersected by the ray and the path length of the ray through those cells. Light ray data can be written out to an hdf5 file. Parameters ---------- seed : optional, int Seed for the random number generator. Default: None. start_position : optional, list of floats Used only if creating a light ray from a single dataset. The coordinates of the starting position of the ray. Default: None. end_position : optional, list of floats Used only if creating a light ray from a single dataset. The coordinates of the ending position of the ray. Default: None. trajectory : optional, list of floats Used only if creating a light ray from a single dataset. The (r, theta, phi) direction of the light ray. Use either end_position or trajectory, not both. Default: None. fields : optional, list A list of fields for which to get data. Default: None. setup_function : optional, callable, accepts a ds This function will be called on each dataset that is loaded to create the light ray. For, example, this can be used to add new derived fields. Default: None. solution_filename : optional, string Path to a text file where the trajectories of each subray is written out. Default: None. data_filename : optional, string Path to output file for ray data. Default: None. get_los_velocity : optional, bool If True, the line of sight velocity is calculated for each point in the ray. Default: True. redshift : optional, float Used with light rays made from single datasets to specify a starting redshift for the ray. If not used, the starting redshift will be 0 for a non-cosmological dataset and the dataset redshift for a cosmological dataset. Default: None. njobs : optional, int The number of parallel jobs over which the segments will be split. Choose -1 for one processor per segment. Default: -1. Examples -------- Make a light ray from multiple datasets: >>> import yt >>> from yt.analysis_modules.cosmological_observation.light_ray.api import \ ... LightRay >>> my_ray = LightRay("enzo_tiny_cosmology/32Mpc_32.enzo", "Enzo", ... 0., 0.1, time_data=False) ... >>> my_ray.make_light_ray(seed=12345, ... solution_filename="solution.txt", ... data_filename="my_ray.h5", ... fields=["temperature", "density"], ... get_los_velocity=True) Make a light ray from a single dataset: >>> import yt >>> from yt.analysis_modules.cosmological_observation.light_ray.api import \ ... LightRay >>> my_ray = LightRay("IsolatedGalaxy/galaxy0030/galaxy0030") ... >>> my_ray.make_light_ray(start_position=[0., 0., 0.], ... end_position=[1., 1., 1.], ... solution_filename="solution.txt", ... data_filename="my_ray.h5", ... fields=["temperature", "density"], ... get_los_velocity=True) """ # Calculate solution. self._calculate_light_ray_solution(seed=seed, start_position=start_position, end_position=end_position, trajectory=trajectory, filename=solution_filename) # Initialize data structures. self._data = {} if fields is None: fields = [] data_fields = fields[:] all_fields = fields[:] all_fields.extend(['dl', 'dredshift', 'redshift']) if get_los_velocity: all_fields.extend(['velocity_x', 'velocity_y', 'velocity_z', 'velocity_los']) data_fields.extend(['velocity_x', 'velocity_y', 'velocity_z']) all_ray_storage = {} for my_storage, my_segment in parallel_objects(self.light_ray_solution, storage=all_ray_storage, njobs=njobs): # Load dataset for segment. ds = load(my_segment['filename'], **self.load_kwargs) my_segment['unique_identifier'] = ds.unique_identifier if redshift is not None: if ds.cosmological_simulation and redshift != ds.current_redshift: mylog.warn("Generating light ray with different redshift than " + "the dataset itself.") my_segment["redshift"] = redshift if setup_function is not None: setup_function(ds) if start_position is not None: my_segment["start"] = ds.arr(my_segment["start"], "code_length") my_segment["end"] = ds.arr(my_segment["end"], "code_length") else: my_segment["start"] = ds.domain_width * my_segment["start"] + \ ds.domain_left_edge my_segment["end"] = ds.domain_width * my_segment["end"] + \ ds.domain_left_edge if not ds.cosmological_simulation: next_redshift = my_segment["redshift"] elif self.near_redshift == self.far_redshift: next_redshift = my_segment["redshift"] - \ self._deltaz_forward(my_segment["redshift"], ds.domain_width[0].in_units("Mpccm / h") * my_segment["traversal_box_fraction"]) elif my_segment.get("next", None) is None: next_redshift = self.near_redshift else: next_redshift = my_segment['next']['redshift'] mylog.info("Getting segment at z = %s: %s to %s." % (my_segment['redshift'], my_segment['start'], my_segment['end'])) # Break periodic ray into non-periodic segments. sub_segments = periodic_ray(my_segment['start'], my_segment['end'], left=ds.domain_left_edge, right=ds.domain_right_edge) # Prepare data structure for subsegment. sub_data = {} sub_data['segment_redshift'] = my_segment['redshift'] for field in all_fields: sub_data[field] = [] # Get data for all subsegments in segment. for sub_segment in sub_segments: mylog.info("Getting subsegment: %s to %s." % (list(sub_segment[0]), list(sub_segment[1]))) sub_ray = ds.ray(sub_segment[0], sub_segment[1]) asort = np.argsort(sub_ray["t"]) sub_data['dl'].extend(sub_ray['dts'][asort] * vector_length(sub_ray.start_point, sub_ray.end_point)) for field in data_fields: sub_data[field].extend(sub_ray[field][asort]) if get_los_velocity: line_of_sight = sub_segment[1] - sub_segment[0] line_of_sight /= ((line_of_sight**2).sum())**0.5 sub_vel = ds.arr([sub_ray['velocity_x'], sub_ray['velocity_y'], sub_ray['velocity_z']]) sub_data['velocity_los'].extend((np.rollaxis(sub_vel, 1) * line_of_sight).sum(axis=1)[asort]) del sub_vel sub_ray.clear_data() del sub_ray, asort for key in sub_data: sub_data[key] = ds.arr(sub_data[key]).in_cgs() # Get redshift for each lixel. Assume linear relation between l and z. sub_data['dredshift'] = (my_segment['redshift'] - next_redshift) * \ (sub_data['dl'] / vector_length(my_segment['start'], my_segment['end']).in_cgs()) sub_data['redshift'] = my_segment['redshift'] - \ sub_data['dredshift'].cumsum() + sub_data['dredshift'] # Remove empty lixels. sub_dl_nonzero = sub_data['dl'].nonzero() for field in all_fields: sub_data[field] = sub_data[field][sub_dl_nonzero] del sub_dl_nonzero # Add to storage. my_storage.result = sub_data del ds # Reconstruct ray data from parallel_objects storage. all_data = [my_data for my_data in all_ray_storage.values()] # This is now a list of segments where each one is a dictionary # with all the fields. all_data.sort(key=lambda a:a['segment_redshift'], reverse=True) # Flatten the list into a single dictionary containing fields # for the whole ray. all_data = _flatten_dict_list(all_data, exceptions=['segment_redshift']) if data_filename is not None: self._write_light_ray(data_filename, all_data) self._data = all_data return all_data
def piter(self, storage=None, dynamic=False): r"""Iterate over time series components in parallel. This allows you to iterate over a time series while dispatching individual components of that time series to different processors or processor groups. If the parallelism strategy was set to be multi-processor (by "parallel = N" where N is an integer when the DatasetSeries was created) this will issue each dataset to an N-processor group. For instance, this would allow you to start a 1024 processor job, loading up 100 datasets in a time series and creating 8 processor groups of 128 processors each, each of which would be assigned a different dataset. This could be accomplished as shown in the examples below. The *storage* option is as seen in :func:`~yt.utilities.parallel_tools.parallel_analysis_interface.parallel_objects` which is a mechanism for storing results of analysis on an individual dataset and then combining the results at the end, so that the entire set of processors have access to those results. Note that supplying a *store* changes the iteration mechanism; see below. Parameters ---------- storage : dict This is a dictionary, which will be filled with results during the course of the iteration. The keys will be the dataset indices and the values will be whatever is assigned to the *result* attribute on the storage during iteration. dynamic : boolean This governs whether or not dynamic load balancing will be enabled. This requires one dedicated processor; if this is enabled with a set of 128 processors available, only 127 will be available to iterate over objects as one will be load balancing the rest. Examples -------- Here is an example of iteration when the results do not need to be stored. One processor will be assigned to each dataset. >>> ts = DatasetSeries("DD*/DD*.index") >>> for ds in ts.piter(): ... SlicePlot(ds, "x", ("gas", "density")).save() ... This demonstrates how one might store results: >>> def print_time(ds): ... print(ds.current_time) ... >>> ts = DatasetSeries("DD*/DD*.index", ... setup_function = print_time ) ... >>> my_storage = {} >>> for sto, ds in ts.piter(storage=my_storage): ... v, c = ds.find_max(("gas", "density")) ... sto.result = (v, c) ... >>> for i, (v, c) in sorted(my_storage.items()): ... print("% 4i %0.3e" % (i, v)) ... This shows how to dispatch 4 processors to each dataset: >>> ts = DatasetSeries("DD*/DD*.index", ... parallel = 4) >>> for ds in ts.piter(): ... ProjectionPlot(ds, "x", ("gas", "density")).save() ... """ if not self.parallel: njobs = 1 elif not dynamic: if self.parallel: njobs = -1 else: njobs = self.parallel else: my_communicator = communication_system.communicators[-1] nsize = my_communicator.size if nsize == 1: self.parallel = False dynamic = False njobs = 1 else: njobs = nsize - 1 for output in parallel_objects(self._pre_outputs, njobs=njobs, storage=storage, dynamic=dynamic): if storage is not None: sto, output = output if isinstance(output, str): ds = self._load(output, **self.kwargs) self._setup_function(ds) else: ds = output if storage is not None: next_ret = (sto, ds) else: next_ret = ds yield next_ret
def __init__(self, ds, normal, field, velocity_bounds, center="c", width=(1.0,"unitary"), dims=100, thermal_broad=False, atomic_weight=56., depth=(1.0,"unitary"), depth_res=256, method="integrate", weight_field=None, no_shifting=False, north_vector=None, no_ghost=True): r""" Initialize a PPVCube object. Parameters ---------- ds : dataset The dataset. normal : array_like or string The normal vector along with to make the projections. If an array, it will be normalized. If a string, it will be assumed to be along one of the principal axes of the domain ("x", "y", or "z"). field : string The field to project. velocity_bounds : tuple A 4-tuple of (vmin, vmax, nbins, units) for the velocity bounds to integrate over. center : A sequence of floats, a string, or a tuple. The coordinate of the center of the image. If set to 'c', 'center' or left blank, the plot is centered on the middle of the domain. If set to 'max' or 'm', the center will be located at the maximum of the ('gas', 'density') field. Centering on the max or min of a specific field is supported by providing a tuple such as ("min","temperature") or ("max","dark_matter_density"). Units can be specified by passing in *center* as a tuple containing a coordinate and string unit name or by passing in a YTArray. If a list or unitless array is supplied, code units are assumed. width : float, tuple, or YTQuantity. The width of the projection. A float will assume the width is in code units. A (value, unit) tuple or YTQuantity allows for the units of the width to be specified. Implies width = height, e.g. the aspect ratio of the PPVCube's spatial dimensions is 1. dims : integer, optional The spatial resolution of the cube. Implies nx = ny, e.g. the aspect ratio of the PPVCube's spatial dimensions is 1. thermal_broad : boolean, optional Whether or not to broaden the line using the gas temperature. Default: False. atomic_weight : float, optional Set this value to the atomic weight of the particle that is emitting the line if *thermal_broad* is True. Defaults to 56 (Fe). depth : A tuple or a float, optional A tuple containing the depth to project through and the string key of the unit: (width, 'unit'). If set to a float, code units are assumed. Only for off-axis cubes. depth_res : integer, optional The resolution of integration along the line of sight for off-axis cubes. Default: 256 method : string, optional Set the projection method to be used. "integrate" : line of sight integration over the line element. "sum" : straight summation over the line of sight. weight_field : string, optional The name of the weighting field. Set to None for no weight. no_shifting : boolean, optional If set, no shifting due to velocity will occur but only thermal broadening. Should not be set when *thermal_broad* is False, otherwise nothing happens! north_vector : a sequence of floats A vector defining the 'up' direction. This option sets the orientation of the plane of projection. If not set, an arbitrary grid-aligned north_vector is chosen. Ignored in the case of on-axis cubes. no_ghost: bool, optional Optimization option for off-axis cases. If True, homogenized bricks will extrapolate out from grid instead of interpolating from ghost zones that have to first be calculated. This can lead to large speed improvements, but at a loss of accuracy/smoothness in resulting image. The effects are less notable when the transfer function is smooth and broad. Default: True Examples -------- >>> i = 60*np.pi/180. >>> L = [0.0,np.sin(i),np.cos(i)] >>> cube = PPVCube(ds, L, "density", (-5.,4.,100,"km/s"), width=(10.,"kpc")) """ self.ds = ds self.field = field self.width = width self.particle_mass = atomic_weight*mh self.thermal_broad = thermal_broad self.no_shifting = no_shifting if not isinstance(normal, string_types): width = ds.coordinates.sanitize_width(normal, width, depth) width = tuple(el.in_units('code_length').v for el in width) if no_shifting and not thermal_broad: raise RuntimeError("no_shifting cannot be True when thermal_broad is False!") self.center = ds.coordinates.sanitize_center(center, normal)[0] self.nx = dims self.ny = dims self.nv = velocity_bounds[2] if method not in ["integrate","sum"]: raise RuntimeError("Only the 'integrate' and 'sum' projection +" "methods are supported in PPVCube.") dd = ds.all_data() fd = dd._determine_fields(field)[0] self.field_units = ds._get_field_info(fd).units self.vbins = ds.arr(np.linspace(velocity_bounds[0], velocity_bounds[1], velocity_bounds[2]+1), velocity_bounds[3]) self._vbins = self.vbins.copy() self.vmid = 0.5*(self.vbins[1:]+self.vbins[:-1]) self.vmid_cgs = self.vmid.in_cgs().v self.dv = self.vbins[1]-self.vbins[0] self.dv_cgs = self.dv.in_cgs().v self.current_v = 0.0 _vlos = create_vlos(normal, self.no_shifting) self.ds.add_field(("gas","v_los"), function=_vlos, units="cm/s") _intensity = self._create_intensity() self.ds.add_field(("gas","intensity"), function=_intensity, units=self.field_units) if method == "integrate" and weight_field is None: self.proj_units = str(ds.quan(1.0, self.field_units+"*cm").units) elif method == "sum": self.proj_units = self.field_units storage = {} pbar = get_pbar("Generating cube.", self.nv) for sto, i in parallel_objects(range(self.nv), storage=storage): self.current_v = self.vmid_cgs[i] if isinstance(normal, string_types): prj = ds.proj("intensity", ds.coordinates.axis_id[normal], method=method, weight_field=weight_field) buf = prj.to_frb(width, self.nx, center=self.center)["intensity"] else: buf = off_axis_projection(ds, self.center, normal, width, (self.nx, self.ny, depth_res), "intensity", north_vector=north_vector, no_ghost=no_ghost, method=method, weight=weight_field).swapaxes(0,1) sto.result_id = i sto.result = buf pbar.update(i) pbar.finish() self.data = ds.arr(np.zeros((self.nx,self.ny,self.nv)), self.proj_units) if is_root(): for i, buf in sorted(storage.items()): self.data[:,:,i] = buf.transpose() self.axis_type = "velocity" # Now fix the width if iterable(self.width): self.width = ds.quan(self.width[0], self.width[1]) elif not isinstance(self.width, YTQuantity): self.width = ds.quan(self.width, "code_length") self.ds.field_info.pop(("gas","intensity")) self.ds.field_info.pop(("gas","v_los"))