def _check_for_outputs(self, potential_outputs): r""" Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): if os.path.exists(output): try: ds = load(output) if ds is not None: num_steps = ds.num_steps my_storage.result = { "filename": output, "num_steps": num_steps } except YTOutputNotIdentified: mylog.error("Failed to load %s", output) my_outputs = [ my_output for my_output in my_outputs.values() if my_output is not None ] return my_outputs
def _check_for_outputs(self, potential_outputs): r""" Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} llevel = mylog.level # suppress logging as we load every dataset, unless set to debug if llevel > 10 and llevel < 40: mylog.setLevel(40) for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): if os.path.exists(output): try: ds = load(output) if ds is not None: my_storage.result = {"filename": output, "time": ds.current_time.in_units("s")} if ds.cosmological_simulation: my_storage.result["redshift"] = ds.current_redshift except YTUnidentifiedDataType: mylog.error("Failed to load %s", output) mylog.setLevel(llevel) my_outputs = [my_output for my_output in my_outputs.values() \ if my_output is not None] return my_outputs
def get_parameter(self, parameter, type=None): """ Gets a parameter not in the parameterDict. """ if parameter in self.parameters: return self.parameters[parameter] for line in open(self.parameter_filename): if line.find("#") >= 1: # Keep the commented lines line = line[:line.find("#")] line = line.strip().rstrip() if len(line) < 2: continue try: param, vals = map(string.strip, map(string.rstrip, line.split("="))) except ValueError: mylog.error("ValueError: '%s'", line) if parameter == param: if type is None: t = vals.split() else: t = map(type, vals.split()) if len(t) == 1: self.parameters[param] = t[0] else: self.parameters[param] = t if param.endswith( "Units") and not param.startswith("Temperature"): dataType = param[:-5] self.conversion_factors[dataType] = self.parameters[param] return self.parameters[parameter] return ""
def get_parameter(self,parameter,type=None): """ Gets a parameter not in the parameterDict. """ if parameter in self.parameters: return self.parameters[parameter] for line in open(self.parameter_filename): if line.find("#") >= 1: # Keep the commented lines line=line[:line.find("#")] line=line.strip().rstrip() if len(line) < 2: continue try: param, vals = map(string.strip,map(string.rstrip, line.split("="))) except ValueError: mylog.error("ValueError: '%s'", line) if parameter == param: if type is None: t = vals.split() else: t = map(type, vals.split()) if len(t) == 1: self.parameters[param] = t[0] else: self.parameters[param] = t if param.endswith("Units") and not param.startswith("Temperature"): dataType = param[:-5] self.conversion_factors[dataType] = self.parameters[param] return self.parameters[parameter] return ""
def default_mpi_excepthook(exception_type, exception_value, tb): traceback.print_tb(tb) mylog.error('%s: %s' % (exception_type.__name__, exception_value)) comm = yt.communication_system.communicators[-1] if comm.size > 1: mylog.error('Error occured on rank %d.' % comm.rank) MPI.COMM_WORLD.Abort(1)
def _check_for_outputs(self, potential_outputs): """ Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): if self.parameters['DataDumpDir'] in output: dir_key = self.parameters['DataDumpDir'] output_key = self.parameters['DataDumpName'] else: dir_key = self.parameters['RedshiftDumpDir'] output_key = self.parameters['RedshiftDumpName'] index = output[output.find(dir_key) + len(dir_key):] filename = os.path.join(self.parameters['GlobalDir'], "%s%s" % (dir_key, index), "%s%s" % (output_key, index)) if os.path.exists(filename): try: ds = load(filename) if ds is not None: my_storage.result = {'filename': filename, 'time': ds.current_time.in_units("s")} if ds.cosmological_simulation: my_storage.result['redshift'] = ds.current_redshift except YTOutputNotIdentified: mylog.error('Failed to load %s', filename) my_outputs = [my_output for my_output in my_outputs.values() \ if my_output is not None] return my_outputs
def default_mpi_excepthook(exception_type, exception_value, tb): traceback.print_tb(tb) mylog.error("%s: %s", exception_type.__name__, exception_value) comm = yt.communication_system.communicators[-1] if comm.size > 1: mylog.error("Error occurred on rank %d.", comm.rank) MPI.COMM_WORLD.Abort(1)
def _check_for_outputs(self, potential_outputs): r""" Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): try: ds = load(output) except (FileNotFoundError, YTUnidentifiedDataType): mylog.error("Failed to load %s", output) continue my_storage.result = { "filename": output, "time": ds.current_time.in_units("s"), } if ds.cosmological_simulation: my_storage.result["redshift"] = ds.current_redshift my_outputs = [ my_output for my_output in my_outputs.values() if my_output is not None ] return my_outputs
def _check_for_outputs(self, potential_outputs): r""" Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): if os.path.exists(output): try: ds = load(output) if ds is not None: my_storage.result = { "filename": output, "time": ds.current_time.in_units("s") } if ds.cosmological_simulation: my_storage.result["redshift"] = ds.current_redshift except YTOutputNotIdentified: mylog.error("Failed to load %s", output) my_outputs = [my_output for my_output in my_outputs.values() \ if my_output is not None] return my_outputs
def add_field(self, name, function=None, sampling_type=None, **kwargs): if not isinstance(name, tuple): if kwargs.setdefault('particle_type', False): name = ('all', name) else: name = ('gas', name) override = kwargs.get("force_override", False) # Handle the case where the field has already been added. if not override and name in self: mylog.error( "Field %s already exists. To override use " + "force_override=True.", name) if kwargs.setdefault('particle_type', False): if sampling_type is not None and sampling_type != "particle": raise RuntimeError( "Clashing definition of 'sampling_type' and " "'particle_type'. Note that 'particle_type' is " "deprecated. Please just use 'sampling_type'.") else: sampling_type = "particle" if sampling_type is None: warnings.warn("Because 'sampling_type' is not specified, yt will " "assume a 'cell' sampling_type for the %s field" % (name, ), stacklevel=3) sampling_type = "cell" return super(LocalFieldInfoContainer, self).add_field(name, sampling_type, function, **kwargs)
def _read_fluid_file_descriptor(fname): """ Read a file descriptor and returns the array of the fields found. """ # Mapping mapping = [ ("density", "Density"), ("velocity_x", "x-velocity"), ("velocity_y", "y-velocity"), ("velocity_z", "z-velocity"), ("pressure", "Pressure"), ("metallicity", "Metallicity"), ] # Add mapping for magnetic fields mapping += [ (key, key) for key in ( f"B_{dim}_{side}" for side in ["left", "right"] for dim in ["x", "y", "z"] ) ] # Convert to dictionary mapping = {k: v for k, v in mapping} with open(fname) as f: line = f.readline() tmp = VERSION_RE.match(line) mylog.debug("Reading fluid file descriptor %s.", fname) if not tmp: return [] version = int(tmp.group(1)) if version == 1: # Skip one line (containing the headers) line = f.readline() fields = [] for i, line in enumerate(f.readlines()): tmp = VAR_DESC_RE.match(line) if not tmp: raise YTFileNotParseable(fname, i + 1) # ivar = tmp.group(1) varname = tmp.group(2) dtype = tmp.group(3) if varname in mapping: varname = mapping[varname] else: varname = f"hydro_{varname}" fields.append((varname, dtype)) else: mylog.error("Version %s", version) raise YTParticleOutputFormatNotImplemented() return fields
def __init__(self, ds, data_source=None, star_mass=None, star_creation_time=None, bins=300, volume=None, star_filter=None): self._ds = ds self._data_source = data_source self._filter = star_filter self.ds_provided = self._data_source is not None self.filter_provided = self._filter is not None self.bin_count = bins # Set up for time conversion. self.cosm = Cosmology(hubble_constant=self._ds.hubble_constant, omega_matter=self._ds.omega_matter, omega_lambda=self._ds.omega_lambda) # Find the time right now. self.time_now = self._ds.current_time if not self.ds_provided: # Check to make sure we have the right set of informations. if star_mass is None or star_creation_time is None \ or volume is None: mylog.error(""" If data_source is not provided, all of these parameters need to be set: star_mass (array, Msun), star_creation_time (array, code units), volume (float, cMpc**3).""") return None if isinstance(star_mass, YTArray): assert star_mass.units.same_dimensions_as(g.units) elif star_mass is not None: star_mass = YTArray(star_mass, 'Msun') self.star_mass = star_mass if isinstance(star_creation_time, YTArray): assert star_creation_time.units.same_dimensions_as(s.units) elif star_creation_time is not None: star_creation_time = self._ds.arr(star_creation_time, 'code_time') self.star_creation_time = star_creation_time if isinstance(volume, YTQuantity): assert volume.units.same_dimensions_as( self._ds.quan(1.0, 'Mpccm**3').units) elif volume is not None: volume = self._ds.quan(volume, 'Mpccm**3') self.volume = volume # Build the distribution. self.build_dist() # Attach some convenience arrays. self.attach_arrays()
def _read_fluid_file_descriptor(fname): """ Read a file descriptor and returns the array of the fields found. """ # Mapping mapping = [ ('density', 'Density'), ('velocity_x', 'x-velocity'), ('velocity_y', 'y-velocity'), ('velocity_z', 'z-velocity'), ('pressure', 'Pressure'), ('metallicity', 'Metallicity'), ] # Add mapping for magnetic fields mapping += [(key, key) for key in ('B_{0}_{1}'.format(dim,side) for side in ['left','right'] for dim in ['x','y','z'])] # Convert to dictionary mapping = {k: v for k, v in mapping} with open(fname, 'r') as f: line = f.readline() tmp = VERSION_RE.match(line) mylog.debug('Reading fluid file descriptor %s.' % fname) if not tmp: return [] version = int(tmp.group(1)) if version == 1: # Skip one line (containing the headers) line = f.readline() fields = [] for i, line in enumerate(f.readlines()): tmp = VAR_DESC_RE.match(line) if not tmp: raise YTFileNotParseable(fname, i+1) # ivar = tmp.group(1) varname = tmp.group(2) dtype = tmp.group(3) if varname in mapping: varname = mapping[varname] else: varname = 'hydro_%s' % varname fields.append((varname, dtype)) else: mylog.error('Version %s', version) raise YTParticleOutputFormatNotImplemented() return fields
def upload(self): api_key = ytcfg.get("yt", "hub_api_key") url = ytcfg.get("yt", "hub_url") if api_key == '': raise YTHubRegisterError metadata, (final_name, chunks) = self._generate_post() if hasattr(self, "_ds_mrep"): self._ds_mrep.upload() for i in metadata: if isinstance(metadata[i], np.ndarray): metadata[i] = metadata[i].tolist() elif hasattr(metadata[i], 'dtype'): metadata[i] = np.asscalar(metadata[i]) metadata['obj_type'] = self.type if len(chunks) == 0: chunk_info = {'chunks': []} else: chunk_info = {'final_name': final_name, 'chunks': []} for cn, cv in chunks: chunk_info['chunks'].append((cn, cv.size * cv.itemsize)) metadata = json.dumps(metadata) chunk_info = json.dumps(chunk_info) datagen, headers = multipart_encode({ 'metadata': metadata, 'chunk_info': chunk_info, 'api_key': api_key }) request = urllib.request.Request(url, datagen, headers) # Actually do the request, and get the response try: rv = urllib.request.urlopen(request).read() except urllib.error.HTTPError as ex: if ex.code == 401: mylog.error("You must create an API key before uploading.") mylog.error("https://data.yt-project.org/getting_started.html") return else: raise ex uploader_info = json.loads(rv) new_url = url + "/handler/%s" % uploader_info['handler_uuid'] for i, (cn, cv) in enumerate(chunks): remaining = cv.size * cv.itemsize f = TemporaryFile() np.save(f, cv) f.seek(0) pbar = UploaderBar("%s, % 2i/% 2i" % (self.type, i + 1, len(chunks))) datagen, headers = multipart_encode({'chunk_data': f}, cb=pbar) request = urllib.request.Request(new_url, datagen, headers) rv = urllib.request.urlopen(request).read() datagen, headers = multipart_encode({'status': 'FINAL'}) request = urllib.request.Request(new_url, datagen, headers) rv = json.loads(urllib.request.urlopen(request).read()) mylog.info("Upload succeeded! View here: %s", rv['url']) return rv
def enable_plugins(pluginfilename=None): """Forces a plugin file to be parsed. A plugin file is a means of creating custom fields, quantities, data objects, colormaps, and other code classes and objects to be used in yt scripts without modifying the yt source directly. If <pluginfilename> is omited, this function will look for a plugin file at ``$HOME/.config/yt/my_plugins.py``, which is the prefered behaviour for a system-level configuration. Warning: a script using this function will only be reproducible if your plugin file is shared with it. """ import yt from yt.config import CONFIG_DIR, ytcfg from yt.fields.my_plugin_fields import my_plugins_fields if pluginfilename is not None: _fn = pluginfilename if not os.path.isfile(_fn): raise FileNotFoundError(_fn) else: # Determine global plugin location. By decreasing priority order: # - absolute path # - CONFIG_DIR # - obsolete config dir. my_plugin_name = ytcfg.get("yt", "pluginfilename") old_config_dir = os.path.join(os.path.expanduser("~"), ".yt") for base_prefix in ("", CONFIG_DIR, old_config_dir): if os.path.isfile(os.path.join(base_prefix, my_plugin_name)): _fn = os.path.join(base_prefix, my_plugin_name) break else: mylog.error("Could not find a global system plugin file.") return if _fn.startswith(old_config_dir): mylog.warning( "Your plugin file is located in a deprecated directory. " "Please move it from %s to %s", os.path.join(old_config_dir, my_plugin_name), os.path.join(CONFIG_DIR, my_plugin_name), ) mylog.info("Loading plugins from %s", _fn) ytdict = yt.__dict__ execdict = ytdict.copy() execdict["add_field"] = my_plugins_fields.add_field with open(_fn) as f: code = compile(f.read(), _fn, "exec") exec(code, execdict, execdict) ytnamespace = list(ytdict.keys()) for k in execdict.keys(): if k not in ytnamespace: if callable(execdict[k]): setattr(yt, k, execdict[k])
def __init__(self, ds, data_source=None, star_mass=None, star_creation_time=None, bins=300, volume=None, star_filter=None): self._ds = ds self._data_source = data_source self._filter = star_filter self.ds_provided = self._data_source is not None self.filter_provided = self._filter is not None self.bin_count = bins # Set up for time conversion. self.cosm = Cosmology( hubble_constant=self._ds.hubble_constant, omega_matter=self._ds.omega_matter, omega_lambda=self._ds.omega_lambda) # Find the time right now. self.time_now = self._ds.current_time if not self.ds_provided: # Check to make sure we have the right set of informations. if star_mass is None or star_creation_time is None \ or volume is None: mylog.error(""" If data_source is not provided, all of these parameters need to be set: star_mass (array, Msun), star_creation_time (array, code units), volume (float, cMpc**3).""") return None if isinstance(star_mass, YTArray): assert star_mass.units.same_dimensions_as(g.units) elif star_mass is not None: star_mass = YTArray(star_mass, 'Msun') self.star_mass = star_mass if isinstance(star_creation_time, YTArray): assert star_creation_time.units.same_dimensions_as(s.units) elif star_creation_time is not None: star_creation_time = self._ds.arr(star_creation_time, 'code_time') self.star_creation_time = star_creation_time if isinstance(volume, YTQuantity): assert volume.units.same_dimensions_as( self._ds.quan(1.0, 'Mpccm**3').units ) elif volume is not None: volume = self._ds.quan(volume, 'Mpccm**3') self.volume = volume # Build the distribution. self.build_dist() # Attach some convenience arrays. self.attach_arrays()
def upload(self): api_key = ytcfg.get("yt", "hub_api_key") url = ytcfg.get("yt", "hub_url") if api_key == '': raise YTHubRegisterError metadata, (final_name, chunks) = self._generate_post() if hasattr(self, "_ds_mrep"): self._ds_mrep.upload() for i in metadata: if isinstance(metadata[i], np.ndarray): metadata[i] = metadata[i].tolist() elif hasattr(metadata[i], 'dtype'): metadata[i] = np.asscalar(metadata[i]) metadata['obj_type'] = self.type if len(chunks) == 0: chunk_info = {'chunks': []} else: chunk_info = {'final_name': final_name, 'chunks': []} for cn, cv in chunks: chunk_info['chunks'].append((cn, cv.size * cv.itemsize)) metadata = json.dumps(metadata) chunk_info = json.dumps(chunk_info) datagen, headers = multipart_encode({'metadata': metadata, 'chunk_info': chunk_info, 'api_key': api_key}) request = urllib.request.Request(url, datagen, headers) # Actually do the request, and get the response try: rv = urllib.request.urlopen(request).read() except urllib.error.HTTPError as ex: if ex.code == 401: mylog.error("You must create an API key before uploading.") mylog.error("https://data.yt-project.org/getting_started.html") return else: raise ex uploader_info = json.loads(rv) new_url = url + "/handler/%s" % uploader_info['handler_uuid'] for i, (cn, cv) in enumerate(chunks): remaining = cv.size * cv.itemsize f = TemporaryFile() np.save(f, cv) f.seek(0) pbar = UploaderBar("%s, % 2i/% 2i" % (self.type, i + 1, len(chunks))) datagen, headers = multipart_encode({'chunk_data': f}, cb=pbar) request = urllib.request.Request(new_url, datagen, headers) rv = urllib.request.urlopen(request).read() datagen, headers = multipart_encode({'status': 'FINAL'}) request = urllib.request.Request(new_url, datagen, headers) rv = json.loads(urllib.request.urlopen(request).read()) mylog.info("Upload succeeded! View here: %s", rv['url']) return rv
def _read_fluid_file_descriptor(fname): """ Read a file descriptor and returns the array of the fields found. """ VERSION_RE = re.compile('# version: *(\d+)') VAR_DESC_RE = re.compile(r'\s*(\d+),\s*(\w+),\s*(\w+)') # Mapping mapping = [ ('density', 'Density'), ('velocity_x', 'x-velocity'), ('velocity_y', 'y-velocity'), ('velocity_z', 'z-velocity'), ('pressure', 'Pressure'), ('metallicity', 'Metallicity'), ] # Convert in dictionary mapping = {k: v for k, v in mapping} with open(fname, 'r') as f: line = f.readline() tmp = VERSION_RE.match(line) mylog.info('Reading fluid file descriptor.') if not tmp: return [] version = int(tmp.group(1)) if version == 1: # Skip one line (containing the headers) line = f.readline() fields = [] for i, line in enumerate(f.readlines()): tmp = VAR_DESC_RE.match(line) if not tmp: raise YTFileNotParseable(fname, i + 1) # ivar = tmp.group(1) varname = tmp.group(2) dtype = tmp.group(3) if varname in mapping: varname = mapping[varname] else: varname = 'particle_%s' % varname fields.append((varname, dtype)) else: mylog.error('Version %s', version) raise YTParticleOutputFormatNotImplemented() return fields
def add_workgroup(self, size=None, ranks=None, name=None): if size is None: size = len(self.available_ranks) if len(self.available_ranks) < size: mylog.error('Not enough resources available, asked for %d have %d', size, self.available_ranks) raise RuntimeError if ranks is None: ranks = [self.available_ranks.pop(0) for i in range(size)] # Default name to the workgroup number. if name is None: name = str(len(self.workgroups)) group = self.comm.comm.Get_group().Incl(ranks) new_comm = self.comm.comm.Create(group) if self.comm.rank in ranks: communication_system.communicators.append(Communicator(new_comm)) self.workgroups.append(Workgroup(len(ranks), ranks, new_comm, name))
def parse_unit_dimension(unit_dimension): r"""Transforms an openPMD unitDimension into a string. Parameters ---------- unit_dimension : array_like integer array of length 7 with one entry for the dimensional component of every SI unit [0] length L, [1] mass M, [2] time T, [3] electric current I, [4] thermodynamic temperature theta, [5] amount of substance N, [6] luminous intensity J References ---------- https://github.com/openPMD/openPMD-standard/blob/latest/STANDARD.md#unit-systems-and-dimensionality Returns ------- str Examples -------- >>> velocity = [1.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0] >>> print(parse_unit_dimension(velocity)) 'm**1*s**-1' >>> magnetic_field = [0.0, 1.0, -2.0, -1.0, 0.0, 0.0, 0.0] >>> print(parse_unit_dimension(magnetic_field)) 'kg**1*s**-2*A**-1' """ if len(unit_dimension) != 7: mylog.error("SI must have 7 base dimensions!") unit_dimension = np.asarray(unit_dimension, dtype="int64") dim = [] si = ["m", "kg", "s", "A", "C", "mol", "cd"] for i in np.arange(7): if unit_dimension[i] != 0: dim.append(f"{si[i]}**{unit_dimension[i]}") return "*".join(dim)
def _check_for_outputs(self, potential_outputs): """ Check a list of files to see if they are valid datasets. """ only_on_root( mylog.info, "Checking %d potential outputs.", len(potential_outputs) ) my_outputs = {} llevel = mylog.level # suppress logging as we load every dataset, unless set to debug if llevel > 10 and llevel < 40: mylog.setLevel(40) for my_storage, output in parallel_objects( potential_outputs, storage=my_outputs ): if self.parameters["DataDumpDir"] in output: dir_key = self.parameters["DataDumpDir"] output_key = self.parameters["DataDumpName"] else: dir_key = self.parameters["RedshiftDumpDir"] output_key = self.parameters["RedshiftDumpName"] index = output[output.find(dir_key) + len(dir_key) :] filename = os.path.join( self.parameters["GlobalDir"], f"{dir_key}{index}", f"{output_key}{index}", ) try: ds = load(filename) except (FileNotFoundError, YTUnidentifiedDataType): mylog.error("Failed to load %s", filename) continue my_storage.result = { "filename": filename, "time": ds.current_time.in_units("s"), } if ds.cosmological_simulation: my_storage.result["redshift"] = ds.current_redshift mylog.setLevel(llevel) my_outputs = [ my_output for my_output in my_outputs.values() if my_output is not None ] return my_outputs
def build_dist(self): """ Build the data for plotting. """ # Pick out the stars. if self.filter_provided: ct = self._filter['creation_time'] mass_stars = self._data_source[self._filter, "particle_mass"] else: if self.ds_provided: ct = self._data_source['creation_time'] if ct is None: errmsg = 'data source must have particle_age!' mylog.error(errmsg) raise RuntimeError(errmsg) mask = ct > 0 if not any(mask): errmsg = 'all particles have age < 0' mylog.error(errmsg) raise RuntimeError(errmsg) # type = self._data_source['particle_type'] ct_stars = ct[mask] mass_stars = self._data_source[ 'particle_mass'][mask].in_units('Msun') del mask else: ct_stars = self.star_creation_time mass_stars = self.star_mass # Find the oldest stars in units of code time. tmin = ct_stars.min().in_units("s") # Multiply the end to prevent numerical issues. self.time_bins = np.linspace( tmin * 1.01, self._ds.current_time.in_units("s"), num=self.bin_count + 1) # Figure out which bins the stars go into. inds = np.digitize(ct_stars.in_units("s"), self.time_bins) - 1 # Sum up the stars created in each time bin. self.mass_bins = YTArray( np.zeros(self.bin_count + 1, dtype='float64'), "Msun" ) for index in np.unique(inds): self.mass_bins[index] += (mass_stars[inds == index]).sum() # We will want the time taken between bins. self.time_bins_dt = self.time_bins[1:] - self.time_bins[:-1]
def build_dist(self): """ Build the data for plotting. """ # Pick out the stars. if self.filter_provided: ct = self._filter['creation_time'] mass_stars = self._data_source[self._filter, "particle_mass"] else: if self.ds_provided: ct = self._data_source['creation_time'] if ct is None: errmsg = 'data source must have particle_age!' mylog.error(errmsg) raise RuntimeError(errmsg) mask = ct > 0 if not any(mask): errmsg = 'all particles have age < 0' mylog.error(errmsg) raise RuntimeError(errmsg) # type = self._data_source['particle_type'] ct_stars = ct[mask] mass_stars = self._data_source['particle_mass'][mask].in_units( 'Msun') del mask else: ct_stars = self.star_creation_time mass_stars = self.star_mass # Find the oldest stars in units of code time. tmin = ct_stars.min().in_units("s") # Multiply the end to prevent numerical issues. self.time_bins = np.linspace(tmin * 1.01, self._ds.current_time.in_units("s"), num=self.bin_count + 1) # Figure out which bins the stars go into. inds = np.digitize(ct_stars.in_units("s"), self.time_bins) - 1 # Sum up the stars created in each time bin. self.mass_bins = YTArray(np.zeros(self.bin_count + 1, dtype='float64'), "Msun") for index in np.unique(inds): self.mass_bins[index] += (mass_stars[inds == index]).sum() # We will want the time taken between bins. self.time_bins_dt = self.time_bins[1:] - self.time_bins[:-1]
def _check_for_outputs(self, potential_outputs): """ Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} llevel = mylog.level # suppress logging as we load every dataset, unless set to debug if llevel > 10 and llevel < 40: mylog.setLevel(40) for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): if self.parameters['DataDumpDir'] in output: dir_key = self.parameters['DataDumpDir'] output_key = self.parameters['DataDumpName'] else: dir_key = self.parameters['RedshiftDumpDir'] output_key = self.parameters['RedshiftDumpName'] index = output[output.find(dir_key) + len(dir_key):] filename = os.path.join(self.parameters['GlobalDir'], "%s%s" % (dir_key, index), "%s%s" % (output_key, index)) if os.path.exists(filename): try: ds = load(filename) if ds is not None: my_storage.result = { 'filename': filename, 'time': ds.current_time.in_units("s") } if ds.cosmological_simulation: my_storage.result['redshift'] = ds.current_redshift except YTOutputNotIdentified: mylog.error('Failed to load %s', filename) mylog.setLevel(llevel) my_outputs = [my_output for my_output in my_outputs.values() \ if my_output is not None] return my_outputs
def _setup_dust_fields(self): idust = 1 imax = self.__class__.MAXN_DUST_SPECIES while ("amrvac", "rhod%d" % idust) in self.field_list: if idust > imax: mylog.error( "Only the first %d dust species are currently read by yt. " "If you read this, please consider issuing a ticket. ", imax, ) break self._setup_velocity_fields(idust) idust += 1 n_dust_found = idust - 1 us = self.ds.unit_system if n_dust_found > 0: def _total_dust_density(field, data): tot = np.zeros_like(data[("gas", "density")]) for idust in range(1, n_dust_found + 1): tot += data["dust%d_density" % idust] return tot self.add_field( ("gas", "total_dust_density"), function=_total_dust_density, dimensions=dimensions.density, units=us["density"], sampling_type="cell", ) def dust_to_gas_ratio(field, data): return data[("gas", "total_dust_density")] / data[("gas", "density")] self.add_field( ("gas", "dust_to_gas_ratio"), function=dust_to_gas_ratio, dimensions=dimensions.dimensionless, sampling_type="cell", )
def _check_for_outputs(self, potential_outputs): r""" Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): try: ds = load(output) except (FileNotFoundError, YTUnidentifiedDataType): mylog.error("Failed to load %s", output) continue my_storage.result = {"filename": output, "num_steps": ds.num_steps} my_outputs = [ my_output for my_output in my_outputs.values() if my_output is not None ] return my_outputs
def _check_for_outputs(self, potential_outputs): r""" Check a list of files to see if they are valid datasets. """ only_on_root(mylog.info, "Checking %d potential outputs.", len(potential_outputs)) my_outputs = {} for my_storage, output in parallel_objects(potential_outputs, storage=my_outputs): if os.path.exists(output): try: ds = load(output) if ds is not None: my_storage.result = {"filename": output, "time": ds.current_time.in_units("s")} if ds.cosmological_simulation: my_storage.result["redshift"] = ds.current_redshift except YTOutputNotIdentified: mylog.error("Failed to load %s", output) my_outputs = [my_output for my_output in my_outputs.values() \ if my_output is not None] return my_outputs
def parallel_objects(objects, njobs=0, storage=None, barrier=True, dynamic=False): r"""This function dispatches components of an iterable to different processors. The parallel_objects function accepts an iterable, *objects*, and based on the number of jobs requested and number of available processors, decides how to dispatch individual objects to processors or sets of processors. This can implicitly include multi-level parallelism, such that the processor groups assigned each object can be composed of several or even hundreds of processors. *storage* is also available, for collation of results at the end of the iteration loop. Calls to this function can be nested. This should not be used to iterate over datasets -- :class:`~yt.data_objects.time_series.DatasetSeries` provides a much nicer interface for that. Parameters ---------- objects : Iterable The list of objects to dispatch to different processors. njobs : int How many jobs to spawn. By default, one job will be dispatched for each available processor. storage : dict This is a dictionary, which will be filled with results during the course of the iteration. The keys will be the dataset indices and the values will be whatever is assigned to the *result* attribute on the storage during iteration. barrier : bool Should a barier be placed at the end of iteration? dynamic : bool This governs whether or not dynamic load balancing will be enabled. This requires one dedicated processor; if this is enabled with a set of 128 processors available, only 127 will be available to iterate over objects as one will be load balancing the rest. Examples -------- Here is a simple example of iterating over a set of centers and making slice plots centered at each. >>> for c in parallel_objects(centers): ... SlicePlot(ds, "x", "Density", center=c).save() ... Here's an example of calculating the angular momentum vector of a set of spheres, but with a set of four jobs of multiple processors each. Note that we also store the results. >>> storage = {} >>> for sto, c in parallel_objects(centers, njobs=4, storage=storage): ... sp = ds.sphere(c, (100, "kpc")) ... sto.result = sp.quantities["AngularMomentumVector"]() ... >>> for sphere_id, L in sorted(storage.items()): ... print(centers[sphere_id], L) ... """ if dynamic: from .task_queue import dynamic_parallel_objects yield from dynamic_parallel_objects(objects, njobs=njobs, storage=storage) return if not parallel_capable: njobs = 1 my_communicator = communication_system.communicators[-1] my_size = my_communicator.size if njobs <= 0: njobs = my_size if njobs > my_size: mylog.error( "You have asked for %s jobs, but you only have %s processors.", njobs, my_size, ) raise RuntimeError my_rank = my_communicator.rank all_new_comms = np.array_split(np.arange(my_size), njobs) for i, comm_set in enumerate(all_new_comms): if my_rank in comm_set: my_new_id = i break if parallel_capable: communication_system.push_with_ids(all_new_comms[my_new_id].tolist()) to_share = {} # If our objects object is slice-aware, like time series data objects are, # this will prevent intermediate objects from being created. oiter = itertools.islice(enumerate(objects), my_new_id, None, njobs) for result_id, obj in oiter: if storage is not None: rstore = ResultsStorage() rstore.result_id = result_id yield rstore, obj to_share[rstore.result_id] = rstore.result else: yield obj if parallel_capable: communication_system.pop() if storage is not None: # Now we have to broadcast it new_storage = my_communicator.par_combine_object(to_share, datatype="dict", op="join") storage.update(new_storage) if barrier: my_communicator.barrier()
def calculate_spectrum(self, data_source=None, star_mass=None, star_creation_time=None, star_metallicity_fraction=None, star_metallicity_constant=None, min_age=YTQuantity(0.0, 'yr')): r"""For the set of stars, calculate the collective spectrum. Attached to the output are several useful objects: Attributes ---------- final_spec: array The collective spectrum in units of flux binned in wavelength. wavelength: array The wavelength for the spectrum bins, in Angstroms. total_mass: float Total mass of all the stars. avg_mass: float Average mass of all the stars. avg_metal: float Average metallicity of all the stars. Parameters ---------- data_source : AMRRegion object, optional The region from which stars are extracted for analysis. If this is not specified, the next three parameters must be supplied. star_mass : Array or list of floats An array of star masses in Msun units. star_creation_time : Array or list of floats An array of star creation times in code units. star_metallicity_fraction : Array or list of floats An array of star metallicity fractions, in code units (which is not Z/Zsun, rather just Z). star_metallicity_constant : Float If desired, override the star metallicity fraction of all the stars to the given value. min_age : Float Removes young stars younger than this number (in years) from the spectrum. Default: 0 (all stars). Examples -------- >>> import yt >>> from yt.analysis_modules.star_analysis.api import SpectrumBuilder >>> ds = yt.load("Enzo_64/RD0006/RedshiftOutput0006") >>> spec = SpectrumBuilder(ds, "bc", model="salpeter") >>> sp = ds.sphere([0.5, 0.5, 0.5], 0.1) >>> spec.calculate_spectrum(data_source=sp, min_age=1.e6) """ # Initialize values self.final_spec = np.zeros(self.wavelength.size, dtype='float64') self._data_source = data_source if isinstance(star_mass, YTArray): assert star_mass.units.same_dimensions_as(g.units) elif star_mass is not None: star_mass = YTArray(star_mass, 'Msun') self.star_mass = star_mass if isinstance(star_creation_time, YTArray): assert star_creation_time.units.same_dimensions_as(s.units) elif star_creation_time is not None: star_creation_time = self._ds.arr(star_creation_time, 'code_time') self.star_creation_time = star_creation_time if isinstance(star_metallicity_fraction, YTArray): assert \ star_metallicity_fraction.units.same_dimensions_as(Zsun.units) elif star_metallicity_fraction is not None: star_metallicity_fraction = self._ds.arr( star_metallicity_fraction, 'code_metallicity' ) self.star_metallicity_fraction = star_metallicity_fraction if isinstance(min_age, YTQuantity): assert min_age.units.same_dimensions_as(s.units) elif min_age is not None: min_age = YTQuantity(min_age, 'yr') self.min_age = min_age # Check to make sure we have the right set of data. if data_source is None: if self.star_mass is None or self.star_creation_time is None or \ (star_metallicity_fraction is None and star_metallicity_constant is None): mylog.error( """ If data_source is not provided, all of these paramters need to be set: star_mass (array, Msun), star_creation_time (array, code units), And one of: star_metallicity_fraction (array, code units). --OR-- star_metallicity_constant (float, code units). """) return None if star_metallicity_fraction is not None: self.star_metal = star_metallicity_fraction else: self.star_metal = \ self._ds.arr(np.ones_like(self.star_mass) * star_metallicity_constant, 'Zsun') else: # Get the data we need. if self.filter_provided: ct = self._filter['creation_time'] # mass_stars = self._data_source[self._filter, "particle_mass"] if star_metallicity_constant is None: self.star_metal = self._data_source[ self._filter, "metallicity_fraction"].in_units('Zsun') else: self.star_metal = \ self._ds.arr(np.ones_like( self._data_source[self._filter, "metallicity_fraction"]) * star_metallicity_constant, "Zsun") else: ct = self._data_source["creation_time"] if ct is None: errmsg = 'data source must have particle_age!' mylog.error(errmsg) raise RuntimeError(errmsg) mask = ct > 0 if not any(mask): errmsg = 'all particles have age < 0' mylog.error(errmsg) raise RuntimeError(errmsg) # type = self._data_source['particle_type'] self.star_creation_time = ct[mask] self.star_mass = self._data_source[ 'particle_mass'][mask].in_units('Msun') if star_metallicity_constant is not None: self.star_metal = self._ds.arr( np.ones_like(self.star_mass) * star_metallicity_constant, 'Zsun') else: self.star_metal = self._data_source[ "metallicity_fraction"][mask].in_units('Zsun') # Age of star in years. dt = (self.time_now - self.star_creation_time).in_units('yr') dt[dt < 0.0] = 0.0 # Remove young stars sub = dt >= self.min_age if len(sub) == 0: return self.star_metal = self.star_metal[sub] dt = dt[sub] self.star_creation_time = self.star_creation_time[sub] # Figure out which METALS bin the star goes into. Mindex = np.digitize(self.star_metal.in_units('Zsun'), METALS) # Replace the indices with strings. Mname = MtoD[Mindex] # Figure out which age bin this star goes into. Aindex = np.digitize(dt, self.age) # Ratios used for the interpolation. ratio1 = (dt - self.age[Aindex - 1]) / \ (self.age[Aindex] - self.age[Aindex - 1]) ratio2 = (self.age[Aindex] - dt) / \ (self.age[Aindex] - self.age[Aindex - 1]) # Sort the stars by metallicity and then by age, which should reduce # memory access time by a little bit in the loop. indexes = np.arange(self.star_metal.size) sort = np.asarray([indexes[i] for i in np.lexsort([indexes, Aindex, Mname])]) Mname = Mname[sort] Aindex = Aindex[sort] ratio1 = ratio1[sort] ratio2 = ratio2[sort] self.star_mass = self.star_mass[sort] self.star_creation_time = self.star_creation_time[sort] self.star_metal = self.star_metal[sort] # Interpolate the flux for each star, adding to the total by weight. pbar = get_pbar("Calculating fluxes", len(self.star_mass)) for i, star in enumerate(izip(Mname, Aindex, ratio1, ratio2, self.star_mass)): # Pick the right age bin for the right flux array. flux = self.flux[star[0]][star[1], :] # Get the one just before the one above. flux_1 = self.flux[star[0]][star[1] - 1, :] # interpolate in log(flux), linear in time. int_flux = star[3] * np.log10(flux_1) + star[2] * np.log10(flux) # Add this flux to the total, weighted by mass. self.final_spec += np.power(10., int_flux) * star[4] pbar.update(i) pbar.finish() # Normalize. self.total_mass = self.star_mass.sum() self.avg_mass = self.star_mass.mean() tot_metal = (self.star_metal * self.star_mass).sum() if tot_metal > 0: self.avg_metal = math.log10( (tot_metal / self.total_mass).in_units('Zsun')) else: self.avg_metal = -99
def _read_fluid_file_descriptor(fname: Union[str, "os.PathLike[str]"]): """ Read a file descriptor and returns the array of the fields found. """ # Mapping mapping_list = [ ("density", "Density"), ("velocity_x", "x-velocity"), ("velocity_y", "y-velocity"), ("velocity_z", "z-velocity"), ("pressure", "Pressure"), ("metallicity", "Metallicity"), # Add mapping for ionized species # Note: we expect internally that these names use the HII, HeII, # HeIII, ... convention for historical reasons. So we need to map # the names read from `hydro_file_descriptor.txt` to this # convention. # This will create fields like ("ramses", "HII") which are mapped # to ("gas", "H_p1_fraction") in fields.py ("H_p1_fraction", "HII"), ("He_p1_fraction", "HeII"), ("He_p2_fraction", "HeIII"), ] # Add mapping for magnetic fields mapping_list += [(key, key) for key in (f"B_{dim}_{side}" for side in ["left", "right"] for dim in ["x", "y", "z"])] # Convert to dictionary mapping = {k: v for k, v in mapping_list} with open(fname) as f: line = f.readline() tmp = VERSION_RE.match(line) mylog.debug("Reading fluid file descriptor %s.", fname) if not tmp: return [] version = int(tmp.group(1)) if version == 1: # Skip one line (containing the headers) line = f.readline() fields = [] for i, line in enumerate(f.readlines()): tmp = VAR_DESC_RE.match(line) if not tmp: raise YTFileNotParseable(fname, i + 1) # ivar = tmp.group(1) varname = tmp.group(2) dtype = tmp.group(3) if varname in mapping: varname = mapping[varname] else: varname = f"hydro_{varname}" fields.append((varname, dtype)) else: mylog.error("Version %s", version) raise YTParticleOutputFormatNotImplemented() return fields
def calculate_spectrum(self, data_source=None, star_mass=None, star_creation_time=None, star_metallicity_fraction=None, star_metallicity_constant=None, min_age=YTQuantity(0.0, 'yr')): r"""For the set of stars, calculate the collective spectrum. Attached to the output are several useful objects: Attributes ---------- final_spec: array The collective spectrum in units of flux binned in wavelength. wavelength: array The wavelength for the spectrum bins, in Angstroms. total_mass: float Total mass of all the stars. avg_mass: float Average mass of all the stars. avg_metal: float Average metallicity of all the stars. Parameters ---------- data_source : AMRRegion object, optional The region from which stars are extracted for analysis. If this is not specified, the next three parameters must be supplied. star_mass : Array or list of floats An array of star masses in Msun units. star_creation_time : Array or list of floats An array of star creation times in code units. star_metallicity_fraction : Array or list of floats An array of star metallicity fractions, in code units (which is not Z/Zsun, rather just Z). star_metallicity_constant : Float If desired, override the star metallicity fraction of all the stars to the given value. min_age : Float Removes young stars younger than this number (in years) from the spectrum. Default: 0 (all stars). Examples -------- >>> import yt >>> from yt.analysis_modules.star_analysis.api import SpectrumBuilder >>> ds = yt.load("Enzo_64/RD0006/RedshiftOutput0006") >>> spec = SpectrumBuilder(ds, "bc", model="salpeter") >>> sp = ds.sphere([0.5, 0.5, 0.5], 0.1) >>> spec.calculate_spectrum(data_source=sp, min_age=1.e6) """ # Initialize values self.final_spec = np.zeros(self.wavelength.size, dtype='float64') self._data_source = data_source if isinstance(star_mass, YTArray): assert star_mass.units.same_dimensions_as(g.units) elif star_mass is not None: star_mass = YTArray(star_mass, 'Msun') self.star_mass = star_mass if isinstance(star_creation_time, YTArray): assert star_creation_time.units.same_dimensions_as(s.units) elif star_creation_time is not None: star_creation_time = self._ds.arr(star_creation_time, 'code_time') self.star_creation_time = star_creation_time if isinstance(star_metallicity_fraction, YTArray): assert \ star_metallicity_fraction.units.same_dimensions_as(Zsun.units) elif star_metallicity_fraction is not None: star_metallicity_fraction = self._ds.arr(star_metallicity_fraction, 'code_metallicity') self.star_metallicity_fraction = star_metallicity_fraction if isinstance(min_age, YTQuantity): assert min_age.units.same_dimensions_as(s.units) elif min_age is not None: min_age = YTQuantity(min_age, 'yr') self.min_age = min_age # Check to make sure we have the right set of data. if data_source is None: if self.star_mass is None or self.star_creation_time is None or \ (star_metallicity_fraction is None and star_metallicity_constant is None): mylog.error(""" If data_source is not provided, all of these paramters need to be set: star_mass (array, Msun), star_creation_time (array, code units), And one of: star_metallicity_fraction (array, code units). --OR-- star_metallicity_constant (float, code units). """) return None if star_metallicity_fraction is not None: self.star_metal = star_metallicity_fraction else: self.star_metal = \ self._ds.arr(np.ones_like(self.star_mass) * star_metallicity_constant, 'Zsun') else: # Get the data we need. if self.filter_provided: ct = self._filter['creation_time'] # mass_stars = self._data_source[self._filter, "particle_mass"] if star_metallicity_constant is None: self.star_metal = self._data_source[ self._filter, "metallicity_fraction"].in_units('Zsun') else: self.star_metal = \ self._ds.arr(np.ones_like( self._data_source[self._filter, "metallicity_fraction"]) * star_metallicity_constant, "Zsun") else: ct = self._data_source["creation_time"] if ct is None: errmsg = 'data source must have particle_age!' mylog.error(errmsg) raise RuntimeError(errmsg) mask = ct > 0 if not any(mask): errmsg = 'all particles have age < 0' mylog.error(errmsg) raise RuntimeError(errmsg) # type = self._data_source['particle_type'] self.star_creation_time = ct[mask] self.star_mass = self._data_source['particle_mass'][ mask].in_units('Msun') if star_metallicity_constant is not None: self.star_metal = self._ds.arr( np.ones_like(self.star_mass) * star_metallicity_constant, 'Zsun') else: self.star_metal = self._data_source[ "metallicity_fraction"][mask].in_units('Zsun') # Age of star in years. dt = (self.time_now - self.star_creation_time).in_units('yr') dt[dt < 0.0] = 0.0 # Remove young stars sub = dt >= self.min_age if len(sub) == 0: return self.star_metal = self.star_metal[sub] dt = dt[sub] self.star_creation_time = self.star_creation_time[sub] # Figure out which METALS bin the star goes into. Mindex = np.digitize(self.star_metal.in_units('Zsun'), METALS) # Replace the indices with strings. Mname = MtoD[Mindex] # Figure out which age bin this star goes into. Aindex = np.digitize(dt, self.age) # Ratios used for the interpolation. ratio1 = (dt - self.age[Aindex - 1]) / \ (self.age[Aindex] - self.age[Aindex - 1]) ratio2 = (self.age[Aindex] - dt) / \ (self.age[Aindex] - self.age[Aindex - 1]) # Sort the stars by metallicity and then by age, which should reduce # memory access time by a little bit in the loop. indexes = np.arange(self.star_metal.size) sort = np.asarray( [indexes[i] for i in np.lexsort([indexes, Aindex, Mname])]) Mname = Mname[sort] Aindex = Aindex[sort] ratio1 = ratio1[sort] ratio2 = ratio2[sort] self.star_mass = self.star_mass[sort] self.star_creation_time = self.star_creation_time[sort] self.star_metal = self.star_metal[sort] # Interpolate the flux for each star, adding to the total by weight. pbar = get_pbar("Calculating fluxes", len(self.star_mass)) for i, star in enumerate( izip(Mname, Aindex, ratio1, ratio2, self.star_mass)): # Pick the right age bin for the right flux array. flux = self.flux[star[0]][star[1], :] # Get the one just before the one above. flux_1 = self.flux[star[0]][star[1] - 1, :] # interpolate in log(flux), linear in time. int_flux = star[3] * np.log10(flux_1) + star[2] * np.log10(flux) # Add this flux to the total, weighted by mass. self.final_spec += np.power(10., int_flux) * star[4] pbar.update(i) pbar.finish() # Normalize. self.total_mass = self.star_mass.sum() self.avg_mass = self.star_mass.mean() tot_metal = (self.star_metal * self.star_mass).sum() if tot_metal > 0: self.avg_metal = math.log10( (tot_metal / self.total_mass).in_units('Zsun')) else: self.avg_metal = -99
def parallel_objects(objects, njobs = 0, storage = None, barrier = True, dynamic = False): r"""This function dispatches components of an iterable to different processors. The parallel_objects function accepts an iterable, *objects*, and based on the number of jobs requested and number of available processors, decides how to dispatch individual objects to processors or sets of processors. This can implicitly include multi-level parallelism, such that the processor groups assigned each object can be composed of several or even hundreds of processors. *storage* is also available, for collation of results at the end of the iteration loop. Calls to this function can be nested. This should not be used to iterate over datasets -- :class:`~yt.data_objects.time_series.DatasetSeries` provides a much nicer interface for that. Parameters ---------- objects : iterable The list of objects to dispatch to different processors. njobs : int How many jobs to spawn. By default, one job will be dispatched for each available processor. storage : dict This is a dictionary, which will be filled with results during the course of the iteration. The keys will be the dataset indices and the values will be whatever is assigned to the *result* attribute on the storage during iteration. barrier : bool Should a barier be placed at the end of iteration? dynamic : bool This governs whether or not dynamic load balancing will be enabled. This requires one dedicated processor; if this is enabled with a set of 128 processors available, only 127 will be available to iterate over objects as one will be load balancing the rest. Examples -------- Here is a simple example of iterating over a set of centers and making slice plots centered at each. >>> for c in parallel_objects(centers): ... SlicePlot(ds, "x", "Density", center = c).save() ... Here's an example of calculating the angular momentum vector of a set of spheres, but with a set of four jobs of multiple processors each. Note that we also store the results. >>> storage = {} >>> for sto, c in parallel_objects(centers, njobs=4, storage=storage): ... sp = ds.sphere(c, (100, "kpc")) ... sto.result = sp.quantities["AngularMomentumVector"]() ... >>> for sphere_id, L in sorted(storage.items()): ... print centers[sphere_id], L ... """ if dynamic: from .task_queue import dynamic_parallel_objects for my_obj in dynamic_parallel_objects(objects, njobs=njobs, storage=storage): yield my_obj return if not parallel_capable: njobs = 1 my_communicator = communication_system.communicators[-1] my_size = my_communicator.size mylog.info("you have %s processors",my_size) if njobs <= 0: njobs = my_size if njobs > my_size: mylog.error("You have asked for %s jobs, but you only have %s processors.", njobs, my_size) raise RuntimeError my_rank = my_communicator.rank mylog.info("I am %s processor",my_rank) all_new_comms = np.array_split(np.arange(my_size), njobs) for i,comm_set in enumerate(all_new_comms): if my_rank in comm_set: my_new_id = i break if parallel_capable: communication_system.push_with_ids(all_new_comms[my_new_id].tolist()) to_share = {} # If our objects object is slice-aware, like time series data objects are, # this will prevent intermediate objects from being created. oiter = itertools.islice(enumerate(objects), my_new_id, None, njobs) for result_id, obj in oiter: if storage is not None: rstore = ResultsStorage() rstore.result_id = result_id yield rstore, obj to_share[rstore.result_id] = rstore.result else: yield obj if parallel_capable: communication_system.pop() if storage is not None: # Now we have to broadcast it new_storage = my_communicator.par_combine_object( to_share, datatype = 'dict', op = 'join') mylog.info("my storage: %s",type(new_storage)) storage.update(new_storage) if barrier: my_communicator.barrier()
def enable_parallelism(suppress_logging: bool = False, communicator=None) -> bool: """ This method is used inside a script to turn on MPI parallelism, via mpi4py. More information about running yt in parallel can be found here: https://yt-project.org/docs/3.0/analyzing/parallel_computation.html Parameters ---------- suppress_logging : bool If set to True, only rank 0 will log information after the initial setup of MPI. communicator : mpi4py.MPI.Comm The MPI communicator to use. This controls which processes yt can see. If not specified, will be set to COMM_WORLD. Returns ------- parallel_capable: bool True if the call was successful. False otherwise. """ global parallel_capable, MPI try: from mpi4py import MPI as _MPI except ImportError: mylog.error("Could not enable parallelism: mpi4py is not installed") return False MPI = _MPI exe_name = os.path.basename(sys.executable) # if no communicator specified, set to COMM_WORLD if communicator is None: communicator = MPI.COMM_WORLD parallel_capable = communicator.size > 1 if not parallel_capable: mylog.error( "Could not enable parallelism: only one mpi process is running. " "To remedy this, launch the Python interpreter as\n" " mpirun -n <X> python3 <yourscript>.py # with X > 1 ", ) return False mylog.info( "Global parallel computation enabled: %s / %s", communicator.rank, communicator.size, ) communication_system.push(communicator) ytcfg["yt", "internals", "global_parallel_rank"] = communicator.rank ytcfg["yt", "internals", "global_parallel_size"] = communicator.size ytcfg["yt", "internals", "parallel"] = True if exe_name == "embed_enzo" or ("_parallel" in dir(sys) and sys._parallel): # type: ignore ytcfg["yt", "inline"] = True yt.utilities.logger.uncolorize_logging() # Even though the uncolorize function already resets the format string, # we reset it again so that it includes the processor. f = logging.Formatter("P%03i %s" % (communicator.rank, yt.utilities.logger.ufstring)) if len(yt.utilities.logger.ytLogger.handlers) > 0: yt.utilities.logger.ytLogger.handlers[0].setFormatter(f) if ytcfg.get("yt", "parallel_traceback"): sys.excepthook = traceback_writer_hook("_%03i" % communicator.rank) else: sys.excepthook = default_mpi_excepthook if ytcfg.get("yt", "log_level") < 20: yt.utilities.logger.ytLogger.warning( "Log Level is set low -- this could affect parallel performance!") dtype_names.update( dict( float32=MPI.FLOAT, float64=MPI.DOUBLE, int32=MPI.INT, int64=MPI.LONG, c=MPI.CHAR, )) op_names.update(dict(sum=MPI.SUM, min=MPI.MIN, max=MPI.MAX)) # Turn off logging on all but the root rank, if specified. if suppress_logging: if communicator.rank > 0: mylog.addFilter(FilterAllMessages()) return True
def load_hexahedral_mesh( data, connectivity, coordinates, length_unit=None, bbox=None, sim_time=0.0, mass_unit=None, time_unit=None, velocity_unit=None, magnetic_unit=None, periodicity=(True, True, True), geometry="cartesian", unit_system="cgs", ): r"""Load a hexahedral mesh of data into yt as a :class:`~yt.frontends.stream.data_structures.StreamHandler`. This should allow a semistructured grid of data to be loaded directly into yt and analyzed as would any others. This comes with several caveats: * Units will be incorrect unless the data has already been converted to cgs. * Some functions may behave oddly, and parallelism will be disappointing or non-existent in most cases. * Particles may be difficult to integrate. Particle fields are detected as one-dimensional fields. The number of particles is set by the "number_of_particles" key in data. Parameters ---------- data : dict This is a dict of numpy arrays, where the keys are the field names. There must only be one. Note that the data in the numpy arrays should define the cell-averaged value for of the quantity in in the hexahedral cell. connectivity : array_like This should be of size (N,8) where N is the number of zones. coordinates : array_like This should be of size (M,3) where M is the number of vertices indicated in the connectivity matrix. bbox : array_like (xdim:zdim, LE:RE), optional Size of computational domain in units of the length unit. sim_time : float, optional The simulation time in seconds mass_unit : string Unit to use for masses. Defaults to unitless. time_unit : string Unit to use for times. Defaults to unitless. velocity_unit : string Unit to use for velocities. Defaults to unitless. magnetic_unit : string Unit to use for magnetic fields. Defaults to unitless. periodicity : tuple of booleans Determines whether the data will be treated as periodic along each axis geometry : string or tuple "cartesian", "cylindrical", "polar", "spherical", "geographic" or "spectral_cube". Optionally, a tuple can be provided to specify the axis ordering -- for instance, to specify that the axis ordering should be z, x, y, this would be: ("cartesian", ("z", "x", "y")). The same can be done for other coordinates, for instance: ("spherical", ("theta", "phi", "r")). """ from yt.frontends.stream.data_structures import ( StreamDictFieldHandler, StreamHandler, StreamHexahedralDataset, ) from yt.frontends.stream.definitions import process_data, set_particle_types domain_dimensions = np.ones(3, "int32") * 2 nprocs = 1 if bbox is None: bbox = np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0]], "float64") domain_left_edge = np.array(bbox[:, 0], "float64") domain_right_edge = np.array(bbox[:, 1], "float64") grid_levels = np.zeros(nprocs, dtype="int32").reshape((nprocs, 1)) field_units, data, _ = process_data(data) sfh = StreamDictFieldHandler() particle_types = set_particle_types(data) sfh.update({"connectivity": connectivity, "coordinates": coordinates, 0: data}) # Simple check for axis length correctness if len(data) > 0: fn = list(sorted(data))[0] array_values = data[fn] if array_values.size != connectivity.shape[0]: mylog.error( "Dimensions of array must be one fewer than the coordinate set." ) raise RuntimeError grid_left_edges = domain_left_edge grid_right_edges = domain_right_edge grid_dimensions = domain_dimensions.reshape(nprocs, 3).astype("int32") if length_unit is None: length_unit = "code_length" if mass_unit is None: mass_unit = "code_mass" if time_unit is None: time_unit = "code_time" if velocity_unit is None: velocity_unit = "code_velocity" if magnetic_unit is None: magnetic_unit = "code_magnetic" # I'm not sure we need any of this. handler = StreamHandler( grid_left_edges, grid_right_edges, grid_dimensions, grid_levels, -np.ones(nprocs, dtype="int64"), np.zeros(nprocs, dtype="int64").reshape(nprocs, 1), # Temporary np.zeros(nprocs).reshape((nprocs, 1)), sfh, field_units, (length_unit, mass_unit, time_unit, velocity_unit, magnetic_unit), particle_types=particle_types, periodicity=periodicity, ) handler.name = "HexahedralMeshData" handler.domain_left_edge = domain_left_edge handler.domain_right_edge = domain_right_edge handler.refine_by = 2 handler.dimensionality = 3 handler.domain_dimensions = domain_dimensions handler.simulation_time = sim_time handler.cosmology_simulation = 0 sds = StreamHexahedralDataset(handler, geometry=geometry, unit_system=unit_system) return sds